1 /**************************************************************************
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include "i915_context.h"
31 #include "i915_batch.h"
32 #include "i915_debug.h"
34 #include "i915_resource.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_format.h"
40 #include "util/u_format.h"
41 #include "util/u_math.h"
42 #include "util/u_memory.h"
44 struct i915_tracked_hw_state
{
46 void (*validate
)(struct i915_context
*, unsigned *batch_space
);
47 void (*emit
)(struct i915_context
*);
48 unsigned dirty
, batch_space
;
53 validate_flush(struct i915_context
*i915
, unsigned *batch_space
)
55 *batch_space
= i915
->flush_dirty
? 1 : 0;
59 emit_flush(struct i915_context
*i915
)
61 /* Cache handling is very cheap atm. State handling can request to flushes:
62 * - I915_FLUSH_CACHE which is a flush everything request and
63 * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
64 * Because the cache handling is so dumb, no explicit "invalidate map cache".
65 * Also, the first is a strict superset of the latter, so the following logic
67 if (i915
->flush_dirty
& I915_FLUSH_CACHE
)
68 OUT_BATCH(MI_FLUSH
| FLUSH_MAP_CACHE
);
69 else if (i915
->flush_dirty
& I915_PIPELINE_FLUSH
)
70 OUT_BATCH(MI_FLUSH
| INHIBIT_FLUSH_RENDER_CACHE
);
73 uint32_t invariant_state
[] = {
74 _3DSTATE_AA_CMD
| AA_LINE_ECAAR_WIDTH_ENABLE
| AA_LINE_ECAAR_WIDTH_1_0
|
75 AA_LINE_REGION_WIDTH_ENABLE
| AA_LINE_REGION_WIDTH_1_0
,
77 _3DSTATE_DFLT_DIFFUSE_CMD
, 0,
79 _3DSTATE_DFLT_SPEC_CMD
, 0,
81 _3DSTATE_DFLT_Z_CMD
, 0,
83 _3DSTATE_COORD_SET_BINDINGS
|
93 _3DSTATE_RASTER_RULES_CMD
|
94 ENABLE_POINT_RASTER_RULE
|
95 OGL_POINT_RASTER_RULE
|
96 ENABLE_LINE_STRIP_PROVOKE_VRTX
|
97 ENABLE_TRI_FAN_PROVOKE_VRTX
|
98 LINE_STRIP_PROVOKE_VRTX(1) |
99 TRI_FAN_PROVOKE_VRTX(2) |
100 ENABLE_TEXKILL_3D_4D
|
103 _3DSTATE_DEPTH_SUBRECT_DISABLE
,
105 /* disable indirect state for now
107 _3DSTATE_LOAD_INDIRECT
| 0, 0};
110 emit_invariant(struct i915_context
*i915
)
112 i915_winsys_batchbuffer_write(i915
->batch
, invariant_state
,
113 Elements(invariant_state
)*sizeof(uint32_t));
117 validate_immediate(struct i915_context
*i915
, unsigned *batch_space
)
119 unsigned dirty
= (1 << I915_IMMEDIATE_S0
| 1 << I915_IMMEDIATE_S1
|
120 1 << I915_IMMEDIATE_S2
| 1 << I915_IMMEDIATE_S3
|
121 1 << I915_IMMEDIATE_S3
| 1 << I915_IMMEDIATE_S4
|
122 1 << I915_IMMEDIATE_S5
| 1 << I915_IMMEDIATE_S6
) &
123 i915
->immediate_dirty
;
125 if (i915
->immediate_dirty
& (1 << I915_IMMEDIATE_S0
) && i915
->vbo
)
126 i915
->validation_buffers
[i915
->num_validation_buffers
++] = i915
->vbo
;
128 *batch_space
= 1 + util_bitcount(dirty
);
132 emit_immediate(struct i915_context
*i915
)
134 /* remove unwanted bits and S7 */
135 unsigned dirty
= (1 << I915_IMMEDIATE_S0
| 1 << I915_IMMEDIATE_S1
|
136 1 << I915_IMMEDIATE_S2
| 1 << I915_IMMEDIATE_S3
|
137 1 << I915_IMMEDIATE_S3
| 1 << I915_IMMEDIATE_S4
|
138 1 << I915_IMMEDIATE_S5
| 1 << I915_IMMEDIATE_S6
) &
139 i915
->immediate_dirty
;
140 int i
, num
= util_bitcount(dirty
);
141 assert(num
&& num
<= I915_MAX_IMMEDIATE
);
143 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1
|
144 dirty
<< 4 | (num
- 1));
146 if (i915
->immediate_dirty
& (1 << I915_IMMEDIATE_S0
)) {
148 OUT_RELOC(i915
->vbo
, I915_USAGE_VERTEX
,
149 i915
->current
.immediate
[I915_IMMEDIATE_S0
]);
154 for (i
= 1; i
< I915_MAX_IMMEDIATE
; i
++) {
155 if (dirty
& (1 << i
)) {
156 /* Fixup blend function for A8 dst buffers.
157 * When we blend to an A8 buffer, the GPU thinks it's a G8 buffer,
158 * and therefore we need to use the color factor for alphas. */
159 if ((i
== I915_IMMEDIATE_S6
) &&
160 (i915
->current
.target_fixup_format
== PIPE_FORMAT_A8_UNORM
)) {
161 uint32_t imm
= i915
->current
.immediate
[i
];
162 uint32_t srcRGB
= (imm
>> S6_CBUF_SRC_BLEND_FACT_SHIFT
) & BLENDFACT_MASK
;
163 if (srcRGB
== BLENDFACT_DST_ALPHA
)
164 srcRGB
= BLENDFACT_DST_COLR
;
165 else if (srcRGB
== BLENDFACT_INV_DST_ALPHA
)
166 srcRGB
= BLENDFACT_INV_DST_COLR
;
167 imm
&= ~SRC_BLND_FACT(BLENDFACT_MASK
);
168 imm
|= SRC_BLND_FACT(srcRGB
);
171 OUT_BATCH(i915
->current
.immediate
[i
]);
178 validate_dynamic(struct i915_context
*i915
, unsigned *batch_space
)
180 *batch_space
= util_bitcount(i915
->dynamic_dirty
& ((1 << I915_MAX_DYNAMIC
) - 1));
184 emit_dynamic(struct i915_context
*i915
)
187 for (i
= 0; i
< I915_MAX_DYNAMIC
; i
++) {
188 if (i915
->dynamic_dirty
& (1 << i
))
189 OUT_BATCH(i915
->current
.dynamic
[i
]);
194 validate_static(struct i915_context
*i915
, unsigned *batch_space
)
198 if (i915
->current
.cbuf_bo
&& (i915
->static_dirty
& I915_DST_BUF_COLOR
)) {
199 i915
->validation_buffers
[i915
->num_validation_buffers
++]
200 = i915
->current
.cbuf_bo
;
204 if (i915
->current
.depth_bo
&& (i915
->static_dirty
& I915_DST_BUF_DEPTH
)) {
205 i915
->validation_buffers
[i915
->num_validation_buffers
++]
206 = i915
->current
.depth_bo
;
210 if (i915
->static_dirty
& I915_DST_VARS
)
213 if (i915
->static_dirty
& I915_DST_RECT
)
218 emit_static(struct i915_context
*i915
)
220 if (i915
->current
.cbuf_bo
&& (i915
->static_dirty
& I915_DST_BUF_COLOR
)) {
221 OUT_BATCH(_3DSTATE_BUF_INFO_CMD
);
222 OUT_BATCH(i915
->current
.cbuf_flags
);
223 OUT_RELOC(i915
->current
.cbuf_bo
,
228 /* What happens if no zbuf??
230 if (i915
->current
.depth_bo
&& (i915
->static_dirty
& I915_DST_BUF_DEPTH
)) {
231 OUT_BATCH(_3DSTATE_BUF_INFO_CMD
);
232 OUT_BATCH(i915
->current
.depth_flags
);
233 OUT_RELOC(i915
->current
.depth_bo
,
238 if (i915
->static_dirty
& I915_DST_VARS
) {
239 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD
);
240 OUT_BATCH(i915
->current
.dst_buf_vars
);
245 validate_map(struct i915_context
*i915
, unsigned *batch_space
)
247 const uint enabled
= i915
->current
.sampler_enable_flags
;
249 struct i915_texture
*tex
;
251 *batch_space
= i915
->current
.sampler_enable_nr
?
252 2 + 3*i915
->current
.sampler_enable_nr
: 0;
254 for (unit
= 0; unit
< I915_TEX_UNITS
; unit
++) {
255 if (enabled
& (1 << unit
)) {
256 tex
= i915_texture(i915
->fragment_sampler_views
[unit
]->texture
);
257 i915
->validation_buffers
[i915
->num_validation_buffers
++] = tex
->buffer
;
263 emit_map(struct i915_context
*i915
)
265 const uint nr
= i915
->current
.sampler_enable_nr
;
267 const uint enabled
= i915
->current
.sampler_enable_flags
;
270 OUT_BATCH(_3DSTATE_MAP_STATE
| (3 * nr
));
272 for (unit
= 0; unit
< I915_TEX_UNITS
; unit
++) {
273 if (enabled
& (1 << unit
)) {
274 struct i915_texture
*texture
= i915_texture(i915
->fragment_sampler_views
[unit
]->texture
);
275 struct i915_winsys_buffer
*buf
= texture
->buffer
;
280 OUT_RELOC(buf
, I915_USAGE_SAMPLER
, 0);
281 OUT_BATCH(i915
->current
.texbuffer
[unit
][0]); /* MS3 */
282 OUT_BATCH(i915
->current
.texbuffer
[unit
][1]); /* MS4 */
290 validate_sampler(struct i915_context
*i915
, unsigned *batch_space
)
292 *batch_space
= i915
->current
.sampler_enable_nr
?
293 2 + 3*i915
->current
.sampler_enable_nr
: 0;
297 emit_sampler(struct i915_context
*i915
)
299 if (i915
->current
.sampler_enable_nr
) {
302 OUT_BATCH( _3DSTATE_SAMPLER_STATE
|
303 (3 * i915
->current
.sampler_enable_nr
) );
305 OUT_BATCH( i915
->current
.sampler_enable_flags
);
307 for (i
= 0; i
< I915_TEX_UNITS
; i
++) {
308 if (i915
->current
.sampler_enable_flags
& (1<<i
)) {
309 OUT_BATCH( i915
->current
.sampler
[i
][0] );
310 OUT_BATCH( i915
->current
.sampler
[i
][1] );
311 OUT_BATCH( i915
->current
.sampler
[i
][2] );
317 static boolean
is_tex_instruction(uint32_t* instruction
)
319 uint32_t op
= instruction
[0] &0xFF000000;
320 return ( (op
== T0_TEXLD
) ||
325 static uint32_t tex_sampler(uint32_t* instruction
)
327 return ( instruction
[0] & T0_SAMPLER_NR_MASK
);
330 static uint
additional_constants(struct i915_context
*i915
)
334 for (i
= 0 ; i
< i915
->fs
->program_len
; i
+=3) {
335 if ( is_tex_instruction(i915
->fs
->program
+ i
)) {
336 int sampler
= tex_sampler(i915
->fs
->program
+ i
);
337 assert(sampler
< I915_TEX_UNITS
);
338 if ( i915
->current
.sampler_srgb
[sampler
] )
346 validate_constants(struct i915_context
*i915
, unsigned *batch_space
)
348 int nr
= i915
->fs
->num_constants
?
349 2 + 4*i915
->fs
->num_constants
: 0;
351 nr
+= 4*additional_constants(i915
);
356 emit_constants(struct i915_context
*i915
)
358 /* Collate the user-defined constants with the fragment shader's
359 * immediates according to the constant_flags[] array.
361 const uint nr
= i915
->fs
->num_constants
+ additional_constants(i915
);
363 assert(nr
< I915_MAX_CONSTANT
);
365 const float srgb_constants
[4] = {1.0/1.055, 0.055/1.055, 2.4, 0.0822};
368 OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS
| (nr
* 4) );
369 OUT_BATCH((1 << nr
) - 1);
371 for (i
= 0; i
< nr
; i
++) {
373 if (i915
->fs
->constant_flags
[i
] == I915_CONSTFLAG_USER
) {
374 /* grab user-defined constant */
375 c
= (uint
*) i915_buffer(i915
->constants
[PIPE_SHADER_FRAGMENT
])->data
;
378 else if (i
< i915
->fs
->num_constants
) {
379 /* emit program constant */
380 c
= (uint
*) i915
->fs
->constants
[i
];
382 /* emit constants for sRGB */
384 /* save const position in context for use in shader emit */
385 i915
->current
.srgb_const_offset
= i
;
387 c
= (uint
*) srgb_constants
;
391 float *f
= (float *) c
;
392 printf("Const %2d: %f %f %f %f %s\n", i
, f
[0], f
[1], f
[2], f
[3],
393 (i915
->fs
->constant_flags
[i
] == I915_CONSTFLAG_USER
394 ? "user" : "immediate"));
406 validate_program(struct i915_context
*i915
, unsigned *batch_space
)
408 uint additional_size
= 0, i
;
410 additional_size
+= i915
->current
.target_fixup_format
? 3 : 0;
412 for (i
= 0 ; i
< i915
->fs
->program_len
; i
+=3)
413 if ( is_tex_instruction(i915
->fs
->program
+ i
) &&
414 i915
->current
.sampler_srgb
[tex_sampler(i915
->fs
->program
+i
)] )
415 additional_size
+= 3 * 8 /* 8 instructions for srgb emulation */;
417 /* we need more batch space if we want to emulate rgba framebuffers
418 * or sRGB textures */
419 *batch_space
= i915
->fs
->decl_len
+ i915
->fs
->program_len
+ additional_size
;
422 static void emit_instruction(struct i915_context
*i915
,
436 OUT_BATCH(A1_SRC0(src0_reg
) | A1_SRC1(src1_reg
));
437 OUT_BATCH(A2_SRC1(src1_reg
) | A2_SRC2(src2_reg
));
441 emit_srgb_fixup(struct i915_context
*i915
,
445 (program
[0] & UREG_TYPE_NR_MASK
) >> UREG_A0_DEST_SHIFT_LEFT
;
446 int dst_mask
= program
[0] & A0_DEST_CHANNEL_ALL
;
447 int cst_idx
= i915
->current
.srgb_const_offset
;
448 int cst0_reg
= swizzle(UREG(REG_TYPE_CONST
, cst_idx
), X
, X
, X
, X
);
449 int cst1_reg
= swizzle(UREG(REG_TYPE_CONST
, cst_idx
), Y
, Y
, Y
, Y
);
450 int cst2_reg
= swizzle(UREG(REG_TYPE_CONST
, cst_idx
), Z
, Z
, Z
, Z
);
451 int t1_reg
= UREG(REG_TYPE_R
, 1);
452 int t1x_reg
= swizzle(UREG(REG_TYPE_R
, 1), X
, X
, X
, X
);
453 int t1y_reg
= swizzle(UREG(REG_TYPE_R
, 1), Y
, Y
, Y
, Y
);
454 int t1z_reg
= swizzle(UREG(REG_TYPE_R
, 1), Z
, Z
, Z
, Z
);
456 emit_instruction(i915
, A0_MAD
, A0_DEST_CHANNEL_ALL
, t1_reg
, dst_reg
, cst0_reg
, cst1_reg
);
457 emit_instruction(i915
, A0_LOG
, A0_DEST_CHANNEL_X
, t1_reg
, t1x_reg
, 0, 0);
458 emit_instruction(i915
, A0_LOG
, A0_DEST_CHANNEL_Y
, t1_reg
, t1y_reg
, 0, 0);
459 emit_instruction(i915
, A0_LOG
, A0_DEST_CHANNEL_Z
, t1_reg
, t1z_reg
, 0, 0);
460 emit_instruction(i915
, A0_MUL
, A0_DEST_CHANNEL_ALL
, t1_reg
, t1_reg
, cst2_reg
, 0);
461 emit_instruction(i915
, A0_EXP
, dst_mask
& A0_DEST_CHANNEL_X
, dst_reg
, t1x_reg
, 0, 0);
462 emit_instruction(i915
, A0_EXP
, dst_mask
& A0_DEST_CHANNEL_Y
, dst_reg
, t1y_reg
, 0, 0);
463 emit_instruction(i915
, A0_EXP
, dst_mask
& A0_DEST_CHANNEL_Z
, dst_reg
, t1z_reg
, 0, 0);
467 emit_program(struct i915_context
*i915
)
469 uint additional_size
= 0;
472 /* count how much additional space we'll need */
473 validate_program(i915
, &additional_size
);
474 additional_size
-= i915
->fs
->decl_len
+ i915
->fs
->program_len
;
476 /* we should always have, at least, a pass-through program */
477 assert(i915
->fs
->program_len
> 0);
479 /* output the declarations */
481 /* first word has the size, we have to adjust that */
482 uint size
= (i915
->fs
->decl
[0]);
483 size
+= additional_size
;
487 for (i
= 1 ; i
< i915
->fs
->decl_len
; i
++)
488 OUT_BATCH(i915
->fs
->decl
[i
]);
490 /* output the program */
491 assert(i915
->fs
->program_len
% 3 == 0);
492 for (i
= 0 ; i
< i915
->fs
->program_len
; i
+=3) {
493 OUT_BATCH(i915
->fs
->program
[i
]);
494 OUT_BATCH(i915
->fs
->program
[i
+1]);
495 OUT_BATCH(i915
->fs
->program
[i
+2]);
497 /* TEX fixup for sRGB */
498 if ( is_tex_instruction(i915
->fs
->program
+i
) &&
499 i915
->current
.sampler_srgb
[tex_sampler(i915
->fs
->program
+i
)] )
500 emit_srgb_fixup(i915
, i915
->fs
->program
);
504 /* we emit an additional mov with swizzle to fake RGBA framebuffers */
505 if (i915
->current
.target_fixup_format
) {
506 /* mov out_color, out_color.zyxw */
508 (REG_TYPE_OC
<< A0_DEST_TYPE_SHIFT
) |
509 A0_DEST_CHANNEL_ALL
|
510 (REG_TYPE_OC
<< A0_SRC0_TYPE_SHIFT
) |
511 (T_DIFFUSE
<< A0_SRC0_NR_SHIFT
));
512 OUT_BATCH(i915
->current
.fixup_swizzle
);
518 emit_draw_rect(struct i915_context
*i915
)
520 if (i915
->static_dirty
& I915_DST_RECT
) {
521 OUT_BATCH(_3DSTATE_DRAW_RECT_CMD
);
522 OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS
);
523 OUT_BATCH(i915
->current
.draw_offset
);
524 OUT_BATCH(i915
->current
.draw_size
);
525 OUT_BATCH(i915
->current
.draw_offset
);
530 i915_validate_state(struct i915_context
*i915
, unsigned *batch_space
)
534 i915
->num_validation_buffers
= 0;
535 if (i915
->hardware_dirty
& I915_HW_INVARIANT
)
536 *batch_space
= Elements(invariant_state
);
541 static int counter_total
= 0;
542 #define VALIDATE_ATOM(atom, hw_dirty) \
543 if (i915->hardware_dirty & hw_dirty) { \
544 static int counter_##atom = 0;\
545 validate_##atom(i915, &tmp); \
546 *batch_space += tmp;\
547 counter_##atom += tmp;\
548 counter_total += tmp;\
549 printf("%s: \t%d/%d \t%2.2f\n",#atom, counter_##atom, counter_total, counter_##atom*100.f/counter_total);}
551 #define VALIDATE_ATOM(atom, hw_dirty) \
552 if (i915->hardware_dirty & hw_dirty) { \
553 validate_##atom(i915, &tmp); \
554 *batch_space += tmp; }
556 VALIDATE_ATOM(flush
, I915_HW_FLUSH
);
557 VALIDATE_ATOM(immediate
, I915_HW_IMMEDIATE
);
558 VALIDATE_ATOM(dynamic
, I915_HW_DYNAMIC
);
559 VALIDATE_ATOM(static, I915_HW_STATIC
);
560 VALIDATE_ATOM(map
, I915_HW_MAP
);
561 VALIDATE_ATOM(sampler
, I915_HW_SAMPLER
);
562 VALIDATE_ATOM(constants
, I915_HW_CONSTANTS
);
563 VALIDATE_ATOM(program
, I915_HW_PROGRAM
);
566 if (i915
->num_validation_buffers
== 0)
569 if (!i915_winsys_validate_buffers(i915
->batch
, i915
->validation_buffers
,
570 i915
->num_validation_buffers
))
576 /* Push the state into the sarea and/or texture memory.
579 i915_emit_hardware_state(struct i915_context
*i915
)
581 unsigned batch_space
;
584 assert(i915
->dirty
== 0);
586 if (I915_DBG_ON(DBG_ATOMS
))
587 i915_dump_hardware_dirty(i915
, __FUNCTION__
);
589 if (!i915_validate_state(i915
, &batch_space
)) {
591 assert(i915_validate_state(i915
, &batch_space
));
594 if(!BEGIN_BATCH(batch_space
)) {
596 assert(i915_validate_state(i915
, &batch_space
));
597 assert(BEGIN_BATCH(batch_space
));
600 save_ptr
= (uintptr_t)i915
->batch
->ptr
;
602 #define EMIT_ATOM(atom, hw_dirty) \
603 if (i915->hardware_dirty & hw_dirty) \
605 EMIT_ATOM(flush
, I915_HW_FLUSH
);
606 EMIT_ATOM(invariant
, I915_HW_INVARIANT
);
607 EMIT_ATOM(immediate
, I915_HW_IMMEDIATE
);
608 EMIT_ATOM(dynamic
, I915_HW_DYNAMIC
);
609 EMIT_ATOM(static, I915_HW_STATIC
);
610 EMIT_ATOM(map
, I915_HW_MAP
);
611 EMIT_ATOM(sampler
, I915_HW_SAMPLER
);
612 EMIT_ATOM(constants
, I915_HW_CONSTANTS
);
613 EMIT_ATOM(program
, I915_HW_PROGRAM
);
614 EMIT_ATOM(draw_rect
, I915_HW_STATIC
);
617 I915_DBG(DBG_EMIT
, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__
,
618 ((uintptr_t)i915
->batch
->ptr
- save_ptr
) / 4,
620 assert(((uintptr_t)i915
->batch
->ptr
- save_ptr
) / 4 == batch_space
);
622 i915
->hardware_dirty
= 0;
623 i915
->immediate_dirty
= 0;
624 i915
->dynamic_dirty
= 0;
625 i915
->static_dirty
= 0;
626 i915
->flush_dirty
= 0;