2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_regions.h"
38 #include "brw_context.h"
39 #include "brw_state.h"
40 #include "brw_defines.h"
42 /* Constant single cliprect for framebuffer object or DRI2 drawing */
43 static void upload_drawing_rect(struct brw_context
*brw
)
45 struct intel_context
*intel
= &brw
->intel
;
46 struct gl_context
*ctx
= &intel
->ctx
;
49 OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965
);
50 OUT_BATCH(0); /* xmin, ymin */
51 OUT_BATCH(((ctx
->DrawBuffer
->Width
- 1) & 0xffff) |
52 ((ctx
->DrawBuffer
->Height
- 1) << 16));
57 const struct brw_tracked_state brw_drawing_rect
= {
60 .brw
= BRW_NEW_CONTEXT
,
63 .emit
= upload_drawing_rect
67 * Upload the binding table pointers, which point each stage's array of surface
70 * The binding table pointers are relative to the surface state base address,
71 * which points at the batchbuffer containing the streamed batch state.
73 static void upload_binding_table_pointers(struct brw_context
*brw
)
75 struct intel_context
*intel
= &brw
->intel
;
78 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS
<< 16 | (6 - 2));
79 OUT_BATCH(brw
->vs
.bind_bo_offset
);
80 OUT_BATCH(0); /* gs */
81 OUT_BATCH(0); /* clip */
82 OUT_BATCH(0); /* sf */
83 OUT_BATCH(brw
->wm
.bind_bo_offset
);
87 const struct brw_tracked_state brw_binding_table_pointers
= {
91 | BRW_NEW_VS_BINDING_TABLE
92 | BRW_NEW_GS_BINDING_TABLE
93 | BRW_NEW_PS_BINDING_TABLE
,
96 .emit
= upload_binding_table_pointers
,
100 * Upload the binding table pointers, which point each stage's array of surface
103 * The binding table pointers are relative to the surface state base address,
104 * which points at the batchbuffer containing the streamed batch state.
106 static void upload_gen6_binding_table_pointers(struct brw_context
*brw
)
108 struct intel_context
*intel
= &brw
->intel
;
111 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS
<< 16 |
112 GEN6_BINDING_TABLE_MODIFY_VS
|
113 GEN6_BINDING_TABLE_MODIFY_GS
|
114 GEN6_BINDING_TABLE_MODIFY_PS
|
116 OUT_BATCH(brw
->vs
.bind_bo_offset
); /* vs */
117 OUT_BATCH(0); /* gs */
118 OUT_BATCH(brw
->wm
.bind_bo_offset
); /* wm/ps */
122 const struct brw_tracked_state gen6_binding_table_pointers
= {
126 | BRW_NEW_VS_BINDING_TABLE
127 | BRW_NEW_GS_BINDING_TABLE
128 | BRW_NEW_PS_BINDING_TABLE
,
131 .emit
= upload_gen6_binding_table_pointers
,
135 * Upload pointers to the per-stage state.
137 * The state pointers in this packet are all relative to the general state
138 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
140 static void upload_pipelined_state_pointers(struct brw_context
*brw
)
142 struct intel_context
*intel
= &brw
->intel
;
144 if (intel
->gen
== 5) {
145 /* Need to flush before changing clip max threads for errata. */
152 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS
<< 16 | (7 - 2));
153 OUT_RELOC(intel
->batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
154 brw
->vs
.state_offset
);
155 if (brw
->gs
.prog_active
)
156 OUT_RELOC(brw
->intel
.batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
157 brw
->gs
.state_offset
| 1);
160 OUT_RELOC(brw
->intel
.batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
161 brw
->clip
.state_offset
| 1);
162 OUT_RELOC(brw
->intel
.batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
163 brw
->sf
.state_offset
);
164 OUT_RELOC(brw
->intel
.batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
165 brw
->wm
.state_offset
);
166 OUT_RELOC(brw
->intel
.batch
.bo
, I915_GEM_DOMAIN_INSTRUCTION
, 0,
167 brw
->cc
.state_offset
);
170 brw
->state
.dirty
.brw
|= BRW_NEW_PSP
;
173 static void upload_psp_urb_cbs(struct brw_context
*brw
)
175 upload_pipelined_state_pointers(brw
);
176 brw_upload_urb_fence(brw
);
177 brw_upload_cs_urb_state(brw
);
180 const struct brw_tracked_state brw_psp_urb_cbs
= {
183 .brw
= BRW_NEW_URB_FENCE
| BRW_NEW_BATCH
,
184 .cache
= (CACHE_NEW_VS_UNIT
|
187 CACHE_NEW_CLIP_UNIT
|
192 .emit
= upload_psp_urb_cbs
,
195 static void prepare_depthbuffer(struct brw_context
*brw
)
197 struct intel_context
*intel
= &brw
->intel
;
198 struct gl_context
*ctx
= &intel
->ctx
;
199 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
200 struct intel_renderbuffer
*drb
= intel_get_renderbuffer(fb
, BUFFER_DEPTH
);
201 struct intel_renderbuffer
*srb
= intel_get_renderbuffer(fb
, BUFFER_STENCIL
);
204 brw_add_validated_bo(brw
, drb
->region
->buffer
);
205 if (drb
&& drb
->hiz_region
)
206 brw_add_validated_bo(brw
, drb
->hiz_region
->buffer
);
208 brw_add_validated_bo(brw
, srb
->region
->buffer
);
211 static void emit_depthbuffer(struct brw_context
*brw
)
213 struct intel_context
*intel
= &brw
->intel
;
214 struct gl_context
*ctx
= &intel
->ctx
;
215 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
217 struct intel_renderbuffer
*depth_irb
= intel_get_renderbuffer(fb
, BUFFER_DEPTH
);
218 struct intel_renderbuffer
*stencil_irb
= intel_get_renderbuffer(fb
, BUFFER_STENCIL
);
219 struct intel_region
*hiz_region
= depth_irb
? depth_irb
->hiz_region
: NULL
;
223 * If depth and stencil buffers are identical, then don't use separate
226 if (depth_irb
&& depth_irb
== stencil_irb
) {
231 * If stencil buffer uses combined depth/stencil format, but no depth buffer
232 * is attached, then use stencil buffer as depth buffer.
234 if (!depth_irb
&& stencil_irb
235 && stencil_irb
->Base
.Format
== MESA_FORMAT_S8_Z24
) {
236 depth_irb
= stencil_irb
;
242 else if (intel
->is_g4x
|| intel
->gen
== 5)
247 if (!depth_irb
&& !stencil_irb
) {
249 OUT_BATCH(_3DSTATE_DEPTH_BUFFER
<< 16 | (len
- 2));
250 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT
<< 18) |
251 (BRW_SURFACE_NULL
<< 29));
256 if (intel
->is_g4x
|| intel
->gen
>= 5)
264 } else if (!depth_irb
&& stencil_irb
) {
266 * There exists a separate stencil buffer but no depth buffer.
268 * The stencil buffer inherits most of its fields from
269 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
272 * Since the stencil buffer has quirky pitch requirements, its region
273 * was allocated with half height and double cpp. So we need
274 * a multiplier of 2 to obtain the surface's real height.
276 * Enable the hiz bit because it and the separate stencil bit must have
277 * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
278 * 1.21 "Separate Stencil Enable":
279 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
280 * Enable must also be enabled.
282 * [DevGT]: This field must be set to the same value (enabled or
283 * disabled) as Hierarchical Depth Buffer Enable
285 assert(intel
->has_separate_stencil
);
286 assert(stencil_irb
->Base
.Format
== MESA_FORMAT_S8
);
289 OUT_BATCH(_3DSTATE_DEPTH_BUFFER
<< 16 | (len
- 2));
290 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT
<< 18) |
291 (1 << 21) | /* separate stencil enable */
292 (1 << 22) | /* hiz enable */
293 (BRW_TILEWALK_YMAJOR
<< 26) |
294 (BRW_SURFACE_2D
<< 29));
296 OUT_BATCH(((stencil_irb
->region
->width
- 1) << 6) |
297 (2 * stencil_irb
->region
->height
- 1) << 19);
307 struct intel_region
*region
= depth_irb
->region
;
309 uint32_t tile_x
, tile_y
, offset
;
311 /* If using separate stencil, hiz must be enabled. */
312 assert(!stencil_irb
|| hiz_region
);
314 switch (region
->cpp
) {
316 format
= BRW_DEPTHFORMAT_D16_UNORM
;
319 if (intel
->depth_buffer_is_float
)
320 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
322 format
= BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
;
324 format
= BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
331 offset
= intel_region_tile_offsets(region
, &tile_x
, &tile_y
);
333 assert(intel
->gen
< 6 || region
->tiling
== I915_TILING_Y
);
334 assert(!hiz_region
|| region
->tiling
== I915_TILING_Y
);
337 OUT_BATCH(_3DSTATE_DEPTH_BUFFER
<< 16 | (len
- 2));
338 OUT_BATCH(((region
->pitch
* region
->cpp
) - 1) |
340 ((hiz_region
? 1 : 0) << 21) | /* separate stencil enable */
341 ((hiz_region
? 1 : 0) << 22) | /* hiz enable */
342 (BRW_TILEWALK_YMAJOR
<< 26) |
343 ((region
->tiling
!= I915_TILING_NONE
) << 27) |
344 (BRW_SURFACE_2D
<< 29));
345 OUT_RELOC(region
->buffer
,
346 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
348 OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< 1) |
349 ((region
->width
- 1) << 6) |
350 ((region
->height
- 1) << 19));
353 if (intel
->is_g4x
|| intel
->gen
>= 5)
354 OUT_BATCH(tile_x
| (tile_y
<< 16));
356 assert(tile_x
== 0 && tile_y
== 0);
364 /* Emit hiz buffer. */
365 if (hiz_region
|| stencil_irb
) {
367 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER
<< 16) | (3 - 2));
368 OUT_BATCH(hiz_region
->pitch
* hiz_region
->cpp
- 1);
369 OUT_RELOC(hiz_region
->buffer
,
370 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
375 /* Emit stencil buffer. */
376 if (hiz_region
|| stencil_irb
) {
378 OUT_BATCH((_3DSTATE_STENCIL_BUFFER
<< 16) | (3 - 2));
379 OUT_BATCH(stencil_irb
->region
->pitch
* stencil_irb
->region
->cpp
- 1);
380 OUT_RELOC(stencil_irb
->region
->buffer
,
381 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
387 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
388 * params must be emitted.
390 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
391 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
392 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
394 if (intel
->gen
>= 6 || hiz_region
) {
396 OUT_BATCH(_3DSTATE_CLEAR_PARAMS
<< 16 | (2 - 2));
402 const struct brw_tracked_state brw_depthbuffer
= {
404 .mesa
= _NEW_BUFFERS
,
405 .brw
= BRW_NEW_BATCH
,
408 .prepare
= prepare_depthbuffer
,
409 .emit
= emit_depthbuffer
,
414 /***********************************************************************
415 * Polygon stipple packet
418 static void upload_polygon_stipple(struct brw_context
*brw
)
420 struct intel_context
*intel
= &brw
->intel
;
421 struct gl_context
*ctx
= &brw
->intel
.ctx
;
424 if (!ctx
->Polygon
.StippleFlag
)
428 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN
<< 16 | (33 - 2));
430 /* Polygon stipple is provided in OpenGL order, i.e. bottom
431 * row first. If we're rendering to a window (i.e. the
432 * default frame buffer object, 0), then we need to invert
433 * it to match our pixel layout. But if we're rendering
434 * to a FBO (i.e. any named frame buffer object), we *don't*
435 * need to invert - we already match the layout.
437 if (ctx
->DrawBuffer
->Name
== 0) {
438 for (i
= 0; i
< 32; i
++)
439 OUT_BATCH(ctx
->PolygonStipple
[31 - i
]); /* invert */
442 for (i
= 0; i
< 32; i
++)
443 OUT_BATCH(ctx
->PolygonStipple
[i
]);
448 const struct brw_tracked_state brw_polygon_stipple
= {
450 .mesa
= _NEW_POLYGONSTIPPLE
,
451 .brw
= BRW_NEW_CONTEXT
,
454 .emit
= upload_polygon_stipple
458 /***********************************************************************
459 * Polygon stipple offset packet
462 static void upload_polygon_stipple_offset(struct brw_context
*brw
)
464 struct intel_context
*intel
= &brw
->intel
;
465 struct gl_context
*ctx
= &brw
->intel
.ctx
;
467 if (!ctx
->Polygon
.StippleFlag
)
471 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET
<< 16 | (2-2));
473 /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
474 * we have to invert the Y axis in order to match the OpenGL
475 * pixel coordinate system, and our offset must be matched
476 * to the window position. If we're drawing to a FBO
477 * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
478 * system works just fine, and there's no window system to
481 if (brw
->intel
.ctx
.DrawBuffer
->Name
== 0)
482 OUT_BATCH((32 - (ctx
->DrawBuffer
->Height
& 31)) & 31);
488 #define _NEW_WINDOW_POS 0x40000000
490 const struct brw_tracked_state brw_polygon_stipple_offset
= {
492 .mesa
= _NEW_WINDOW_POS
| _NEW_POLYGONSTIPPLE
,
493 .brw
= BRW_NEW_CONTEXT
,
496 .emit
= upload_polygon_stipple_offset
499 /**********************************************************************
502 static void upload_aa_line_parameters(struct brw_context
*brw
)
504 struct intel_context
*intel
= &brw
->intel
;
505 struct gl_context
*ctx
= &brw
->intel
.ctx
;
507 if (!ctx
->Line
.SmoothFlag
|| !brw
->has_aa_line_parameters
)
510 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS
<< 16 | (3 - 2));
511 /* use legacy aa line coverage computation */
517 const struct brw_tracked_state brw_aa_line_parameters
= {
520 .brw
= BRW_NEW_CONTEXT
,
523 .emit
= upload_aa_line_parameters
526 /***********************************************************************
527 * Line stipple packet
530 static void upload_line_stipple(struct brw_context
*brw
)
532 struct intel_context
*intel
= &brw
->intel
;
533 struct gl_context
*ctx
= &brw
->intel
.ctx
;
537 if (!ctx
->Line
.StippleFlag
)
541 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN
<< 16 | (3 - 2));
542 OUT_BATCH(ctx
->Line
.StipplePattern
);
543 tmp
= 1.0 / (GLfloat
) ctx
->Line
.StippleFactor
;
544 tmpi
= tmp
* (1<<13);
545 OUT_BATCH(tmpi
<< 16 | ctx
->Line
.StippleFactor
);
549 const struct brw_tracked_state brw_line_stipple
= {
552 .brw
= BRW_NEW_CONTEXT
,
555 .emit
= upload_line_stipple
559 /***********************************************************************
560 * Misc invarient state packets
563 static void upload_invarient_state( struct brw_context
*brw
)
565 struct intel_context
*intel
= &brw
->intel
;
568 /* 0x61040000 Pipeline Select */
569 /* PipelineSelect : 0 */
570 struct brw_pipeline_select ps
;
572 memset(&ps
, 0, sizeof(ps
));
573 ps
.header
.opcode
= brw
->CMD_PIPELINE_SELECT
;
574 ps
.header
.pipeline_select
= 0;
575 BRW_BATCH_STRUCT(brw
, &ps
);
578 if (intel
->gen
< 6) {
579 struct brw_global_depth_offset_clamp gdo
;
580 memset(&gdo
, 0, sizeof(gdo
));
582 /* Disable depth offset clamping.
584 gdo
.header
.opcode
= _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP
;
585 gdo
.header
.length
= sizeof(gdo
)/4 - 2;
586 gdo
.depth_offset_clamp
= 0.0;
588 BRW_BATCH_STRUCT(brw
, &gdo
);
591 if (intel
->gen
>= 6) {
593 int len
= intel
->gen
>= 7 ? 4 : 3;
596 OUT_BATCH(_3DSTATE_MULTISAMPLE
<< 16 | (len
- 2));
597 OUT_BATCH(MS_PIXEL_LOCATION_CENTER
|
599 OUT_BATCH(0); /* positions for 4/8-sample */
605 OUT_BATCH(_3DSTATE_SAMPLE_MASK
<< 16 | (2 - 2));
609 if (intel
->gen
< 7) {
610 for (i
= 0; i
< 4; i
++) {
612 OUT_BATCH(_3DSTATE_GS_SVB_INDEX
<< 16 | (4 - 2));
613 OUT_BATCH(i
<< SVB_INDEX_SHIFT
);
615 OUT_BATCH(0xffffffff);
621 /* 0x61020000 State Instruction Pointer */
623 struct brw_system_instruction_pointer sip
;
624 memset(&sip
, 0, sizeof(sip
));
626 sip
.header
.opcode
= CMD_STATE_INSN_POINTER
;
627 sip
.header
.length
= 0;
629 sip
.bits0
.system_instruction_pointer
= 0;
630 BRW_BATCH_STRUCT(brw
, &sip
);
635 struct brw_vf_statistics vfs
;
636 memset(&vfs
, 0, sizeof(vfs
));
638 vfs
.opcode
= brw
->CMD_VF_STATISTICS
;
639 if (unlikely(INTEL_DEBUG
& DEBUG_STATS
))
640 vfs
.statistics_enable
= 1;
642 BRW_BATCH_STRUCT(brw
, &vfs
);
646 const struct brw_tracked_state brw_invarient_state
= {
649 .brw
= BRW_NEW_CONTEXT
,
652 .emit
= upload_invarient_state
656 * Define the base addresses which some state is referenced from.
658 * This allows us to avoid having to emit relocations for the objects,
659 * and is actually required for binding table pointers on gen6.
661 * Surface state base address covers binding table pointers and
662 * surface state objects, but not the surfaces that the surface state
665 static void upload_state_base_address( struct brw_context
*brw
)
667 struct intel_context
*intel
= &brw
->intel
;
669 if (intel
->gen
>= 6) {
671 OUT_BATCH(CMD_STATE_BASE_ADDRESS
<< 16 | (10 - 2));
672 /* General state base address: stateless DP read/write requests */
674 /* Surface state base address:
675 * BINDING_TABLE_STATE
678 OUT_RELOC(intel
->batch
.bo
, I915_GEM_DOMAIN_SAMPLER
, 0, 1);
679 /* Dynamic state base address:
681 * SAMPLER_BORDER_COLOR_STATE
682 * CLIP, SF, WM/CC viewport state
684 * DEPTH_STENCIL_STATE
686 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
687 * Disable is clear, which we rely on)
689 OUT_RELOC(intel
->batch
.bo
, (I915_GEM_DOMAIN_RENDER
|
690 I915_GEM_DOMAIN_INSTRUCTION
), 0, 1);
692 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
693 OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */
694 OUT_BATCH(1); /* General state upper bound */
695 OUT_BATCH(1); /* Dynamic state upper bound */
696 OUT_BATCH(1); /* Indirect object upper bound */
697 OUT_BATCH(1); /* Instruction access upper bound */
699 } else if (intel
->gen
== 5) {
701 OUT_BATCH(CMD_STATE_BASE_ADDRESS
<< 16 | (8 - 2));
702 OUT_BATCH(1); /* General state base address */
703 OUT_RELOC(intel
->batch
.bo
, I915_GEM_DOMAIN_SAMPLER
, 0,
704 1); /* Surface state base address */
705 OUT_BATCH(1); /* Indirect object base address */
706 OUT_BATCH(1); /* Instruction base address */
707 OUT_BATCH(1); /* General state upper bound */
708 OUT_BATCH(1); /* Indirect object upper bound */
709 OUT_BATCH(1); /* Instruction access upper bound */
713 OUT_BATCH(CMD_STATE_BASE_ADDRESS
<< 16 | (6 - 2));
714 OUT_BATCH(1); /* General state base address */
715 OUT_RELOC(intel
->batch
.bo
, I915_GEM_DOMAIN_SAMPLER
, 0,
716 1); /* Surface state base address */
717 OUT_BATCH(1); /* Indirect object base address */
718 OUT_BATCH(1); /* General state upper bound */
719 OUT_BATCH(1); /* Indirect object upper bound */
724 const struct brw_tracked_state brw_state_base_address
= {
727 .brw
= BRW_NEW_BATCH
,
730 .emit
= upload_state_base_address