i965: Add HiZ operation state to brw_context
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_regions.h"
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42
43 /* Constant single cliprect for framebuffer object or DRI2 drawing */
44 static void upload_drawing_rect(struct brw_context *brw)
45 {
46 struct intel_context *intel = &brw->intel;
47 struct gl_context *ctx = &intel->ctx;
48
49 BEGIN_BATCH(4);
50 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
51 OUT_BATCH(0); /* xmin, ymin */
52 OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
53 ((ctx->DrawBuffer->Height - 1) << 16));
54 OUT_BATCH(0);
55 ADVANCE_BATCH();
56 }
57
58 const struct brw_tracked_state brw_drawing_rect = {
59 .dirty = {
60 .mesa = _NEW_BUFFERS,
61 .brw = BRW_NEW_CONTEXT,
62 .cache = 0
63 },
64 .emit = upload_drawing_rect
65 };
66
67 /**
68 * Upload the binding table pointers, which point each stage's array of surface
69 * state pointers.
70 *
71 * The binding table pointers are relative to the surface state base address,
72 * which points at the batchbuffer containing the streamed batch state.
73 */
74 static void upload_binding_table_pointers(struct brw_context *brw)
75 {
76 struct intel_context *intel = &brw->intel;
77
78 BEGIN_BATCH(6);
79 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
80 OUT_BATCH(brw->bind.bo_offset);
81 OUT_BATCH(0); /* gs */
82 OUT_BATCH(0); /* clip */
83 OUT_BATCH(0); /* sf */
84 OUT_BATCH(brw->bind.bo_offset);
85 ADVANCE_BATCH();
86 }
87
88 const struct brw_tracked_state brw_binding_table_pointers = {
89 .dirty = {
90 .mesa = 0,
91 .brw = (BRW_NEW_BATCH |
92 BRW_NEW_STATE_BASE_ADDRESS |
93 BRW_NEW_VS_BINDING_TABLE |
94 BRW_NEW_GS_BINDING_TABLE |
95 BRW_NEW_PS_BINDING_TABLE),
96 .cache = 0,
97 },
98 .emit = upload_binding_table_pointers,
99 };
100
101 /**
102 * Upload the binding table pointers, which point each stage's array of surface
103 * state pointers.
104 *
105 * The binding table pointers are relative to the surface state base address,
106 * which points at the batchbuffer containing the streamed batch state.
107 */
108 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
109 {
110 struct intel_context *intel = &brw->intel;
111
112 BEGIN_BATCH(4);
113 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
114 GEN6_BINDING_TABLE_MODIFY_VS |
115 GEN6_BINDING_TABLE_MODIFY_GS |
116 GEN6_BINDING_TABLE_MODIFY_PS |
117 (4 - 2));
118 OUT_BATCH(brw->bind.bo_offset); /* vs */
119 OUT_BATCH(0); /* gs */
120 OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
121 ADVANCE_BATCH();
122 }
123
124 const struct brw_tracked_state gen6_binding_table_pointers = {
125 .dirty = {
126 .mesa = 0,
127 .brw = (BRW_NEW_BATCH |
128 BRW_NEW_STATE_BASE_ADDRESS |
129 BRW_NEW_VS_BINDING_TABLE |
130 BRW_NEW_GS_BINDING_TABLE |
131 BRW_NEW_PS_BINDING_TABLE),
132 .cache = 0,
133 },
134 .emit = upload_gen6_binding_table_pointers,
135 };
136
137 /**
138 * Upload pointers to the per-stage state.
139 *
140 * The state pointers in this packet are all relative to the general state
141 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
142 */
143 static void upload_pipelined_state_pointers(struct brw_context *brw )
144 {
145 struct intel_context *intel = &brw->intel;
146
147 if (intel->gen == 5) {
148 /* Need to flush before changing clip max threads for errata. */
149 BEGIN_BATCH(1);
150 OUT_BATCH(MI_FLUSH);
151 ADVANCE_BATCH();
152 }
153
154 BEGIN_BATCH(7);
155 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
156 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
157 brw->vs.state_offset);
158 if (brw->gs.prog_active)
159 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
160 brw->gs.state_offset | 1);
161 else
162 OUT_BATCH(0);
163 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
164 brw->clip.state_offset | 1);
165 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
166 brw->sf.state_offset);
167 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
168 brw->wm.state_offset);
169 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
170 brw->cc.state_offset);
171 ADVANCE_BATCH();
172
173 brw->state.dirty.brw |= BRW_NEW_PSP;
174 }
175
176 static void upload_psp_urb_cbs(struct brw_context *brw )
177 {
178 upload_pipelined_state_pointers(brw);
179 brw_upload_urb_fence(brw);
180 brw_upload_cs_urb_state(brw);
181 }
182
183 const struct brw_tracked_state brw_psp_urb_cbs = {
184 .dirty = {
185 .mesa = 0,
186 .brw = (BRW_NEW_URB_FENCE |
187 BRW_NEW_BATCH |
188 BRW_NEW_STATE_BASE_ADDRESS),
189 .cache = (CACHE_NEW_VS_UNIT |
190 CACHE_NEW_GS_UNIT |
191 CACHE_NEW_GS_PROG |
192 CACHE_NEW_CLIP_UNIT |
193 CACHE_NEW_SF_UNIT |
194 CACHE_NEW_WM_UNIT |
195 CACHE_NEW_CC_UNIT)
196 },
197 .emit = upload_psp_urb_cbs,
198 };
199
200 static void emit_depthbuffer(struct brw_context *brw)
201 {
202 struct intel_context *intel = &brw->intel;
203 struct gl_context *ctx = &intel->ctx;
204 struct gl_framebuffer *fb = ctx->DrawBuffer;
205 /* _NEW_BUFFERS */
206 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
207 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
208 struct intel_region *hiz_region = NULL;
209 unsigned int len;
210
211 if (depth_irb &&
212 depth_irb->mt &&
213 depth_irb->mt->hiz_mt) {
214 hiz_region = depth_irb->mt->hiz_mt->region;
215 }
216
217 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
218 * non-pipelined state that will need the PIPE_CONTROL workaround.
219 */
220 if (intel->gen == 6) {
221 intel_emit_post_sync_nonzero_flush(intel);
222 intel_emit_depth_stall_flushes(intel);
223 }
224
225 /*
226 * If either depth or stencil buffer has packed depth/stencil format,
227 * then don't use separate stencil. Emit only a depth buffer.
228 */
229 if (depth_irb && depth_irb->Base.Format == MESA_FORMAT_S8_Z24) {
230 stencil_irb = NULL;
231 } else if (!depth_irb && stencil_irb
232 && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
233 depth_irb = stencil_irb;
234 stencil_irb = NULL;
235 }
236
237 if (intel->gen >= 6)
238 len = 7;
239 else if (intel->is_g4x || intel->gen == 5)
240 len = 6;
241 else
242 len = 5;
243
244 if (!depth_irb && !stencil_irb) {
245 BEGIN_BATCH(len);
246 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
247 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
248 (BRW_SURFACE_NULL << 29));
249 OUT_BATCH(0);
250 OUT_BATCH(0);
251 OUT_BATCH(0);
252
253 if (intel->is_g4x || intel->gen >= 5)
254 OUT_BATCH(0);
255
256 if (intel->gen >= 6)
257 OUT_BATCH(0);
258
259 ADVANCE_BATCH();
260
261 } else if (!depth_irb && stencil_irb) {
262 /*
263 * There exists a separate stencil buffer but no depth buffer.
264 *
265 * The stencil buffer inherits most of its fields from
266 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
267 * height.
268 *
269 * Since the stencil buffer has quirky pitch requirements, its region
270 * was allocated with half height and double cpp. So we need
271 * a multiplier of 2 to obtain the surface's real height.
272 *
273 * Enable the hiz bit because it and the separate stencil bit must have
274 * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
275 * 1.21 "Separate Stencil Enable":
276 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
277 * Enable must also be enabled.
278 *
279 * [DevGT]: This field must be set to the same value (enabled or
280 * disabled) as Hierarchical Depth Buffer Enable
281 */
282 struct intel_region *region = stencil_irb->mt->region;
283
284 assert(intel->has_separate_stencil);
285 assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
286
287 BEGIN_BATCH(len);
288 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
289 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
290 (1 << 21) | /* separate stencil enable */
291 (1 << 22) | /* hiz enable */
292 (BRW_TILEWALK_YMAJOR << 26) |
293 (BRW_SURFACE_2D << 29));
294 OUT_BATCH(0);
295 OUT_BATCH(((region->width - 1) << 6) |
296 (2 * region->height - 1) << 19);
297 OUT_BATCH(0);
298 OUT_BATCH(0);
299
300 if (intel->gen >= 6)
301 OUT_BATCH(0);
302
303 ADVANCE_BATCH();
304
305 } else {
306 struct intel_region *region = depth_irb->mt->region;
307 unsigned int format;
308 uint32_t tile_x, tile_y, offset;
309
310 /* If using separate stencil, hiz must be enabled. */
311 assert(!stencil_irb || hiz_region);
312
313 switch (region->cpp) {
314 case 2:
315 format = BRW_DEPTHFORMAT_D16_UNORM;
316 break;
317 case 4:
318 if (intel->depth_buffer_is_float)
319 format = BRW_DEPTHFORMAT_D32_FLOAT;
320 else if (hiz_region)
321 format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
322 else
323 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
324 break;
325 default:
326 assert(0);
327 return;
328 }
329
330 offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
331
332 assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
333 assert(!hiz_region || region->tiling == I915_TILING_Y);
334
335 BEGIN_BATCH(len);
336 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
337 OUT_BATCH(((region->pitch * region->cpp) - 1) |
338 (format << 18) |
339 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
340 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
341 (BRW_TILEWALK_YMAJOR << 26) |
342 ((region->tiling != I915_TILING_NONE) << 27) |
343 (BRW_SURFACE_2D << 29));
344 OUT_RELOC(region->bo,
345 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
346 offset);
347 OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
348 ((region->width - 1) << 6) |
349 ((region->height - 1) << 19));
350 OUT_BATCH(0);
351
352 if (intel->is_g4x || intel->gen >= 5)
353 OUT_BATCH(tile_x | (tile_y << 16));
354 else
355 assert(tile_x == 0 && tile_y == 0);
356
357 if (intel->gen >= 6)
358 OUT_BATCH(0);
359
360 ADVANCE_BATCH();
361 }
362
363 if (hiz_region || stencil_irb) {
364 /*
365 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
366 * stencil enable' and 'hiz enable' bits were set. Therefore we must
367 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
368 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
369 * failure to do so causes hangs on gen5 and a stall on gen6.
370 */
371
372 /* Emit hiz buffer. */
373 if (hiz_region) {
374 BEGIN_BATCH(3);
375 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
376 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
377 OUT_RELOC(hiz_region->bo,
378 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
379 0);
380 ADVANCE_BATCH();
381 } else {
382 BEGIN_BATCH(3);
383 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
384 OUT_BATCH(0);
385 OUT_BATCH(0);
386 ADVANCE_BATCH();
387 }
388
389 /* Emit stencil buffer. */
390 if (stencil_irb) {
391 struct intel_region *region = stencil_irb->mt->region;
392 BEGIN_BATCH(3);
393 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
394 OUT_BATCH(region->pitch * region->cpp - 1);
395 OUT_RELOC(region->bo,
396 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
397 0);
398 ADVANCE_BATCH();
399 } else {
400 BEGIN_BATCH(3);
401 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
402 OUT_BATCH(0);
403 OUT_BATCH(0);
404 ADVANCE_BATCH();
405 }
406 }
407
408 /*
409 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
410 * params must be emitted.
411 *
412 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
413 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
414 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
415 */
416 if (intel->gen >= 6 || hiz_region) {
417 if (intel->gen == 6)
418 intel_emit_post_sync_nonzero_flush(intel);
419
420 BEGIN_BATCH(2);
421 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
422 OUT_BATCH(0);
423 ADVANCE_BATCH();
424 }
425 }
426
427 const struct brw_tracked_state brw_depthbuffer = {
428 .dirty = {
429 .mesa = _NEW_BUFFERS,
430 .brw = BRW_NEW_BATCH,
431 .cache = 0,
432 },
433 .emit = emit_depthbuffer,
434 };
435
436
437
438 /***********************************************************************
439 * Polygon stipple packet
440 */
441
442 static void upload_polygon_stipple(struct brw_context *brw)
443 {
444 struct intel_context *intel = &brw->intel;
445 struct gl_context *ctx = &brw->intel.ctx;
446 GLuint i;
447
448 /* _NEW_POLYGON */
449 if (!ctx->Polygon.StippleFlag)
450 return;
451
452 if (intel->gen == 6)
453 intel_emit_post_sync_nonzero_flush(intel);
454
455 BEGIN_BATCH(33);
456 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
457
458 /* Polygon stipple is provided in OpenGL order, i.e. bottom
459 * row first. If we're rendering to a window (i.e. the
460 * default frame buffer object, 0), then we need to invert
461 * it to match our pixel layout. But if we're rendering
462 * to a FBO (i.e. any named frame buffer object), we *don't*
463 * need to invert - we already match the layout.
464 */
465 if (ctx->DrawBuffer->Name == 0) {
466 for (i = 0; i < 32; i++)
467 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
468 }
469 else {
470 for (i = 0; i < 32; i++)
471 OUT_BATCH(ctx->PolygonStipple[i]);
472 }
473 CACHED_BATCH();
474 }
475
476 const struct brw_tracked_state brw_polygon_stipple = {
477 .dirty = {
478 .mesa = (_NEW_POLYGONSTIPPLE |
479 _NEW_POLYGON),
480 .brw = BRW_NEW_CONTEXT,
481 .cache = 0
482 },
483 .emit = upload_polygon_stipple
484 };
485
486
487 /***********************************************************************
488 * Polygon stipple offset packet
489 */
490
491 static void upload_polygon_stipple_offset(struct brw_context *brw)
492 {
493 struct intel_context *intel = &brw->intel;
494 struct gl_context *ctx = &brw->intel.ctx;
495
496 /* _NEW_POLYGON */
497 if (!ctx->Polygon.StippleFlag)
498 return;
499
500 if (intel->gen == 6)
501 intel_emit_post_sync_nonzero_flush(intel);
502
503 BEGIN_BATCH(2);
504 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
505
506 /* _NEW_BUFFERS
507 *
508 * If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
509 * we have to invert the Y axis in order to match the OpenGL
510 * pixel coordinate system, and our offset must be matched
511 * to the window position. If we're drawing to a FBO
512 * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
513 * system works just fine, and there's no window system to
514 * worry about.
515 */
516 if (brw->intel.ctx.DrawBuffer->Name == 0)
517 OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
518 else
519 OUT_BATCH(0);
520 CACHED_BATCH();
521 }
522
523 const struct brw_tracked_state brw_polygon_stipple_offset = {
524 .dirty = {
525 .mesa = (_NEW_BUFFERS |
526 _NEW_POLYGON),
527 .brw = BRW_NEW_CONTEXT,
528 .cache = 0
529 },
530 .emit = upload_polygon_stipple_offset
531 };
532
533 /**********************************************************************
534 * AA Line parameters
535 */
536 static void upload_aa_line_parameters(struct brw_context *brw)
537 {
538 struct intel_context *intel = &brw->intel;
539 struct gl_context *ctx = &brw->intel.ctx;
540
541 if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
542 return;
543
544 if (intel->gen == 6)
545 intel_emit_post_sync_nonzero_flush(intel);
546
547 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
548 /* use legacy aa line coverage computation */
549 OUT_BATCH(0);
550 OUT_BATCH(0);
551 CACHED_BATCH();
552 }
553
554 const struct brw_tracked_state brw_aa_line_parameters = {
555 .dirty = {
556 .mesa = _NEW_LINE,
557 .brw = BRW_NEW_CONTEXT,
558 .cache = 0
559 },
560 .emit = upload_aa_line_parameters
561 };
562
563 /***********************************************************************
564 * Line stipple packet
565 */
566
567 static void upload_line_stipple(struct brw_context *brw)
568 {
569 struct intel_context *intel = &brw->intel;
570 struct gl_context *ctx = &brw->intel.ctx;
571 GLfloat tmp;
572 GLint tmpi;
573
574 if (!ctx->Line.StippleFlag)
575 return;
576
577 if (intel->gen == 6)
578 intel_emit_post_sync_nonzero_flush(intel);
579
580 BEGIN_BATCH(3);
581 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
582 OUT_BATCH(ctx->Line.StipplePattern);
583 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
584 tmpi = tmp * (1<<13);
585 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
586 CACHED_BATCH();
587 }
588
589 const struct brw_tracked_state brw_line_stipple = {
590 .dirty = {
591 .mesa = _NEW_LINE,
592 .brw = BRW_NEW_CONTEXT,
593 .cache = 0
594 },
595 .emit = upload_line_stipple
596 };
597
598
599 /***********************************************************************
600 * Misc invarient state packets
601 */
602
603 static void upload_invarient_state( struct brw_context *brw )
604 {
605 struct intel_context *intel = &brw->intel;
606
607 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
608 if (intel->gen == 6)
609 intel_emit_post_sync_nonzero_flush(intel);
610
611 /* Select the 3D pipeline (as opposed to media) */
612 BEGIN_BATCH(1);
613 OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
614 ADVANCE_BATCH();
615
616 if (intel->gen < 6) {
617 /* Disable depth offset clamping. */
618 BEGIN_BATCH(2);
619 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
620 OUT_BATCH_F(0.0);
621 ADVANCE_BATCH();
622 }
623
624 if (intel->gen >= 6) {
625 int i;
626 int len = intel->gen >= 7 ? 4 : 3;
627
628 BEGIN_BATCH(len);
629 OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
630 OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
631 MS_NUMSAMPLES_1);
632 OUT_BATCH(0); /* positions for 4/8-sample */
633 if (intel->gen >= 7)
634 OUT_BATCH(0);
635 ADVANCE_BATCH();
636
637 BEGIN_BATCH(2);
638 OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
639 OUT_BATCH(1);
640 ADVANCE_BATCH();
641
642 if (intel->gen < 7) {
643 for (i = 0; i < 4; i++) {
644 BEGIN_BATCH(4);
645 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
646 OUT_BATCH(i << SVB_INDEX_SHIFT);
647 OUT_BATCH(0);
648 OUT_BATCH(0xffffffff);
649 ADVANCE_BATCH();
650 }
651 }
652 }
653
654 BEGIN_BATCH(2);
655 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
656 OUT_BATCH(0);
657 ADVANCE_BATCH();
658
659 BEGIN_BATCH(1);
660 OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
661 (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
662 ADVANCE_BATCH();
663 }
664
665 const struct brw_tracked_state brw_invarient_state = {
666 .dirty = {
667 .mesa = 0,
668 .brw = BRW_NEW_CONTEXT,
669 .cache = 0
670 },
671 .emit = upload_invarient_state
672 };
673
674 /**
675 * Define the base addresses which some state is referenced from.
676 *
677 * This allows us to avoid having to emit relocations for the objects,
678 * and is actually required for binding table pointers on gen6.
679 *
680 * Surface state base address covers binding table pointers and
681 * surface state objects, but not the surfaces that the surface state
682 * objects point to.
683 */
684 static void upload_state_base_address( struct brw_context *brw )
685 {
686 struct intel_context *intel = &brw->intel;
687
688 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
689 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
690 * programmed prior to STATE_BASE_ADDRESS.
691 *
692 * However, given that the instruction SBA (general state base
693 * address) on this chipset is always set to 0 across X and GL,
694 * maybe this isn't required for us in particular.
695 */
696
697 if (intel->gen >= 6) {
698 if (intel->gen == 6)
699 intel_emit_post_sync_nonzero_flush(intel);
700
701 BEGIN_BATCH(10);
702 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
703 /* General state base address: stateless DP read/write requests */
704 OUT_BATCH(1);
705 /* Surface state base address:
706 * BINDING_TABLE_STATE
707 * SURFACE_STATE
708 */
709 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
710 /* Dynamic state base address:
711 * SAMPLER_STATE
712 * SAMPLER_BORDER_COLOR_STATE
713 * CLIP, SF, WM/CC viewport state
714 * COLOR_CALC_STATE
715 * DEPTH_STENCIL_STATE
716 * BLEND_STATE
717 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
718 * Disable is clear, which we rely on)
719 */
720 OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
721 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
722
723 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
724 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
725 1); /* Instruction base address: shader kernels (incl. SIP) */
726
727 OUT_BATCH(1); /* General state upper bound */
728 OUT_BATCH(1); /* Dynamic state upper bound */
729 OUT_BATCH(1); /* Indirect object upper bound */
730 OUT_BATCH(1); /* Instruction access upper bound */
731 ADVANCE_BATCH();
732 } else if (intel->gen == 5) {
733 BEGIN_BATCH(8);
734 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
735 OUT_BATCH(1); /* General state base address */
736 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
737 1); /* Surface state base address */
738 OUT_BATCH(1); /* Indirect object base address */
739 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
740 1); /* Instruction base address */
741 OUT_BATCH(1); /* General state upper bound */
742 OUT_BATCH(1); /* Indirect object upper bound */
743 OUT_BATCH(1); /* Instruction access upper bound */
744 ADVANCE_BATCH();
745 } else {
746 BEGIN_BATCH(6);
747 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
748 OUT_BATCH(1); /* General state base address */
749 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
750 1); /* Surface state base address */
751 OUT_BATCH(1); /* Indirect object base address */
752 OUT_BATCH(1); /* General state upper bound */
753 OUT_BATCH(1); /* Indirect object upper bound */
754 ADVANCE_BATCH();
755 }
756
757 /* According to section 3.6.1 of VOL1 of the 965 PRM,
758 * STATE_BASE_ADDRESS updates require a reissue of:
759 *
760 * 3DSTATE_PIPELINE_POINTERS
761 * 3DSTATE_BINDING_TABLE_POINTERS
762 * MEDIA_STATE_POINTERS
763 *
764 * and this continues through Ironlake. The Sandy Bridge PRM, vol
765 * 1 part 1 says that the folowing packets must be reissued:
766 *
767 * 3DSTATE_CC_POINTERS
768 * 3DSTATE_BINDING_TABLE_POINTERS
769 * 3DSTATE_SAMPLER_STATE_POINTERS
770 * 3DSTATE_VIEWPORT_STATE_POINTERS
771 * MEDIA_STATE_POINTERS
772 *
773 * Those are always reissued following SBA updates anyway (new
774 * batch time), except in the case of the program cache BO
775 * changing. Having a separate state flag makes the sequence more
776 * obvious.
777 */
778
779 brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
780 }
781
782 const struct brw_tracked_state brw_state_base_address = {
783 .dirty = {
784 .mesa = 0,
785 .brw = (BRW_NEW_BATCH |
786 BRW_NEW_PROGRAM_CACHE),
787 .cache = 0,
788 },
789 .emit = upload_state_base_address
790 };