intel: Replace intel_renderbuffer::region with a miptree [v3]
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_regions.h"
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42
43 /* Constant single cliprect for framebuffer object or DRI2 drawing */
44 static void upload_drawing_rect(struct brw_context *brw)
45 {
46 struct intel_context *intel = &brw->intel;
47 struct gl_context *ctx = &intel->ctx;
48
49 BEGIN_BATCH(4);
50 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
51 OUT_BATCH(0); /* xmin, ymin */
52 OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
53 ((ctx->DrawBuffer->Height - 1) << 16));
54 OUT_BATCH(0);
55 ADVANCE_BATCH();
56 }
57
58 const struct brw_tracked_state brw_drawing_rect = {
59 .dirty = {
60 .mesa = _NEW_BUFFERS,
61 .brw = BRW_NEW_CONTEXT,
62 .cache = 0
63 },
64 .emit = upload_drawing_rect
65 };
66
67 /**
68 * Upload the binding table pointers, which point each stage's array of surface
69 * state pointers.
70 *
71 * The binding table pointers are relative to the surface state base address,
72 * which points at the batchbuffer containing the streamed batch state.
73 */
74 static void upload_binding_table_pointers(struct brw_context *brw)
75 {
76 struct intel_context *intel = &brw->intel;
77
78 BEGIN_BATCH(6);
79 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
80 OUT_BATCH(brw->bind.bo_offset);
81 OUT_BATCH(0); /* gs */
82 OUT_BATCH(0); /* clip */
83 OUT_BATCH(0); /* sf */
84 OUT_BATCH(brw->bind.bo_offset);
85 ADVANCE_BATCH();
86 }
87
88 const struct brw_tracked_state brw_binding_table_pointers = {
89 .dirty = {
90 .mesa = 0,
91 .brw = (BRW_NEW_BATCH |
92 BRW_NEW_STATE_BASE_ADDRESS |
93 BRW_NEW_VS_BINDING_TABLE |
94 BRW_NEW_GS_BINDING_TABLE |
95 BRW_NEW_PS_BINDING_TABLE),
96 .cache = 0,
97 },
98 .emit = upload_binding_table_pointers,
99 };
100
101 /**
102 * Upload the binding table pointers, which point each stage's array of surface
103 * state pointers.
104 *
105 * The binding table pointers are relative to the surface state base address,
106 * which points at the batchbuffer containing the streamed batch state.
107 */
108 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
109 {
110 struct intel_context *intel = &brw->intel;
111
112 BEGIN_BATCH(4);
113 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
114 GEN6_BINDING_TABLE_MODIFY_VS |
115 GEN6_BINDING_TABLE_MODIFY_GS |
116 GEN6_BINDING_TABLE_MODIFY_PS |
117 (4 - 2));
118 OUT_BATCH(brw->bind.bo_offset); /* vs */
119 OUT_BATCH(0); /* gs */
120 OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
121 ADVANCE_BATCH();
122 }
123
124 const struct brw_tracked_state gen6_binding_table_pointers = {
125 .dirty = {
126 .mesa = 0,
127 .brw = (BRW_NEW_BATCH |
128 BRW_NEW_STATE_BASE_ADDRESS |
129 BRW_NEW_VS_BINDING_TABLE |
130 BRW_NEW_GS_BINDING_TABLE |
131 BRW_NEW_PS_BINDING_TABLE),
132 .cache = 0,
133 },
134 .emit = upload_gen6_binding_table_pointers,
135 };
136
137 /**
138 * Upload pointers to the per-stage state.
139 *
140 * The state pointers in this packet are all relative to the general state
141 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
142 */
143 static void upload_pipelined_state_pointers(struct brw_context *brw )
144 {
145 struct intel_context *intel = &brw->intel;
146
147 if (intel->gen == 5) {
148 /* Need to flush before changing clip max threads for errata. */
149 BEGIN_BATCH(1);
150 OUT_BATCH(MI_FLUSH);
151 ADVANCE_BATCH();
152 }
153
154 BEGIN_BATCH(7);
155 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
156 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
157 brw->vs.state_offset);
158 if (brw->gs.prog_active)
159 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
160 brw->gs.state_offset | 1);
161 else
162 OUT_BATCH(0);
163 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
164 brw->clip.state_offset | 1);
165 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
166 brw->sf.state_offset);
167 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
168 brw->wm.state_offset);
169 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
170 brw->cc.state_offset);
171 ADVANCE_BATCH();
172
173 brw->state.dirty.brw |= BRW_NEW_PSP;
174 }
175
176 static void upload_psp_urb_cbs(struct brw_context *brw )
177 {
178 upload_pipelined_state_pointers(brw);
179 brw_upload_urb_fence(brw);
180 brw_upload_cs_urb_state(brw);
181 }
182
183 const struct brw_tracked_state brw_psp_urb_cbs = {
184 .dirty = {
185 .mesa = 0,
186 .brw = (BRW_NEW_URB_FENCE |
187 BRW_NEW_BATCH |
188 BRW_NEW_STATE_BASE_ADDRESS),
189 .cache = (CACHE_NEW_VS_UNIT |
190 CACHE_NEW_GS_UNIT |
191 CACHE_NEW_GS_PROG |
192 CACHE_NEW_CLIP_UNIT |
193 CACHE_NEW_SF_UNIT |
194 CACHE_NEW_WM_UNIT |
195 CACHE_NEW_CC_UNIT)
196 },
197 .emit = upload_psp_urb_cbs,
198 };
199
200 static void emit_depthbuffer(struct brw_context *brw)
201 {
202 struct intel_context *intel = &brw->intel;
203 struct gl_context *ctx = &intel->ctx;
204 struct gl_framebuffer *fb = ctx->DrawBuffer;
205 /* _NEW_BUFFERS */
206 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
207 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
208 struct intel_region *hiz_region = NULL;
209 unsigned int len;
210
211 if (depth_irb &&
212 depth_irb->mt) {
213 hiz_region = depth_irb->mt->hiz_region;
214 }
215
216 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
217 * non-pipelined state that will need the PIPE_CONTROL workaround.
218 */
219 if (intel->gen == 6) {
220 intel_emit_post_sync_nonzero_flush(intel);
221 intel_emit_depth_stall_flushes(intel);
222 }
223
224 /*
225 * If either depth or stencil buffer has packed depth/stencil format,
226 * then don't use separate stencil. Emit only a depth buffer.
227 */
228 if (depth_irb && depth_irb->Base.Format == MESA_FORMAT_S8_Z24) {
229 stencil_irb = NULL;
230 } else if (!depth_irb && stencil_irb
231 && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
232 depth_irb = stencil_irb;
233 stencil_irb = NULL;
234 }
235
236 if (intel->gen >= 6)
237 len = 7;
238 else if (intel->is_g4x || intel->gen == 5)
239 len = 6;
240 else
241 len = 5;
242
243 if (!depth_irb && !stencil_irb) {
244 BEGIN_BATCH(len);
245 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
246 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
247 (BRW_SURFACE_NULL << 29));
248 OUT_BATCH(0);
249 OUT_BATCH(0);
250 OUT_BATCH(0);
251
252 if (intel->is_g4x || intel->gen >= 5)
253 OUT_BATCH(0);
254
255 if (intel->gen >= 6)
256 OUT_BATCH(0);
257
258 ADVANCE_BATCH();
259
260 } else if (!depth_irb && stencil_irb) {
261 /*
262 * There exists a separate stencil buffer but no depth buffer.
263 *
264 * The stencil buffer inherits most of its fields from
265 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
266 * height.
267 *
268 * Since the stencil buffer has quirky pitch requirements, its region
269 * was allocated with half height and double cpp. So we need
270 * a multiplier of 2 to obtain the surface's real height.
271 *
272 * Enable the hiz bit because it and the separate stencil bit must have
273 * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
274 * 1.21 "Separate Stencil Enable":
275 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
276 * Enable must also be enabled.
277 *
278 * [DevGT]: This field must be set to the same value (enabled or
279 * disabled) as Hierarchical Depth Buffer Enable
280 */
281 struct intel_region *region = stencil_irb->mt->region;
282
283 assert(intel->has_separate_stencil);
284 assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
285
286 BEGIN_BATCH(len);
287 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
288 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
289 (1 << 21) | /* separate stencil enable */
290 (1 << 22) | /* hiz enable */
291 (BRW_TILEWALK_YMAJOR << 26) |
292 (BRW_SURFACE_2D << 29));
293 OUT_BATCH(0);
294 OUT_BATCH(((region->width - 1) << 6) |
295 (2 * region->height - 1) << 19);
296 OUT_BATCH(0);
297 OUT_BATCH(0);
298
299 if (intel->gen >= 6)
300 OUT_BATCH(0);
301
302 ADVANCE_BATCH();
303
304 } else {
305 struct intel_region *region = depth_irb->mt->region;
306 unsigned int format;
307 uint32_t tile_x, tile_y, offset;
308
309 /* If using separate stencil, hiz must be enabled. */
310 assert(!stencil_irb || hiz_region);
311
312 switch (region->cpp) {
313 case 2:
314 format = BRW_DEPTHFORMAT_D16_UNORM;
315 break;
316 case 4:
317 if (intel->depth_buffer_is_float)
318 format = BRW_DEPTHFORMAT_D32_FLOAT;
319 else if (hiz_region)
320 format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
321 else
322 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
323 break;
324 default:
325 assert(0);
326 return;
327 }
328
329 offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
330
331 assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
332 assert(!hiz_region || region->tiling == I915_TILING_Y);
333
334 BEGIN_BATCH(len);
335 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
336 OUT_BATCH(((region->pitch * region->cpp) - 1) |
337 (format << 18) |
338 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
339 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
340 (BRW_TILEWALK_YMAJOR << 26) |
341 ((region->tiling != I915_TILING_NONE) << 27) |
342 (BRW_SURFACE_2D << 29));
343 OUT_RELOC(region->bo,
344 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
345 offset);
346 OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
347 ((region->width - 1) << 6) |
348 ((region->height - 1) << 19));
349 OUT_BATCH(0);
350
351 if (intel->is_g4x || intel->gen >= 5)
352 OUT_BATCH(tile_x | (tile_y << 16));
353 else
354 assert(tile_x == 0 && tile_y == 0);
355
356 if (intel->gen >= 6)
357 OUT_BATCH(0);
358
359 ADVANCE_BATCH();
360 }
361
362 if (hiz_region || stencil_irb) {
363 /*
364 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
365 * stencil enable' and 'hiz enable' bits were set. Therefore we must
366 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
367 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
368 * failure to do so causes hangs on gen5 and a stall on gen6.
369 */
370
371 /* Emit hiz buffer. */
372 if (hiz_region) {
373 BEGIN_BATCH(3);
374 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
375 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
376 OUT_RELOC(hiz_region->bo,
377 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
378 0);
379 ADVANCE_BATCH();
380 } else {
381 BEGIN_BATCH(3);
382 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
383 OUT_BATCH(0);
384 OUT_BATCH(0);
385 ADVANCE_BATCH();
386 }
387
388 /* Emit stencil buffer. */
389 if (stencil_irb) {
390 struct intel_region *region = stencil_irb->mt->region;
391 BEGIN_BATCH(3);
392 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
393 OUT_BATCH(region->pitch * region->cpp - 1);
394 OUT_RELOC(region->bo,
395 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
396 0);
397 ADVANCE_BATCH();
398 } else {
399 BEGIN_BATCH(3);
400 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
401 OUT_BATCH(0);
402 OUT_BATCH(0);
403 ADVANCE_BATCH();
404 }
405 }
406
407 /*
408 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
409 * params must be emitted.
410 *
411 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
412 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
413 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
414 */
415 if (intel->gen >= 6 || hiz_region) {
416 if (intel->gen == 6)
417 intel_emit_post_sync_nonzero_flush(intel);
418
419 BEGIN_BATCH(2);
420 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
421 OUT_BATCH(0);
422 ADVANCE_BATCH();
423 }
424 }
425
426 const struct brw_tracked_state brw_depthbuffer = {
427 .dirty = {
428 .mesa = _NEW_BUFFERS,
429 .brw = BRW_NEW_BATCH,
430 .cache = 0,
431 },
432 .emit = emit_depthbuffer,
433 };
434
435
436
437 /***********************************************************************
438 * Polygon stipple packet
439 */
440
441 static void upload_polygon_stipple(struct brw_context *brw)
442 {
443 struct intel_context *intel = &brw->intel;
444 struct gl_context *ctx = &brw->intel.ctx;
445 GLuint i;
446
447 /* _NEW_POLYGON */
448 if (!ctx->Polygon.StippleFlag)
449 return;
450
451 if (intel->gen == 6)
452 intel_emit_post_sync_nonzero_flush(intel);
453
454 BEGIN_BATCH(33);
455 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
456
457 /* Polygon stipple is provided in OpenGL order, i.e. bottom
458 * row first. If we're rendering to a window (i.e. the
459 * default frame buffer object, 0), then we need to invert
460 * it to match our pixel layout. But if we're rendering
461 * to a FBO (i.e. any named frame buffer object), we *don't*
462 * need to invert - we already match the layout.
463 */
464 if (ctx->DrawBuffer->Name == 0) {
465 for (i = 0; i < 32; i++)
466 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
467 }
468 else {
469 for (i = 0; i < 32; i++)
470 OUT_BATCH(ctx->PolygonStipple[i]);
471 }
472 CACHED_BATCH();
473 }
474
475 const struct brw_tracked_state brw_polygon_stipple = {
476 .dirty = {
477 .mesa = (_NEW_POLYGONSTIPPLE |
478 _NEW_POLYGON),
479 .brw = BRW_NEW_CONTEXT,
480 .cache = 0
481 },
482 .emit = upload_polygon_stipple
483 };
484
485
486 /***********************************************************************
487 * Polygon stipple offset packet
488 */
489
490 static void upload_polygon_stipple_offset(struct brw_context *brw)
491 {
492 struct intel_context *intel = &brw->intel;
493 struct gl_context *ctx = &brw->intel.ctx;
494
495 /* _NEW_POLYGON */
496 if (!ctx->Polygon.StippleFlag)
497 return;
498
499 if (intel->gen == 6)
500 intel_emit_post_sync_nonzero_flush(intel);
501
502 BEGIN_BATCH(2);
503 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
504
505 /* _NEW_BUFFERS
506 *
507 * If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
508 * we have to invert the Y axis in order to match the OpenGL
509 * pixel coordinate system, and our offset must be matched
510 * to the window position. If we're drawing to a FBO
511 * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
512 * system works just fine, and there's no window system to
513 * worry about.
514 */
515 if (brw->intel.ctx.DrawBuffer->Name == 0)
516 OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
517 else
518 OUT_BATCH(0);
519 CACHED_BATCH();
520 }
521
522 const struct brw_tracked_state brw_polygon_stipple_offset = {
523 .dirty = {
524 .mesa = (_NEW_BUFFERS |
525 _NEW_POLYGON),
526 .brw = BRW_NEW_CONTEXT,
527 .cache = 0
528 },
529 .emit = upload_polygon_stipple_offset
530 };
531
532 /**********************************************************************
533 * AA Line parameters
534 */
535 static void upload_aa_line_parameters(struct brw_context *brw)
536 {
537 struct intel_context *intel = &brw->intel;
538 struct gl_context *ctx = &brw->intel.ctx;
539
540 if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
541 return;
542
543 if (intel->gen == 6)
544 intel_emit_post_sync_nonzero_flush(intel);
545
546 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
547 /* use legacy aa line coverage computation */
548 OUT_BATCH(0);
549 OUT_BATCH(0);
550 CACHED_BATCH();
551 }
552
553 const struct brw_tracked_state brw_aa_line_parameters = {
554 .dirty = {
555 .mesa = _NEW_LINE,
556 .brw = BRW_NEW_CONTEXT,
557 .cache = 0
558 },
559 .emit = upload_aa_line_parameters
560 };
561
562 /***********************************************************************
563 * Line stipple packet
564 */
565
566 static void upload_line_stipple(struct brw_context *brw)
567 {
568 struct intel_context *intel = &brw->intel;
569 struct gl_context *ctx = &brw->intel.ctx;
570 GLfloat tmp;
571 GLint tmpi;
572
573 if (!ctx->Line.StippleFlag)
574 return;
575
576 if (intel->gen == 6)
577 intel_emit_post_sync_nonzero_flush(intel);
578
579 BEGIN_BATCH(3);
580 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
581 OUT_BATCH(ctx->Line.StipplePattern);
582 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
583 tmpi = tmp * (1<<13);
584 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
585 CACHED_BATCH();
586 }
587
588 const struct brw_tracked_state brw_line_stipple = {
589 .dirty = {
590 .mesa = _NEW_LINE,
591 .brw = BRW_NEW_CONTEXT,
592 .cache = 0
593 },
594 .emit = upload_line_stipple
595 };
596
597
598 /***********************************************************************
599 * Misc invarient state packets
600 */
601
602 static void upload_invarient_state( struct brw_context *brw )
603 {
604 struct intel_context *intel = &brw->intel;
605
606 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
607 if (intel->gen == 6)
608 intel_emit_post_sync_nonzero_flush(intel);
609
610 /* Select the 3D pipeline (as opposed to media) */
611 BEGIN_BATCH(1);
612 OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
613 ADVANCE_BATCH();
614
615 if (intel->gen < 6) {
616 /* Disable depth offset clamping. */
617 BEGIN_BATCH(2);
618 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
619 OUT_BATCH_F(0.0);
620 ADVANCE_BATCH();
621 }
622
623 if (intel->gen >= 6) {
624 int i;
625 int len = intel->gen >= 7 ? 4 : 3;
626
627 BEGIN_BATCH(len);
628 OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
629 OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
630 MS_NUMSAMPLES_1);
631 OUT_BATCH(0); /* positions for 4/8-sample */
632 if (intel->gen >= 7)
633 OUT_BATCH(0);
634 ADVANCE_BATCH();
635
636 BEGIN_BATCH(2);
637 OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
638 OUT_BATCH(1);
639 ADVANCE_BATCH();
640
641 if (intel->gen < 7) {
642 for (i = 0; i < 4; i++) {
643 BEGIN_BATCH(4);
644 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
645 OUT_BATCH(i << SVB_INDEX_SHIFT);
646 OUT_BATCH(0);
647 OUT_BATCH(0xffffffff);
648 ADVANCE_BATCH();
649 }
650 }
651 }
652
653 BEGIN_BATCH(2);
654 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
655 OUT_BATCH(0);
656 ADVANCE_BATCH();
657
658 BEGIN_BATCH(1);
659 OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
660 (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
661 ADVANCE_BATCH();
662 }
663
664 const struct brw_tracked_state brw_invarient_state = {
665 .dirty = {
666 .mesa = 0,
667 .brw = BRW_NEW_CONTEXT,
668 .cache = 0
669 },
670 .emit = upload_invarient_state
671 };
672
673 /**
674 * Define the base addresses which some state is referenced from.
675 *
676 * This allows us to avoid having to emit relocations for the objects,
677 * and is actually required for binding table pointers on gen6.
678 *
679 * Surface state base address covers binding table pointers and
680 * surface state objects, but not the surfaces that the surface state
681 * objects point to.
682 */
683 static void upload_state_base_address( struct brw_context *brw )
684 {
685 struct intel_context *intel = &brw->intel;
686
687 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
688 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
689 * programmed prior to STATE_BASE_ADDRESS.
690 *
691 * However, given that the instruction SBA (general state base
692 * address) on this chipset is always set to 0 across X and GL,
693 * maybe this isn't required for us in particular.
694 */
695
696 if (intel->gen >= 6) {
697 if (intel->gen == 6)
698 intel_emit_post_sync_nonzero_flush(intel);
699
700 BEGIN_BATCH(10);
701 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
702 /* General state base address: stateless DP read/write requests */
703 OUT_BATCH(1);
704 /* Surface state base address:
705 * BINDING_TABLE_STATE
706 * SURFACE_STATE
707 */
708 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
709 /* Dynamic state base address:
710 * SAMPLER_STATE
711 * SAMPLER_BORDER_COLOR_STATE
712 * CLIP, SF, WM/CC viewport state
713 * COLOR_CALC_STATE
714 * DEPTH_STENCIL_STATE
715 * BLEND_STATE
716 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
717 * Disable is clear, which we rely on)
718 */
719 OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
720 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
721
722 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
723 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
724 1); /* Instruction base address: shader kernels (incl. SIP) */
725
726 OUT_BATCH(1); /* General state upper bound */
727 OUT_BATCH(1); /* Dynamic state upper bound */
728 OUT_BATCH(1); /* Indirect object upper bound */
729 OUT_BATCH(1); /* Instruction access upper bound */
730 ADVANCE_BATCH();
731 } else if (intel->gen == 5) {
732 BEGIN_BATCH(8);
733 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
734 OUT_BATCH(1); /* General state base address */
735 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
736 1); /* Surface state base address */
737 OUT_BATCH(1); /* Indirect object base address */
738 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
739 1); /* Instruction base address */
740 OUT_BATCH(1); /* General state upper bound */
741 OUT_BATCH(1); /* Indirect object upper bound */
742 OUT_BATCH(1); /* Instruction access upper bound */
743 ADVANCE_BATCH();
744 } else {
745 BEGIN_BATCH(6);
746 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
747 OUT_BATCH(1); /* General state base address */
748 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
749 1); /* Surface state base address */
750 OUT_BATCH(1); /* Indirect object base address */
751 OUT_BATCH(1); /* General state upper bound */
752 OUT_BATCH(1); /* Indirect object upper bound */
753 ADVANCE_BATCH();
754 }
755
756 /* According to section 3.6.1 of VOL1 of the 965 PRM,
757 * STATE_BASE_ADDRESS updates require a reissue of:
758 *
759 * 3DSTATE_PIPELINE_POINTERS
760 * 3DSTATE_BINDING_TABLE_POINTERS
761 * MEDIA_STATE_POINTERS
762 *
763 * and this continues through Ironlake. The Sandy Bridge PRM, vol
764 * 1 part 1 says that the folowing packets must be reissued:
765 *
766 * 3DSTATE_CC_POINTERS
767 * 3DSTATE_BINDING_TABLE_POINTERS
768 * 3DSTATE_SAMPLER_STATE_POINTERS
769 * 3DSTATE_VIEWPORT_STATE_POINTERS
770 * MEDIA_STATE_POINTERS
771 *
772 * Those are always reissued following SBA updates anyway (new
773 * batch time), except in the case of the program cache BO
774 * changing. Having a separate state flag makes the sequence more
775 * obvious.
776 */
777
778 brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
779 }
780
781 const struct brw_tracked_state brw_state_base_address = {
782 .dirty = {
783 .mesa = 0,
784 .brw = (BRW_NEW_BATCH |
785 BRW_NEW_PROGRAM_CACHE),
786 .cache = 0,
787 },
788 .emit = upload_state_base_address
789 };