Merge branch 'gallium-polygon-stipple'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_regions.h"
37
38 #include "brw_context.h"
39 #include "brw_state.h"
40 #include "brw_defines.h"
41
42 /* Constant single cliprect for framebuffer object or DRI2 drawing */
43 static void upload_drawing_rect(struct brw_context *brw)
44 {
45 struct intel_context *intel = &brw->intel;
46 struct gl_context *ctx = &intel->ctx;
47
48 BEGIN_BATCH(4);
49 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
50 OUT_BATCH(0); /* xmin, ymin */
51 OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
52 ((ctx->DrawBuffer->Height - 1) << 16));
53 OUT_BATCH(0);
54 ADVANCE_BATCH();
55 }
56
57 const struct brw_tracked_state brw_drawing_rect = {
58 .dirty = {
59 .mesa = _NEW_BUFFERS,
60 .brw = BRW_NEW_CONTEXT,
61 .cache = 0
62 },
63 .emit = upload_drawing_rect
64 };
65
66 /**
67 * Upload the binding table pointers, which point each stage's array of surface
68 * state pointers.
69 *
70 * The binding table pointers are relative to the surface state base address,
71 * which points at the batchbuffer containing the streamed batch state.
72 */
73 static void upload_binding_table_pointers(struct brw_context *brw)
74 {
75 struct intel_context *intel = &brw->intel;
76
77 BEGIN_BATCH(6);
78 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
79 OUT_BATCH(brw->vs.bind_bo_offset);
80 OUT_BATCH(0); /* gs */
81 OUT_BATCH(0); /* clip */
82 OUT_BATCH(0); /* sf */
83 OUT_BATCH(brw->wm.bind_bo_offset);
84 ADVANCE_BATCH();
85 }
86
87 const struct brw_tracked_state brw_binding_table_pointers = {
88 .dirty = {
89 .mesa = 0,
90 .brw = (BRW_NEW_BATCH |
91 BRW_NEW_STATE_BASE_ADDRESS |
92 BRW_NEW_VS_BINDING_TABLE |
93 BRW_NEW_GS_BINDING_TABLE |
94 BRW_NEW_PS_BINDING_TABLE),
95 .cache = 0,
96 },
97 .emit = upload_binding_table_pointers,
98 };
99
100 /**
101 * Upload the binding table pointers, which point each stage's array of surface
102 * state pointers.
103 *
104 * The binding table pointers are relative to the surface state base address,
105 * which points at the batchbuffer containing the streamed batch state.
106 */
107 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
108 {
109 struct intel_context *intel = &brw->intel;
110
111 BEGIN_BATCH(4);
112 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
113 GEN6_BINDING_TABLE_MODIFY_VS |
114 GEN6_BINDING_TABLE_MODIFY_GS |
115 GEN6_BINDING_TABLE_MODIFY_PS |
116 (4 - 2));
117 OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
118 OUT_BATCH(0); /* gs */
119 OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
120 ADVANCE_BATCH();
121 }
122
123 const struct brw_tracked_state gen6_binding_table_pointers = {
124 .dirty = {
125 .mesa = 0,
126 .brw = (BRW_NEW_BATCH |
127 BRW_NEW_STATE_BASE_ADDRESS |
128 BRW_NEW_VS_BINDING_TABLE |
129 BRW_NEW_GS_BINDING_TABLE |
130 BRW_NEW_PS_BINDING_TABLE),
131 .cache = 0,
132 },
133 .emit = upload_gen6_binding_table_pointers,
134 };
135
136 /**
137 * Upload pointers to the per-stage state.
138 *
139 * The state pointers in this packet are all relative to the general state
140 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
141 */
142 static void upload_pipelined_state_pointers(struct brw_context *brw )
143 {
144 struct intel_context *intel = &brw->intel;
145
146 if (intel->gen == 5) {
147 /* Need to flush before changing clip max threads for errata. */
148 BEGIN_BATCH(1);
149 OUT_BATCH(MI_FLUSH);
150 ADVANCE_BATCH();
151 }
152
153 BEGIN_BATCH(7);
154 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
155 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
156 brw->vs.state_offset);
157 if (brw->gs.prog_active)
158 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
159 brw->gs.state_offset | 1);
160 else
161 OUT_BATCH(0);
162 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
163 brw->clip.state_offset | 1);
164 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
165 brw->sf.state_offset);
166 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
167 brw->wm.state_offset);
168 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
169 brw->cc.state_offset);
170 ADVANCE_BATCH();
171
172 brw->state.dirty.brw |= BRW_NEW_PSP;
173 }
174
175 static void upload_psp_urb_cbs(struct brw_context *brw )
176 {
177 upload_pipelined_state_pointers(brw);
178 brw_upload_urb_fence(brw);
179 brw_upload_cs_urb_state(brw);
180 }
181
182 const struct brw_tracked_state brw_psp_urb_cbs = {
183 .dirty = {
184 .mesa = 0,
185 .brw = (BRW_NEW_URB_FENCE |
186 BRW_NEW_BATCH |
187 BRW_NEW_STATE_BASE_ADDRESS),
188 .cache = (CACHE_NEW_VS_UNIT |
189 CACHE_NEW_GS_UNIT |
190 CACHE_NEW_GS_PROG |
191 CACHE_NEW_CLIP_UNIT |
192 CACHE_NEW_SF_UNIT |
193 CACHE_NEW_WM_UNIT |
194 CACHE_NEW_CC_UNIT)
195 },
196 .emit = upload_psp_urb_cbs,
197 };
198
199 static void prepare_depthbuffer(struct brw_context *brw)
200 {
201 struct intel_context *intel = &brw->intel;
202 struct gl_context *ctx = &intel->ctx;
203 struct gl_framebuffer *fb = ctx->DrawBuffer;
204 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
205 struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
206
207 if (drb)
208 brw_add_validated_bo(brw, drb->region->buffer);
209 if (drb && drb->hiz_region)
210 brw_add_validated_bo(brw, drb->hiz_region->buffer);
211 if (srb)
212 brw_add_validated_bo(brw, srb->region->buffer);
213 }
214
215 static void emit_depthbuffer(struct brw_context *brw)
216 {
217 struct intel_context *intel = &brw->intel;
218 struct gl_context *ctx = &intel->ctx;
219 struct gl_framebuffer *fb = ctx->DrawBuffer;
220 /* _NEW_BUFFERS */
221 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
222 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
223 struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
224 unsigned int len;
225
226 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
227 * non-pipelined state that will need the PIPE_CONTROL workaround.
228 */
229 if (intel->gen == 6)
230 intel_emit_post_sync_nonzero_flush(intel);
231
232 /*
233 * If either depth or stencil buffer has packed depth/stencil format,
234 * then don't use separate stencil. Emit only a depth buffer.
235 */
236 if (depth_irb && depth_irb->Base.Format == MESA_FORMAT_S8_Z24) {
237 stencil_irb = NULL;
238 } else if (!depth_irb && stencil_irb
239 && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
240 depth_irb = stencil_irb;
241 stencil_irb = NULL;
242 }
243
244 if (intel->gen >= 6)
245 len = 7;
246 else if (intel->is_g4x || intel->gen == 5)
247 len = 6;
248 else
249 len = 5;
250
251 if (!depth_irb && !stencil_irb) {
252 BEGIN_BATCH(len);
253 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
254 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
255 (BRW_SURFACE_NULL << 29));
256 OUT_BATCH(0);
257 OUT_BATCH(0);
258 OUT_BATCH(0);
259
260 if (intel->is_g4x || intel->gen >= 5)
261 OUT_BATCH(0);
262
263 if (intel->gen >= 6)
264 OUT_BATCH(0);
265
266 ADVANCE_BATCH();
267
268 } else if (!depth_irb && stencil_irb) {
269 /*
270 * There exists a separate stencil buffer but no depth buffer.
271 *
272 * The stencil buffer inherits most of its fields from
273 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
274 * height.
275 *
276 * Since the stencil buffer has quirky pitch requirements, its region
277 * was allocated with half height and double cpp. So we need
278 * a multiplier of 2 to obtain the surface's real height.
279 *
280 * Enable the hiz bit because it and the separate stencil bit must have
281 * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
282 * 1.21 "Separate Stencil Enable":
283 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
284 * Enable must also be enabled.
285 *
286 * [DevGT]: This field must be set to the same value (enabled or
287 * disabled) as Hierarchical Depth Buffer Enable
288 */
289 assert(intel->has_separate_stencil);
290 assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
291
292 BEGIN_BATCH(len);
293 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
294 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
295 (1 << 21) | /* separate stencil enable */
296 (1 << 22) | /* hiz enable */
297 (BRW_TILEWALK_YMAJOR << 26) |
298 (BRW_SURFACE_2D << 29));
299 OUT_BATCH(0);
300 OUT_BATCH(((stencil_irb->region->width - 1) << 6) |
301 (2 * stencil_irb->region->height - 1) << 19);
302 OUT_BATCH(0);
303 OUT_BATCH(0);
304
305 if (intel->gen >= 6)
306 OUT_BATCH(0);
307
308 ADVANCE_BATCH();
309
310 } else {
311 struct intel_region *region = depth_irb->region;
312 unsigned int format;
313 uint32_t tile_x, tile_y, offset;
314
315 /* If using separate stencil, hiz must be enabled. */
316 assert(!stencil_irb || hiz_region);
317
318 switch (region->cpp) {
319 case 2:
320 format = BRW_DEPTHFORMAT_D16_UNORM;
321 break;
322 case 4:
323 if (intel->depth_buffer_is_float)
324 format = BRW_DEPTHFORMAT_D32_FLOAT;
325 else if (hiz_region)
326 format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
327 else
328 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
329 break;
330 default:
331 assert(0);
332 return;
333 }
334
335 offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
336
337 assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
338 assert(!hiz_region || region->tiling == I915_TILING_Y);
339
340 BEGIN_BATCH(len);
341 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
342 OUT_BATCH(((region->pitch * region->cpp) - 1) |
343 (format << 18) |
344 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
345 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
346 (BRW_TILEWALK_YMAJOR << 26) |
347 ((region->tiling != I915_TILING_NONE) << 27) |
348 (BRW_SURFACE_2D << 29));
349 OUT_RELOC(region->buffer,
350 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
351 offset);
352 OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
353 ((region->width - 1) << 6) |
354 ((region->height - 1) << 19));
355 OUT_BATCH(0);
356
357 if (intel->is_g4x || intel->gen >= 5)
358 OUT_BATCH(tile_x | (tile_y << 16));
359 else
360 assert(tile_x == 0 && tile_y == 0);
361
362 if (intel->gen >= 6)
363 OUT_BATCH(0);
364
365 ADVANCE_BATCH();
366 }
367
368 if (hiz_region || stencil_irb) {
369 /*
370 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
371 * stencil enable' and 'hiz enable' bits were set. Therefore we must
372 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
373 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
374 * failure to do so causes hangs on gen5 and a stall on gen6.
375 */
376
377 /* Emit hiz buffer. */
378 if (hiz_region) {
379 BEGIN_BATCH(3);
380 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
381 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
382 OUT_RELOC(hiz_region->buffer,
383 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
384 0);
385 ADVANCE_BATCH();
386 } else {
387 BEGIN_BATCH(3);
388 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
389 OUT_BATCH(0);
390 OUT_BATCH(0);
391 ADVANCE_BATCH();
392 }
393
394 /* Emit stencil buffer. */
395 if (stencil_irb) {
396 BEGIN_BATCH(3);
397 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
398 OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1);
399 OUT_RELOC(stencil_irb->region->buffer,
400 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
401 0);
402 ADVANCE_BATCH();
403 } else {
404 BEGIN_BATCH(3);
405 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
406 OUT_BATCH(0);
407 OUT_BATCH(0);
408 ADVANCE_BATCH();
409 }
410 }
411
412 /*
413 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
414 * params must be emitted.
415 *
416 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
417 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
418 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
419 */
420 if (intel->gen >= 6 || hiz_region) {
421 if (intel->gen == 6)
422 intel_emit_post_sync_nonzero_flush(intel);
423
424 BEGIN_BATCH(2);
425 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
426 OUT_BATCH(0);
427 ADVANCE_BATCH();
428 }
429 }
430
431 const struct brw_tracked_state brw_depthbuffer = {
432 .dirty = {
433 .mesa = _NEW_BUFFERS,
434 .brw = BRW_NEW_BATCH,
435 .cache = 0,
436 },
437 .prepare = prepare_depthbuffer,
438 .emit = emit_depthbuffer,
439 };
440
441
442
443 /***********************************************************************
444 * Polygon stipple packet
445 */
446
447 static void upload_polygon_stipple(struct brw_context *brw)
448 {
449 struct intel_context *intel = &brw->intel;
450 struct gl_context *ctx = &brw->intel.ctx;
451 GLuint i;
452
453 if (!ctx->Polygon.StippleFlag)
454 return;
455
456 if (intel->gen == 6)
457 intel_emit_post_sync_nonzero_flush(intel);
458
459 BEGIN_BATCH(33);
460 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
461
462 /* Polygon stipple is provided in OpenGL order, i.e. bottom
463 * row first. If we're rendering to a window (i.e. the
464 * default frame buffer object, 0), then we need to invert
465 * it to match our pixel layout. But if we're rendering
466 * to a FBO (i.e. any named frame buffer object), we *don't*
467 * need to invert - we already match the layout.
468 */
469 if (ctx->DrawBuffer->Name == 0) {
470 for (i = 0; i < 32; i++)
471 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
472 }
473 else {
474 for (i = 0; i < 32; i++)
475 OUT_BATCH(ctx->PolygonStipple[i]);
476 }
477 CACHED_BATCH();
478 }
479
480 const struct brw_tracked_state brw_polygon_stipple = {
481 .dirty = {
482 .mesa = _NEW_POLYGONSTIPPLE,
483 .brw = BRW_NEW_CONTEXT,
484 .cache = 0
485 },
486 .emit = upload_polygon_stipple
487 };
488
489
490 /***********************************************************************
491 * Polygon stipple offset packet
492 */
493
494 static void upload_polygon_stipple_offset(struct brw_context *brw)
495 {
496 struct intel_context *intel = &brw->intel;
497 struct gl_context *ctx = &brw->intel.ctx;
498
499 if (!ctx->Polygon.StippleFlag)
500 return;
501
502 if (intel->gen == 6)
503 intel_emit_post_sync_nonzero_flush(intel);
504
505 BEGIN_BATCH(2);
506 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
507
508 /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
509 * we have to invert the Y axis in order to match the OpenGL
510 * pixel coordinate system, and our offset must be matched
511 * to the window position. If we're drawing to a FBO
512 * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
513 * system works just fine, and there's no window system to
514 * worry about.
515 */
516 if (brw->intel.ctx.DrawBuffer->Name == 0)
517 OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
518 else
519 OUT_BATCH(0);
520 CACHED_BATCH();
521 }
522
523 #define _NEW_WINDOW_POS 0x40000000
524
525 const struct brw_tracked_state brw_polygon_stipple_offset = {
526 .dirty = {
527 .mesa = _NEW_WINDOW_POS | _NEW_POLYGONSTIPPLE,
528 .brw = BRW_NEW_CONTEXT,
529 .cache = 0
530 },
531 .emit = upload_polygon_stipple_offset
532 };
533
534 /**********************************************************************
535 * AA Line parameters
536 */
537 static void upload_aa_line_parameters(struct brw_context *brw)
538 {
539 struct intel_context *intel = &brw->intel;
540 struct gl_context *ctx = &brw->intel.ctx;
541
542 if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
543 return;
544
545 if (intel->gen == 6)
546 intel_emit_post_sync_nonzero_flush(intel);
547
548 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
549 /* use legacy aa line coverage computation */
550 OUT_BATCH(0);
551 OUT_BATCH(0);
552 CACHED_BATCH();
553 }
554
555 const struct brw_tracked_state brw_aa_line_parameters = {
556 .dirty = {
557 .mesa = _NEW_LINE,
558 .brw = BRW_NEW_CONTEXT,
559 .cache = 0
560 },
561 .emit = upload_aa_line_parameters
562 };
563
564 /***********************************************************************
565 * Line stipple packet
566 */
567
568 static void upload_line_stipple(struct brw_context *brw)
569 {
570 struct intel_context *intel = &brw->intel;
571 struct gl_context *ctx = &brw->intel.ctx;
572 GLfloat tmp;
573 GLint tmpi;
574
575 if (!ctx->Line.StippleFlag)
576 return;
577
578 if (intel->gen == 6)
579 intel_emit_post_sync_nonzero_flush(intel);
580
581 BEGIN_BATCH(3);
582 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
583 OUT_BATCH(ctx->Line.StipplePattern);
584 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
585 tmpi = tmp * (1<<13);
586 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
587 CACHED_BATCH();
588 }
589
590 const struct brw_tracked_state brw_line_stipple = {
591 .dirty = {
592 .mesa = _NEW_LINE,
593 .brw = BRW_NEW_CONTEXT,
594 .cache = 0
595 },
596 .emit = upload_line_stipple
597 };
598
599
600 /***********************************************************************
601 * Misc invarient state packets
602 */
603
604 static void upload_invarient_state( struct brw_context *brw )
605 {
606 struct intel_context *intel = &brw->intel;
607
608 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
609 if (intel->gen == 6)
610 intel_emit_post_sync_nonzero_flush(intel);
611
612 /* Select the 3D pipeline (as opposed to media) */
613 BEGIN_BATCH(1);
614 OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
615 ADVANCE_BATCH();
616
617 if (intel->gen < 6) {
618 /* Disable depth offset clamping. */
619 BEGIN_BATCH(2);
620 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
621 OUT_BATCH_F(0.0);
622 ADVANCE_BATCH();
623 }
624
625 if (intel->gen >= 6) {
626 int i;
627 int len = intel->gen >= 7 ? 4 : 3;
628
629 BEGIN_BATCH(len);
630 OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
631 OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
632 MS_NUMSAMPLES_1);
633 OUT_BATCH(0); /* positions for 4/8-sample */
634 if (intel->gen >= 7)
635 OUT_BATCH(0);
636 ADVANCE_BATCH();
637
638 BEGIN_BATCH(2);
639 OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
640 OUT_BATCH(1);
641 ADVANCE_BATCH();
642
643 if (intel->gen < 7) {
644 for (i = 0; i < 4; i++) {
645 BEGIN_BATCH(4);
646 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
647 OUT_BATCH(i << SVB_INDEX_SHIFT);
648 OUT_BATCH(0);
649 OUT_BATCH(0xffffffff);
650 ADVANCE_BATCH();
651 }
652 }
653 }
654
655 BEGIN_BATCH(2);
656 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
657 OUT_BATCH(0);
658 ADVANCE_BATCH();
659
660 BEGIN_BATCH(1);
661 OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
662 (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
663 ADVANCE_BATCH();
664 }
665
666 const struct brw_tracked_state brw_invarient_state = {
667 .dirty = {
668 .mesa = 0,
669 .brw = BRW_NEW_CONTEXT,
670 .cache = 0
671 },
672 .emit = upload_invarient_state
673 };
674
675 /**
676 * Define the base addresses which some state is referenced from.
677 *
678 * This allows us to avoid having to emit relocations for the objects,
679 * and is actually required for binding table pointers on gen6.
680 *
681 * Surface state base address covers binding table pointers and
682 * surface state objects, but not the surfaces that the surface state
683 * objects point to.
684 */
685 static void upload_state_base_address( struct brw_context *brw )
686 {
687 struct intel_context *intel = &brw->intel;
688
689 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
690 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
691 * programmed prior to STATE_BASE_ADDRESS.
692 *
693 * However, given that the instruction SBA (general state base
694 * address) on this chipset is always set to 0 across X and GL,
695 * maybe this isn't required for us in particular.
696 */
697
698 if (intel->gen >= 6) {
699 if (intel->gen == 6)
700 intel_emit_post_sync_nonzero_flush(intel);
701
702 BEGIN_BATCH(10);
703 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
704 /* General state base address: stateless DP read/write requests */
705 OUT_BATCH(1);
706 /* Surface state base address:
707 * BINDING_TABLE_STATE
708 * SURFACE_STATE
709 */
710 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
711 /* Dynamic state base address:
712 * SAMPLER_STATE
713 * SAMPLER_BORDER_COLOR_STATE
714 * CLIP, SF, WM/CC viewport state
715 * COLOR_CALC_STATE
716 * DEPTH_STENCIL_STATE
717 * BLEND_STATE
718 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
719 * Disable is clear, which we rely on)
720 */
721 OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
722 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
723
724 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
725 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
726 1); /* Instruction base address: shader kernels (incl. SIP) */
727
728 OUT_BATCH(1); /* General state upper bound */
729 OUT_BATCH(1); /* Dynamic state upper bound */
730 OUT_BATCH(1); /* Indirect object upper bound */
731 OUT_BATCH(1); /* Instruction access upper bound */
732 ADVANCE_BATCH();
733 } else if (intel->gen == 5) {
734 BEGIN_BATCH(8);
735 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
736 OUT_BATCH(1); /* General state base address */
737 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
738 1); /* Surface state base address */
739 OUT_BATCH(1); /* Indirect object base address */
740 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
741 1); /* Instruction base address */
742 OUT_BATCH(1); /* General state upper bound */
743 OUT_BATCH(1); /* Indirect object upper bound */
744 OUT_BATCH(1); /* Instruction access upper bound */
745 ADVANCE_BATCH();
746 } else {
747 BEGIN_BATCH(6);
748 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
749 OUT_BATCH(1); /* General state base address */
750 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
751 1); /* Surface state base address */
752 OUT_BATCH(1); /* Indirect object base address */
753 OUT_BATCH(1); /* General state upper bound */
754 OUT_BATCH(1); /* Indirect object upper bound */
755 ADVANCE_BATCH();
756 }
757
758 /* According to section 3.6.1 of VOL1 of the 965 PRM,
759 * STATE_BASE_ADDRESS updates require a reissue of:
760 *
761 * 3DSTATE_PIPELINE_POINTERS
762 * 3DSTATE_BINDING_TABLE_POINTERS
763 * MEDIA_STATE_POINTERS
764 *
765 * and this continues through Ironlake. The Sandy Bridge PRM, vol
766 * 1 part 1 says that the folowing packets must be reissued:
767 *
768 * 3DSTATE_CC_POINTERS
769 * 3DSTATE_BINDING_TABLE_POINTERS
770 * 3DSTATE_SAMPLER_STATE_POINTERS
771 * 3DSTATE_VIEWPORT_STATE_POINTERS
772 * MEDIA_STATE_POINTERS
773 *
774 * Those are always reissued following SBA updates anyway (new
775 * batch time), except in the case of the program cache BO
776 * changing. Having a separate state flag makes the sequence more
777 * obvious.
778 */
779
780 brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
781 }
782
783 const struct brw_tracked_state brw_state_base_address = {
784 .dirty = {
785 .mesa = 0,
786 .brw = (BRW_NEW_BATCH |
787 BRW_NEW_PROGRAM_CACHE),
788 .cache = 0,
789 },
790 .emit = upload_state_base_address
791 };