mesa: add/update comments in _mesa_copy_buffer_subdata()
[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33
34 #include "intel_batchbuffer.h"
35 #include "intel_fbo.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_regions.h"
38
39 #include "brw_context.h"
40 #include "brw_state.h"
41 #include "brw_defines.h"
42
43 /* Constant single cliprect for framebuffer object or DRI2 drawing */
44 static void upload_drawing_rect(struct brw_context *brw)
45 {
46 struct intel_context *intel = &brw->intel;
47 struct gl_context *ctx = &intel->ctx;
48
49 BEGIN_BATCH(4);
50 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
51 OUT_BATCH(0); /* xmin, ymin */
52 OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
53 ((ctx->DrawBuffer->Height - 1) << 16));
54 OUT_BATCH(0);
55 ADVANCE_BATCH();
56 }
57
58 const struct brw_tracked_state brw_drawing_rect = {
59 .dirty = {
60 .mesa = _NEW_BUFFERS,
61 .brw = BRW_NEW_CONTEXT,
62 .cache = 0
63 },
64 .emit = upload_drawing_rect
65 };
66
67 /**
68 * Upload the binding table pointers, which point each stage's array of surface
69 * state pointers.
70 *
71 * The binding table pointers are relative to the surface state base address,
72 * which points at the batchbuffer containing the streamed batch state.
73 */
74 static void upload_binding_table_pointers(struct brw_context *brw)
75 {
76 struct intel_context *intel = &brw->intel;
77
78 BEGIN_BATCH(6);
79 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
80 OUT_BATCH(brw->bind.bo_offset);
81 OUT_BATCH(0); /* gs */
82 OUT_BATCH(0); /* clip */
83 OUT_BATCH(0); /* sf */
84 OUT_BATCH(brw->bind.bo_offset);
85 ADVANCE_BATCH();
86 }
87
88 const struct brw_tracked_state brw_binding_table_pointers = {
89 .dirty = {
90 .mesa = 0,
91 .brw = (BRW_NEW_BATCH |
92 BRW_NEW_STATE_BASE_ADDRESS |
93 BRW_NEW_VS_BINDING_TABLE |
94 BRW_NEW_GS_BINDING_TABLE |
95 BRW_NEW_PS_BINDING_TABLE),
96 .cache = 0,
97 },
98 .emit = upload_binding_table_pointers,
99 };
100
101 /**
102 * Upload the binding table pointers, which point each stage's array of surface
103 * state pointers.
104 *
105 * The binding table pointers are relative to the surface state base address,
106 * which points at the batchbuffer containing the streamed batch state.
107 */
108 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
109 {
110 struct intel_context *intel = &brw->intel;
111
112 BEGIN_BATCH(4);
113 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
114 GEN6_BINDING_TABLE_MODIFY_VS |
115 GEN6_BINDING_TABLE_MODIFY_GS |
116 GEN6_BINDING_TABLE_MODIFY_PS |
117 (4 - 2));
118 OUT_BATCH(brw->bind.bo_offset); /* vs */
119 OUT_BATCH(brw->bind.bo_offset); /* gs */
120 OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
121 ADVANCE_BATCH();
122 }
123
124 const struct brw_tracked_state gen6_binding_table_pointers = {
125 .dirty = {
126 .mesa = 0,
127 .brw = (BRW_NEW_BATCH |
128 BRW_NEW_STATE_BASE_ADDRESS |
129 BRW_NEW_VS_BINDING_TABLE |
130 BRW_NEW_GS_BINDING_TABLE |
131 BRW_NEW_PS_BINDING_TABLE),
132 .cache = 0,
133 },
134 .emit = upload_gen6_binding_table_pointers,
135 };
136
137 /**
138 * Upload pointers to the per-stage state.
139 *
140 * The state pointers in this packet are all relative to the general state
141 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
142 */
143 static void upload_pipelined_state_pointers(struct brw_context *brw )
144 {
145 struct intel_context *intel = &brw->intel;
146
147 if (intel->gen == 5) {
148 /* Need to flush before changing clip max threads for errata. */
149 BEGIN_BATCH(1);
150 OUT_BATCH(MI_FLUSH);
151 ADVANCE_BATCH();
152 }
153
154 BEGIN_BATCH(7);
155 OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
156 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
157 brw->vs.state_offset);
158 if (brw->gs.prog_active)
159 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
160 brw->gs.state_offset | 1);
161 else
162 OUT_BATCH(0);
163 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
164 brw->clip.state_offset | 1);
165 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
166 brw->sf.state_offset);
167 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
168 brw->wm.state_offset);
169 OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
170 brw->cc.state_offset);
171 ADVANCE_BATCH();
172
173 brw->state.dirty.brw |= BRW_NEW_PSP;
174 }
175
176 static void upload_psp_urb_cbs(struct brw_context *brw )
177 {
178 upload_pipelined_state_pointers(brw);
179 brw_upload_urb_fence(brw);
180 brw_upload_cs_urb_state(brw);
181 }
182
183 const struct brw_tracked_state brw_psp_urb_cbs = {
184 .dirty = {
185 .mesa = 0,
186 .brw = (BRW_NEW_URB_FENCE |
187 BRW_NEW_BATCH |
188 BRW_NEW_STATE_BASE_ADDRESS),
189 .cache = (CACHE_NEW_VS_UNIT |
190 CACHE_NEW_GS_UNIT |
191 CACHE_NEW_GS_PROG |
192 CACHE_NEW_CLIP_UNIT |
193 CACHE_NEW_SF_UNIT |
194 CACHE_NEW_WM_UNIT |
195 CACHE_NEW_CC_UNIT)
196 },
197 .emit = upload_psp_urb_cbs,
198 };
199
200 uint32_t
201 brw_depthbuffer_format(struct brw_context *brw)
202 {
203 struct intel_context *intel = &brw->intel;
204 struct gl_context *ctx = &intel->ctx;
205 struct gl_framebuffer *fb = ctx->DrawBuffer;
206 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
207 struct intel_renderbuffer *srb;
208
209 if (!drb &&
210 (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
211 !srb->mt->stencil_mt &&
212 (srb->Base.Format == MESA_FORMAT_S8_Z24 ||
213 srb->Base.Format == MESA_FORMAT_Z32_FLOAT_X24S8)) {
214 drb = srb;
215 }
216
217 if (!drb)
218 return BRW_DEPTHFORMAT_D32_FLOAT;
219
220 switch (drb->mt->format) {
221 case MESA_FORMAT_Z16:
222 return BRW_DEPTHFORMAT_D16_UNORM;
223 case MESA_FORMAT_Z32_FLOAT:
224 return BRW_DEPTHFORMAT_D32_FLOAT;
225 case MESA_FORMAT_X8_Z24:
226 if (intel->gen >= 5)
227 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
228 else /* Gen4 doesn't support X8; use S8 instead. */
229 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
230 case MESA_FORMAT_S8_Z24:
231 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
232 case MESA_FORMAT_Z32_FLOAT_X24S8:
233 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
234 default:
235 _mesa_problem(ctx, "Unexpected depth format %s\n",
236 _mesa_get_format_name(drb->Base.Format));
237 return BRW_DEPTHFORMAT_D16_UNORM;
238 }
239 }
240
241 static void emit_depthbuffer(struct brw_context *brw)
242 {
243 struct intel_context *intel = &brw->intel;
244 struct gl_context *ctx = &intel->ctx;
245 struct gl_framebuffer *fb = ctx->DrawBuffer;
246 /* _NEW_BUFFERS */
247 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
248 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
249 struct intel_mipmap_tree *stencil_mt = NULL;
250 struct intel_region *hiz_region = NULL;
251 unsigned int len;
252 bool separate_stencil = false;
253
254 if (depth_irb &&
255 depth_irb->mt &&
256 depth_irb->mt->hiz_mt) {
257 hiz_region = depth_irb->mt->hiz_mt->region;
258 }
259
260 /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
261 * non-pipelined state that will need the PIPE_CONTROL workaround.
262 */
263 if (intel->gen == 6) {
264 intel_emit_post_sync_nonzero_flush(intel);
265 intel_emit_depth_stall_flushes(intel);
266 }
267
268 /* Find the real separate stencil mt if present. */
269 if (stencil_irb) {
270 stencil_mt = stencil_irb->mt;
271 if (stencil_mt->stencil_mt)
272 stencil_mt = stencil_mt->stencil_mt;
273
274 if (stencil_mt->format == MESA_FORMAT_S8)
275 separate_stencil = true;
276 }
277
278 /* If there's a packed depth/stencil bound to stencil only, we need to
279 * emit the packed depth/stencil buffer packet.
280 */
281 if (!depth_irb && stencil_irb && !separate_stencil)
282 depth_irb = stencil_irb;
283
284 if (intel->gen >= 6)
285 len = 7;
286 else if (intel->is_g4x || intel->gen == 5)
287 len = 6;
288 else
289 len = 5;
290
291 if (!depth_irb && !separate_stencil) {
292 BEGIN_BATCH(len);
293 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
294 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
295 (BRW_SURFACE_NULL << 29));
296 OUT_BATCH(0);
297 OUT_BATCH(0);
298 OUT_BATCH(0);
299
300 if (intel->is_g4x || intel->gen >= 5)
301 OUT_BATCH(0);
302
303 if (intel->gen >= 6)
304 OUT_BATCH(0);
305
306 ADVANCE_BATCH();
307
308 } else if (!depth_irb && separate_stencil) {
309 /*
310 * There exists a separate stencil buffer but no depth buffer.
311 *
312 * The stencil buffer inherits most of its fields from
313 * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
314 * height.
315 *
316 * Since the stencil buffer has quirky pitch requirements, its region
317 * was allocated with half height and double cpp. So we need
318 * a multiplier of 2 to obtain the surface's real height.
319 *
320 * Enable the hiz bit because it and the separate stencil bit must have
321 * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
322 * 1.21 "Separate Stencil Enable":
323 * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
324 * Enable must also be enabled.
325 *
326 * [DevGT]: This field must be set to the same value (enabled or
327 * disabled) as Hierarchical Depth Buffer Enable
328 *
329 * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
330 * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
331 * [DevGT+]: This field must be set to TRUE.
332 */
333 struct intel_region *region = stencil_mt->region;
334
335 assert(intel->has_separate_stencil);
336
337 BEGIN_BATCH(len);
338 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
339 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
340 (1 << 21) | /* separate stencil enable */
341 (1 << 22) | /* hiz enable */
342 (BRW_TILEWALK_YMAJOR << 26) |
343 (1 << 27) | /* tiled surface */
344 (BRW_SURFACE_2D << 29));
345 OUT_BATCH(0);
346 OUT_BATCH(((region->width - 1) << 6) |
347 (2 * region->height - 1) << 19);
348 OUT_BATCH(0);
349 OUT_BATCH(0);
350
351 if (intel->gen >= 6)
352 OUT_BATCH(0);
353
354 ADVANCE_BATCH();
355
356 } else {
357 struct intel_region *region = depth_irb->mt->region;
358 uint32_t tile_x, tile_y, offset;
359
360 /* If using separate stencil, hiz must be enabled. */
361 assert(!separate_stencil || hiz_region);
362
363 offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
364
365 assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
366 assert(!hiz_region || region->tiling == I915_TILING_Y);
367
368 BEGIN_BATCH(len);
369 OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
370 OUT_BATCH(((region->pitch * region->cpp) - 1) |
371 (brw_depthbuffer_format(brw) << 18) |
372 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
373 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
374 (BRW_TILEWALK_YMAJOR << 26) |
375 ((region->tiling != I915_TILING_NONE) << 27) |
376 (BRW_SURFACE_2D << 29));
377 OUT_RELOC(region->bo,
378 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
379 offset);
380 OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
381 ((region->width - 1) << 6) |
382 ((region->height - 1) << 19));
383 OUT_BATCH(0);
384
385 if (intel->is_g4x || intel->gen >= 5)
386 OUT_BATCH(tile_x | (tile_y << 16));
387 else
388 assert(tile_x == 0 && tile_y == 0);
389
390 if (intel->gen >= 6)
391 OUT_BATCH(0);
392
393 ADVANCE_BATCH();
394 }
395
396 if (hiz_region || separate_stencil) {
397 /*
398 * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
399 * stencil enable' and 'hiz enable' bits were set. Therefore we must
400 * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
401 * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
402 * failure to do so causes hangs on gen5 and a stall on gen6.
403 */
404
405 /* Emit hiz buffer. */
406 if (hiz_region) {
407 BEGIN_BATCH(3);
408 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
409 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
410 OUT_RELOC(hiz_region->bo,
411 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
412 0);
413 ADVANCE_BATCH();
414 } else {
415 BEGIN_BATCH(3);
416 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
417 OUT_BATCH(0);
418 OUT_BATCH(0);
419 ADVANCE_BATCH();
420 }
421
422 /* Emit stencil buffer. */
423 if (separate_stencil) {
424 struct intel_region *region = stencil_mt->region;
425 BEGIN_BATCH(3);
426 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
427 OUT_BATCH(region->pitch * region->cpp - 1);
428 OUT_RELOC(region->bo,
429 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
430 0);
431 ADVANCE_BATCH();
432 } else {
433 BEGIN_BATCH(3);
434 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
435 OUT_BATCH(0);
436 OUT_BATCH(0);
437 ADVANCE_BATCH();
438 }
439 }
440
441 /*
442 * On Gen >= 6, emit clear params for safety. If using hiz, then clear
443 * params must be emitted.
444 *
445 * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
446 * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
447 * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
448 */
449 if (intel->gen >= 6 || hiz_region) {
450 if (intel->gen == 6)
451 intel_emit_post_sync_nonzero_flush(intel);
452
453 BEGIN_BATCH(2);
454 OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
455 OUT_BATCH(0);
456 ADVANCE_BATCH();
457 }
458 }
459
460 const struct brw_tracked_state brw_depthbuffer = {
461 .dirty = {
462 .mesa = _NEW_BUFFERS,
463 .brw = BRW_NEW_BATCH,
464 .cache = 0,
465 },
466 .emit = emit_depthbuffer,
467 };
468
469
470
471 /***********************************************************************
472 * Polygon stipple packet
473 */
474
475 static void upload_polygon_stipple(struct brw_context *brw)
476 {
477 struct intel_context *intel = &brw->intel;
478 struct gl_context *ctx = &brw->intel.ctx;
479 GLuint i;
480
481 /* _NEW_POLYGON */
482 if (!ctx->Polygon.StippleFlag)
483 return;
484
485 if (intel->gen == 6)
486 intel_emit_post_sync_nonzero_flush(intel);
487
488 BEGIN_BATCH(33);
489 OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
490
491 /* Polygon stipple is provided in OpenGL order, i.e. bottom
492 * row first. If we're rendering to a window (i.e. the
493 * default frame buffer object, 0), then we need to invert
494 * it to match our pixel layout. But if we're rendering
495 * to a FBO (i.e. any named frame buffer object), we *don't*
496 * need to invert - we already match the layout.
497 */
498 if (ctx->DrawBuffer->Name == 0) {
499 for (i = 0; i < 32; i++)
500 OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
501 }
502 else {
503 for (i = 0; i < 32; i++)
504 OUT_BATCH(ctx->PolygonStipple[i]);
505 }
506 CACHED_BATCH();
507 }
508
509 const struct brw_tracked_state brw_polygon_stipple = {
510 .dirty = {
511 .mesa = (_NEW_POLYGONSTIPPLE |
512 _NEW_POLYGON),
513 .brw = BRW_NEW_CONTEXT,
514 .cache = 0
515 },
516 .emit = upload_polygon_stipple
517 };
518
519
520 /***********************************************************************
521 * Polygon stipple offset packet
522 */
523
524 static void upload_polygon_stipple_offset(struct brw_context *brw)
525 {
526 struct intel_context *intel = &brw->intel;
527 struct gl_context *ctx = &brw->intel.ctx;
528
529 /* _NEW_POLYGON */
530 if (!ctx->Polygon.StippleFlag)
531 return;
532
533 if (intel->gen == 6)
534 intel_emit_post_sync_nonzero_flush(intel);
535
536 BEGIN_BATCH(2);
537 OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
538
539 /* _NEW_BUFFERS
540 *
541 * If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
542 * we have to invert the Y axis in order to match the OpenGL
543 * pixel coordinate system, and our offset must be matched
544 * to the window position. If we're drawing to a FBO
545 * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
546 * system works just fine, and there's no window system to
547 * worry about.
548 */
549 if (brw->intel.ctx.DrawBuffer->Name == 0)
550 OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
551 else
552 OUT_BATCH(0);
553 CACHED_BATCH();
554 }
555
556 const struct brw_tracked_state brw_polygon_stipple_offset = {
557 .dirty = {
558 .mesa = (_NEW_BUFFERS |
559 _NEW_POLYGON),
560 .brw = BRW_NEW_CONTEXT,
561 .cache = 0
562 },
563 .emit = upload_polygon_stipple_offset
564 };
565
566 /**********************************************************************
567 * AA Line parameters
568 */
569 static void upload_aa_line_parameters(struct brw_context *brw)
570 {
571 struct intel_context *intel = &brw->intel;
572 struct gl_context *ctx = &brw->intel.ctx;
573
574 if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
575 return;
576
577 if (intel->gen == 6)
578 intel_emit_post_sync_nonzero_flush(intel);
579
580 OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
581 /* use legacy aa line coverage computation */
582 OUT_BATCH(0);
583 OUT_BATCH(0);
584 CACHED_BATCH();
585 }
586
587 const struct brw_tracked_state brw_aa_line_parameters = {
588 .dirty = {
589 .mesa = _NEW_LINE,
590 .brw = BRW_NEW_CONTEXT,
591 .cache = 0
592 },
593 .emit = upload_aa_line_parameters
594 };
595
596 /***********************************************************************
597 * Line stipple packet
598 */
599
600 static void upload_line_stipple(struct brw_context *brw)
601 {
602 struct intel_context *intel = &brw->intel;
603 struct gl_context *ctx = &brw->intel.ctx;
604 GLfloat tmp;
605 GLint tmpi;
606
607 if (!ctx->Line.StippleFlag)
608 return;
609
610 if (intel->gen == 6)
611 intel_emit_post_sync_nonzero_flush(intel);
612
613 BEGIN_BATCH(3);
614 OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
615 OUT_BATCH(ctx->Line.StipplePattern);
616 tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
617 tmpi = tmp * (1<<13);
618 OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
619 CACHED_BATCH();
620 }
621
622 const struct brw_tracked_state brw_line_stipple = {
623 .dirty = {
624 .mesa = _NEW_LINE,
625 .brw = BRW_NEW_CONTEXT,
626 .cache = 0
627 },
628 .emit = upload_line_stipple
629 };
630
631
632 /***********************************************************************
633 * Misc invarient state packets
634 */
635
636 static void upload_invarient_state( struct brw_context *brw )
637 {
638 struct intel_context *intel = &brw->intel;
639
640 /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
641 if (intel->gen == 6)
642 intel_emit_post_sync_nonzero_flush(intel);
643
644 /* Select the 3D pipeline (as opposed to media) */
645 BEGIN_BATCH(1);
646 OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
647 ADVANCE_BATCH();
648
649 if (intel->gen < 6) {
650 /* Disable depth offset clamping. */
651 BEGIN_BATCH(2);
652 OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
653 OUT_BATCH_F(0.0);
654 ADVANCE_BATCH();
655 }
656
657 if (intel->gen >= 6) {
658 int i;
659 int len = intel->gen >= 7 ? 4 : 3;
660
661 BEGIN_BATCH(len);
662 OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
663 OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
664 MS_NUMSAMPLES_1);
665 OUT_BATCH(0); /* positions for 4/8-sample */
666 if (intel->gen >= 7)
667 OUT_BATCH(0);
668 ADVANCE_BATCH();
669
670 BEGIN_BATCH(2);
671 OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
672 OUT_BATCH(1);
673 ADVANCE_BATCH();
674
675 if (intel->gen < 7) {
676 for (i = 0; i < 4; i++) {
677 BEGIN_BATCH(4);
678 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
679 OUT_BATCH(i << SVB_INDEX_SHIFT);
680 OUT_BATCH(0);
681 OUT_BATCH(0xffffffff);
682 ADVANCE_BATCH();
683 }
684 }
685 }
686
687 BEGIN_BATCH(2);
688 OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
689 OUT_BATCH(0);
690 ADVANCE_BATCH();
691
692 BEGIN_BATCH(1);
693 OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
694 (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
695 ADVANCE_BATCH();
696 }
697
698 const struct brw_tracked_state brw_invarient_state = {
699 .dirty = {
700 .mesa = 0,
701 .brw = BRW_NEW_CONTEXT,
702 .cache = 0
703 },
704 .emit = upload_invarient_state
705 };
706
707 /**
708 * Define the base addresses which some state is referenced from.
709 *
710 * This allows us to avoid having to emit relocations for the objects,
711 * and is actually required for binding table pointers on gen6.
712 *
713 * Surface state base address covers binding table pointers and
714 * surface state objects, but not the surfaces that the surface state
715 * objects point to.
716 */
717 static void upload_state_base_address( struct brw_context *brw )
718 {
719 struct intel_context *intel = &brw->intel;
720
721 /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
722 * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
723 * programmed prior to STATE_BASE_ADDRESS.
724 *
725 * However, given that the instruction SBA (general state base
726 * address) on this chipset is always set to 0 across X and GL,
727 * maybe this isn't required for us in particular.
728 */
729
730 if (intel->gen >= 6) {
731 if (intel->gen == 6)
732 intel_emit_post_sync_nonzero_flush(intel);
733
734 BEGIN_BATCH(10);
735 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
736 /* General state base address: stateless DP read/write requests */
737 OUT_BATCH(1);
738 /* Surface state base address:
739 * BINDING_TABLE_STATE
740 * SURFACE_STATE
741 */
742 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
743 /* Dynamic state base address:
744 * SAMPLER_STATE
745 * SAMPLER_BORDER_COLOR_STATE
746 * CLIP, SF, WM/CC viewport state
747 * COLOR_CALC_STATE
748 * DEPTH_STENCIL_STATE
749 * BLEND_STATE
750 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
751 * Disable is clear, which we rely on)
752 */
753 OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
754 I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
755
756 OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
757 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
758 1); /* Instruction base address: shader kernels (incl. SIP) */
759
760 OUT_BATCH(1); /* General state upper bound */
761 OUT_BATCH(1); /* Dynamic state upper bound */
762 OUT_BATCH(1); /* Indirect object upper bound */
763 OUT_BATCH(1); /* Instruction access upper bound */
764 ADVANCE_BATCH();
765 } else if (intel->gen == 5) {
766 BEGIN_BATCH(8);
767 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
768 OUT_BATCH(1); /* General state base address */
769 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
770 1); /* Surface state base address */
771 OUT_BATCH(1); /* Indirect object base address */
772 OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
773 1); /* Instruction base address */
774 OUT_BATCH(1); /* General state upper bound */
775 OUT_BATCH(1); /* Indirect object upper bound */
776 OUT_BATCH(1); /* Instruction access upper bound */
777 ADVANCE_BATCH();
778 } else {
779 BEGIN_BATCH(6);
780 OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
781 OUT_BATCH(1); /* General state base address */
782 OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
783 1); /* Surface state base address */
784 OUT_BATCH(1); /* Indirect object base address */
785 OUT_BATCH(1); /* General state upper bound */
786 OUT_BATCH(1); /* Indirect object upper bound */
787 ADVANCE_BATCH();
788 }
789
790 /* According to section 3.6.1 of VOL1 of the 965 PRM,
791 * STATE_BASE_ADDRESS updates require a reissue of:
792 *
793 * 3DSTATE_PIPELINE_POINTERS
794 * 3DSTATE_BINDING_TABLE_POINTERS
795 * MEDIA_STATE_POINTERS
796 *
797 * and this continues through Ironlake. The Sandy Bridge PRM, vol
798 * 1 part 1 says that the folowing packets must be reissued:
799 *
800 * 3DSTATE_CC_POINTERS
801 * 3DSTATE_BINDING_TABLE_POINTERS
802 * 3DSTATE_SAMPLER_STATE_POINTERS
803 * 3DSTATE_VIEWPORT_STATE_POINTERS
804 * MEDIA_STATE_POINTERS
805 *
806 * Those are always reissued following SBA updates anyway (new
807 * batch time), except in the case of the program cache BO
808 * changing. Having a separate state flag makes the sequence more
809 * obvious.
810 */
811
812 brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
813 }
814
815 const struct brw_tracked_state brw_state_base_address = {
816 .dirty = {
817 .mesa = 0,
818 .brw = (BRW_NEW_BATCH |
819 BRW_NEW_PROGRAM_CACHE),
820 .cache = 0,
821 },
822 .emit = upload_state_base_address
823 };