e87b9d1657fe03bb49628b4e28fb4029a97c5512
[mesa.git] / src / mesa / drivers / dri / i965 / gen7_blorp.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
28
29 #include "brw_context.h"
30 #include "brw_state.h"
31
32 #include "brw_blorp.h"
33
34
35 /* 3DSTATE_URB_VS
36 * 3DSTATE_URB_HS
37 * 3DSTATE_URB_DS
38 * 3DSTATE_URB_GS
39 *
40 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
41 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
42 *
43 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
44 * programmed in order for the programming of this state to be
45 * valid.
46 */
47 static void
48 gen7_blorp_emit_urb_config(struct brw_context *brw)
49 {
50 unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16;
51 gen7_emit_push_constant_state(brw,
52 urb_size / 2 /* vs_size */,
53 0 /* gs_size */,
54 urb_size / 2 /* fs_size */);
55
56 /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS, Dword
57 * 1.15:0 "VS Number of URB Entries".
58 */
59 gen7_emit_urb_state(brw,
60 32 /* num_vs_entries */,
61 2 /* vs_size */,
62 2 /* vs_start */,
63 0 /* num_gs_entries */,
64 1 /* gs_size */,
65 2 /* gs_start */);
66 }
67
68
69 /* 3DSTATE_BLEND_STATE_POINTERS */
70 static void
71 gen7_blorp_emit_blend_state_pointer(struct brw_context *brw,
72 uint32_t cc_blend_state_offset)
73 {
74 BEGIN_BATCH(2);
75 OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
76 OUT_BATCH(cc_blend_state_offset | 1);
77 ADVANCE_BATCH();
78 }
79
80
81 /* 3DSTATE_CC_STATE_POINTERS */
82 static void
83 gen7_blorp_emit_cc_state_pointer(struct brw_context *brw,
84 uint32_t cc_state_offset)
85 {
86 BEGIN_BATCH(2);
87 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
88 OUT_BATCH(cc_state_offset | 1);
89 ADVANCE_BATCH();
90 }
91
92 static void
93 gen7_blorp_emit_cc_viewport(struct brw_context *brw)
94 {
95 struct brw_cc_viewport *ccv;
96 uint32_t cc_vp_offset;
97
98 ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
99 sizeof(*ccv), 32,
100 &cc_vp_offset);
101 ccv->min_depth = 0.0;
102 ccv->max_depth = 1.0;
103
104 BEGIN_BATCH(2);
105 OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2));
106 OUT_BATCH(cc_vp_offset);
107 ADVANCE_BATCH();
108 }
109
110
111 /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
112 *
113 * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
114 */
115 static void
116 gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw,
117 uint32_t depthstencil_offset)
118 {
119 BEGIN_BATCH(2);
120 OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
121 OUT_BATCH(depthstencil_offset | 1);
122 ADVANCE_BATCH();
123 }
124
125
126 /* SURFACE_STATE for renderbuffer or texture surface (see
127 * brw_update_renderbuffer_surface and brw_update_texture_surface)
128 */
129 static uint32_t
130 gen7_blorp_emit_surface_state(struct brw_context *brw,
131 const brw_blorp_surface_info *surface,
132 uint32_t read_domains, uint32_t write_domain,
133 bool is_render_target)
134 {
135 uint32_t wm_surf_offset;
136 uint32_t width = surface->width;
137 uint32_t height = surface->height;
138 /* Note: since gen7 uses INTEL_MSAA_LAYOUT_CMS or INTEL_MSAA_LAYOUT_UMS for
139 * color surfaces, width and height are measured in pixels; we don't need
140 * to divide them by 2 as we do for Gen6 (see
141 * gen6_blorp_emit_surface_state).
142 */
143 struct intel_mipmap_tree *mt = surface->mt;
144 uint32_t tile_x, tile_y;
145 const uint8_t mocs = GEN7_MOCS_L3;
146
147 uint32_t tiling = surface->map_stencil_as_y_tiled
148 ? I915_TILING_Y : mt->tiling;
149
150 uint32_t *surf = (uint32_t *)
151 brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &wm_surf_offset);
152 memset(surf, 0, 8 * 4);
153
154 surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
155 surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT |
156 gen7_surface_tiling_mode(tiling);
157
158 if (surface->mt->valign == 4)
159 surf[0] |= GEN7_SURFACE_VALIGN_4;
160 if (surface->mt->halign == 8)
161 surf[0] |= GEN7_SURFACE_HALIGN_8;
162
163 if (surface->array_layout == ALL_SLICES_AT_EACH_LOD)
164 surf[0] |= GEN7_SURFACE_ARYSPC_LOD0;
165 else
166 surf[0] |= GEN7_SURFACE_ARYSPC_FULL;
167
168 /* reloc */
169 surf[1] =
170 surface->compute_tile_offsets(&tile_x, &tile_y) + mt->bo->offset64;
171
172 /* Note that the low bits of these fields are missing, so
173 * there's the possibility of getting in trouble.
174 */
175 assert(tile_x % 4 == 0);
176 assert(tile_y % 2 == 0);
177 surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
178 SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) |
179 SET_FIELD(mocs, GEN7_SURFACE_MOCS);
180
181 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
182 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
183
184 uint32_t pitch_bytes = mt->pitch;
185 if (surface->map_stencil_as_y_tiled)
186 pitch_bytes *= 2;
187 surf[3] = pitch_bytes - 1;
188
189 surf[4] = gen7_surface_msaa_bits(surface->num_samples, surface->msaa_layout);
190 if (surface->mt->mcs_mt) {
191 gen7_set_surface_mcs_info(brw, surf, wm_surf_offset, surface->mt->mcs_mt,
192 is_render_target);
193 }
194
195 surf[7] = surface->mt->fast_clear_color_value;
196
197 if (brw->is_haswell) {
198 surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
199 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
200 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
201 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
202 }
203
204 /* Emit relocation to surface contents */
205 drm_intel_bo_emit_reloc(brw->batch.bo,
206 wm_surf_offset + 4,
207 mt->bo,
208 surf[1] - mt->bo->offset64,
209 read_domains, write_domain);
210
211 gen7_check_surface_setup(surf, is_render_target);
212
213 return wm_surf_offset;
214 }
215
216
217 /* 3DSTATE_VS
218 *
219 * Disable vertex shader.
220 */
221 static void
222 gen7_blorp_emit_vs_disable(struct brw_context *brw)
223 {
224 BEGIN_BATCH(7);
225 OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
226 OUT_BATCH(0);
227 OUT_BATCH(0);
228 OUT_BATCH(0);
229 OUT_BATCH(0);
230 OUT_BATCH(0);
231 OUT_BATCH(0);
232 ADVANCE_BATCH();
233
234 BEGIN_BATCH(6);
235 OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
236 OUT_BATCH(0);
237 OUT_BATCH(0);
238 OUT_BATCH(0);
239 OUT_BATCH(0);
240 OUT_BATCH(0);
241 ADVANCE_BATCH();
242 }
243
244
245 /* 3DSTATE_HS
246 *
247 * Disable the hull shader.
248 */
249 static void
250 gen7_blorp_emit_hs_disable(struct brw_context *brw)
251 {
252 BEGIN_BATCH(7);
253 OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
254 OUT_BATCH(0);
255 OUT_BATCH(0);
256 OUT_BATCH(0);
257 OUT_BATCH(0);
258 OUT_BATCH(0);
259 OUT_BATCH(0);
260 ADVANCE_BATCH();
261
262 BEGIN_BATCH(7);
263 OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
264 OUT_BATCH(0);
265 OUT_BATCH(0);
266 OUT_BATCH(0);
267 OUT_BATCH(0);
268 OUT_BATCH(0);
269 OUT_BATCH(0);
270 ADVANCE_BATCH();
271 }
272
273
274 /* 3DSTATE_TE
275 *
276 * Disable the tesselation engine.
277 */
278 static void
279 gen7_blorp_emit_te_disable(struct brw_context *brw)
280 {
281 BEGIN_BATCH(4);
282 OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
283 OUT_BATCH(0);
284 OUT_BATCH(0);
285 OUT_BATCH(0);
286 ADVANCE_BATCH();
287 }
288
289
290 /* 3DSTATE_DS
291 *
292 * Disable the domain shader.
293 */
294 static void
295 gen7_blorp_emit_ds_disable(struct brw_context *brw)
296 {
297 BEGIN_BATCH(7);
298 OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
299 OUT_BATCH(0);
300 OUT_BATCH(0);
301 OUT_BATCH(0);
302 OUT_BATCH(0);
303 OUT_BATCH(0);
304 OUT_BATCH(0);
305 ADVANCE_BATCH();
306
307 BEGIN_BATCH(6);
308 OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
309 OUT_BATCH(0);
310 OUT_BATCH(0);
311 OUT_BATCH(0);
312 OUT_BATCH(0);
313 OUT_BATCH(0);
314 ADVANCE_BATCH();
315 }
316
317 /* 3DSTATE_GS
318 *
319 * Disable the geometry shader.
320 */
321 static void
322 gen7_blorp_emit_gs_disable(struct brw_context *brw)
323 {
324 BEGIN_BATCH(7);
325 OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
326 OUT_BATCH(0);
327 OUT_BATCH(0);
328 OUT_BATCH(0);
329 OUT_BATCH(0);
330 OUT_BATCH(0);
331 OUT_BATCH(0);
332 ADVANCE_BATCH();
333
334 /**
335 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
336 * Geometry > Geometry Shader > State:
337 *
338 * "Note: Because of corruption in IVB:GT2, software needs to flush the
339 * whole fixed function pipeline when the GS enable changes value in
340 * the 3DSTATE_GS."
341 *
342 * The hardware architects have clarified that in this context "flush the
343 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
344 * Stall" bit set.
345 */
346 if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled)
347 gen7_emit_cs_stall_flush(brw);
348
349 BEGIN_BATCH(7);
350 OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
351 OUT_BATCH(0);
352 OUT_BATCH(0);
353 OUT_BATCH(0);
354 OUT_BATCH(0);
355 OUT_BATCH(0);
356 OUT_BATCH(0);
357 ADVANCE_BATCH();
358 brw->gs.enabled = false;
359 }
360
361 /* 3DSTATE_STREAMOUT
362 *
363 * Disable streamout.
364 */
365 static void
366 gen7_blorp_emit_streamout_disable(struct brw_context *brw)
367 {
368 BEGIN_BATCH(3);
369 OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
370 OUT_BATCH(0);
371 OUT_BATCH(0);
372 ADVANCE_BATCH();
373 }
374
375
376 static void
377 gen7_blorp_emit_sf_config(struct brw_context *brw,
378 const brw_blorp_params *params)
379 {
380 /* 3DSTATE_SF
381 *
382 * Disable ViewportTransformEnable (dw1.1)
383 *
384 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
385 * Primitives Overview":
386 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
387 * use of screen- space coordinates).
388 *
389 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
390 * and BackFaceFillMode (dw1.4:3) to SOLID(0).
391 *
392 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
393 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
394 * SOLID: Any triangle or rectangle object found to be front-facing
395 * is rendered as a solid object. This setting is required when
396 * (rendering rectangle (RECTLIST) objects.
397 */
398 {
399 BEGIN_BATCH(7);
400 OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
401 OUT_BATCH(params->depth_format <<
402 GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
403 OUT_BATCH(params->dst.num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
404 OUT_BATCH(0);
405 OUT_BATCH(0);
406 OUT_BATCH(0);
407 OUT_BATCH(0);
408 ADVANCE_BATCH();
409 }
410
411 /* 3DSTATE_SBE */
412 {
413 BEGIN_BATCH(14);
414 OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
415 OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE |
416 params->num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT |
417 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
418 BRW_SF_URB_ENTRY_READ_OFFSET <<
419 GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
420 for (int i = 0; i < 12; ++i)
421 OUT_BATCH(0);
422 ADVANCE_BATCH();
423 }
424 }
425
426
427 /**
428 * Disable thread dispatch (dw5.19) and enable the HiZ op.
429 */
430 static void
431 gen7_blorp_emit_wm_config(struct brw_context *brw,
432 const brw_blorp_params *params,
433 brw_blorp_prog_data *prog_data)
434 {
435 uint32_t dw1 = 0, dw2 = 0;
436
437 switch (params->hiz_op) {
438 case GEN6_HIZ_OP_DEPTH_CLEAR:
439 dw1 |= GEN7_WM_DEPTH_CLEAR;
440 break;
441 case GEN6_HIZ_OP_DEPTH_RESOLVE:
442 dw1 |= GEN7_WM_DEPTH_RESOLVE;
443 break;
444 case GEN6_HIZ_OP_HIZ_RESOLVE:
445 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
446 break;
447 case GEN6_HIZ_OP_NONE:
448 break;
449 default:
450 unreachable("not reached");
451 }
452 dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
453 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
454 dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
455 if (params->use_wm_prog) {
456 dw1 |= GEN7_WM_KILL_ENABLE; /* TODO: temporarily smash on */
457 dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */
458 }
459
460 if (params->dst.num_samples > 1) {
461 dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
462 if (prog_data && prog_data->persample_msaa_dispatch)
463 dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
464 else
465 dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
466 } else {
467 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
468 dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
469 }
470
471 BEGIN_BATCH(3);
472 OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
473 OUT_BATCH(dw1);
474 OUT_BATCH(dw2);
475 ADVANCE_BATCH();
476 }
477
478
479 /**
480 * 3DSTATE_PS
481 *
482 * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
483 * that, thread dispatch info must still be specified.
484 * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
485 * valid range for this field is [0x3, 0x2f].
486 * - A dispatch mode must be given; that is, at least one of the
487 * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
488 * discovered through simulator error messages.
489 */
490 static void
491 gen7_blorp_emit_ps_config(struct brw_context *brw,
492 const brw_blorp_params *params,
493 uint32_t prog_offset,
494 brw_blorp_prog_data *prog_data)
495 {
496 uint32_t dw2, dw4, dw5;
497 const int max_threads_shift = brw->is_haswell ?
498 HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
499
500 dw2 = dw4 = dw5 = 0;
501 dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
502
503 /* If there's a WM program, we need to do 16-pixel dispatch since that's
504 * what the program is compiled for. If there isn't, then it shouldn't
505 * matter because no program is actually being run. However, the hardware
506 * gets angry if we don't enable at least one dispatch mode, so just enable
507 * 16-pixel dispatch unconditionally.
508 */
509 dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
510
511 if (brw->is_haswell)
512 dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
513 if (params->use_wm_prog) {
514 dw2 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
515 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
516 dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
517 }
518
519 BEGIN_BATCH(8);
520 OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
521 OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
522 OUT_BATCH(dw2);
523 OUT_BATCH(0);
524 OUT_BATCH(dw4);
525 OUT_BATCH(dw5);
526 OUT_BATCH(0);
527 OUT_BATCH(0);
528 ADVANCE_BATCH();
529 }
530
531
532 static void
533 gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw,
534 uint32_t wm_bind_bo_offset)
535 {
536 BEGIN_BATCH(2);
537 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
538 OUT_BATCH(wm_bind_bo_offset);
539 ADVANCE_BATCH();
540 }
541
542
543 static void
544 gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw,
545 uint32_t sampler_offset)
546 {
547 BEGIN_BATCH(2);
548 OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
549 OUT_BATCH(sampler_offset);
550 ADVANCE_BATCH();
551 }
552
553
554 static void
555 gen7_blorp_emit_constant_ps(struct brw_context *brw,
556 uint32_t wm_push_const_offset)
557 {
558 const uint8_t mocs = GEN7_MOCS_L3;
559
560 /* Make sure the push constants fill an exact integer number of
561 * registers.
562 */
563 assert(sizeof(brw_blorp_wm_push_constants) % 32 == 0);
564
565 /* There must be at least one register worth of push constant data. */
566 assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0);
567
568 /* Enable push constant buffer 0. */
569 BEGIN_BATCH(7);
570 OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
571 (7 - 2));
572 OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS);
573 OUT_BATCH(0);
574 OUT_BATCH(wm_push_const_offset | mocs);
575 OUT_BATCH(0);
576 OUT_BATCH(0);
577 OUT_BATCH(0);
578 ADVANCE_BATCH();
579 }
580
581 static void
582 gen7_blorp_emit_constant_ps_disable(struct brw_context *brw)
583 {
584 BEGIN_BATCH(7);
585 OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
586 OUT_BATCH(0);
587 OUT_BATCH(0);
588 OUT_BATCH(0);
589 OUT_BATCH(0);
590 OUT_BATCH(0);
591 OUT_BATCH(0);
592 ADVANCE_BATCH();
593 }
594
595 static void
596 gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
597 const brw_blorp_params *params)
598 {
599 const uint8_t mocs = GEN7_MOCS_L3;
600 uint32_t surfwidth, surfheight;
601 uint32_t surftype;
602 unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
603 unsigned int min_array_element;
604 GLenum gl_target = params->depth.mt->target;
605 unsigned int lod;
606
607 switch (gl_target) {
608 case GL_TEXTURE_CUBE_MAP_ARRAY:
609 case GL_TEXTURE_CUBE_MAP:
610 /* The PRM claims that we should use BRW_SURFACE_CUBE for this
611 * situation, but experiments show that gl_Layer doesn't work when we do
612 * this. So we use BRW_SURFACE_2D, since for rendering purposes this is
613 * equivalent.
614 */
615 surftype = BRW_SURFACE_2D;
616 depth *= 6;
617 break;
618 default:
619 surftype = translate_tex_target(gl_target);
620 break;
621 }
622
623 min_array_element = params->depth.layer;
624 if (params->depth.mt->num_samples > 1) {
625 /* Convert physical layer to logical layer. */
626 min_array_element /= params->depth.mt->num_samples;
627 }
628
629 lod = params->depth.level - params->depth.mt->first_level;
630
631 if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) {
632 /* HIZ ops for lod 0 may set the width & height a little
633 * larger to allow the fast depth clear to fit the hardware
634 * alignment requirements. (8x4)
635 */
636 surfwidth = params->depth.width;
637 surfheight = params->depth.height;
638 } else {
639 surfwidth = params->depth.mt->logical_width0;
640 surfheight = params->depth.mt->logical_height0;
641 }
642
643 /* 3DSTATE_DEPTH_BUFFER */
644 {
645 brw_emit_depth_stall_flushes(brw);
646
647 BEGIN_BATCH(7);
648 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
649 OUT_BATCH((params->depth.mt->pitch - 1) |
650 params->depth_format << 18 |
651 1 << 22 | /* hiz enable */
652 1 << 28 | /* depth write */
653 surftype << 29);
654 OUT_RELOC(params->depth.mt->bo,
655 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
656 0);
657 OUT_BATCH((surfwidth - 1) << 4 |
658 (surfheight - 1) << 18 |
659 lod);
660 OUT_BATCH(((depth - 1) << 21) |
661 (min_array_element << 10) |
662 mocs);
663 OUT_BATCH(0);
664 OUT_BATCH((depth - 1) << 21);
665 ADVANCE_BATCH();
666 }
667
668 /* 3DSTATE_HIER_DEPTH_BUFFER */
669 {
670 struct intel_miptree_aux_buffer *hiz_buf = params->depth.mt->hiz_buf;
671
672 BEGIN_BATCH(3);
673 OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
674 OUT_BATCH((mocs << 25) |
675 (hiz_buf->pitch - 1));
676 OUT_RELOC(hiz_buf->bo,
677 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
678 0);
679 ADVANCE_BATCH();
680 }
681
682 /* 3DSTATE_STENCIL_BUFFER */
683 {
684 BEGIN_BATCH(3);
685 OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
686 OUT_BATCH(0);
687 OUT_BATCH(0);
688 ADVANCE_BATCH();
689 }
690 }
691
692
693 static void
694 gen7_blorp_emit_depth_disable(struct brw_context *brw)
695 {
696 brw_emit_depth_stall_flushes(brw);
697
698 BEGIN_BATCH(7);
699 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
700 OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT << 18 | (BRW_SURFACE_NULL << 29));
701 OUT_BATCH(0);
702 OUT_BATCH(0);
703 OUT_BATCH(0);
704 OUT_BATCH(0);
705 OUT_BATCH(0);
706 ADVANCE_BATCH();
707
708 BEGIN_BATCH(3);
709 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
710 OUT_BATCH(0);
711 OUT_BATCH(0);
712 ADVANCE_BATCH();
713
714 BEGIN_BATCH(3);
715 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
716 OUT_BATCH(0);
717 OUT_BATCH(0);
718 ADVANCE_BATCH();
719 }
720
721
722 /* 3DSTATE_CLEAR_PARAMS
723 *
724 * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4
725 * 3DSTATE_CLEAR_PARAMS:
726 * 3DSTATE_CLEAR_PARAMS must always be programmed in the along
727 * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER,
728 * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
729 */
730 static void
731 gen7_blorp_emit_clear_params(struct brw_context *brw,
732 const brw_blorp_params *params)
733 {
734 BEGIN_BATCH(3);
735 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
736 OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0);
737 OUT_BATCH(GEN7_DEPTH_CLEAR_VALID);
738 ADVANCE_BATCH();
739 }
740
741
742 /* 3DPRIMITIVE */
743 static void
744 gen7_blorp_emit_primitive(struct brw_context *brw,
745 const brw_blorp_params *params)
746 {
747 BEGIN_BATCH(7);
748 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
749 OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
750 _3DPRIM_RECTLIST);
751 OUT_BATCH(3); /* vertex count per instance */
752 OUT_BATCH(0);
753 OUT_BATCH(params->num_layers); /* instance count */
754 OUT_BATCH(0);
755 OUT_BATCH(0);
756 ADVANCE_BATCH();
757 }
758
759
760 /**
761 * \copydoc gen6_blorp_exec()
762 */
763 void
764 gen7_blorp_exec(struct brw_context *brw,
765 const brw_blorp_params *params)
766 {
767 if (brw->gen >= 8)
768 return;
769
770 brw_blorp_prog_data *prog_data = NULL;
771 uint32_t cc_blend_state_offset = 0;
772 uint32_t cc_state_offset = 0;
773 uint32_t depthstencil_offset;
774 uint32_t wm_push_const_offset = 0;
775 uint32_t wm_bind_bo_offset = 0;
776 uint32_t sampler_offset = 0;
777
778 uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
779 gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
780 gen6_emit_3dstate_sample_mask(brw,
781 params->dst.num_samples > 1 ?
782 (1 << params->dst.num_samples) - 1 : 1);
783 gen6_blorp_emit_state_base_address(brw, params);
784 gen6_blorp_emit_vertices(brw, params);
785 gen7_blorp_emit_urb_config(brw);
786 if (params->use_wm_prog) {
787 cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params);
788 cc_state_offset = gen6_blorp_emit_cc_state(brw);
789 gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset);
790 gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset);
791 }
792 depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
793 gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset);
794 if (brw->use_resource_streamer)
795 gen7_disable_hw_binding_tables(brw);
796 if (params->use_wm_prog) {
797 uint32_t wm_surf_offset_renderbuffer;
798 uint32_t wm_surf_offset_texture = 0;
799 wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
800 intel_miptree_used_for_rendering(params->dst.mt);
801 wm_surf_offset_renderbuffer =
802 gen7_blorp_emit_surface_state(brw, &params->dst,
803 I915_GEM_DOMAIN_RENDER,
804 I915_GEM_DOMAIN_RENDER,
805 true /* is_render_target */);
806 if (params->src.mt) {
807 wm_surf_offset_texture =
808 gen7_blorp_emit_surface_state(brw, &params->src,
809 I915_GEM_DOMAIN_SAMPLER, 0,
810 false /* is_render_target */);
811 }
812 wm_bind_bo_offset =
813 gen6_blorp_emit_binding_table(brw,
814 wm_surf_offset_renderbuffer,
815 wm_surf_offset_texture);
816 sampler_offset =
817 gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true);
818 }
819 gen7_blorp_emit_vs_disable(brw);
820 gen7_blorp_emit_hs_disable(brw);
821 gen7_blorp_emit_te_disable(brw);
822 gen7_blorp_emit_ds_disable(brw);
823 gen7_blorp_emit_gs_disable(brw);
824 gen7_blorp_emit_streamout_disable(brw);
825 gen6_blorp_emit_clip_disable(brw);
826 gen7_blorp_emit_sf_config(brw, params);
827 gen7_blorp_emit_wm_config(brw, params, prog_data);
828 if (params->use_wm_prog) {
829 gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset);
830 gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset);
831 gen7_blorp_emit_constant_ps(brw, wm_push_const_offset);
832 } else {
833 gen7_blorp_emit_constant_ps_disable(brw);
834 }
835 gen7_blorp_emit_ps_config(brw, params, prog_offset, prog_data);
836 gen7_blorp_emit_cc_viewport(brw);
837
838 if (params->depth.mt)
839 gen7_blorp_emit_depth_stencil_config(brw, params);
840 else
841 gen7_blorp_emit_depth_disable(brw);
842 gen7_blorp_emit_clear_params(brw, params);
843 gen6_blorp_emit_drawing_rectangle(brw, params);
844 gen7_blorp_emit_primitive(brw, params);
845 }