i965: split gen{6,7}_blorp_exec functions into manageable chunks.
[mesa.git] / src / mesa / drivers / dri / i965 / gen7_blorp.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "intel_batchbuffer.h"
27 #include "intel_fbo.h"
28 #include "intel_mipmap_tree.h"
29
30 #include "brw_context.h"
31 #include "brw_defines.h"
32 #include "brw_state.h"
33
34 #include "brw_blorp.h"
35 #include "gen7_blorp.h"
36
37
38 /* 3DSTATE_URB_VS
39 * 3DSTATE_URB_HS
40 * 3DSTATE_URB_DS
41 * 3DSTATE_URB_GS
42 *
43 * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the
44 * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS:
45 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
46 * programmed in order for the programming of this state to be
47 * valid.
48 */
49 static void
50 gen7_blorp_emit_urb_config(struct brw_context *brw,
51 const brw_blorp_params *params)
52 {
53 struct intel_context *intel = &brw->intel;
54
55 /* The minimum valid value is 32. See 3DSTATE_URB_VS,
56 * Dword 1.15:0 "VS Number of URB Entries".
57 */
58 int num_vs_entries = 32;
59
60 BEGIN_BATCH(2);
61 OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
62 OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT |
63 0 << GEN7_URB_STARTING_ADDRESS_SHIFT |
64 num_vs_entries);
65 ADVANCE_BATCH();
66
67 BEGIN_BATCH(2);
68 OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
69 OUT_BATCH(0);
70 ADVANCE_BATCH();
71
72 BEGIN_BATCH(2);
73 OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
74 OUT_BATCH(0);
75 ADVANCE_BATCH();
76
77 BEGIN_BATCH(2);
78 OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
79 OUT_BATCH(0);
80 ADVANCE_BATCH();
81 }
82
83
84 /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
85 *
86 * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
87 */
88 static void
89 gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw,
90 const brw_blorp_params *params,
91 uint32_t depthstencil_offset)
92 {
93 struct intel_context *intel = &brw->intel;
94
95 BEGIN_BATCH(2);
96 OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
97 OUT_BATCH(depthstencil_offset | 1);
98 ADVANCE_BATCH();
99 }
100
101
102 /* 3DSTATE_HS
103 *
104 * Disable the hull shader.
105 */
106 static void
107 gen7_blorp_emit_hs_disable(struct brw_context *brw,
108 const brw_blorp_params *params)
109 {
110 struct intel_context *intel = &brw->intel;
111
112 BEGIN_BATCH(7);
113 OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
114 OUT_BATCH(0);
115 OUT_BATCH(0);
116 OUT_BATCH(0);
117 OUT_BATCH(0);
118 OUT_BATCH(0);
119 OUT_BATCH(0);
120 ADVANCE_BATCH();
121 }
122
123
124 /* 3DSTATE_TE
125 *
126 * Disable the tesselation engine.
127 */
128 static void
129 gen7_blorp_emit_te_disable(struct brw_context *brw,
130 const brw_blorp_params *params)
131 {
132 struct intel_context *intel = &brw->intel;
133
134 BEGIN_BATCH(4);
135 OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
136 OUT_BATCH(0);
137 OUT_BATCH(0);
138 OUT_BATCH(0);
139 ADVANCE_BATCH();
140 }
141
142
143 /* 3DSTATE_DS
144 *
145 * Disable the domain shader.
146 */
147 static void
148 gen7_blorp_emit_ds_disable(struct brw_context *brw,
149 const brw_blorp_params *params)
150 {
151 struct intel_context *intel = &brw->intel;
152
153 BEGIN_BATCH(6);
154 OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
155 OUT_BATCH(0);
156 OUT_BATCH(0);
157 OUT_BATCH(0);
158 OUT_BATCH(0);
159 OUT_BATCH(0);
160 ADVANCE_BATCH();
161 }
162
163
164 /* 3DSTATE_STREAMOUT
165 *
166 * Disable streamout.
167 */
168 static void
169 gen7_blorp_emit_streamout_disable(struct brw_context *brw,
170 const brw_blorp_params *params)
171 {
172 struct intel_context *intel = &brw->intel;
173
174 BEGIN_BATCH(3);
175 OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
176 OUT_BATCH(0);
177 OUT_BATCH(0);
178 ADVANCE_BATCH();
179 }
180
181
182 static void
183 gen7_blorp_emit_sf_config(struct brw_context *brw,
184 const brw_blorp_params *params)
185 {
186 struct intel_context *intel = &brw->intel;
187
188 /* 3DSTATE_SF
189 *
190 * Disable ViewportTransformEnable (dw1.1)
191 *
192 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
193 * Primitives Overview":
194 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
195 * use of screen- space coordinates).
196 *
197 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
198 * and BackFaceFillMode (dw1.4:3) to SOLID(0).
199 *
200 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
201 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
202 * SOLID: Any triangle or rectangle object found to be front-facing
203 * is rendered as a solid object. This setting is required when
204 * (rendering rectangle (RECTLIST) objects.
205 */
206 {
207 BEGIN_BATCH(7);
208 OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
209 OUT_BATCH(params->depth_format <<
210 GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
211 OUT_BATCH(0);
212 OUT_BATCH(0);
213 OUT_BATCH(0);
214 OUT_BATCH(0);
215 OUT_BATCH(0);
216 ADVANCE_BATCH();
217 }
218
219 /* 3DSTATE_SBE */
220 {
221 BEGIN_BATCH(14);
222 OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
223 OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
224 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
225 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
226 for (int i = 0; i < 12; ++i)
227 OUT_BATCH(0);
228 ADVANCE_BATCH();
229 }
230 }
231
232
233 /**
234 * Disable thread dispatch (dw5.19) and enable the HiZ op.
235 */
236 static void
237 gen7_blorp_emit_wm_config(struct brw_context *brw,
238 const brw_blorp_params *params)
239 {
240 struct intel_context *intel = &brw->intel;
241
242 uint32_t dw1 = 0;
243
244 switch (params->hiz_op) {
245 case GEN6_HIZ_OP_DEPTH_CLEAR:
246 assert(!"not implemented");
247 dw1 |= GEN7_WM_DEPTH_CLEAR;
248 break;
249 case GEN6_HIZ_OP_DEPTH_RESOLVE:
250 dw1 |= GEN7_WM_DEPTH_RESOLVE;
251 break;
252 case GEN6_HIZ_OP_HIZ_RESOLVE:
253 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
254 break;
255 default:
256 assert(0);
257 break;
258 }
259
260 BEGIN_BATCH(3);
261 OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
262 OUT_BATCH(dw1);
263 OUT_BATCH(0);
264 ADVANCE_BATCH();
265 }
266
267
268 /**
269 * 3DSTATE_PS
270 *
271 * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
272 * that, thread dispatch info must still be specified.
273 * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec
274 * states that the valid range for this field is [0x3, 0x2f].
275 * - A dispatch mode must be given; that is, at least one of the
276 * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
277 * discovered through simulator error messages.
278 */
279 static void
280 gen7_blorp_emit_ps_config(struct brw_context *brw,
281 const brw_blorp_params *params)
282 {
283 struct intel_context *intel = &brw->intel;
284
285 BEGIN_BATCH(8);
286 OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
287 OUT_BATCH(0);
288 OUT_BATCH(0);
289 OUT_BATCH(0);
290 OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) |
291 GEN7_PS_32_DISPATCH_ENABLE);
292 OUT_BATCH(0);
293 OUT_BATCH(0);
294 OUT_BATCH(0);
295 ADVANCE_BATCH();
296 }
297
298
299 static void
300 gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
301 const brw_blorp_params *params)
302 {
303 struct intel_context *intel = &brw->intel;
304 uint32_t draw_x, draw_y;
305 uint32_t tile_mask_x, tile_mask_y;
306
307 params->depth.get_draw_offsets(&draw_x, &draw_y);
308 gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y);
309
310 /* 3DSTATE_DEPTH_BUFFER */
311 {
312 uint32_t width, height;
313 params->depth.get_miplevel_dims(&width, &height);
314
315 uint32_t tile_x = draw_x & tile_mask_x;
316 uint32_t tile_y = draw_y & tile_mask_y;
317 uint32_t offset =
318 intel_region_get_aligned_offset(params->depth.mt->region,
319 draw_x & ~tile_mask_x,
320 draw_y & ~tile_mask_y);
321
322 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
323 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
324 * Coordinate Offset X/Y":
325 *
326 * "The 3 LSBs of both offsets must be zero to ensure correct
327 * alignment"
328 *
329 * We have no guarantee that tile_x and tile_y are correctly aligned,
330 * since they are determined by the mipmap layout, which is only aligned
331 * to multiples of 4.
332 *
333 * So, to avoid hanging the GPU, just smash the low order 3 bits of
334 * tile_x and tile_y to 0. This is a temporary workaround until we come
335 * up with a better solution.
336 */
337 tile_x &= ~7;
338 tile_y &= ~7;
339
340 intel_emit_depth_stall_flushes(intel);
341
342 BEGIN_BATCH(7);
343 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
344 uint32_t pitch_bytes =
345 params->depth.mt->region->pitch * params->depth.mt->region->cpp;
346 OUT_BATCH((pitch_bytes - 1) |
347 params->depth_format << 18 |
348 1 << 22 | /* hiz enable */
349 1 << 28 | /* depth write */
350 BRW_SURFACE_2D << 29);
351 OUT_RELOC(params->depth.mt->region->bo,
352 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
353 offset);
354 OUT_BATCH((width + tile_x - 1) << 4 |
355 (height + tile_y - 1) << 18);
356 OUT_BATCH(0);
357 OUT_BATCH(tile_x |
358 tile_y << 16);
359 OUT_BATCH(0);
360 ADVANCE_BATCH();
361 }
362
363 /* 3DSTATE_HIER_DEPTH_BUFFER */
364 {
365 struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
366 uint32_t hiz_offset =
367 intel_region_get_aligned_offset(hiz_region,
368 draw_x & ~tile_mask_x,
369 (draw_y & ~tile_mask_y) / 2);
370
371 BEGIN_BATCH(3);
372 OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
373 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
374 OUT_RELOC(hiz_region->bo,
375 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
376 hiz_offset);
377 ADVANCE_BATCH();
378 }
379
380 /* 3DSTATE_STENCIL_BUFFER */
381 {
382 BEGIN_BATCH(3);
383 OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
384 OUT_BATCH(0);
385 OUT_BATCH(0);
386 ADVANCE_BATCH();
387 }
388 }
389
390
391 /* 3DSTATE_CLEAR_PARAMS
392 *
393 * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2
394 * 3DSTATE_CLEAR_PARAMS:
395 * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along
396 * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER,
397 * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
398 */
399 static void
400 gen7_blorp_emit_clear_params(struct brw_context *brw,
401 const brw_blorp_params *params)
402 {
403 struct intel_context *intel = &brw->intel;
404
405 BEGIN_BATCH(3);
406 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
407 OUT_BATCH(0);
408 OUT_BATCH(0);
409 ADVANCE_BATCH();
410 }
411
412
413 /* 3DPRIMITIVE */
414 static void
415 gen7_blorp_emit_primitive(struct brw_context *brw,
416 const brw_blorp_params *params)
417 {
418 struct intel_context *intel = &brw->intel;
419
420 BEGIN_BATCH(7);
421 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
422 OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
423 _3DPRIM_RECTLIST);
424 OUT_BATCH(3); /* vertex count per instance */
425 OUT_BATCH(0);
426 OUT_BATCH(1); /* instance count */
427 OUT_BATCH(0);
428 OUT_BATCH(0);
429 ADVANCE_BATCH();
430 }
431
432
433 /**
434 * \copydoc gen6_blorp_exec()
435 */
436 void
437 gen7_blorp_exec(struct intel_context *intel,
438 const brw_blorp_params *params)
439 {
440 struct gl_context *ctx = &intel->ctx;
441 struct brw_context *brw = brw_context(ctx);
442 uint32_t depthstencil_offset;
443
444 gen6_blorp_emit_batch_head(brw, params);
445 gen6_blorp_emit_vertices(brw, params);
446 gen7_blorp_emit_urb_config(brw, params);
447 depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
448 gen7_blorp_emit_depth_stencil_state_pointers(brw, params,
449 depthstencil_offset);
450 gen6_blorp_emit_vs_disable(brw, params);
451 gen7_blorp_emit_hs_disable(brw, params);
452 gen7_blorp_emit_te_disable(brw, params);
453 gen7_blorp_emit_ds_disable(brw, params);
454 gen6_blorp_emit_gs_disable(brw, params);
455 gen7_blorp_emit_streamout_disable(brw, params);
456 gen6_blorp_emit_clip_disable(brw, params);
457 gen7_blorp_emit_sf_config(brw, params);
458 gen7_blorp_emit_wm_config(brw, params);
459 gen7_blorp_emit_ps_config(brw, params);
460
461 gen7_blorp_emit_depth_stencil_config(brw, params);
462 gen7_blorp_emit_clear_params(brw, params);
463 gen6_blorp_emit_drawing_rectangle(brw, params);
464 gen7_blorp_emit_primitive(brw, params);
465
466 /* See comments above at first invocation of intel_flush() in
467 * gen6_blorp_emit_batch_head().
468 */
469 intel_flush(ctx);
470
471 /* Be safe. */
472 brw->state.dirty.brw = ~0;
473 brw->state.dirty.cache = ~0;
474 }
475
476 /** \copydoc gen6_resolve_hiz_slice() */
477 void
478 gen7_resolve_hiz_slice(struct intel_context *intel,
479 struct intel_mipmap_tree *mt,
480 uint32_t level,
481 uint32_t layer)
482 {
483 brw_hiz_op_params params(mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
484 gen7_blorp_exec(intel, &params);
485 }
486
487 /** \copydoc gen6_resolve_depth_slice() */
488 void
489 gen7_resolve_depth_slice(struct intel_context *intel,
490 struct intel_mipmap_tree *mt,
491 uint32_t level,
492 uint32_t layer)
493 {
494 brw_hiz_op_params params(mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
495 gen7_blorp_exec(intel, &params);
496 }