i965/blorp: Add genxml-based sampler state emit function
[mesa.git] / src / mesa / drivers / dri / i965 / genX_blorp_exec.c
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
28
29 #include "brw_context.h"
30 #include "brw_state.h"
31
32 #include "blorp_priv.h"
33
34 #include "genxml/gen_macros.h"
35
36 static void *
37 blorp_emit_dwords(struct brw_context *brw, unsigned n)
38 {
39 intel_batchbuffer_begin(brw, n, RENDER_RING);
40 uint32_t *map = brw->batch.map_next;
41 brw->batch.map_next += n;
42 intel_batchbuffer_advance(brw);
43 return map;
44 }
45
46 struct blorp_address {
47 drm_intel_bo *buffer;
48 uint32_t read_domains;
49 uint32_t write_domain;
50 uint32_t offset;
51 };
52
53 static uint64_t
54 blorp_emit_reloc(struct brw_context *brw, void *location,
55 struct blorp_address address, uint32_t delta)
56 {
57 uint32_t offset = (char *)location - (char *)brw->batch.map;
58 if (brw->gen >= 8) {
59 return intel_batchbuffer_reloc64(brw, address.buffer, offset,
60 address.read_domains,
61 address.write_domain,
62 address.offset + delta);
63 } else {
64 return intel_batchbuffer_reloc(brw, address.buffer, offset,
65 address.read_domains,
66 address.write_domain,
67 address.offset + delta);
68 }
69 }
70
71 #define __gen_address_type struct blorp_address
72 #define __gen_user_data struct brw_context
73
74 static uint64_t
75 __gen_combine_address(struct brw_context *brw, void *location,
76 struct blorp_address address, uint32_t delta)
77 {
78 if (address.buffer == NULL) {
79 return address.offset + delta;
80 } else {
81 return blorp_emit_reloc(brw, location, address, delta);
82 }
83 }
84
85 #include "genxml/genX_pack.h"
86
87 #define _blorp_cmd_length(cmd) cmd ## _length
88 #define _blorp_cmd_header(cmd) cmd ## _header
89 #define _blorp_cmd_pack(cmd) cmd ## _pack
90
91 #define blorp_emit(brw, cmd, name) \
92 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
93 *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \
94 __builtin_expect(_dst != NULL, 1); \
95 _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \
96 _dst = NULL)
97
98 static void
99 blorp_emit_sf_config(struct brw_context *brw,
100 const struct brw_blorp_params *params)
101 {
102 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
103
104 /* 3DSTATE_SF
105 *
106 * Disable ViewportTransformEnable (dw2.1)
107 *
108 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
109 * Primitives Overview":
110 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
111 * use of screen- space coordinates).
112 *
113 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
114 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
115 *
116 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
117 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
118 * SOLID: Any triangle or rectangle object found to be front-facing
119 * is rendered as a solid object. This setting is required when
120 * (rendering rectangle (RECTLIST) objects.
121 */
122 blorp_emit(brw, GENX(3DSTATE_SF), sf) {
123 sf.FrontFaceFillMode = FILL_MODE_SOLID;
124 sf.BackFaceFillMode = FILL_MODE_SOLID;
125
126 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
127 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
128
129 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
130 if (prog_data) {
131 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
132 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
133 sf.ConstantInterpolationEnable = prog_data->flat_inputs;
134 } else {
135 sf.NumberofSFOutputAttributes = 0;
136 sf.VertexURBEntryReadLength = 1;
137 }
138 }
139 }
140
141 static void
142 blorp_emit_wm_config(struct brw_context *brw,
143 const struct brw_blorp_params *params)
144 {
145 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
146
147 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
148 * nonzero to prevent the GPU from hanging. While the documentation doesn't
149 * mention this explicitly, it notes that the valid range for the field is
150 * [1,39] = [2,40] threads, which excludes zero.
151 *
152 * To be safe (and to minimize extraneous code) we go ahead and fully
153 * configure the WM state whether or not there is a WM program.
154 */
155 blorp_emit(brw, GENX(3DSTATE_WM), wm) {
156 wm.MaximumNumberofThreads = brw->max_wm_threads - 1;
157
158 switch (params->hiz_op) {
159 case GEN6_HIZ_OP_DEPTH_CLEAR:
160 wm.DepthBufferClear = true;
161 break;
162 case GEN6_HIZ_OP_DEPTH_RESOLVE:
163 wm.DepthBufferResolveEnable = true;
164 break;
165 case GEN6_HIZ_OP_HIZ_RESOLVE:
166 wm.HierarchicalDepthBufferResolveEnable = true;
167 break;
168 case GEN6_HIZ_OP_NONE:
169 break;
170 default:
171 unreachable("not reached");
172 }
173
174 if (prog_data) {
175 wm.ThreadDispatchEnable = true;
176
177 wm.DispatchGRFStartRegisterforConstantSetupData0 =
178 prog_data->first_curbe_grf_0;
179 wm.DispatchGRFStartRegisterforConstantSetupData2 =
180 prog_data->first_curbe_grf_2;
181
182 wm.KernelStartPointer0 = params->wm_prog_kernel;
183 wm.KernelStartPointer2 =
184 params->wm_prog_kernel + prog_data->ksp_offset_2;
185
186 wm._8PixelDispatchEnable = prog_data->dispatch_8;
187 wm._16PixelDispatchEnable = prog_data->dispatch_16;
188
189 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
190 }
191
192 if (params->src.bo) {
193 wm.SamplerCount = 1; /* Up to 4 samplers */
194 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
195 }
196
197 if (params->dst.surf.samples > 1) {
198 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
199 wm.MultisampleDispatchMode =
200 (prog_data && prog_data->persample_msaa_dispatch) ?
201 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
202 } else {
203 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
204 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
205 }
206 }
207 }
208
209
210 static void
211 blorp_emit_depth_stencil_config(struct brw_context *brw,
212 const struct brw_blorp_params *params)
213 {
214 brw_emit_depth_stall_flushes(brw);
215
216 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
217 switch (params->depth.surf.dim) {
218 case ISL_SURF_DIM_1D:
219 db.SurfaceType = SURFTYPE_1D;
220 break;
221 case ISL_SURF_DIM_2D:
222 db.SurfaceType = SURFTYPE_2D;
223 break;
224 case ISL_SURF_DIM_3D:
225 db.SurfaceType = SURFTYPE_3D;
226 break;
227 }
228
229 db.SurfaceFormat = params->depth_format;
230
231 db.TiledSurface = true;
232 db.TileWalk = TILEWALK_YMAJOR;
233 db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
234
235 db.HierarchicalDepthBufferEnable = true;
236 db.SeparateStencilBufferEnable = true;
237
238 db.Width = params->depth.surf.logical_level0_px.width - 1;
239 db.Height = params->depth.surf.logical_level0_px.height - 1;
240 db.RenderTargetViewExtent = db.Depth =
241 MAX2(params->depth.surf.logical_level0_px.depth,
242 params->depth.surf.logical_level0_px.array_len) - 1;
243
244 db.LOD = params->depth.view.base_level;
245 db.MinimumArrayElement = params->depth.view.base_array_layer;
246
247 db.SurfacePitch = params->depth.surf.row_pitch - 1;
248 db.SurfaceBaseAddress = (struct blorp_address) {
249 .buffer = params->depth.bo,
250 .read_domains = I915_GEM_DOMAIN_RENDER,
251 .write_domain = I915_GEM_DOMAIN_RENDER,
252 .offset = params->depth.offset,
253 };
254 }
255
256 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
257 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
258 hiz.SurfaceBaseAddress = (struct blorp_address) {
259 .buffer = params->depth.aux_bo,
260 .read_domains = I915_GEM_DOMAIN_RENDER,
261 .write_domain = I915_GEM_DOMAIN_RENDER,
262 .offset = params->depth.aux_offset,
263 };
264 }
265
266 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
267 }
268
269 static uint32_t
270 blorp_emit_blend_state(struct brw_context *brw,
271 const struct brw_blorp_params *params)
272 {
273 struct GENX(BLEND_STATE) blend;
274 memset(&blend, 0, sizeof(blend));
275
276 for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
277 blend.Entry[i].PreBlendColorClampEnable = true;
278 blend.Entry[i].PostBlendColorClampEnable = true;
279 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
280
281 blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
282 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
283 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
284 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
285 }
286
287 uint32_t offset;
288 void *state = brw_state_batch(brw, AUB_TRACE_BLEND_STATE,
289 GENX(BLEND_STATE_length) * 4, 64, &offset);
290 GENX(BLEND_STATE_pack)(NULL, state, &blend);
291
292 return offset;
293 }
294
295 static uint32_t
296 blorp_emit_color_calc_state(struct brw_context *brw,
297 const struct brw_blorp_params *params)
298 {
299 uint32_t offset;
300 void *state = brw_state_batch(brw, AUB_TRACE_CC_STATE,
301 GENX(COLOR_CALC_STATE_length) * 4, 64, &offset);
302 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
303
304 return offset;
305 }
306
307 static uint32_t
308 blorp_emit_depth_stencil_state(struct brw_context *brw,
309 const struct brw_blorp_params *params)
310 {
311 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
312 * - 7.5.3.1 Depth Buffer Clear
313 * - 7.5.3.2 Depth Buffer Resolve
314 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
315 */
316 struct GENX(DEPTH_STENCIL_STATE) ds = {
317 .DepthBufferWriteEnable = true,
318 };
319
320 if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
321 ds.DepthTestEnable = true;
322 ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
323 }
324
325 uint32_t offset;
326 void *state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
327 GENX(DEPTH_STENCIL_STATE_length) * 4, 64,
328 &offset);
329 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
330
331 return offset;
332 }
333
334 static void
335 blorp_emit_sampler_state(struct brw_context *brw,
336 const struct brw_blorp_params *params)
337 {
338 struct GENX(SAMPLER_STATE) sampler = {
339 .MipModeFilter = MIPFILTER_NONE,
340 .MagModeFilter = MAPFILTER_LINEAR,
341 .MinModeFilter = MAPFILTER_LINEAR,
342 .MinLOD = 0,
343 .MaxLOD = 0,
344 .TCXAddressControlMode = TCM_CLAMP,
345 .TCYAddressControlMode = TCM_CLAMP,
346 .TCZAddressControlMode = TCM_CLAMP,
347 .MaximumAnisotropy = RATIO21,
348 .RAddressMinFilterRoundingEnable = true,
349 .RAddressMagFilterRoundingEnable = true,
350 .VAddressMinFilterRoundingEnable = true,
351 .VAddressMagFilterRoundingEnable = true,
352 .UAddressMinFilterRoundingEnable = true,
353 .UAddressMagFilterRoundingEnable = true,
354 .NonnormalizedCoordinateEnable = true,
355 };
356
357 uint32_t offset;
358 void *state = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
359 GENX(SAMPLER_STATE_length) * 4, 32, &offset);
360 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
361
362 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
363 ssp.VSSamplerStateChange = true;
364 ssp.GSSamplerStateChange = true;
365 ssp.PSSamplerStateChange = true;
366 ssp.PointertoPSSamplerState = offset;
367 }
368 }
369
370 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
371 static void
372 blorp_emit_viewport_state(struct brw_context *brw,
373 const struct brw_blorp_params *params)
374 {
375 uint32_t cc_vp_offset;
376
377 void *state = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
378 GENX(CC_VIEWPORT_length) * 4, 32,
379 &cc_vp_offset);
380
381 GENX(CC_VIEWPORT_pack)(brw, state,
382 &(struct GENX(CC_VIEWPORT)) {
383 .MinimumDepth = 0.0,
384 .MaximumDepth = 1.0,
385 });
386
387 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
388 vsp.CCViewportStateChange = true;
389 vsp.PointertoCC_VIEWPORT = cc_vp_offset;
390 }
391 }
392
393
394 /**
395 * \brief Execute a blit or render pass operation.
396 *
397 * To execute the operation, this function manually constructs and emits a
398 * batch to draw a rectangle primitive. The batchbuffer is flushed before
399 * constructing and after emitting the batch.
400 *
401 * This function alters no GL state.
402 */
403 void
404 genX(blorp_exec)(struct brw_context *brw,
405 const struct brw_blorp_params *params)
406 {
407 uint32_t blend_state_offset = 0;
408 uint32_t color_calc_state_offset = 0;
409 uint32_t depth_stencil_state_offset;
410 uint32_t wm_bind_bo_offset = 0;
411
412 /* Emit workaround flushes when we switch from drawing to blorping. */
413 brw_emit_post_sync_nonzero_flush(brw);
414
415 brw_upload_state_base_address(brw);
416
417 gen6_blorp_emit_vertices(brw, params);
418
419 /* 3DSTATE_URB
420 *
421 * Assign the entire URB to the VS. Even though the VS disabled, URB space
422 * is still needed because the clipper loads the VUE's from the URB. From
423 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
424 * Dword 1.15:0 "VS Number of URB Entries":
425 * This field is always used (even if VS Function Enable is DISABLED).
426 *
427 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
428 * safely ignore it because this batch contains only one draw call.
429 * Because of URB corruption caused by allocating a previous GS unit
430 * URB entry to the VS unit, software is required to send a “GS NULL
431 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
432 * plus a dummy DRAW call before any case where VS will be taking over
433 * GS URB space.
434 */
435 blorp_emit(brw, GENX(3DSTATE_URB), urb) {
436 urb.VSNumberofURBEntries = brw->urb.max_vs_entries;
437 }
438
439 if (params->wm_prog_data) {
440 blend_state_offset = blorp_emit_blend_state(brw, params);
441 color_calc_state_offset = blorp_emit_color_calc_state(brw, params);
442 }
443 depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params);
444
445 /* 3DSTATE_CC_STATE_POINTERS
446 *
447 * The pointer offsets are relative to
448 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
449 *
450 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
451 */
452 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
453 cc.BLEND_STATEChange = true;
454 cc.COLOR_CALC_STATEChange = true;
455 cc.DEPTH_STENCIL_STATEChange = true;
456 cc.PointertoBLEND_STATE = blend_state_offset;
457 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
458 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
459 }
460
461 blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);
462 blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);
463 blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);
464
465 if (params->wm_prog_data) {
466 uint32_t wm_surf_offset_renderbuffer;
467 uint32_t wm_surf_offset_texture = 0;
468
469 wm_surf_offset_renderbuffer =
470 brw_blorp_emit_surface_state(brw, &params->dst,
471 I915_GEM_DOMAIN_RENDER,
472 I915_GEM_DOMAIN_RENDER, true);
473 if (params->src.bo) {
474 wm_surf_offset_texture =
475 brw_blorp_emit_surface_state(brw, &params->src,
476 I915_GEM_DOMAIN_SAMPLER, 0, false);
477 }
478 wm_bind_bo_offset =
479 gen6_blorp_emit_binding_table(brw,
480 wm_surf_offset_renderbuffer,
481 wm_surf_offset_texture);
482
483 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
484 bt.PSBindingTableChange = true;
485 bt.PointertoPSBindingTable = wm_bind_bo_offset;
486 }
487 }
488
489 if (params->src.bo)
490 blorp_emit_sampler_state(brw, params);
491
492 gen6_emit_3dstate_multisample(brw, params->dst.surf.samples);
493
494 blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
495 mask.SampleMask = (1 << params->dst.surf.samples) - 1;
496 }
497
498 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
499 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
500 *
501 * [DevSNB] A pipeline flush must be programmed prior to a
502 * 3DSTATE_VS command that causes the VS Function Enable to
503 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
504 * command with CS stall bit set and a post sync operation.
505 *
506 * We've already done one at the start of the BLORP operation.
507 */
508 blorp_emit(brw, GENX(3DSTATE_VS), vs);
509 blorp_emit(brw, GENX(3DSTATE_GS), gs);
510
511 blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {
512 clip.PerspectiveDivideDisable = true;
513 }
514
515 blorp_emit_sf_config(brw, params);
516 blorp_emit_wm_config(brw, params);
517
518 blorp_emit_viewport_state(brw, params);
519
520 if (params->depth.bo) {
521 blorp_emit_depth_stencil_config(brw, params);
522 } else {
523 brw_emit_depth_stall_flushes(brw);
524
525 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
526 db.SurfaceType = SURFTYPE_NULL;
527 db.SurfaceFormat = D32_FLOAT;
528 }
529 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
530 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
531 }
532
533 /* 3DSTATE_CLEAR_PARAMS
534 *
535 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
536 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
537 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
538 */
539 blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {
540 clear.DepthClearValueValid = true;
541 clear.DepthClearValue = params->depth.clear_color.u32[0];
542 }
543
544 blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
545 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
546 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
547 }
548
549 blorp_emit(brw, GENX(3DPRIMITIVE), prim) {
550 prim.VertexAccessType = SEQUENTIAL;
551 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
552 prim.VertexCountPerInstance = 3;
553 prim.InstanceCount = params->num_layers;
554 }
555 }