i965/gen8+: Add aux buffer alignment assertions
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_surface_state.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/blend.h"
25 #include "main/mtypes.h"
26 #include "main/samplerobj.h"
27 #include "main/texformat.h"
28 #include "main/teximage.h"
29 #include "program/prog_parameter.h"
30
31 #include "intel_mipmap_tree.h"
32 #include "intel_batchbuffer.h"
33 #include "intel_tex.h"
34 #include "intel_fbo.h"
35 #include "intel_buffer_objects.h"
36
37 #include "brw_context.h"
38 #include "brw_state.h"
39 #include "brw_defines.h"
40 #include "brw_wm.h"
41
42 /**
43 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
44 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
45 *
46 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
47 * 0 1 2 3 4 5
48 * 4 5 6 7 0 1
49 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
50 *
51 * which is simply adding 4 then modding by 8 (or anding with 7).
52 */
53 static unsigned
54 swizzle_to_scs(unsigned swizzle)
55 {
56 return (swizzle + 4) & 7;
57 }
58
59 static uint32_t
60 surface_tiling_mode(uint32_t tiling)
61 {
62 switch (tiling) {
63 case I915_TILING_X:
64 return GEN8_SURFACE_TILING_X;
65 case I915_TILING_Y:
66 return GEN8_SURFACE_TILING_Y;
67 default:
68 return GEN8_SURFACE_TILING_NONE;
69 }
70 }
71
72 static unsigned
73 vertical_alignment(const struct intel_mipmap_tree *mt)
74 {
75 switch (mt->align_h) {
76 case 4:
77 return GEN8_SURFACE_VALIGN_4;
78 case 8:
79 return GEN8_SURFACE_VALIGN_8;
80 case 16:
81 return GEN8_SURFACE_VALIGN_16;
82 default:
83 unreachable("Unsupported vertical surface alignment.");
84 }
85 }
86
87 static unsigned
88 horizontal_alignment(const struct intel_mipmap_tree *mt)
89 {
90 switch (mt->align_w) {
91 case 4:
92 return GEN8_SURFACE_HALIGN_4;
93 case 8:
94 return GEN8_SURFACE_HALIGN_8;
95 case 16:
96 return GEN8_SURFACE_HALIGN_16;
97 default:
98 unreachable("Unsupported horizontal surface alignment.");
99 }
100 }
101
102 static uint32_t *
103 allocate_surface_state(struct brw_context *brw, uint32_t *out_offset, int index)
104 {
105 int dwords = brw->gen >= 9 ? 16 : 13;
106 uint32_t *surf = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
107 dwords * 4, 64, index, out_offset);
108 memset(surf, 0, dwords * 4);
109 return surf;
110 }
111
112 static void
113 gen8_emit_buffer_surface_state(struct brw_context *brw,
114 uint32_t *out_offset,
115 drm_intel_bo *bo,
116 unsigned buffer_offset,
117 unsigned surface_format,
118 unsigned buffer_size,
119 unsigned pitch,
120 bool rw)
121 {
122 const unsigned mocs = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
123 uint32_t *surf = allocate_surface_state(brw, out_offset, -1);
124
125 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
126 surface_format << BRW_SURFACE_FORMAT_SHIFT |
127 BRW_SURFACE_RC_READ_WRITE;
128 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS);
129
130 surf[2] = SET_FIELD((buffer_size - 1) & 0x7f, GEN7_SURFACE_WIDTH) |
131 SET_FIELD(((buffer_size - 1) >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
132 if (surface_format == BRW_SURFACEFORMAT_RAW)
133 surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3ff, BRW_SURFACE_DEPTH);
134 else
135 surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH);
136 surf[3] |= (pitch - 1);
137 surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
138 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
139 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
140 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
141 /* reloc */
142 *((uint64_t *) &surf[8]) = (bo ? bo->offset64 : 0) + buffer_offset;
143
144 /* Emit relocation to surface contents. */
145 if (bo) {
146 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 8 * 4,
147 bo, buffer_offset, I915_GEM_DOMAIN_SAMPLER,
148 rw ? I915_GEM_DOMAIN_SAMPLER : 0);
149 }
150 }
151
152 static void
153 gen8_emit_texture_surface_state(struct brw_context *brw,
154 struct intel_mipmap_tree *mt,
155 GLenum target,
156 unsigned min_layer, unsigned max_layer,
157 unsigned min_level, unsigned max_level,
158 unsigned format,
159 unsigned swizzle,
160 uint32_t *surf_offset,
161 bool rw, bool for_gather)
162 {
163 const unsigned depth = max_layer - min_layer;
164 struct intel_mipmap_tree *aux_mt = NULL;
165 uint32_t aux_mode = 0;
166 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
167 int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
168 unsigned tiling_mode, pitch;
169
170 if (mt->format == MESA_FORMAT_S_UINT8) {
171 tiling_mode = GEN8_SURFACE_TILING_W;
172 pitch = 2 * mt->pitch;
173 } else {
174 tiling_mode = surface_tiling_mode(mt->tiling);
175 pitch = mt->pitch;
176 }
177
178 if (mt->mcs_mt) {
179 aux_mt = mt->mcs_mt;
180 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
181
182 /*
183 * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
184 * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
185 *
186 * From the hardware spec for GEN9:
187 * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
188 * 16 must be used."
189 */
190 assert(brw->gen < 9 || mt->align_w == 16);
191 assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
192 }
193
194 uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
195
196 surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT |
197 format << BRW_SURFACE_FORMAT_SHIFT |
198 vertical_alignment(mt) |
199 horizontal_alignment(mt) |
200 tiling_mode;
201
202 if (target == GL_TEXTURE_CUBE_MAP ||
203 target == GL_TEXTURE_CUBE_MAP_ARRAY) {
204 surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
205 }
206
207 if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP)
208 surf[0] |= GEN8_SURFACE_IS_ARRAY;
209
210 surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
211
212 surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
213 SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
214
215 surf[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1);
216
217 surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) |
218 SET_FIELD(min_layer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) |
219 SET_FIELD(depth - 1, GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT);
220
221 surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) |
222 (max_level - min_level - 1); /* mip count */
223
224 if (aux_mt) {
225 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
226 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
227 aux_mode;
228 } else {
229 surf[6] = 0;
230 }
231
232 surf[7] = mt->fast_clear_color_value |
233 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
234 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
235 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
236 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
237
238 *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
239
240 if (aux_mt) {
241 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
242 drm_intel_bo_emit_reloc(brw->batch.bo, *surf_offset + 10 * 4,
243 aux_mt->bo, 0,
244 I915_GEM_DOMAIN_SAMPLER,
245 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
246 } else {
247 surf[10] = 0;
248 surf[11] = 0;
249 }
250 surf[12] = 0;
251
252 /* Emit relocation to surface contents */
253 drm_intel_bo_emit_reloc(brw->batch.bo,
254 *surf_offset + 8 * 4,
255 mt->bo,
256 mt->offset,
257 I915_GEM_DOMAIN_SAMPLER,
258 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
259 }
260
261 static void
262 gen8_update_texture_surface(struct gl_context *ctx,
263 unsigned unit,
264 uint32_t *surf_offset,
265 bool for_gather)
266 {
267 struct brw_context *brw = brw_context(ctx);
268 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
269
270 if (obj->Target == GL_TEXTURE_BUFFER) {
271 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
272
273 } else {
274 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
275 struct intel_texture_object *intel_obj = intel_texture_object(obj);
276 struct intel_mipmap_tree *mt = intel_obj->mt;
277 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
278 /* If this is a view with restricted NumLayers, then our effective depth
279 * is not just the miptree depth.
280 */
281 const unsigned depth = (obj->Immutable && obj->Target != GL_TEXTURE_3D ?
282 obj->NumLayers : mt->logical_depth0);
283
284 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
285 * texturing functions that return a float, as our code generation always
286 * selects the .x channel (which would always be 0).
287 */
288 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
289 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
290 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
291 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
292 brw_get_texture_swizzle(&brw->ctx, obj));
293
294 unsigned format = translate_tex_format(brw, intel_obj->_Format,
295 sampler->sRGBDecode);
296 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
297 mt = mt->stencil_mt;
298 format = BRW_SURFACEFORMAT_R8_UINT;
299 }
300
301 gen8_emit_texture_surface_state(brw, mt, obj->Target,
302 obj->MinLayer, obj->MinLayer + depth,
303 obj->MinLevel + obj->BaseLevel,
304 obj->MinLevel + intel_obj->_MaxLevel + 1,
305 format, swizzle, surf_offset,
306 false, for_gather);
307 }
308 }
309
310 /**
311 * Creates a null surface.
312 *
313 * This is used when the shader doesn't write to any color output. An FB
314 * write to target 0 will still be emitted, because that's how the thread is
315 * terminated (and computed depth is returned), so we need to have the
316 * hardware discard the target 0 color output..
317 */
318 static void
319 gen8_emit_null_surface_state(struct brw_context *brw,
320 unsigned width,
321 unsigned height,
322 unsigned samples,
323 uint32_t *out_offset)
324 {
325 uint32_t *surf = allocate_surface_state(brw, out_offset, -1);
326
327 surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
328 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
329 GEN8_SURFACE_TILING_Y;
330 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
331 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
332 }
333
334 /**
335 * Sets up a surface state structure to point at the given region.
336 * While it is only used for the front/back buffer currently, it should be
337 * usable for further buffers when doing ARB_draw_buffer support.
338 */
339 static uint32_t
340 gen8_update_renderbuffer_surface(struct brw_context *brw,
341 struct gl_renderbuffer *rb,
342 bool layered, unsigned unit /* unused */,
343 uint32_t surf_index)
344 {
345 struct gl_context *ctx = &brw->ctx;
346 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
347 struct intel_mipmap_tree *mt = irb->mt;
348 struct intel_mipmap_tree *aux_mt = NULL;
349 uint32_t aux_mode = 0;
350 unsigned width = mt->logical_width0;
351 unsigned height = mt->logical_height0;
352 unsigned pitch = mt->pitch;
353 uint32_t tiling = mt->tiling;
354 uint32_t format = 0;
355 uint32_t surf_type;
356 uint32_t offset;
357 bool is_array = false;
358 int depth = MAX2(irb->layer_count, 1);
359 const int min_array_element = (mt->format == MESA_FORMAT_S_UINT8) ?
360 irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1));
361 GLenum gl_target =
362 rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
363 /* FINISHME: Use PTE MOCS on Skylake. */
364 uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE;
365
366 intel_miptree_used_for_rendering(mt);
367
368 switch (gl_target) {
369 case GL_TEXTURE_CUBE_MAP_ARRAY:
370 case GL_TEXTURE_CUBE_MAP:
371 surf_type = BRW_SURFACE_2D;
372 is_array = true;
373 depth *= 6;
374 break;
375 case GL_TEXTURE_3D:
376 depth = MAX2(irb->mt->logical_depth0, 1);
377 /* fallthrough */
378 default:
379 surf_type = translate_tex_target(gl_target);
380 is_array = _mesa_tex_target_is_array(gl_target);
381 break;
382 }
383
384 /* _NEW_BUFFERS */
385 /* Render targets can't use IMS layout. Stencil in turn gets configured as
386 * single sampled and indexed manually by the program.
387 */
388 if (mt->format == MESA_FORMAT_S_UINT8) {
389 brw_configure_w_tiled(mt, true, &width, &height, &pitch,
390 &tiling, &format);
391 } else {
392 assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
393 assert(brw_render_target_supported(brw, rb));
394 mesa_format rb_format = _mesa_get_render_format(ctx,
395 intel_rb_format(irb));
396 format = brw->render_target_format[rb_format];
397 if (unlikely(!brw->format_supported_as_render_target[rb_format]))
398 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
399 __func__, _mesa_get_format_name(rb_format));
400 }
401
402 if (mt->mcs_mt) {
403 aux_mt = mt->mcs_mt;
404 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
405
406 /*
407 * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
408 * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
409 *
410 * From the hardware spec for GEN9:
411 * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
412 * 16 must be used."
413 */
414 assert(brw->gen < 9 || mt->align_w == 16);
415 assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
416 }
417
418 uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
419
420 surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) |
421 (is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
422 (format << BRW_SURFACE_FORMAT_SHIFT) |
423 vertical_alignment(mt) |
424 horizontal_alignment(mt) |
425 surface_tiling_mode(tiling);
426
427 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
428
429 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
430 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
431
432 surf[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
433 (pitch - 1); /* Surface Pitch */
434
435 surf[4] = min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
436 (depth - 1) << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT;
437
438 if (mt->format != MESA_FORMAT_S_UINT8)
439 surf[4] |= gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
440
441 surf[5] = irb->mt_level - irb->mt->first_level;
442
443 if (aux_mt) {
444 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
445 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
446 aux_mode;
447 } else {
448 surf[6] = 0;
449 }
450
451 surf[7] = mt->fast_clear_color_value |
452 SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
453 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
454 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
455 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
456
457 assert(mt->offset % mt->cpp == 0);
458 *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
459
460 if (aux_mt) {
461 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
462 drm_intel_bo_emit_reloc(brw->batch.bo,
463 offset + 10 * 4,
464 aux_mt->bo, 0,
465 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
466 } else {
467 surf[10] = 0;
468 surf[11] = 0;
469 }
470 surf[12] = 0;
471
472 drm_intel_bo_emit_reloc(brw->batch.bo,
473 offset + 8 * 4,
474 mt->bo,
475 mt->offset,
476 I915_GEM_DOMAIN_RENDER,
477 I915_GEM_DOMAIN_RENDER);
478
479 return offset;
480 }
481
482 void
483 gen8_init_vtable_surface_functions(struct brw_context *brw)
484 {
485 brw->vtbl.update_texture_surface = gen8_update_texture_surface;
486 brw->vtbl.update_renderbuffer_surface = gen8_update_renderbuffer_surface;
487 brw->vtbl.emit_null_surface_state = gen8_emit_null_surface_state;
488 brw->vtbl.emit_texture_surface_state = gen8_emit_texture_surface_state;
489 brw->vtbl.emit_buffer_surface_state = gen8_emit_buffer_surface_state;
490 }