i965/gen9: Set tiled resource mode in surface state
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_surface_state.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/blend.h"
25 #include "main/mtypes.h"
26 #include "main/samplerobj.h"
27 #include "main/texformat.h"
28 #include "main/teximage.h"
29 #include "program/prog_parameter.h"
30
31 #include "intel_mipmap_tree.h"
32 #include "intel_batchbuffer.h"
33 #include "intel_tex.h"
34 #include "intel_fbo.h"
35 #include "intel_buffer_objects.h"
36
37 #include "brw_context.h"
38 #include "brw_state.h"
39 #include "brw_defines.h"
40 #include "brw_wm.h"
41
42 /**
43 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
44 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
45 *
46 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
47 * 0 1 2 3 4 5
48 * 4 5 6 7 0 1
49 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
50 *
51 * which is simply adding 4 then modding by 8 (or anding with 7).
52 */
53 static unsigned
54 swizzle_to_scs(unsigned swizzle)
55 {
56 return (swizzle + 4) & 7;
57 }
58
59 static uint32_t
60 surface_tiling_resource_mode(uint32_t tr_mode)
61 {
62 switch (tr_mode) {
63 case INTEL_MIPTREE_TRMODE_YF:
64 return GEN9_SURFACE_TRMODE_TILEYF;
65 case INTEL_MIPTREE_TRMODE_YS:
66 return GEN9_SURFACE_TRMODE_TILEYS;
67 default:
68 return GEN9_SURFACE_TRMODE_NONE;
69 }
70 }
71
72 static uint32_t
73 surface_tiling_mode(uint32_t tiling)
74 {
75 switch (tiling) {
76 case I915_TILING_X:
77 return GEN8_SURFACE_TILING_X;
78 case I915_TILING_Y:
79 return GEN8_SURFACE_TILING_Y;
80 default:
81 return GEN8_SURFACE_TILING_NONE;
82 }
83 }
84
85 static unsigned
86 vertical_alignment(const struct intel_mipmap_tree *mt)
87 {
88 switch (mt->align_h) {
89 case 4:
90 return GEN8_SURFACE_VALIGN_4;
91 case 8:
92 return GEN8_SURFACE_VALIGN_8;
93 case 16:
94 return GEN8_SURFACE_VALIGN_16;
95 default:
96 unreachable("Unsupported vertical surface alignment.");
97 }
98 }
99
100 static unsigned
101 horizontal_alignment(const struct intel_mipmap_tree *mt)
102 {
103 switch (mt->align_w) {
104 case 4:
105 return GEN8_SURFACE_HALIGN_4;
106 case 8:
107 return GEN8_SURFACE_HALIGN_8;
108 case 16:
109 return GEN8_SURFACE_HALIGN_16;
110 default:
111 unreachable("Unsupported horizontal surface alignment.");
112 }
113 }
114
115 static uint32_t *
116 allocate_surface_state(struct brw_context *brw, uint32_t *out_offset, int index)
117 {
118 int dwords = brw->gen >= 9 ? 16 : 13;
119 uint32_t *surf = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
120 dwords * 4, 64, index, out_offset);
121 memset(surf, 0, dwords * 4);
122 return surf;
123 }
124
125 static void
126 gen8_emit_buffer_surface_state(struct brw_context *brw,
127 uint32_t *out_offset,
128 drm_intel_bo *bo,
129 unsigned buffer_offset,
130 unsigned surface_format,
131 unsigned buffer_size,
132 unsigned pitch,
133 bool rw)
134 {
135 const unsigned mocs = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
136 uint32_t *surf = allocate_surface_state(brw, out_offset, -1);
137
138 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
139 surface_format << BRW_SURFACE_FORMAT_SHIFT |
140 BRW_SURFACE_RC_READ_WRITE;
141 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS);
142
143 surf[2] = SET_FIELD((buffer_size - 1) & 0x7f, GEN7_SURFACE_WIDTH) |
144 SET_FIELD(((buffer_size - 1) >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
145 if (surface_format == BRW_SURFACEFORMAT_RAW)
146 surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3ff, BRW_SURFACE_DEPTH);
147 else
148 surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH);
149 surf[3] |= (pitch - 1);
150 surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
151 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
152 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
153 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
154 /* reloc */
155 *((uint64_t *) &surf[8]) = (bo ? bo->offset64 : 0) + buffer_offset;
156
157 /* Emit relocation to surface contents. */
158 if (bo) {
159 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 8 * 4,
160 bo, buffer_offset, I915_GEM_DOMAIN_SAMPLER,
161 rw ? I915_GEM_DOMAIN_SAMPLER : 0);
162 }
163 }
164
165 static void
166 gen8_emit_texture_surface_state(struct brw_context *brw,
167 struct intel_mipmap_tree *mt,
168 GLenum target,
169 unsigned min_layer, unsigned max_layer,
170 unsigned min_level, unsigned max_level,
171 unsigned format,
172 unsigned swizzle,
173 uint32_t *surf_offset,
174 bool rw, bool for_gather)
175 {
176 const unsigned depth = max_layer - min_layer;
177 struct intel_mipmap_tree *aux_mt = NULL;
178 uint32_t aux_mode = 0;
179 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
180 int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
181 unsigned tiling_mode, pitch;
182 const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
183
184 if (mt->format == MESA_FORMAT_S_UINT8) {
185 tiling_mode = GEN8_SURFACE_TILING_W;
186 pitch = 2 * mt->pitch;
187 } else {
188 tiling_mode = surface_tiling_mode(mt->tiling);
189 pitch = mt->pitch;
190 }
191
192 if (mt->mcs_mt) {
193 aux_mt = mt->mcs_mt;
194 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
195
196 /*
197 * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
198 * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
199 *
200 * From the hardware spec for GEN9:
201 * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
202 * 16 must be used."
203 */
204 assert(brw->gen < 9 || mt->align_w == 16);
205 assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
206 }
207
208 uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
209
210 surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT |
211 format << BRW_SURFACE_FORMAT_SHIFT |
212 vertical_alignment(mt) |
213 horizontal_alignment(mt) |
214 tiling_mode;
215
216 if (target == GL_TEXTURE_CUBE_MAP ||
217 target == GL_TEXTURE_CUBE_MAP_ARRAY) {
218 surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
219 }
220
221 if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP)
222 surf[0] |= GEN8_SURFACE_IS_ARRAY;
223
224 surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
225
226 surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
227 SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
228
229 surf[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1);
230
231 surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) |
232 SET_FIELD(min_layer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) |
233 SET_FIELD(depth - 1, GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT);
234
235 surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) |
236 (max_level - min_level - 1); /* mip count */
237
238 if (brw->gen >= 9)
239 surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);
240
241 if (aux_mt) {
242 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
243 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
244 aux_mode;
245 } else {
246 surf[6] = 0;
247 }
248
249 surf[7] = mt->fast_clear_color_value |
250 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
251 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
252 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
253 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
254
255 *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
256
257 if (aux_mt) {
258 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
259 drm_intel_bo_emit_reloc(brw->batch.bo, *surf_offset + 10 * 4,
260 aux_mt->bo, 0,
261 I915_GEM_DOMAIN_SAMPLER,
262 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
263 } else {
264 surf[10] = 0;
265 surf[11] = 0;
266 }
267 surf[12] = 0;
268
269 /* Emit relocation to surface contents */
270 drm_intel_bo_emit_reloc(brw->batch.bo,
271 *surf_offset + 8 * 4,
272 mt->bo,
273 mt->offset,
274 I915_GEM_DOMAIN_SAMPLER,
275 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
276 }
277
278 static void
279 gen8_update_texture_surface(struct gl_context *ctx,
280 unsigned unit,
281 uint32_t *surf_offset,
282 bool for_gather)
283 {
284 struct brw_context *brw = brw_context(ctx);
285 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
286
287 if (obj->Target == GL_TEXTURE_BUFFER) {
288 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
289
290 } else {
291 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
292 struct intel_texture_object *intel_obj = intel_texture_object(obj);
293 struct intel_mipmap_tree *mt = intel_obj->mt;
294 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
295 /* If this is a view with restricted NumLayers, then our effective depth
296 * is not just the miptree depth.
297 */
298 const unsigned depth = (obj->Immutable && obj->Target != GL_TEXTURE_3D ?
299 obj->NumLayers : mt->logical_depth0);
300
301 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
302 * texturing functions that return a float, as our code generation always
303 * selects the .x channel (which would always be 0).
304 */
305 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
306 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
307 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
308 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
309 brw_get_texture_swizzle(&brw->ctx, obj));
310
311 unsigned format = translate_tex_format(brw, intel_obj->_Format,
312 sampler->sRGBDecode);
313 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
314 mt = mt->stencil_mt;
315 format = BRW_SURFACEFORMAT_R8_UINT;
316 }
317
318 gen8_emit_texture_surface_state(brw, mt, obj->Target,
319 obj->MinLayer, obj->MinLayer + depth,
320 obj->MinLevel + obj->BaseLevel,
321 obj->MinLevel + intel_obj->_MaxLevel + 1,
322 format, swizzle, surf_offset,
323 false, for_gather);
324 }
325 }
326
327 /**
328 * Creates a null surface.
329 *
330 * This is used when the shader doesn't write to any color output. An FB
331 * write to target 0 will still be emitted, because that's how the thread is
332 * terminated (and computed depth is returned), so we need to have the
333 * hardware discard the target 0 color output..
334 */
335 static void
336 gen8_emit_null_surface_state(struct brw_context *brw,
337 unsigned width,
338 unsigned height,
339 unsigned samples,
340 uint32_t *out_offset)
341 {
342 uint32_t *surf = allocate_surface_state(brw, out_offset, -1);
343
344 surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
345 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
346 GEN8_SURFACE_TILING_Y;
347 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
348 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
349 }
350
351 /**
352 * Sets up a surface state structure to point at the given region.
353 * While it is only used for the front/back buffer currently, it should be
354 * usable for further buffers when doing ARB_draw_buffer support.
355 */
356 static uint32_t
357 gen8_update_renderbuffer_surface(struct brw_context *brw,
358 struct gl_renderbuffer *rb,
359 bool layered, unsigned unit /* unused */,
360 uint32_t surf_index)
361 {
362 struct gl_context *ctx = &brw->ctx;
363 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
364 struct intel_mipmap_tree *mt = irb->mt;
365 struct intel_mipmap_tree *aux_mt = NULL;
366 uint32_t aux_mode = 0;
367 unsigned width = mt->logical_width0;
368 unsigned height = mt->logical_height0;
369 unsigned pitch = mt->pitch;
370 uint32_t tiling = mt->tiling;
371 unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
372 uint32_t format = 0;
373 uint32_t surf_type;
374 uint32_t offset;
375 bool is_array = false;
376 int depth = MAX2(irb->layer_count, 1);
377 const int min_array_element = (mt->format == MESA_FORMAT_S_UINT8) ?
378 irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1));
379 GLenum gl_target =
380 rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
381 /* FINISHME: Use PTE MOCS on Skylake. */
382 uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE;
383
384 intel_miptree_used_for_rendering(mt);
385
386 switch (gl_target) {
387 case GL_TEXTURE_CUBE_MAP_ARRAY:
388 case GL_TEXTURE_CUBE_MAP:
389 surf_type = BRW_SURFACE_2D;
390 is_array = true;
391 depth *= 6;
392 break;
393 case GL_TEXTURE_3D:
394 depth = MAX2(irb->mt->logical_depth0, 1);
395 /* fallthrough */
396 default:
397 surf_type = translate_tex_target(gl_target);
398 is_array = _mesa_tex_target_is_array(gl_target);
399 break;
400 }
401
402 /* _NEW_BUFFERS */
403 /* Render targets can't use IMS layout. Stencil in turn gets configured as
404 * single sampled and indexed manually by the program.
405 */
406 if (mt->format == MESA_FORMAT_S_UINT8) {
407 brw_configure_w_tiled(mt, true, &width, &height, &pitch,
408 &tiling, &format);
409 } else {
410 assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
411 assert(brw_render_target_supported(brw, rb));
412 mesa_format rb_format = _mesa_get_render_format(ctx,
413 intel_rb_format(irb));
414 format = brw->render_target_format[rb_format];
415 if (unlikely(!brw->format_supported_as_render_target[rb_format]))
416 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
417 __func__, _mesa_get_format_name(rb_format));
418 }
419
420 if (mt->mcs_mt) {
421 aux_mt = mt->mcs_mt;
422 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
423
424 /*
425 * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
426 * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
427 *
428 * From the hardware spec for GEN9:
429 * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
430 * 16 must be used."
431 */
432 assert(brw->gen < 9 || mt->align_w == 16);
433 assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
434 }
435
436 uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
437
438 surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) |
439 (is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
440 (format << BRW_SURFACE_FORMAT_SHIFT) |
441 vertical_alignment(mt) |
442 horizontal_alignment(mt) |
443 surface_tiling_mode(tiling);
444
445 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
446
447 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
448 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
449
450 surf[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
451 (pitch - 1); /* Surface Pitch */
452
453 surf[4] = min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
454 (depth - 1) << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT;
455
456 if (mt->format != MESA_FORMAT_S_UINT8)
457 surf[4] |= gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
458
459 surf[5] = irb->mt_level - irb->mt->first_level;
460
461 if (brw->gen >= 9)
462 surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);
463
464 if (aux_mt) {
465 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
466 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
467 aux_mode;
468 } else {
469 surf[6] = 0;
470 }
471
472 surf[7] = mt->fast_clear_color_value |
473 SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
474 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
475 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
476 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
477
478 assert(mt->offset % mt->cpp == 0);
479 *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
480
481 if (aux_mt) {
482 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
483 drm_intel_bo_emit_reloc(brw->batch.bo,
484 offset + 10 * 4,
485 aux_mt->bo, 0,
486 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
487 } else {
488 surf[10] = 0;
489 surf[11] = 0;
490 }
491 surf[12] = 0;
492
493 drm_intel_bo_emit_reloc(brw->batch.bo,
494 offset + 8 * 4,
495 mt->bo,
496 mt->offset,
497 I915_GEM_DOMAIN_RENDER,
498 I915_GEM_DOMAIN_RENDER);
499
500 return offset;
501 }
502
503 void
504 gen8_init_vtable_surface_functions(struct brw_context *brw)
505 {
506 brw->vtbl.update_texture_surface = gen8_update_texture_surface;
507 brw->vtbl.update_renderbuffer_surface = gen8_update_renderbuffer_surface;
508 brw->vtbl.emit_null_surface_state = gen8_emit_null_surface_state;
509 brw->vtbl.emit_texture_surface_state = gen8_emit_texture_surface_state;
510 brw->vtbl.emit_buffer_surface_state = gen8_emit_buffer_surface_state;
511 }