i965: Micro-optimize swizzle_to_scs() and make it inlinable.
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_surface_state.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/blend.h"
25 #include "main/mtypes.h"
26 #include "main/samplerobj.h"
27 #include "main/texformat.h"
28 #include "program/prog_parameter.h"
29
30 #include "intel_mipmap_tree.h"
31 #include "intel_batchbuffer.h"
32 #include "intel_tex.h"
33 #include "intel_fbo.h"
34 #include "intel_buffer_objects.h"
35
36 #include "brw_context.h"
37 #include "brw_state.h"
38 #include "brw_defines.h"
39 #include "brw_wm.h"
40
41 /**
42 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
43 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
44 *
45 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
46 * 0 1 2 3 4 5
47 * 4 5 6 7 0 1
48 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
49 *
50 * which is simply adding 4 then modding by 8 (or anding with 7).
51 */
52 static unsigned
53 swizzle_to_scs(unsigned swizzle)
54 {
55 return (swizzle + 4) & 7;
56 }
57
58 static uint32_t
59 surface_tiling_mode(uint32_t tiling)
60 {
61 switch (tiling) {
62 case I915_TILING_X:
63 return GEN8_SURFACE_TILING_X;
64 case I915_TILING_Y:
65 return GEN8_SURFACE_TILING_Y;
66 default:
67 return GEN8_SURFACE_TILING_NONE;
68 }
69 }
70
71 static unsigned
72 vertical_alignment(struct intel_mipmap_tree *mt)
73 {
74 switch (mt->align_h) {
75 case 4:
76 return GEN8_SURFACE_VALIGN_4;
77 case 8:
78 return GEN8_SURFACE_VALIGN_8;
79 case 16:
80 return GEN8_SURFACE_VALIGN_16;
81 default:
82 unreachable("Unsupported vertical surface alignment.");
83 }
84 }
85
86 static unsigned
87 horizontal_alignment(struct intel_mipmap_tree *mt)
88 {
89 switch (mt->align_w) {
90 case 4:
91 return GEN8_SURFACE_HALIGN_4;
92 case 8:
93 return GEN8_SURFACE_HALIGN_8;
94 case 16:
95 return GEN8_SURFACE_HALIGN_16;
96 default:
97 unreachable("Unsupported horizontal surface alignment.");
98 }
99 }
100
101 static uint32_t *
102 allocate_surface_state(struct brw_context *brw, uint32_t *out_offset)
103 {
104 int dwords = brw->gen >= 9 ? 16 : 13;
105 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
106 dwords * 4, 64, out_offset);
107 memset(surf, 0, dwords * 4);
108 return surf;
109 }
110
111 static void
112 gen8_emit_buffer_surface_state(struct brw_context *brw,
113 uint32_t *out_offset,
114 drm_intel_bo *bo,
115 unsigned buffer_offset,
116 unsigned surface_format,
117 unsigned buffer_size,
118 unsigned pitch,
119 unsigned mocs,
120 bool rw)
121 {
122 uint32_t *surf = allocate_surface_state(brw, out_offset);
123
124 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
125 surface_format << BRW_SURFACE_FORMAT_SHIFT |
126 BRW_SURFACE_RC_READ_WRITE;
127 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS);
128
129 surf[2] = SET_FIELD((buffer_size - 1) & 0x7f, GEN7_SURFACE_WIDTH) |
130 SET_FIELD(((buffer_size - 1) >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
131 surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH) |
132 (pitch - 1);
133 surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
134 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
135 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
136 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
137 /* reloc */
138 *((uint64_t *) &surf[8]) = (bo ? bo->offset64 : 0) + buffer_offset;
139
140 /* Emit relocation to surface contents. */
141 if (bo) {
142 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 8 * 4,
143 bo, buffer_offset, I915_GEM_DOMAIN_SAMPLER,
144 rw ? I915_GEM_DOMAIN_SAMPLER : 0);
145 }
146 }
147
148 static void
149 gen8_update_texture_surface(struct gl_context *ctx,
150 unsigned unit,
151 uint32_t *surf_offset,
152 bool for_gather)
153 {
154 struct brw_context *brw = brw_context(ctx);
155 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
156 struct intel_texture_object *intelObj = intel_texture_object(tObj);
157 struct intel_mipmap_tree *mt = intelObj->mt;
158 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
159 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
160 struct intel_mipmap_tree *aux_mt = NULL;
161 uint32_t aux_mode = 0;
162 mesa_format format = intelObj->_Format;
163 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
164
165 if (tObj->Target == GL_TEXTURE_BUFFER) {
166 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
167 return;
168 }
169
170 if (tObj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
171 mt = mt->stencil_mt;
172 format = MESA_FORMAT_S_UINT8;
173 }
174
175 unsigned tiling_mode, pitch;
176 if (format == MESA_FORMAT_S_UINT8) {
177 tiling_mode = GEN8_SURFACE_TILING_W;
178 pitch = 2 * mt->pitch;
179 } else {
180 tiling_mode = surface_tiling_mode(mt->tiling);
181 pitch = mt->pitch;
182 }
183
184 if (mt->mcs_mt) {
185 aux_mt = mt->mcs_mt;
186 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
187 }
188
189 /* If this is a view with restricted NumLayers, then our effective depth
190 * is not just the miptree depth.
191 */
192 uint32_t effective_depth =
193 (tObj->Immutable && tObj->Target != GL_TEXTURE_3D) ? tObj->NumLayers
194 : mt->logical_depth0;
195
196 uint32_t tex_format = translate_tex_format(brw, format, sampler->sRGBDecode);
197
198 uint32_t *surf = allocate_surface_state(brw, surf_offset);
199
200 surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
201 tex_format << BRW_SURFACE_FORMAT_SHIFT |
202 vertical_alignment(mt) |
203 horizontal_alignment(mt) |
204 tiling_mode;
205
206 if (tObj->Target == GL_TEXTURE_CUBE_MAP ||
207 tObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
208 surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
209 }
210
211 if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D)
212 surf[0] |= GEN8_SURFACE_IS_ARRAY;
213
214 surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
215
216 surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
217 SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
218
219 surf[3] = SET_FIELD(effective_depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1);
220
221 surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) |
222 SET_FIELD(tObj->MinLayer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) |
223 SET_FIELD(effective_depth - 1,
224 GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT);
225
226 surf[5] = SET_FIELD(tObj->MinLevel + tObj->BaseLevel - mt->first_level,
227 GEN7_SURFACE_MIN_LOD) |
228 (intelObj->_MaxLevel - tObj->BaseLevel); /* mip count */
229
230 if (aux_mt) {
231 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
232 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
233 aux_mode;
234 } else {
235 surf[6] = 0;
236 }
237
238 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
239 * texturing functions that return a float, as our code generation always
240 * selects the .x channel (which would always be 0).
241 */
242 const bool alpha_depth = tObj->DepthMode == GL_ALPHA &&
243 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
244 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
245
246 surf[7] = mt->fast_clear_color_value;
247
248 const int swizzle =
249 unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj);
250 surf[7] |=
251 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
252 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
253 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
254 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
255
256 *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
257
258 if (aux_mt) {
259 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
260 drm_intel_bo_emit_reloc(brw->batch.bo, *surf_offset + 10 * 4,
261 aux_mt->bo, 0,
262 I915_GEM_DOMAIN_SAMPLER, 0);
263 } else {
264 surf[10] = 0;
265 surf[11] = 0;
266 }
267 surf[12] = 0;
268
269 /* Emit relocation to surface contents */
270 drm_intel_bo_emit_reloc(brw->batch.bo,
271 *surf_offset + 8 * 4,
272 mt->bo,
273 mt->offset,
274 I915_GEM_DOMAIN_SAMPLER, 0);
275 }
276
277 static void
278 gen8_create_raw_surface(struct brw_context *brw, drm_intel_bo *bo,
279 uint32_t offset, uint32_t size,
280 uint32_t *out_offset, bool rw)
281 {
282 gen8_emit_buffer_surface_state(brw,
283 out_offset,
284 bo,
285 offset,
286 BRW_SURFACEFORMAT_RAW,
287 size,
288 1,
289 0 /* mocs */,
290 true /* rw */);
291 }
292
293 /**
294 * Creates a null renderbuffer surface.
295 *
296 * This is used when the shader doesn't write to any color output. An FB
297 * write to target 0 will still be emitted, because that's how the thread is
298 * terminated (and computed depth is returned), so we need to have the
299 * hardware discard the target 0 color output..
300 */
301 static void
302 gen8_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
303 {
304 struct gl_context *ctx = &brw->ctx;
305
306 /* _NEW_BUFFERS */
307 const struct gl_framebuffer *fb = ctx->DrawBuffer;
308 uint32_t surf_index =
309 brw->wm.prog_data->binding_table.render_target_start + unit;
310
311 uint32_t *surf =
312 allocate_surface_state(brw, &brw->wm.base.surf_offset[surf_index]);
313
314 surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
315 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
316 GEN8_SURFACE_TILING_Y;
317 surf[2] = SET_FIELD(fb->Width - 1, GEN7_SURFACE_WIDTH) |
318 SET_FIELD(fb->Height - 1, GEN7_SURFACE_HEIGHT);
319 }
320
321 /**
322 * Sets up a surface state structure to point at the given region.
323 * While it is only used for the front/back buffer currently, it should be
324 * usable for further buffers when doing ARB_draw_buffer support.
325 */
326 static void
327 gen8_update_renderbuffer_surface(struct brw_context *brw,
328 struct gl_renderbuffer *rb,
329 bool layered,
330 unsigned unit)
331 {
332 struct gl_context *ctx = &brw->ctx;
333 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
334 struct intel_mipmap_tree *mt = irb->mt;
335 struct intel_mipmap_tree *aux_mt = NULL;
336 uint32_t aux_mode = 0;
337 unsigned width = mt->logical_width0;
338 unsigned height = mt->logical_height0;
339 unsigned pitch = mt->pitch;
340 uint32_t tiling = mt->tiling;
341 uint32_t format = 0;
342 uint32_t surf_type;
343 bool is_array = false;
344 int depth = MAX2(irb->layer_count, 1);
345 const int min_array_element = (mt->format == MESA_FORMAT_S_UINT8) ?
346 irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1));
347 GLenum gl_target =
348 rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
349 uint32_t surf_index =
350 brw->wm.prog_data->binding_table.render_target_start + unit;
351 /* FINISHME: Use PTE MOCS on Skylake. */
352 uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE;
353
354 intel_miptree_used_for_rendering(mt);
355
356 switch (gl_target) {
357 case GL_TEXTURE_CUBE_MAP_ARRAY:
358 case GL_TEXTURE_CUBE_MAP:
359 surf_type = BRW_SURFACE_2D;
360 is_array = true;
361 depth *= 6;
362 break;
363 case GL_TEXTURE_3D:
364 depth = MAX2(irb->mt->logical_depth0, 1);
365 /* fallthrough */
366 default:
367 surf_type = translate_tex_target(gl_target);
368 is_array = _mesa_tex_target_is_array(gl_target);
369 break;
370 }
371
372 /* _NEW_BUFFERS */
373 /* Render targets can't use IMS layout. Stencil in turn gets configured as
374 * single sampled and indexed manually by the program.
375 */
376 if (mt->format == MESA_FORMAT_S_UINT8) {
377 brw_configure_w_tiled(mt, true, &width, &height, &pitch,
378 &tiling, &format);
379 } else {
380 assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
381 assert(brw_render_target_supported(brw, rb));
382 mesa_format rb_format = _mesa_get_render_format(ctx,
383 intel_rb_format(irb));
384 format = brw->render_target_format[rb_format];
385 if (unlikely(!brw->format_supported_as_render_target[rb_format]))
386 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
387 __FUNCTION__, _mesa_get_format_name(rb_format));
388 }
389
390 if (mt->mcs_mt) {
391 aux_mt = mt->mcs_mt;
392 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
393 }
394
395 uint32_t *surf =
396 allocate_surface_state(brw, &brw->wm.base.surf_offset[surf_index]);
397
398 surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) |
399 (is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
400 (format << BRW_SURFACE_FORMAT_SHIFT) |
401 vertical_alignment(mt) |
402 horizontal_alignment(mt) |
403 surface_tiling_mode(tiling);
404
405 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
406
407 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
408 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
409
410 surf[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
411 (pitch - 1); /* Surface Pitch */
412
413 surf[4] = min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
414 (depth - 1) << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT;
415
416 if (mt->format != MESA_FORMAT_S_UINT8)
417 surf[4] |= gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
418
419 surf[5] = irb->mt_level - irb->mt->first_level;
420
421 if (aux_mt) {
422 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
423 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
424 aux_mode;
425 } else {
426 surf[6] = 0;
427 }
428
429 surf[7] = mt->fast_clear_color_value |
430 SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
431 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
432 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
433 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
434
435 *((uint64_t *) &surf[8]) = mt->bo->offset64; /* reloc */
436
437 if (aux_mt) {
438 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
439 drm_intel_bo_emit_reloc(brw->batch.bo,
440 brw->wm.base.surf_offset[surf_index] + 10 * 4,
441 aux_mt->bo, 0,
442 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
443 } else {
444 surf[10] = 0;
445 surf[11] = 0;
446 }
447 surf[12] = 0;
448
449 drm_intel_bo_emit_reloc(brw->batch.bo,
450 brw->wm.base.surf_offset[surf_index] + 8 * 4,
451 mt->bo,
452 0,
453 I915_GEM_DOMAIN_RENDER,
454 I915_GEM_DOMAIN_RENDER);
455 }
456
457 void
458 gen8_init_vtable_surface_functions(struct brw_context *brw)
459 {
460 brw->vtbl.update_texture_surface = gen8_update_texture_surface;
461 brw->vtbl.update_renderbuffer_surface = gen8_update_renderbuffer_surface;
462 brw->vtbl.update_null_renderbuffer_surface =
463 gen8_update_null_renderbuffer_surface;
464 brw->vtbl.create_raw_surface = gen8_create_raw_surface;
465 brw->vtbl.emit_buffer_surface_state = gen8_emit_buffer_surface_state;
466 }