i965/vec4: Don't lose the force_writemask_all flag during CSE.
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_surface_state.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/blend.h"
25 #include "main/mtypes.h"
26 #include "main/samplerobj.h"
27 #include "main/texformat.h"
28 #include "program/prog_parameter.h"
29
30 #include "intel_mipmap_tree.h"
31 #include "intel_batchbuffer.h"
32 #include "intel_tex.h"
33 #include "intel_fbo.h"
34 #include "intel_buffer_objects.h"
35
36 #include "brw_context.h"
37 #include "brw_state.h"
38 #include "brw_defines.h"
39 #include "brw_wm.h"
40
41 /**
42 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
43 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
44 *
45 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
46 * 0 1 2 3 4 5
47 * 4 5 6 7 0 1
48 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
49 *
50 * which is simply adding 4 then modding by 8 (or anding with 7).
51 */
52 static unsigned
53 swizzle_to_scs(unsigned swizzle)
54 {
55 return (swizzle + 4) & 7;
56 }
57
58 static uint32_t
59 surface_tiling_mode(uint32_t tiling)
60 {
61 switch (tiling) {
62 case I915_TILING_X:
63 return GEN8_SURFACE_TILING_X;
64 case I915_TILING_Y:
65 return GEN8_SURFACE_TILING_Y;
66 default:
67 return GEN8_SURFACE_TILING_NONE;
68 }
69 }
70
71 static unsigned
72 vertical_alignment(struct intel_mipmap_tree *mt)
73 {
74 switch (mt->align_h) {
75 case 4:
76 return GEN8_SURFACE_VALIGN_4;
77 case 8:
78 return GEN8_SURFACE_VALIGN_8;
79 case 16:
80 return GEN8_SURFACE_VALIGN_16;
81 default:
82 unreachable("Unsupported vertical surface alignment.");
83 }
84 }
85
86 static unsigned
87 horizontal_alignment(struct intel_mipmap_tree *mt)
88 {
89 switch (mt->align_w) {
90 case 4:
91 return GEN8_SURFACE_HALIGN_4;
92 case 8:
93 return GEN8_SURFACE_HALIGN_8;
94 case 16:
95 return GEN8_SURFACE_HALIGN_16;
96 default:
97 unreachable("Unsupported horizontal surface alignment.");
98 }
99 }
100
101 static uint32_t *
102 allocate_surface_state(struct brw_context *brw, uint32_t *out_offset)
103 {
104 int dwords = brw->gen >= 9 ? 16 : 13;
105 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
106 dwords * 4, 64, out_offset);
107 memset(surf, 0, dwords * 4);
108 return surf;
109 }
110
111 static void
112 gen8_emit_buffer_surface_state(struct brw_context *brw,
113 uint32_t *out_offset,
114 drm_intel_bo *bo,
115 unsigned buffer_offset,
116 unsigned surface_format,
117 unsigned buffer_size,
118 unsigned pitch,
119 bool rw)
120 {
121 const unsigned mocs = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
122 uint32_t *surf = allocate_surface_state(brw, out_offset);
123
124 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
125 surface_format << BRW_SURFACE_FORMAT_SHIFT |
126 BRW_SURFACE_RC_READ_WRITE;
127 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS);
128
129 surf[2] = SET_FIELD((buffer_size - 1) & 0x7f, GEN7_SURFACE_WIDTH) |
130 SET_FIELD(((buffer_size - 1) >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
131 surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH) |
132 (pitch - 1);
133 surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
134 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
135 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
136 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
137 /* reloc */
138 *((uint64_t *) &surf[8]) = (bo ? bo->offset64 : 0) + buffer_offset;
139
140 /* Emit relocation to surface contents. */
141 if (bo) {
142 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 8 * 4,
143 bo, buffer_offset, I915_GEM_DOMAIN_SAMPLER,
144 rw ? I915_GEM_DOMAIN_SAMPLER : 0);
145 }
146 }
147
148 static void
149 gen8_update_texture_surface(struct gl_context *ctx,
150 unsigned unit,
151 uint32_t *surf_offset,
152 bool for_gather)
153 {
154 struct brw_context *brw = brw_context(ctx);
155 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
156 struct intel_texture_object *intelObj = intel_texture_object(tObj);
157 struct intel_mipmap_tree *mt = intelObj->mt;
158 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
159 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
160 struct intel_mipmap_tree *aux_mt = NULL;
161 uint32_t aux_mode = 0;
162 mesa_format format = intelObj->_Format;
163 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
164
165 if (tObj->Target == GL_TEXTURE_BUFFER) {
166 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
167 return;
168 }
169
170 if (tObj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
171 mt = mt->stencil_mt;
172 format = MESA_FORMAT_S_UINT8;
173 }
174
175 unsigned tiling_mode, pitch;
176 if (format == MESA_FORMAT_S_UINT8) {
177 tiling_mode = GEN8_SURFACE_TILING_W;
178 pitch = 2 * mt->pitch;
179 } else {
180 tiling_mode = surface_tiling_mode(mt->tiling);
181 pitch = mt->pitch;
182 }
183
184 if (mt->mcs_mt) {
185 aux_mt = mt->mcs_mt;
186 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
187 }
188
189 /* If this is a view with restricted NumLayers, then our effective depth
190 * is not just the miptree depth.
191 */
192 uint32_t effective_depth =
193 (tObj->Immutable && tObj->Target != GL_TEXTURE_3D) ? tObj->NumLayers
194 : mt->logical_depth0;
195
196 uint32_t tex_format = translate_tex_format(brw, format, sampler->sRGBDecode);
197
198 uint32_t *surf = allocate_surface_state(brw, surf_offset);
199
200 surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
201 tex_format << BRW_SURFACE_FORMAT_SHIFT |
202 vertical_alignment(mt) |
203 horizontal_alignment(mt) |
204 tiling_mode;
205
206 if (tObj->Target == GL_TEXTURE_CUBE_MAP ||
207 tObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
208 surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
209 }
210
211 if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D)
212 surf[0] |= GEN8_SURFACE_IS_ARRAY;
213
214 surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
215
216 surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
217 SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
218
219 surf[3] = SET_FIELD(effective_depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1);
220
221 surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) |
222 SET_FIELD(tObj->MinLayer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) |
223 SET_FIELD(effective_depth - 1,
224 GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT);
225
226 surf[5] = SET_FIELD(tObj->MinLevel + tObj->BaseLevel - mt->first_level,
227 GEN7_SURFACE_MIN_LOD) |
228 (intelObj->_MaxLevel - tObj->BaseLevel); /* mip count */
229
230 if (aux_mt) {
231 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
232 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
233 aux_mode;
234 } else {
235 surf[6] = 0;
236 }
237
238 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
239 * texturing functions that return a float, as our code generation always
240 * selects the .x channel (which would always be 0).
241 */
242 const bool alpha_depth = tObj->DepthMode == GL_ALPHA &&
243 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
244 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
245
246 surf[7] = mt->fast_clear_color_value;
247
248 const int swizzle =
249 unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj);
250 surf[7] |=
251 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
252 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
253 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
254 SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
255
256 *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
257
258 if (aux_mt) {
259 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
260 drm_intel_bo_emit_reloc(brw->batch.bo, *surf_offset + 10 * 4,
261 aux_mt->bo, 0,
262 I915_GEM_DOMAIN_SAMPLER, 0);
263 } else {
264 surf[10] = 0;
265 surf[11] = 0;
266 }
267 surf[12] = 0;
268
269 /* Emit relocation to surface contents */
270 drm_intel_bo_emit_reloc(brw->batch.bo,
271 *surf_offset + 8 * 4,
272 mt->bo,
273 mt->offset,
274 I915_GEM_DOMAIN_SAMPLER, 0);
275 }
276
277 /**
278 * Creates a null surface.
279 *
280 * This is used when the shader doesn't write to any color output. An FB
281 * write to target 0 will still be emitted, because that's how the thread is
282 * terminated (and computed depth is returned), so we need to have the
283 * hardware discard the target 0 color output..
284 */
285 static void
286 gen8_emit_null_surface_state(struct brw_context *brw,
287 unsigned width,
288 unsigned height,
289 unsigned samples,
290 uint32_t *out_offset)
291 {
292 uint32_t *surf = allocate_surface_state(brw, out_offset);
293
294 surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
295 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
296 GEN8_SURFACE_TILING_Y;
297 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
298 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
299 }
300
301 /**
302 * Sets up a surface state structure to point at the given region.
303 * While it is only used for the front/back buffer currently, it should be
304 * usable for further buffers when doing ARB_draw_buffer support.
305 */
306 static void
307 gen8_update_renderbuffer_surface(struct brw_context *brw,
308 struct gl_renderbuffer *rb,
309 bool layered,
310 unsigned unit)
311 {
312 struct gl_context *ctx = &brw->ctx;
313 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
314 struct intel_mipmap_tree *mt = irb->mt;
315 struct intel_mipmap_tree *aux_mt = NULL;
316 uint32_t aux_mode = 0;
317 unsigned width = mt->logical_width0;
318 unsigned height = mt->logical_height0;
319 unsigned pitch = mt->pitch;
320 uint32_t tiling = mt->tiling;
321 uint32_t format = 0;
322 uint32_t surf_type;
323 bool is_array = false;
324 int depth = MAX2(irb->layer_count, 1);
325 const int min_array_element = (mt->format == MESA_FORMAT_S_UINT8) ?
326 irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1));
327 GLenum gl_target =
328 rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
329 uint32_t surf_index =
330 brw->wm.prog_data->binding_table.render_target_start + unit;
331 /* FINISHME: Use PTE MOCS on Skylake. */
332 uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE;
333
334 intel_miptree_used_for_rendering(mt);
335
336 switch (gl_target) {
337 case GL_TEXTURE_CUBE_MAP_ARRAY:
338 case GL_TEXTURE_CUBE_MAP:
339 surf_type = BRW_SURFACE_2D;
340 is_array = true;
341 depth *= 6;
342 break;
343 case GL_TEXTURE_3D:
344 depth = MAX2(irb->mt->logical_depth0, 1);
345 /* fallthrough */
346 default:
347 surf_type = translate_tex_target(gl_target);
348 is_array = _mesa_tex_target_is_array(gl_target);
349 break;
350 }
351
352 /* _NEW_BUFFERS */
353 /* Render targets can't use IMS layout. Stencil in turn gets configured as
354 * single sampled and indexed manually by the program.
355 */
356 if (mt->format == MESA_FORMAT_S_UINT8) {
357 brw_configure_w_tiled(mt, true, &width, &height, &pitch,
358 &tiling, &format);
359 } else {
360 assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
361 assert(brw_render_target_supported(brw, rb));
362 mesa_format rb_format = _mesa_get_render_format(ctx,
363 intel_rb_format(irb));
364 format = brw->render_target_format[rb_format];
365 if (unlikely(!brw->format_supported_as_render_target[rb_format]))
366 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
367 __FUNCTION__, _mesa_get_format_name(rb_format));
368 }
369
370 if (mt->mcs_mt) {
371 aux_mt = mt->mcs_mt;
372 aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
373 }
374
375 uint32_t *surf =
376 allocate_surface_state(brw, &brw->wm.base.surf_offset[surf_index]);
377
378 surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) |
379 (is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
380 (format << BRW_SURFACE_FORMAT_SHIFT) |
381 vertical_alignment(mt) |
382 horizontal_alignment(mt) |
383 surface_tiling_mode(tiling);
384
385 surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
386
387 surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
388 SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
389
390 surf[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
391 (pitch - 1); /* Surface Pitch */
392
393 surf[4] = min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
394 (depth - 1) << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT;
395
396 if (mt->format != MESA_FORMAT_S_UINT8)
397 surf[4] |= gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
398
399 surf[5] = irb->mt_level - irb->mt->first_level;
400
401 if (aux_mt) {
402 surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
403 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
404 aux_mode;
405 } else {
406 surf[6] = 0;
407 }
408
409 surf[7] = mt->fast_clear_color_value |
410 SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
411 SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
412 SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
413 SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
414
415 assert(mt->offset % mt->cpp == 0);
416 *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
417
418 if (aux_mt) {
419 *((uint64_t *) &surf[10]) = aux_mt->bo->offset64;
420 drm_intel_bo_emit_reloc(brw->batch.bo,
421 brw->wm.base.surf_offset[surf_index] + 10 * 4,
422 aux_mt->bo, 0,
423 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
424 } else {
425 surf[10] = 0;
426 surf[11] = 0;
427 }
428 surf[12] = 0;
429
430 drm_intel_bo_emit_reloc(brw->batch.bo,
431 brw->wm.base.surf_offset[surf_index] + 8 * 4,
432 mt->bo,
433 mt->offset,
434 I915_GEM_DOMAIN_RENDER,
435 I915_GEM_DOMAIN_RENDER);
436 }
437
438 void
439 gen8_init_vtable_surface_functions(struct brw_context *brw)
440 {
441 brw->vtbl.update_texture_surface = gen8_update_texture_surface;
442 brw->vtbl.update_renderbuffer_surface = gen8_update_renderbuffer_surface;
443 brw->vtbl.emit_null_surface_state = gen8_emit_null_surface_state;
444 brw->vtbl.emit_buffer_surface_state = gen8_emit_buffer_surface_state;
445 }