i965/miptree: Take an isl_format in render_aux_usage
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 uint32_t tex_mocs[] = {
59 [7] = GEN7_MOCS_L3,
60 [8] = BDW_MOCS_WB,
61 [9] = SKL_MOCS_WB,
62 [10] = CNL_MOCS_WB,
63 };
64
65 uint32_t rb_mocs[] = {
66 [7] = GEN7_MOCS_L3,
67 [8] = BDW_MOCS_PTE,
68 [9] = SKL_MOCS_PTE,
69 [10] = CNL_MOCS_PTE,
70 };
71
72 static void
73 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
74 GLenum target, struct isl_view *view,
75 uint32_t *tile_x, uint32_t *tile_y,
76 uint32_t *offset, struct isl_surf *surf)
77 {
78 *surf = mt->surf;
79
80 const struct gen_device_info *devinfo = &brw->screen->devinfo;
81 const enum isl_dim_layout dim_layout =
82 get_isl_dim_layout(devinfo, mt->surf.tiling, target);
83
84 if (surf->dim_layout == dim_layout)
85 return;
86
87 /* The layout of the specified texture target is not compatible with the
88 * actual layout of the miptree structure in memory -- You're entering
89 * dangerous territory, this can only possibly work if you only intended
90 * to access a single level and slice of the texture, and the hardware
91 * supports the tile offset feature in order to allow non-tile-aligned
92 * base offsets, since we'll have to point the hardware to the first
93 * texel of the level instead of relying on the usual base level/layer
94 * controls.
95 */
96 assert(devinfo->has_surface_tile_offset);
97 assert(view->levels == 1 && view->array_len == 1);
98 assert(*tile_x == 0 && *tile_y == 0);
99
100 *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
101 view->base_array_layer,
102 tile_x, tile_y);
103
104 /* Minify the logical dimensions of the texture. */
105 const unsigned l = view->base_level - mt->first_level;
106 surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
107 surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
108 minify(surf->logical_level0_px.height, l);
109 surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
110 minify(surf->logical_level0_px.depth, l);
111
112 /* Only the base level and layer can be addressed with the overridden
113 * layout.
114 */
115 surf->logical_level0_px.array_len = 1;
116 surf->levels = 1;
117 surf->dim_layout = dim_layout;
118
119 /* The requested slice of the texture is now at the base level and
120 * layer.
121 */
122 view->base_level = 0;
123 view->base_array_layer = 0;
124 }
125
126 static void
127 brw_emit_surface_state(struct brw_context *brw,
128 struct intel_mipmap_tree *mt,
129 GLenum target, struct isl_view view,
130 enum isl_aux_usage aux_usage,
131 uint32_t mocs, uint32_t *surf_offset, int surf_index,
132 unsigned reloc_flags)
133 {
134 uint32_t tile_x = mt->level[0].level_x;
135 uint32_t tile_y = mt->level[0].level_y;
136 uint32_t offset = mt->offset;
137
138 struct isl_surf surf;
139
140 get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
141
142 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
143
144 struct brw_bo *aux_bo;
145 struct isl_surf *aux_surf = NULL;
146 uint64_t aux_offset = 0;
147 switch (aux_usage) {
148 case ISL_AUX_USAGE_MCS:
149 case ISL_AUX_USAGE_CCS_D:
150 case ISL_AUX_USAGE_CCS_E:
151 aux_surf = &mt->mcs_buf->surf;
152 aux_bo = mt->mcs_buf->bo;
153 aux_offset = mt->mcs_buf->offset;
154 break;
155
156 case ISL_AUX_USAGE_HIZ:
157 aux_surf = &mt->hiz_buf->surf;
158 aux_bo = mt->hiz_buf->bo;
159 aux_offset = 0;
160 break;
161
162 case ISL_AUX_USAGE_NONE:
163 break;
164 }
165
166 if (aux_usage != ISL_AUX_USAGE_NONE) {
167 /* We only really need a clear color if we also have an auxiliary
168 * surface. Without one, it does nothing.
169 */
170 clear_color = mt->fast_clear_color;
171 }
172
173 void *state = brw_state_batch(brw,
174 brw->isl_dev.ss.size,
175 brw->isl_dev.ss.align,
176 surf_offset);
177
178 isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
179 .address = brw_state_reloc(&brw->batch,
180 *surf_offset + brw->isl_dev.ss.addr_offset,
181 mt->bo, offset, reloc_flags),
182 .aux_surf = aux_surf, .aux_usage = aux_usage,
183 .aux_address = aux_offset,
184 .mocs = mocs, .clear_color = clear_color,
185 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
186 if (aux_surf) {
187 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
188 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
189 * contain other control information. Since buffer addresses are always
190 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
191 * an ordinary reloc to do the necessary address translation.
192 *
193 * FIXME: move to the point of assignment.
194 */
195 assert((aux_offset & 0xfff) == 0);
196 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
197 *aux_addr = brw_state_reloc(&brw->batch,
198 *surf_offset +
199 brw->isl_dev.ss.aux_addr_offset,
200 aux_bo, *aux_addr,
201 reloc_flags);
202 }
203 }
204
205 static uint32_t
206 gen6_update_renderbuffer_surface(struct brw_context *brw,
207 struct gl_renderbuffer *rb,
208 unsigned unit,
209 uint32_t surf_index)
210 {
211 const struct gen_device_info *devinfo = &brw->screen->devinfo;
212 struct gl_context *ctx = &brw->ctx;
213 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
214 struct intel_mipmap_tree *mt = irb->mt;
215
216 assert(brw_render_target_supported(brw, rb));
217
218 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
219 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
220 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
221 __func__, _mesa_get_format_name(rb_format));
222 }
223 enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
224
225 enum isl_aux_usage aux_usage =
226 brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
227 intel_miptree_render_aux_usage(brw, mt, isl_format,
228 ctx->Color.BlendEnabled & (1 << unit));
229
230 struct isl_view view = {
231 .format = isl_format,
232 .base_level = irb->mt_level - irb->mt->first_level,
233 .levels = 1,
234 .base_array_layer = irb->mt_layer,
235 .array_len = MAX2(irb->layer_count, 1),
236 .swizzle = ISL_SWIZZLE_IDENTITY,
237 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
238 };
239
240 uint32_t offset;
241 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
242 rb_mocs[devinfo->gen],
243 &offset, surf_index,
244 RELOC_WRITE);
245 return offset;
246 }
247
248 GLuint
249 translate_tex_target(GLenum target)
250 {
251 switch (target) {
252 case GL_TEXTURE_1D:
253 case GL_TEXTURE_1D_ARRAY_EXT:
254 return BRW_SURFACE_1D;
255
256 case GL_TEXTURE_RECTANGLE_NV:
257 return BRW_SURFACE_2D;
258
259 case GL_TEXTURE_2D:
260 case GL_TEXTURE_2D_ARRAY_EXT:
261 case GL_TEXTURE_EXTERNAL_OES:
262 case GL_TEXTURE_2D_MULTISAMPLE:
263 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
264 return BRW_SURFACE_2D;
265
266 case GL_TEXTURE_3D:
267 return BRW_SURFACE_3D;
268
269 case GL_TEXTURE_CUBE_MAP:
270 case GL_TEXTURE_CUBE_MAP_ARRAY:
271 return BRW_SURFACE_CUBE;
272
273 default:
274 unreachable("not reached");
275 }
276 }
277
278 uint32_t
279 brw_get_surface_tiling_bits(enum isl_tiling tiling)
280 {
281 switch (tiling) {
282 case ISL_TILING_X:
283 return BRW_SURFACE_TILED;
284 case ISL_TILING_Y0:
285 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
286 default:
287 return 0;
288 }
289 }
290
291
292 uint32_t
293 brw_get_surface_num_multisamples(unsigned num_samples)
294 {
295 if (num_samples > 1)
296 return BRW_SURFACE_MULTISAMPLECOUNT_4;
297 else
298 return BRW_SURFACE_MULTISAMPLECOUNT_1;
299 }
300
301 /**
302 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
303 * swizzling.
304 */
305 int
306 brw_get_texture_swizzle(const struct gl_context *ctx,
307 const struct gl_texture_object *t)
308 {
309 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
310
311 int swizzles[SWIZZLE_NIL + 1] = {
312 SWIZZLE_X,
313 SWIZZLE_Y,
314 SWIZZLE_Z,
315 SWIZZLE_W,
316 SWIZZLE_ZERO,
317 SWIZZLE_ONE,
318 SWIZZLE_NIL
319 };
320
321 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
322 img->_BaseFormat == GL_DEPTH_STENCIL) {
323 GLenum depth_mode = t->DepthMode;
324
325 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
326 * with depth component data specified with a sized internal format.
327 * Otherwise, it's left at the old default, GL_LUMINANCE.
328 */
329 if (_mesa_is_gles3(ctx) &&
330 img->InternalFormat != GL_DEPTH_COMPONENT &&
331 img->InternalFormat != GL_DEPTH_STENCIL) {
332 depth_mode = GL_RED;
333 }
334
335 switch (depth_mode) {
336 case GL_ALPHA:
337 swizzles[0] = SWIZZLE_ZERO;
338 swizzles[1] = SWIZZLE_ZERO;
339 swizzles[2] = SWIZZLE_ZERO;
340 swizzles[3] = SWIZZLE_X;
341 break;
342 case GL_LUMINANCE:
343 swizzles[0] = SWIZZLE_X;
344 swizzles[1] = SWIZZLE_X;
345 swizzles[2] = SWIZZLE_X;
346 swizzles[3] = SWIZZLE_ONE;
347 break;
348 case GL_INTENSITY:
349 swizzles[0] = SWIZZLE_X;
350 swizzles[1] = SWIZZLE_X;
351 swizzles[2] = SWIZZLE_X;
352 swizzles[3] = SWIZZLE_X;
353 break;
354 case GL_RED:
355 swizzles[0] = SWIZZLE_X;
356 swizzles[1] = SWIZZLE_ZERO;
357 swizzles[2] = SWIZZLE_ZERO;
358 swizzles[3] = SWIZZLE_ONE;
359 break;
360 }
361 }
362
363 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
364
365 /* If the texture's format is alpha-only, force R, G, and B to
366 * 0.0. Similarly, if the texture's format has no alpha channel,
367 * force the alpha value read to 1.0. This allows for the
368 * implementation to use an RGBA texture for any of these formats
369 * without leaking any unexpected values.
370 */
371 switch (img->_BaseFormat) {
372 case GL_ALPHA:
373 swizzles[0] = SWIZZLE_ZERO;
374 swizzles[1] = SWIZZLE_ZERO;
375 swizzles[2] = SWIZZLE_ZERO;
376 break;
377 case GL_LUMINANCE:
378 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
379 swizzles[0] = SWIZZLE_X;
380 swizzles[1] = SWIZZLE_X;
381 swizzles[2] = SWIZZLE_X;
382 swizzles[3] = SWIZZLE_ONE;
383 }
384 break;
385 case GL_LUMINANCE_ALPHA:
386 if (datatype == GL_SIGNED_NORMALIZED) {
387 swizzles[0] = SWIZZLE_X;
388 swizzles[1] = SWIZZLE_X;
389 swizzles[2] = SWIZZLE_X;
390 swizzles[3] = SWIZZLE_W;
391 }
392 break;
393 case GL_INTENSITY:
394 if (datatype == GL_SIGNED_NORMALIZED) {
395 swizzles[0] = SWIZZLE_X;
396 swizzles[1] = SWIZZLE_X;
397 swizzles[2] = SWIZZLE_X;
398 swizzles[3] = SWIZZLE_X;
399 }
400 break;
401 case GL_RED:
402 case GL_RG:
403 case GL_RGB:
404 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
405 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
406 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
407 swizzles[3] = SWIZZLE_ONE;
408 break;
409 }
410
411 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
412 swizzles[GET_SWZ(t->_Swizzle, 1)],
413 swizzles[GET_SWZ(t->_Swizzle, 2)],
414 swizzles[GET_SWZ(t->_Swizzle, 3)]);
415 }
416
417 /**
418 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
419 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
420 *
421 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
422 * 0 1 2 3 4 5
423 * 4 5 6 7 0 1
424 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
425 *
426 * which is simply adding 4 then modding by 8 (or anding with 7).
427 *
428 * We then may need to apply workarounds for textureGather hardware bugs.
429 */
430 static unsigned
431 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
432 {
433 unsigned scs = (swizzle + 4) & 7;
434
435 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
436 }
437
438 static bool
439 brw_aux_surface_disabled(const struct brw_context *brw,
440 const struct intel_mipmap_tree *mt)
441 {
442 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
443
444 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
445 const struct intel_renderbuffer *irb =
446 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
447
448 if (irb && irb->mt == mt)
449 return brw->draw_aux_buffer_disabled[i];
450 }
451
452 return false;
453 }
454
455 static void
456 brw_update_texture_surface(struct gl_context *ctx,
457 unsigned unit,
458 uint32_t *surf_offset,
459 bool for_gather,
460 bool for_txf,
461 uint32_t plane)
462 {
463 struct brw_context *brw = brw_context(ctx);
464 const struct gen_device_info *devinfo = &brw->screen->devinfo;
465 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
466
467 if (obj->Target == GL_TEXTURE_BUFFER) {
468 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
469
470 } else {
471 struct intel_texture_object *intel_obj = intel_texture_object(obj);
472 struct intel_mipmap_tree *mt = intel_obj->mt;
473
474 if (plane > 0) {
475 if (mt->plane[plane - 1] == NULL)
476 return;
477 mt = mt->plane[plane - 1];
478 }
479
480 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
481 /* If this is a view with restricted NumLayers, then our effective depth
482 * is not just the miptree depth.
483 */
484 unsigned view_num_layers;
485 if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
486 view_num_layers = obj->NumLayers;
487 } else {
488 view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
489 mt->surf.logical_level0_px.depth :
490 mt->surf.logical_level0_px.array_len;
491 }
492
493 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
494 * texturing functions that return a float, as our code generation always
495 * selects the .x channel (which would always be 0).
496 */
497 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
498 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
499 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
500 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
501 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
502 brw_get_texture_swizzle(&brw->ctx, obj));
503
504 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
505 enum isl_format format = translate_tex_format(brw, mesa_fmt,
506 for_txf ? GL_DECODE_EXT :
507 sampler->sRGBDecode);
508
509 /* Implement gen6 and gen7 gather work-around */
510 bool need_green_to_blue = false;
511 if (for_gather) {
512 if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
513 format == ISL_FORMAT_R32G32_SINT ||
514 format == ISL_FORMAT_R32G32_UINT)) {
515 format = ISL_FORMAT_R32G32_FLOAT_LD;
516 need_green_to_blue = devinfo->is_haswell;
517 } else if (devinfo->gen == 6) {
518 /* Sandybridge's gather4 message is broken for integer formats.
519 * To work around this, we pretend the surface is UNORM for
520 * 8 or 16-bit formats, and emit shader instructions to recover
521 * the real INT/UINT value. For 32-bit formats, we pretend
522 * the surface is FLOAT, and simply reinterpret the resulting
523 * bits.
524 */
525 switch (format) {
526 case ISL_FORMAT_R8_SINT:
527 case ISL_FORMAT_R8_UINT:
528 format = ISL_FORMAT_R8_UNORM;
529 break;
530
531 case ISL_FORMAT_R16_SINT:
532 case ISL_FORMAT_R16_UINT:
533 format = ISL_FORMAT_R16_UNORM;
534 break;
535
536 case ISL_FORMAT_R32_SINT:
537 case ISL_FORMAT_R32_UINT:
538 format = ISL_FORMAT_R32_FLOAT;
539 break;
540
541 default:
542 break;
543 }
544 }
545 }
546
547 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
548 if (devinfo->gen <= 7) {
549 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
550 mt = mt->r8stencil_mt;
551 } else {
552 mt = mt->stencil_mt;
553 }
554 format = ISL_FORMAT_R8_UINT;
555 } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
556 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
557 mt = mt->r8stencil_mt;
558 format = ISL_FORMAT_R8_UINT;
559 }
560
561 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
562
563 struct isl_view view = {
564 .format = format,
565 .base_level = obj->MinLevel + obj->BaseLevel,
566 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
567 .base_array_layer = obj->MinLayer,
568 .array_len = view_num_layers,
569 .swizzle = {
570 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
571 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
572 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
573 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
574 },
575 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
576 };
577
578 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
579 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
580 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
581
582 enum isl_aux_usage aux_usage =
583 intel_miptree_texture_aux_usage(brw, mt, format);
584
585 if (brw_aux_surface_disabled(brw, mt))
586 aux_usage = ISL_AUX_USAGE_NONE;
587
588 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
589 tex_mocs[devinfo->gen],
590 surf_offset, surf_index,
591 0);
592 }
593 }
594
595 void
596 brw_emit_buffer_surface_state(struct brw_context *brw,
597 uint32_t *out_offset,
598 struct brw_bo *bo,
599 unsigned buffer_offset,
600 unsigned surface_format,
601 unsigned buffer_size,
602 unsigned pitch,
603 unsigned reloc_flags)
604 {
605 const struct gen_device_info *devinfo = &brw->screen->devinfo;
606 uint32_t *dw = brw_state_batch(brw,
607 brw->isl_dev.ss.size,
608 brw->isl_dev.ss.align,
609 out_offset);
610
611 isl_buffer_fill_state(&brw->isl_dev, dw,
612 .address = !bo ? buffer_offset :
613 brw_state_reloc(&brw->batch,
614 *out_offset + brw->isl_dev.ss.addr_offset,
615 bo, buffer_offset,
616 reloc_flags),
617 .size = buffer_size,
618 .format = surface_format,
619 .stride = pitch,
620 .mocs = tex_mocs[devinfo->gen]);
621 }
622
623 void
624 brw_update_buffer_texture_surface(struct gl_context *ctx,
625 unsigned unit,
626 uint32_t *surf_offset)
627 {
628 struct brw_context *brw = brw_context(ctx);
629 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
630 struct intel_buffer_object *intel_obj =
631 intel_buffer_object(tObj->BufferObject);
632 uint32_t size = tObj->BufferSize;
633 struct brw_bo *bo = NULL;
634 mesa_format format = tObj->_BufferObjectFormat;
635 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
636 int texel_size = _mesa_get_format_bytes(format);
637
638 if (intel_obj) {
639 size = MIN2(size, intel_obj->Base.Size);
640 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
641 false);
642 }
643
644 /* The ARB_texture_buffer_specification says:
645 *
646 * "The number of texels in the buffer texture's texel array is given by
647 *
648 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
649 *
650 * where <buffer_size> is the size of the buffer object, in basic
651 * machine units and <components> and <base_type> are the element count
652 * and base data type for elements, as specified in Table X.1. The
653 * number of texels in the texel array is then clamped to the
654 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
655 *
656 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
657 * so that when ISL divides by stride to obtain the number of texels, that
658 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
659 */
660 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
661
662 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
663 _mesa_problem(NULL, "bad format %s for texture buffer\n",
664 _mesa_get_format_name(format));
665 }
666
667 brw_emit_buffer_surface_state(brw, surf_offset, bo,
668 tObj->BufferOffset,
669 isl_format,
670 size,
671 texel_size,
672 0);
673 }
674
675 /**
676 * Create the constant buffer surface. Vertex/fragment shader constants will be
677 * read from this buffer with Data Port Read instructions/messages.
678 */
679 void
680 brw_create_constant_surface(struct brw_context *brw,
681 struct brw_bo *bo,
682 uint32_t offset,
683 uint32_t size,
684 uint32_t *out_offset)
685 {
686 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
687 ISL_FORMAT_R32G32B32A32_FLOAT,
688 size, 1, 0);
689 }
690
691 /**
692 * Create the buffer surface. Shader buffer variables will be
693 * read from / write to this buffer with Data Port Read/Write
694 * instructions/messages.
695 */
696 void
697 brw_create_buffer_surface(struct brw_context *brw,
698 struct brw_bo *bo,
699 uint32_t offset,
700 uint32_t size,
701 uint32_t *out_offset)
702 {
703 /* Use a raw surface so we can reuse existing untyped read/write/atomic
704 * messages. We need these specifically for the fragment shader since they
705 * include a pixel mask header that we need to ensure correct behavior
706 * with helper invocations, which cannot write to the buffer.
707 */
708 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
709 ISL_FORMAT_RAW,
710 size, 1, RELOC_WRITE);
711 }
712
713 /**
714 * Set up a binding table entry for use by stream output logic (transform
715 * feedback).
716 *
717 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
718 */
719 void
720 brw_update_sol_surface(struct brw_context *brw,
721 struct gl_buffer_object *buffer_obj,
722 uint32_t *out_offset, unsigned num_vector_components,
723 unsigned stride_dwords, unsigned offset_dwords)
724 {
725 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
726 uint32_t offset_bytes = 4 * offset_dwords;
727 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
728 offset_bytes,
729 buffer_obj->Size - offset_bytes,
730 true);
731 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
732 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
733 size_t size_dwords = buffer_obj->Size / 4;
734 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
735
736 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
737 * too big to map using a single binding table entry?
738 */
739 assert((size_dwords - offset_dwords) / stride_dwords
740 <= BRW_MAX_NUM_BUFFER_ENTRIES);
741
742 if (size_dwords > offset_dwords + num_vector_components) {
743 /* There is room for at least 1 transform feedback output in the buffer.
744 * Compute the number of additional transform feedback outputs the
745 * buffer has room for.
746 */
747 buffer_size_minus_1 =
748 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
749 } else {
750 /* There isn't even room for a single transform feedback output in the
751 * buffer. We can't configure the binding table entry to prevent output
752 * entirely; we'll have to rely on the geometry shader to detect
753 * overflow. But to minimize the damage in case of a bug, set up the
754 * binding table entry to just allow a single output.
755 */
756 buffer_size_minus_1 = 0;
757 }
758 width = buffer_size_minus_1 & 0x7f;
759 height = (buffer_size_minus_1 & 0xfff80) >> 7;
760 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
761
762 switch (num_vector_components) {
763 case 1:
764 surface_format = ISL_FORMAT_R32_FLOAT;
765 break;
766 case 2:
767 surface_format = ISL_FORMAT_R32G32_FLOAT;
768 break;
769 case 3:
770 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
771 break;
772 case 4:
773 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
774 break;
775 default:
776 unreachable("Invalid vector size for transform feedback output");
777 }
778
779 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
780 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
781 surface_format << BRW_SURFACE_FORMAT_SHIFT |
782 BRW_SURFACE_RC_READ_WRITE;
783 surf[1] = brw_state_reloc(&brw->batch,
784 *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
785 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
786 height << BRW_SURFACE_HEIGHT_SHIFT);
787 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
788 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
789 surf[4] = 0;
790 surf[5] = 0;
791 }
792
793 /* Creates a new WM constant buffer reflecting the current fragment program's
794 * constants, if needed by the fragment program.
795 *
796 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
797 * state atom.
798 */
799 static void
800 brw_upload_wm_pull_constants(struct brw_context *brw)
801 {
802 struct brw_stage_state *stage_state = &brw->wm.base;
803 /* BRW_NEW_FRAGMENT_PROGRAM */
804 struct brw_program *fp =
805 (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
806
807 /* BRW_NEW_FS_PROG_DATA */
808 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
809
810 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
811 /* _NEW_PROGRAM_CONSTANTS */
812 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
813 stage_state, prog_data);
814 }
815
816 const struct brw_tracked_state brw_wm_pull_constants = {
817 .dirty = {
818 .mesa = _NEW_PROGRAM_CONSTANTS,
819 .brw = BRW_NEW_BATCH |
820 BRW_NEW_FRAGMENT_PROGRAM |
821 BRW_NEW_FS_PROG_DATA,
822 },
823 .emit = brw_upload_wm_pull_constants,
824 };
825
826 /**
827 * Creates a null renderbuffer surface.
828 *
829 * This is used when the shader doesn't write to any color output. An FB
830 * write to target 0 will still be emitted, because that's how the thread is
831 * terminated (and computed depth is returned), so we need to have the
832 * hardware discard the target 0 color output..
833 */
834 static void
835 emit_null_surface_state(struct brw_context *brw,
836 const struct gl_framebuffer *fb,
837 uint32_t *out_offset)
838 {
839 const struct gen_device_info *devinfo = &brw->screen->devinfo;
840 uint32_t *surf = brw_state_batch(brw,
841 brw->isl_dev.ss.size,
842 brw->isl_dev.ss.align,
843 out_offset);
844
845 /* Use the fb dimensions or 1x1x1 */
846 const unsigned width = fb ? _mesa_geometric_width(fb) : 1;
847 const unsigned height = fb ? _mesa_geometric_height(fb) : 1;
848 const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
849
850 if (devinfo->gen != 6 || samples <= 1) {
851 isl_null_fill_state(&brw->isl_dev, surf,
852 isl_extent3d(width, height, 1));
853 return;
854 }
855
856 /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
857 * So work around this problem by rendering into dummy color buffer.
858 *
859 * To decrease the amount of memory needed by the workaround buffer, we
860 * set its pitch to 128 bytes (the width of a Y tile). This means that
861 * the amount of memory needed for the workaround buffer is
862 * (width_in_tiles + height_in_tiles - 1) tiles.
863 *
864 * Note that since the workaround buffer will be interpreted by the
865 * hardware as an interleaved multisampled buffer, we need to compute
866 * width_in_tiles and height_in_tiles by dividing the width and height
867 * by 16 rather than the normal Y-tile size of 32.
868 */
869 unsigned width_in_tiles = ALIGN(width, 16) / 16;
870 unsigned height_in_tiles = ALIGN(height, 16) / 16;
871 unsigned pitch_minus_1 = 127;
872 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
873 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
874 size_needed);
875
876 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
877 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
878 surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4,
879 brw->wm.multisampled_null_render_target_bo,
880 0, RELOC_WRITE);
881
882 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
883 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
884
885 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
886 * Notes):
887 *
888 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
889 */
890 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
891 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
892 surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
893 surf[5] = 0;
894 }
895
896 /**
897 * Sets up a surface state structure to point at the given region.
898 * While it is only used for the front/back buffer currently, it should be
899 * usable for further buffers when doing ARB_draw_buffer support.
900 */
901 static uint32_t
902 gen4_update_renderbuffer_surface(struct brw_context *brw,
903 struct gl_renderbuffer *rb,
904 unsigned unit,
905 uint32_t surf_index)
906 {
907 const struct gen_device_info *devinfo = &brw->screen->devinfo;
908 struct gl_context *ctx = &brw->ctx;
909 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
910 struct intel_mipmap_tree *mt = irb->mt;
911 uint32_t *surf;
912 uint32_t tile_x, tile_y;
913 enum isl_format format;
914 uint32_t offset;
915 /* _NEW_BUFFERS */
916 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
917 /* BRW_NEW_FS_PROG_DATA */
918
919 if (rb->TexImage && !devinfo->has_surface_tile_offset) {
920 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
921
922 if (tile_x != 0 || tile_y != 0) {
923 /* Original gen4 hardware couldn't draw to a non-tile-aligned
924 * destination in a miptree unless you actually setup your renderbuffer
925 * as a miptree and used the fragile lod/array_index/etc. controls to
926 * select the image. So, instead, we just make a new single-level
927 * miptree and render into that.
928 */
929 intel_renderbuffer_move_to_temp(brw, irb, false);
930 assert(irb->align_wa_mt);
931 mt = irb->align_wa_mt;
932 }
933 }
934
935 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
936
937 format = brw->mesa_to_isl_render_format[rb_format];
938 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
939 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
940 __func__, _mesa_get_format_name(rb_format));
941 }
942
943 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
944 format << BRW_SURFACE_FORMAT_SHIFT);
945
946 /* reloc */
947 assert(mt->offset % mt->cpp == 0);
948 surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo,
949 mt->offset +
950 intel_renderbuffer_get_tile_offsets(irb,
951 &tile_x,
952 &tile_y),
953 RELOC_WRITE);
954
955 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
956 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
957
958 surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
959 (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
960
961 surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
962
963 assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
964 /* Note that the low bits of these fields are missing, so
965 * there's the possibility of getting in trouble.
966 */
967 assert(tile_x % 4 == 0);
968 assert(tile_y % 2 == 0);
969 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
970 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
971 (mt->surf.image_alignment_el.height == 4 ?
972 BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
973
974 if (devinfo->gen < 6) {
975 /* _NEW_COLOR */
976 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
977 (ctx->Color.BlendEnabled & (1 << unit)))
978 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
979
980 if (!ctx->Color.ColorMask[unit][0])
981 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
982 if (!ctx->Color.ColorMask[unit][1])
983 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
984 if (!ctx->Color.ColorMask[unit][2])
985 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
986
987 /* As mentioned above, disable writes to the alpha component when the
988 * renderbuffer is XRGB.
989 */
990 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
991 !ctx->Color.ColorMask[unit][3]) {
992 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
993 }
994 }
995
996 return offset;
997 }
998
999 static void
1000 update_renderbuffer_surfaces(struct brw_context *brw)
1001 {
1002 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1003 const struct gl_context *ctx = &brw->ctx;
1004
1005 /* _NEW_BUFFERS | _NEW_COLOR */
1006 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1007
1008 /* Render targets always start at binding table index 0. */
1009 const unsigned rt_start = 0;
1010
1011 uint32_t *surf_offsets = brw->wm.base.surf_offset;
1012
1013 /* Update surfaces for drawing buffers */
1014 if (fb->_NumColorDrawBuffers >= 1) {
1015 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1016 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1017
1018 if (intel_renderbuffer(rb)) {
1019 surf_offsets[rt_start + i] = devinfo->gen >= 6 ?
1020 gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
1021 gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
1022 } else {
1023 emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
1024 }
1025 }
1026 } else {
1027 emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
1028 }
1029
1030 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1031 }
1032
1033 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1034 .dirty = {
1035 .mesa = _NEW_BUFFERS |
1036 _NEW_COLOR,
1037 .brw = BRW_NEW_BATCH,
1038 },
1039 .emit = update_renderbuffer_surfaces,
1040 };
1041
1042 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1043 .dirty = {
1044 .mesa = _NEW_BUFFERS,
1045 .brw = BRW_NEW_BATCH |
1046 BRW_NEW_AUX_STATE,
1047 },
1048 .emit = update_renderbuffer_surfaces,
1049 };
1050
1051 static void
1052 update_renderbuffer_read_surfaces(struct brw_context *brw)
1053 {
1054 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1055 const struct gl_context *ctx = &brw->ctx;
1056
1057 /* BRW_NEW_FS_PROG_DATA */
1058 const struct brw_wm_prog_data *wm_prog_data =
1059 brw_wm_prog_data(brw->wm.base.prog_data);
1060
1061 if (wm_prog_data->has_render_target_reads &&
1062 !ctx->Extensions.MESA_shader_framebuffer_fetch) {
1063 /* _NEW_BUFFERS */
1064 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1065
1066 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1067 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1068 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1069 const unsigned surf_index =
1070 wm_prog_data->binding_table.render_target_read_start + i;
1071 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1072
1073 if (irb) {
1074 const enum isl_format format = brw->mesa_to_isl_render_format[
1075 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1076 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1077 format));
1078
1079 /* Override the target of the texture if the render buffer is a
1080 * single slice of a 3D texture (since the minimum array element
1081 * field of the surface state structure is ignored by the sampler
1082 * unit for 3D textures on some hardware), or if the render buffer
1083 * is a 1D array (since shaders always provide the array index
1084 * coordinate at the Z component to avoid state-dependent
1085 * recompiles when changing the texture target of the
1086 * framebuffer).
1087 */
1088 const GLenum target =
1089 (irb->mt->target == GL_TEXTURE_3D &&
1090 irb->layer_count == 1) ? GL_TEXTURE_2D :
1091 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1092 irb->mt->target;
1093
1094 const struct isl_view view = {
1095 .format = format,
1096 .base_level = irb->mt_level - irb->mt->first_level,
1097 .levels = 1,
1098 .base_array_layer = irb->mt_layer,
1099 .array_len = irb->layer_count,
1100 .swizzle = ISL_SWIZZLE_IDENTITY,
1101 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1102 };
1103
1104 enum isl_aux_usage aux_usage =
1105 intel_miptree_texture_aux_usage(brw, irb->mt, format);
1106 if (brw->draw_aux_buffer_disabled[i])
1107 aux_usage = ISL_AUX_USAGE_NONE;
1108
1109 brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1110 tex_mocs[devinfo->gen],
1111 surf_offset, surf_index,
1112 0);
1113
1114 } else {
1115 emit_null_surface_state(brw, fb, surf_offset);
1116 }
1117 }
1118
1119 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1120 }
1121 }
1122
1123 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1124 .dirty = {
1125 .mesa = _NEW_BUFFERS,
1126 .brw = BRW_NEW_BATCH |
1127 BRW_NEW_AUX_STATE |
1128 BRW_NEW_FS_PROG_DATA,
1129 },
1130 .emit = update_renderbuffer_read_surfaces,
1131 };
1132
1133 static void
1134 update_stage_texture_surfaces(struct brw_context *brw,
1135 const struct gl_program *prog,
1136 struct brw_stage_state *stage_state,
1137 bool for_gather, uint32_t plane)
1138 {
1139 if (!prog)
1140 return;
1141
1142 struct gl_context *ctx = &brw->ctx;
1143
1144 uint32_t *surf_offset = stage_state->surf_offset;
1145
1146 /* BRW_NEW_*_PROG_DATA */
1147 if (for_gather)
1148 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1149 else
1150 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1151
1152 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1153 for (unsigned s = 0; s < num_samplers; s++) {
1154 surf_offset[s] = 0;
1155
1156 if (prog->SamplersUsed & (1 << s)) {
1157 const unsigned unit = prog->SamplerUnits[s];
1158 const bool used_by_txf = prog->info.textures_used_by_txf & (1 << s);
1159
1160 /* _NEW_TEXTURE */
1161 if (ctx->Texture.Unit[unit]._Current) {
1162 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather,
1163 used_by_txf, plane);
1164 }
1165 }
1166 }
1167 }
1168
1169
1170 /**
1171 * Construct SURFACE_STATE objects for enabled textures.
1172 */
1173 static void
1174 brw_update_texture_surfaces(struct brw_context *brw)
1175 {
1176 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1177
1178 /* BRW_NEW_VERTEX_PROGRAM */
1179 struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
1180
1181 /* BRW_NEW_TESS_PROGRAMS */
1182 struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
1183 struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
1184
1185 /* BRW_NEW_GEOMETRY_PROGRAM */
1186 struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
1187
1188 /* BRW_NEW_FRAGMENT_PROGRAM */
1189 struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
1190
1191 /* _NEW_TEXTURE */
1192 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1193 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1194 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1195 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1196 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1197
1198 /* emit alternate set of surface state for gather. this
1199 * allows the surface format to be overriden for only the
1200 * gather4 messages. */
1201 if (devinfo->gen < 8) {
1202 if (vs && vs->nir->info.uses_texture_gather)
1203 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1204 if (tcs && tcs->nir->info.uses_texture_gather)
1205 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1206 if (tes && tes->nir->info.uses_texture_gather)
1207 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1208 if (gs && gs->nir->info.uses_texture_gather)
1209 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1210 if (fs && fs->nir->info.uses_texture_gather)
1211 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1212 }
1213
1214 if (fs) {
1215 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1216 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1217 }
1218
1219 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1220 }
1221
1222 const struct brw_tracked_state brw_texture_surfaces = {
1223 .dirty = {
1224 .mesa = _NEW_TEXTURE,
1225 .brw = BRW_NEW_BATCH |
1226 BRW_NEW_AUX_STATE |
1227 BRW_NEW_FRAGMENT_PROGRAM |
1228 BRW_NEW_FS_PROG_DATA |
1229 BRW_NEW_GEOMETRY_PROGRAM |
1230 BRW_NEW_GS_PROG_DATA |
1231 BRW_NEW_TESS_PROGRAMS |
1232 BRW_NEW_TCS_PROG_DATA |
1233 BRW_NEW_TES_PROG_DATA |
1234 BRW_NEW_TEXTURE_BUFFER |
1235 BRW_NEW_VERTEX_PROGRAM |
1236 BRW_NEW_VS_PROG_DATA,
1237 },
1238 .emit = brw_update_texture_surfaces,
1239 };
1240
1241 static void
1242 brw_update_cs_texture_surfaces(struct brw_context *brw)
1243 {
1244 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1245
1246 /* BRW_NEW_COMPUTE_PROGRAM */
1247 struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
1248
1249 /* _NEW_TEXTURE */
1250 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1251
1252 /* emit alternate set of surface state for gather. this
1253 * allows the surface format to be overriden for only the
1254 * gather4 messages.
1255 */
1256 if (devinfo->gen < 8) {
1257 if (cs && cs->nir->info.uses_texture_gather)
1258 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1259 }
1260
1261 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1262 }
1263
1264 const struct brw_tracked_state brw_cs_texture_surfaces = {
1265 .dirty = {
1266 .mesa = _NEW_TEXTURE,
1267 .brw = BRW_NEW_BATCH |
1268 BRW_NEW_COMPUTE_PROGRAM |
1269 BRW_NEW_AUX_STATE,
1270 },
1271 .emit = brw_update_cs_texture_surfaces,
1272 };
1273
1274
1275 void
1276 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1277 struct brw_stage_state *stage_state,
1278 struct brw_stage_prog_data *prog_data)
1279 {
1280 struct gl_context *ctx = &brw->ctx;
1281
1282 if (!prog)
1283 return;
1284
1285 uint32_t *ubo_surf_offsets =
1286 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1287
1288 for (int i = 0; i < prog->info.num_ubos; i++) {
1289 struct gl_buffer_binding *binding =
1290 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1291
1292 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1293 emit_null_surface_state(brw, NULL, &ubo_surf_offsets[i]);
1294 } else {
1295 struct intel_buffer_object *intel_bo =
1296 intel_buffer_object(binding->BufferObject);
1297 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1298 if (!binding->AutomaticSize)
1299 size = MIN2(size, binding->Size);
1300 struct brw_bo *bo =
1301 intel_bufferobj_buffer(brw, intel_bo,
1302 binding->Offset,
1303 size, false);
1304 brw_create_constant_surface(brw, bo, binding->Offset,
1305 size,
1306 &ubo_surf_offsets[i]);
1307 }
1308 }
1309
1310 uint32_t *ssbo_surf_offsets =
1311 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1312
1313 for (int i = 0; i < prog->info.num_ssbos; i++) {
1314 struct gl_buffer_binding *binding =
1315 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1316
1317 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1318 emit_null_surface_state(brw, NULL, &ssbo_surf_offsets[i]);
1319 } else {
1320 struct intel_buffer_object *intel_bo =
1321 intel_buffer_object(binding->BufferObject);
1322 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1323 if (!binding->AutomaticSize)
1324 size = MIN2(size, binding->Size);
1325 struct brw_bo *bo =
1326 intel_bufferobj_buffer(brw, intel_bo,
1327 binding->Offset,
1328 size, true);
1329 brw_create_buffer_surface(brw, bo, binding->Offset,
1330 size,
1331 &ssbo_surf_offsets[i]);
1332 }
1333 }
1334
1335 stage_state->push_constants_dirty = true;
1336
1337 if (prog->info.num_ubos || prog->info.num_ssbos)
1338 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1339 }
1340
1341 static void
1342 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1343 {
1344 struct gl_context *ctx = &brw->ctx;
1345 /* _NEW_PROGRAM */
1346 struct gl_program *prog = ctx->FragmentProgram._Current;
1347
1348 /* BRW_NEW_FS_PROG_DATA */
1349 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1350 }
1351
1352 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1353 .dirty = {
1354 .mesa = _NEW_PROGRAM,
1355 .brw = BRW_NEW_BATCH |
1356 BRW_NEW_FS_PROG_DATA |
1357 BRW_NEW_UNIFORM_BUFFER,
1358 },
1359 .emit = brw_upload_wm_ubo_surfaces,
1360 };
1361
1362 static void
1363 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1364 {
1365 struct gl_context *ctx = &brw->ctx;
1366 /* _NEW_PROGRAM */
1367 struct gl_program *prog =
1368 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1369
1370 /* BRW_NEW_CS_PROG_DATA */
1371 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1372 }
1373
1374 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1375 .dirty = {
1376 .mesa = _NEW_PROGRAM,
1377 .brw = BRW_NEW_BATCH |
1378 BRW_NEW_CS_PROG_DATA |
1379 BRW_NEW_UNIFORM_BUFFER,
1380 },
1381 .emit = brw_upload_cs_ubo_surfaces,
1382 };
1383
1384 void
1385 brw_upload_abo_surfaces(struct brw_context *brw,
1386 const struct gl_program *prog,
1387 struct brw_stage_state *stage_state,
1388 struct brw_stage_prog_data *prog_data)
1389 {
1390 struct gl_context *ctx = &brw->ctx;
1391 uint32_t *surf_offsets =
1392 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1393
1394 if (prog->info.num_abos) {
1395 for (unsigned i = 0; i < prog->info.num_abos; i++) {
1396 struct gl_buffer_binding *binding =
1397 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1398 struct intel_buffer_object *intel_bo =
1399 intel_buffer_object(binding->BufferObject);
1400 struct brw_bo *bo =
1401 intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1402 intel_bo->Base.Size - binding->Offset,
1403 true);
1404
1405 brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1406 binding->Offset, ISL_FORMAT_RAW,
1407 bo->size - binding->Offset, 1,
1408 RELOC_WRITE);
1409 }
1410
1411 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1412 }
1413 }
1414
1415 static void
1416 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1417 {
1418 /* _NEW_PROGRAM */
1419 const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
1420
1421 if (wm) {
1422 /* BRW_NEW_FS_PROG_DATA */
1423 brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1424 }
1425 }
1426
1427 const struct brw_tracked_state brw_wm_abo_surfaces = {
1428 .dirty = {
1429 .mesa = _NEW_PROGRAM,
1430 .brw = BRW_NEW_ATOMIC_BUFFER |
1431 BRW_NEW_BATCH |
1432 BRW_NEW_FS_PROG_DATA,
1433 },
1434 .emit = brw_upload_wm_abo_surfaces,
1435 };
1436
1437 static void
1438 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1439 {
1440 /* _NEW_PROGRAM */
1441 const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
1442
1443 if (cp) {
1444 /* BRW_NEW_CS_PROG_DATA */
1445 brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1446 }
1447 }
1448
1449 const struct brw_tracked_state brw_cs_abo_surfaces = {
1450 .dirty = {
1451 .mesa = _NEW_PROGRAM,
1452 .brw = BRW_NEW_ATOMIC_BUFFER |
1453 BRW_NEW_BATCH |
1454 BRW_NEW_CS_PROG_DATA,
1455 },
1456 .emit = brw_upload_cs_abo_surfaces,
1457 };
1458
1459 static void
1460 brw_upload_cs_image_surfaces(struct brw_context *brw)
1461 {
1462 /* _NEW_PROGRAM */
1463 const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
1464
1465 if (cp) {
1466 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1467 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1468 brw->cs.base.prog_data);
1469 }
1470 }
1471
1472 const struct brw_tracked_state brw_cs_image_surfaces = {
1473 .dirty = {
1474 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1475 .brw = BRW_NEW_BATCH |
1476 BRW_NEW_CS_PROG_DATA |
1477 BRW_NEW_AUX_STATE |
1478 BRW_NEW_IMAGE_UNITS
1479 },
1480 .emit = brw_upload_cs_image_surfaces,
1481 };
1482
1483 static uint32_t
1484 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1485 {
1486 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1487 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1488 if (access == GL_WRITE_ONLY) {
1489 return hw_format;
1490 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1491 /* Typed surface reads support a very limited subset of the shader
1492 * image formats. Translate it into the closest format the
1493 * hardware supports.
1494 */
1495 return isl_lower_storage_image_format(devinfo, hw_format);
1496 } else {
1497 /* The hardware doesn't actually support a typed format that we can use
1498 * so we have to fall back to untyped read/write messages.
1499 */
1500 return ISL_FORMAT_RAW;
1501 }
1502 }
1503
1504 static void
1505 update_default_image_param(struct brw_context *brw,
1506 struct gl_image_unit *u,
1507 unsigned surface_idx,
1508 struct brw_image_param *param)
1509 {
1510 memset(param, 0, sizeof(*param));
1511 param->surface_idx = surface_idx;
1512 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1513 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1514 * detailed explanation of these parameters.
1515 */
1516 param->swizzling[0] = 0xff;
1517 param->swizzling[1] = 0xff;
1518 }
1519
1520 static void
1521 update_buffer_image_param(struct brw_context *brw,
1522 struct gl_image_unit *u,
1523 unsigned surface_idx,
1524 struct brw_image_param *param)
1525 {
1526 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1527 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1528 update_default_image_param(brw, u, surface_idx, param);
1529
1530 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1531 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1532 }
1533
1534 static unsigned
1535 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1536 unsigned level)
1537 {
1538 if (target == GL_TEXTURE_CUBE_MAP)
1539 return 6;
1540
1541 return target == GL_TEXTURE_3D ?
1542 minify(mt->surf.logical_level0_px.depth, level) :
1543 mt->surf.logical_level0_px.array_len;
1544 }
1545
1546 static void
1547 update_image_surface(struct brw_context *brw,
1548 struct gl_image_unit *u,
1549 GLenum access,
1550 unsigned surface_idx,
1551 uint32_t *surf_offset,
1552 struct brw_image_param *param)
1553 {
1554 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1555
1556 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1557 struct gl_texture_object *obj = u->TexObj;
1558 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1559
1560 if (obj->Target == GL_TEXTURE_BUFFER) {
1561 struct intel_buffer_object *intel_obj =
1562 intel_buffer_object(obj->BufferObject);
1563 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1564 _mesa_get_format_bytes(u->_ActualFormat));
1565
1566 brw_emit_buffer_surface_state(
1567 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1568 format, intel_obj->Base.Size, texel_size,
1569 access != GL_READ_ONLY ? RELOC_WRITE : 0);
1570
1571 update_buffer_image_param(brw, u, surface_idx, param);
1572
1573 } else {
1574 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1575 struct intel_mipmap_tree *mt = intel_obj->mt;
1576 const unsigned num_layers = u->Layered ?
1577 get_image_num_layers(mt, obj->Target, u->Level) : 1;
1578
1579 struct isl_view view = {
1580 .format = format,
1581 .base_level = obj->MinLevel + u->Level,
1582 .levels = 1,
1583 .base_array_layer = obj->MinLayer + u->_Layer,
1584 .array_len = num_layers,
1585 .swizzle = ISL_SWIZZLE_IDENTITY,
1586 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1587 };
1588
1589 if (format == ISL_FORMAT_RAW) {
1590 brw_emit_buffer_surface_state(
1591 brw, surf_offset, mt->bo, mt->offset,
1592 format, mt->bo->size - mt->offset, 1 /* pitch */,
1593 access != GL_READ_ONLY ? RELOC_WRITE : 0);
1594
1595 } else {
1596 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1597 assert(!intel_miptree_has_color_unresolved(mt,
1598 view.base_level, 1,
1599 view.base_array_layer,
1600 view.array_len));
1601 brw_emit_surface_state(brw, mt, mt->target, view,
1602 ISL_AUX_USAGE_NONE, tex_mocs[devinfo->gen],
1603 surf_offset, surf_index,
1604 access == GL_READ_ONLY ? 0 : RELOC_WRITE);
1605 }
1606
1607 isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1608 param->surface_idx = surface_idx;
1609 }
1610
1611 } else {
1612 emit_null_surface_state(brw, NULL, surf_offset);
1613 update_default_image_param(brw, u, surface_idx, param);
1614 }
1615 }
1616
1617 void
1618 brw_upload_image_surfaces(struct brw_context *brw,
1619 const struct gl_program *prog,
1620 struct brw_stage_state *stage_state,
1621 struct brw_stage_prog_data *prog_data)
1622 {
1623 assert(prog);
1624 struct gl_context *ctx = &brw->ctx;
1625
1626 if (prog->info.num_images) {
1627 for (unsigned i = 0; i < prog->info.num_images; i++) {
1628 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1629 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1630
1631 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1632 surf_idx,
1633 &stage_state->surf_offset[surf_idx],
1634 &stage_state->image_param[i]);
1635 }
1636
1637 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1638 /* This may have changed the image metadata dependent on the context
1639 * image unit state and passed to the program as uniforms, make sure
1640 * that push and pull constants are reuploaded.
1641 */
1642 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1643 }
1644 }
1645
1646 static void
1647 brw_upload_wm_image_surfaces(struct brw_context *brw)
1648 {
1649 /* BRW_NEW_FRAGMENT_PROGRAM */
1650 const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
1651
1652 if (wm) {
1653 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1654 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1655 brw->wm.base.prog_data);
1656 }
1657 }
1658
1659 const struct brw_tracked_state brw_wm_image_surfaces = {
1660 .dirty = {
1661 .mesa = _NEW_TEXTURE,
1662 .brw = BRW_NEW_BATCH |
1663 BRW_NEW_AUX_STATE |
1664 BRW_NEW_FRAGMENT_PROGRAM |
1665 BRW_NEW_FS_PROG_DATA |
1666 BRW_NEW_IMAGE_UNITS
1667 },
1668 .emit = brw_upload_wm_image_surfaces,
1669 };
1670
1671 static void
1672 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1673 {
1674 struct gl_context *ctx = &brw->ctx;
1675 /* _NEW_PROGRAM */
1676 struct gl_program *prog =
1677 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1678 /* BRW_NEW_CS_PROG_DATA */
1679 const struct brw_cs_prog_data *cs_prog_data =
1680 brw_cs_prog_data(brw->cs.base.prog_data);
1681
1682 if (prog && cs_prog_data->uses_num_work_groups) {
1683 const unsigned surf_idx =
1684 cs_prog_data->binding_table.work_groups_start;
1685 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1686 struct brw_bo *bo;
1687 uint32_t bo_offset;
1688
1689 if (brw->compute.num_work_groups_bo == NULL) {
1690 bo = NULL;
1691 intel_upload_data(brw,
1692 (void *)brw->compute.num_work_groups,
1693 3 * sizeof(GLuint),
1694 sizeof(GLuint),
1695 &bo,
1696 &bo_offset);
1697 } else {
1698 bo = brw->compute.num_work_groups_bo;
1699 bo_offset = brw->compute.num_work_groups_offset;
1700 }
1701
1702 brw_emit_buffer_surface_state(brw, surf_offset,
1703 bo, bo_offset,
1704 ISL_FORMAT_RAW,
1705 3 * sizeof(GLuint), 1,
1706 RELOC_WRITE);
1707 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1708 }
1709 }
1710
1711 const struct brw_tracked_state brw_cs_work_groups_surface = {
1712 .dirty = {
1713 .brw = BRW_NEW_CS_PROG_DATA |
1714 BRW_NEW_CS_WORK_GROUPS
1715 },
1716 .emit = brw_upload_cs_work_groups_surface,
1717 };