i965/icl: Add render target flush after uploading binding table
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 uint32_t wb_mocs[] = {
59 [7] = GEN7_MOCS_L3,
60 [8] = BDW_MOCS_WB,
61 [9] = SKL_MOCS_WB,
62 [10] = CNL_MOCS_WB,
63 [11] = ICL_MOCS_WB,
64 };
65
66 uint32_t pte_mocs[] = {
67 [7] = GEN7_MOCS_L3,
68 [8] = BDW_MOCS_PTE,
69 [9] = SKL_MOCS_PTE,
70 [10] = CNL_MOCS_PTE,
71 [11] = ICL_MOCS_PTE,
72 };
73
74 uint32_t
75 brw_get_bo_mocs(const struct gen_device_info *devinfo, struct brw_bo *bo)
76 {
77 return (bo && bo->external ? pte_mocs : wb_mocs)[devinfo->gen];
78 }
79
80 static void
81 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
82 GLenum target, struct isl_view *view,
83 uint32_t *tile_x, uint32_t *tile_y,
84 uint32_t *offset, struct isl_surf *surf)
85 {
86 *surf = mt->surf;
87
88 const struct gen_device_info *devinfo = &brw->screen->devinfo;
89 const enum isl_dim_layout dim_layout =
90 get_isl_dim_layout(devinfo, mt->surf.tiling, target);
91
92 if (surf->dim_layout == dim_layout)
93 return;
94
95 /* The layout of the specified texture target is not compatible with the
96 * actual layout of the miptree structure in memory -- You're entering
97 * dangerous territory, this can only possibly work if you only intended
98 * to access a single level and slice of the texture, and the hardware
99 * supports the tile offset feature in order to allow non-tile-aligned
100 * base offsets, since we'll have to point the hardware to the first
101 * texel of the level instead of relying on the usual base level/layer
102 * controls.
103 */
104 assert(devinfo->has_surface_tile_offset);
105 assert(view->levels == 1 && view->array_len == 1);
106 assert(*tile_x == 0 && *tile_y == 0);
107
108 *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
109 view->base_array_layer,
110 tile_x, tile_y);
111
112 /* Minify the logical dimensions of the texture. */
113 const unsigned l = view->base_level - mt->first_level;
114 surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
115 surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
116 minify(surf->logical_level0_px.height, l);
117 surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
118 minify(surf->logical_level0_px.depth, l);
119
120 /* Only the base level and layer can be addressed with the overridden
121 * layout.
122 */
123 surf->logical_level0_px.array_len = 1;
124 surf->levels = 1;
125 surf->dim_layout = dim_layout;
126
127 /* The requested slice of the texture is now at the base level and
128 * layer.
129 */
130 view->base_level = 0;
131 view->base_array_layer = 0;
132 }
133
134 static void
135 brw_emit_surface_state(struct brw_context *brw,
136 struct intel_mipmap_tree *mt,
137 GLenum target, struct isl_view view,
138 enum isl_aux_usage aux_usage,
139 uint32_t *surf_offset, int surf_index,
140 unsigned reloc_flags)
141 {
142 const struct gen_device_info *devinfo = &brw->screen->devinfo;
143 uint32_t tile_x = mt->level[0].level_x;
144 uint32_t tile_y = mt->level[0].level_y;
145 uint32_t offset = mt->offset;
146
147 struct isl_surf surf;
148
149 get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
150
151 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
152
153 struct brw_bo *aux_bo;
154 struct isl_surf *aux_surf = NULL;
155 uint64_t aux_offset = 0;
156 switch (aux_usage) {
157 case ISL_AUX_USAGE_MCS:
158 case ISL_AUX_USAGE_CCS_D:
159 case ISL_AUX_USAGE_CCS_E:
160 aux_surf = &mt->mcs_buf->surf;
161 aux_bo = mt->mcs_buf->bo;
162 aux_offset = mt->mcs_buf->offset;
163 break;
164
165 case ISL_AUX_USAGE_HIZ:
166 aux_surf = &mt->hiz_buf->surf;
167 aux_bo = mt->hiz_buf->bo;
168 aux_offset = 0;
169 break;
170
171 case ISL_AUX_USAGE_NONE:
172 break;
173 }
174
175 if (aux_usage != ISL_AUX_USAGE_NONE) {
176 /* We only really need a clear color if we also have an auxiliary
177 * surface. Without one, it does nothing.
178 */
179 clear_color = mt->fast_clear_color;
180 }
181
182 void *state = brw_state_batch(brw,
183 brw->isl_dev.ss.size,
184 brw->isl_dev.ss.align,
185 surf_offset);
186
187 isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
188 .address = brw_state_reloc(&brw->batch,
189 *surf_offset + brw->isl_dev.ss.addr_offset,
190 mt->bo, offset, reloc_flags),
191 .aux_surf = aux_surf, .aux_usage = aux_usage,
192 .aux_address = aux_offset,
193 .mocs = brw_get_bo_mocs(devinfo, mt->bo),
194 .clear_color = clear_color,
195 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
196 if (aux_surf) {
197 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
198 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
199 * contain other control information. Since buffer addresses are always
200 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
201 * an ordinary reloc to do the necessary address translation.
202 *
203 * FIXME: move to the point of assignment.
204 */
205 assert((aux_offset & 0xfff) == 0);
206 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
207 *aux_addr = brw_state_reloc(&brw->batch,
208 *surf_offset +
209 brw->isl_dev.ss.aux_addr_offset,
210 aux_bo, *aux_addr,
211 reloc_flags);
212 }
213 }
214
215 static uint32_t
216 gen6_update_renderbuffer_surface(struct brw_context *brw,
217 struct gl_renderbuffer *rb,
218 unsigned unit,
219 uint32_t surf_index)
220 {
221 struct gl_context *ctx = &brw->ctx;
222 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
223 struct intel_mipmap_tree *mt = irb->mt;
224
225 assert(brw_render_target_supported(brw, rb));
226
227 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
228 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
229 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
230 __func__, _mesa_get_format_name(rb_format));
231 }
232 enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
233
234 struct isl_view view = {
235 .format = isl_format,
236 .base_level = irb->mt_level - irb->mt->first_level,
237 .levels = 1,
238 .base_array_layer = irb->mt_layer,
239 .array_len = MAX2(irb->layer_count, 1),
240 .swizzle = ISL_SWIZZLE_IDENTITY,
241 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
242 };
243
244 uint32_t offset;
245 brw_emit_surface_state(brw, mt, mt->target, view,
246 brw->draw_aux_usage[unit],
247 &offset, surf_index,
248 RELOC_WRITE);
249 return offset;
250 }
251
252 GLuint
253 translate_tex_target(GLenum target)
254 {
255 switch (target) {
256 case GL_TEXTURE_1D:
257 case GL_TEXTURE_1D_ARRAY_EXT:
258 return BRW_SURFACE_1D;
259
260 case GL_TEXTURE_RECTANGLE_NV:
261 return BRW_SURFACE_2D;
262
263 case GL_TEXTURE_2D:
264 case GL_TEXTURE_2D_ARRAY_EXT:
265 case GL_TEXTURE_EXTERNAL_OES:
266 case GL_TEXTURE_2D_MULTISAMPLE:
267 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
268 return BRW_SURFACE_2D;
269
270 case GL_TEXTURE_3D:
271 return BRW_SURFACE_3D;
272
273 case GL_TEXTURE_CUBE_MAP:
274 case GL_TEXTURE_CUBE_MAP_ARRAY:
275 return BRW_SURFACE_CUBE;
276
277 default:
278 unreachable("not reached");
279 }
280 }
281
282 uint32_t
283 brw_get_surface_tiling_bits(enum isl_tiling tiling)
284 {
285 switch (tiling) {
286 case ISL_TILING_X:
287 return BRW_SURFACE_TILED;
288 case ISL_TILING_Y0:
289 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
290 default:
291 return 0;
292 }
293 }
294
295
296 uint32_t
297 brw_get_surface_num_multisamples(unsigned num_samples)
298 {
299 if (num_samples > 1)
300 return BRW_SURFACE_MULTISAMPLECOUNT_4;
301 else
302 return BRW_SURFACE_MULTISAMPLECOUNT_1;
303 }
304
305 /**
306 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
307 * swizzling.
308 */
309 int
310 brw_get_texture_swizzle(const struct gl_context *ctx,
311 const struct gl_texture_object *t)
312 {
313 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
314
315 int swizzles[SWIZZLE_NIL + 1] = {
316 SWIZZLE_X,
317 SWIZZLE_Y,
318 SWIZZLE_Z,
319 SWIZZLE_W,
320 SWIZZLE_ZERO,
321 SWIZZLE_ONE,
322 SWIZZLE_NIL
323 };
324
325 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
326 img->_BaseFormat == GL_DEPTH_STENCIL) {
327 GLenum depth_mode = t->DepthMode;
328
329 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
330 * with depth component data specified with a sized internal format.
331 * Otherwise, it's left at the old default, GL_LUMINANCE.
332 */
333 if (_mesa_is_gles3(ctx) &&
334 img->InternalFormat != GL_DEPTH_COMPONENT &&
335 img->InternalFormat != GL_DEPTH_STENCIL) {
336 depth_mode = GL_RED;
337 }
338
339 switch (depth_mode) {
340 case GL_ALPHA:
341 swizzles[0] = SWIZZLE_ZERO;
342 swizzles[1] = SWIZZLE_ZERO;
343 swizzles[2] = SWIZZLE_ZERO;
344 swizzles[3] = SWIZZLE_X;
345 break;
346 case GL_LUMINANCE:
347 swizzles[0] = SWIZZLE_X;
348 swizzles[1] = SWIZZLE_X;
349 swizzles[2] = SWIZZLE_X;
350 swizzles[3] = SWIZZLE_ONE;
351 break;
352 case GL_INTENSITY:
353 swizzles[0] = SWIZZLE_X;
354 swizzles[1] = SWIZZLE_X;
355 swizzles[2] = SWIZZLE_X;
356 swizzles[3] = SWIZZLE_X;
357 break;
358 case GL_RED:
359 swizzles[0] = SWIZZLE_X;
360 swizzles[1] = SWIZZLE_ZERO;
361 swizzles[2] = SWIZZLE_ZERO;
362 swizzles[3] = SWIZZLE_ONE;
363 break;
364 }
365 }
366
367 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
368
369 /* If the texture's format is alpha-only, force R, G, and B to
370 * 0.0. Similarly, if the texture's format has no alpha channel,
371 * force the alpha value read to 1.0. This allows for the
372 * implementation to use an RGBA texture for any of these formats
373 * without leaking any unexpected values.
374 */
375 switch (img->_BaseFormat) {
376 case GL_ALPHA:
377 swizzles[0] = SWIZZLE_ZERO;
378 swizzles[1] = SWIZZLE_ZERO;
379 swizzles[2] = SWIZZLE_ZERO;
380 break;
381 case GL_LUMINANCE:
382 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
383 swizzles[0] = SWIZZLE_X;
384 swizzles[1] = SWIZZLE_X;
385 swizzles[2] = SWIZZLE_X;
386 swizzles[3] = SWIZZLE_ONE;
387 }
388 break;
389 case GL_LUMINANCE_ALPHA:
390 if (datatype == GL_SIGNED_NORMALIZED) {
391 swizzles[0] = SWIZZLE_X;
392 swizzles[1] = SWIZZLE_X;
393 swizzles[2] = SWIZZLE_X;
394 swizzles[3] = SWIZZLE_W;
395 }
396 break;
397 case GL_INTENSITY:
398 if (datatype == GL_SIGNED_NORMALIZED) {
399 swizzles[0] = SWIZZLE_X;
400 swizzles[1] = SWIZZLE_X;
401 swizzles[2] = SWIZZLE_X;
402 swizzles[3] = SWIZZLE_X;
403 }
404 break;
405 case GL_RED:
406 case GL_RG:
407 case GL_RGB:
408 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
409 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
410 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
411 swizzles[3] = SWIZZLE_ONE;
412 break;
413 }
414
415 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
416 swizzles[GET_SWZ(t->_Swizzle, 1)],
417 swizzles[GET_SWZ(t->_Swizzle, 2)],
418 swizzles[GET_SWZ(t->_Swizzle, 3)]);
419 }
420
421 /**
422 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
423 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
424 *
425 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
426 * 0 1 2 3 4 5
427 * 4 5 6 7 0 1
428 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
429 *
430 * which is simply adding 4 then modding by 8 (or anding with 7).
431 *
432 * We then may need to apply workarounds for textureGather hardware bugs.
433 */
434 static unsigned
435 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
436 {
437 unsigned scs = (swizzle + 4) & 7;
438
439 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
440 }
441
442 static void brw_update_texture_surface(struct gl_context *ctx,
443 unsigned unit,
444 uint32_t *surf_offset,
445 bool for_gather,
446 bool for_txf,
447 uint32_t plane)
448 {
449 struct brw_context *brw = brw_context(ctx);
450 const struct gen_device_info *devinfo = &brw->screen->devinfo;
451 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
452
453 if (obj->Target == GL_TEXTURE_BUFFER) {
454 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
455
456 } else {
457 struct intel_texture_object *intel_obj = intel_texture_object(obj);
458 struct intel_mipmap_tree *mt = intel_obj->mt;
459
460 if (plane > 0) {
461 if (mt->plane[plane - 1] == NULL)
462 return;
463 mt = mt->plane[plane - 1];
464 }
465
466 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
467 /* If this is a view with restricted NumLayers, then our effective depth
468 * is not just the miptree depth.
469 */
470 unsigned view_num_layers;
471 if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
472 view_num_layers = obj->NumLayers;
473 } else {
474 view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
475 mt->surf.logical_level0_px.depth :
476 mt->surf.logical_level0_px.array_len;
477 }
478
479 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
480 * texturing functions that return a float, as our code generation always
481 * selects the .x channel (which would always be 0).
482 */
483 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
484 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
485 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
486 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
487 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
488 brw_get_texture_swizzle(&brw->ctx, obj));
489
490 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
491 enum isl_format format = translate_tex_format(brw, mesa_fmt,
492 for_txf ? GL_DECODE_EXT :
493 sampler->sRGBDecode);
494
495 /* Implement gen6 and gen7 gather work-around */
496 bool need_green_to_blue = false;
497 if (for_gather) {
498 if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
499 format == ISL_FORMAT_R32G32_SINT ||
500 format == ISL_FORMAT_R32G32_UINT)) {
501 format = ISL_FORMAT_R32G32_FLOAT_LD;
502 need_green_to_blue = devinfo->is_haswell;
503 } else if (devinfo->gen == 6) {
504 /* Sandybridge's gather4 message is broken for integer formats.
505 * To work around this, we pretend the surface is UNORM for
506 * 8 or 16-bit formats, and emit shader instructions to recover
507 * the real INT/UINT value. For 32-bit formats, we pretend
508 * the surface is FLOAT, and simply reinterpret the resulting
509 * bits.
510 */
511 switch (format) {
512 case ISL_FORMAT_R8_SINT:
513 case ISL_FORMAT_R8_UINT:
514 format = ISL_FORMAT_R8_UNORM;
515 break;
516
517 case ISL_FORMAT_R16_SINT:
518 case ISL_FORMAT_R16_UINT:
519 format = ISL_FORMAT_R16_UNORM;
520 break;
521
522 case ISL_FORMAT_R32_SINT:
523 case ISL_FORMAT_R32_UINT:
524 format = ISL_FORMAT_R32_FLOAT;
525 break;
526
527 default:
528 break;
529 }
530 }
531 }
532
533 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
534 if (devinfo->gen <= 7) {
535 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
536 mt = mt->r8stencil_mt;
537 } else {
538 mt = mt->stencil_mt;
539 }
540 format = ISL_FORMAT_R8_UINT;
541 } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
542 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
543 mt = mt->r8stencil_mt;
544 format = ISL_FORMAT_R8_UINT;
545 }
546
547 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
548
549 struct isl_view view = {
550 .format = format,
551 .base_level = obj->MinLevel + obj->BaseLevel,
552 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
553 .base_array_layer = obj->MinLayer,
554 .array_len = view_num_layers,
555 .swizzle = {
556 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
557 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
558 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
559 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
560 },
561 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
562 };
563
564 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
565 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
566 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
567
568 enum isl_aux_usage aux_usage =
569 intel_miptree_texture_aux_usage(brw, mt, format);
570
571 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
572 surf_offset, surf_index,
573 0);
574 }
575 }
576
577 void
578 brw_emit_buffer_surface_state(struct brw_context *brw,
579 uint32_t *out_offset,
580 struct brw_bo *bo,
581 unsigned buffer_offset,
582 unsigned surface_format,
583 unsigned buffer_size,
584 unsigned pitch,
585 unsigned reloc_flags)
586 {
587 const struct gen_device_info *devinfo = &brw->screen->devinfo;
588 uint32_t *dw = brw_state_batch(brw,
589 brw->isl_dev.ss.size,
590 brw->isl_dev.ss.align,
591 out_offset);
592
593 isl_buffer_fill_state(&brw->isl_dev, dw,
594 .address = !bo ? buffer_offset :
595 brw_state_reloc(&brw->batch,
596 *out_offset + brw->isl_dev.ss.addr_offset,
597 bo, buffer_offset,
598 reloc_flags),
599 .size = buffer_size,
600 .format = surface_format,
601 .stride = pitch,
602 .mocs = brw_get_bo_mocs(devinfo, bo));
603 }
604
605 void
606 brw_update_buffer_texture_surface(struct gl_context *ctx,
607 unsigned unit,
608 uint32_t *surf_offset)
609 {
610 struct brw_context *brw = brw_context(ctx);
611 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
612 struct intel_buffer_object *intel_obj =
613 intel_buffer_object(tObj->BufferObject);
614 uint32_t size = tObj->BufferSize;
615 struct brw_bo *bo = NULL;
616 mesa_format format = tObj->_BufferObjectFormat;
617 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
618 int texel_size = _mesa_get_format_bytes(format);
619
620 if (intel_obj) {
621 size = MIN2(size, intel_obj->Base.Size);
622 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
623 false);
624 }
625
626 /* The ARB_texture_buffer_specification says:
627 *
628 * "The number of texels in the buffer texture's texel array is given by
629 *
630 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
631 *
632 * where <buffer_size> is the size of the buffer object, in basic
633 * machine units and <components> and <base_type> are the element count
634 * and base data type for elements, as specified in Table X.1. The
635 * number of texels in the texel array is then clamped to the
636 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
637 *
638 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
639 * so that when ISL divides by stride to obtain the number of texels, that
640 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
641 */
642 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
643
644 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
645 _mesa_problem(NULL, "bad format %s for texture buffer\n",
646 _mesa_get_format_name(format));
647 }
648
649 brw_emit_buffer_surface_state(brw, surf_offset, bo,
650 tObj->BufferOffset,
651 isl_format,
652 size,
653 texel_size,
654 0);
655 }
656
657 /**
658 * Set up a binding table entry for use by stream output logic (transform
659 * feedback).
660 *
661 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
662 */
663 void
664 brw_update_sol_surface(struct brw_context *brw,
665 struct gl_buffer_object *buffer_obj,
666 uint32_t *out_offset, unsigned num_vector_components,
667 unsigned stride_dwords, unsigned offset_dwords)
668 {
669 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
670 uint32_t offset_bytes = 4 * offset_dwords;
671 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
672 offset_bytes,
673 buffer_obj->Size - offset_bytes,
674 true);
675 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
676 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
677 size_t size_dwords = buffer_obj->Size / 4;
678 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
679
680 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
681 * too big to map using a single binding table entry?
682 */
683 assert((size_dwords - offset_dwords) / stride_dwords
684 <= BRW_MAX_NUM_BUFFER_ENTRIES);
685
686 if (size_dwords > offset_dwords + num_vector_components) {
687 /* There is room for at least 1 transform feedback output in the buffer.
688 * Compute the number of additional transform feedback outputs the
689 * buffer has room for.
690 */
691 buffer_size_minus_1 =
692 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
693 } else {
694 /* There isn't even room for a single transform feedback output in the
695 * buffer. We can't configure the binding table entry to prevent output
696 * entirely; we'll have to rely on the geometry shader to detect
697 * overflow. But to minimize the damage in case of a bug, set up the
698 * binding table entry to just allow a single output.
699 */
700 buffer_size_minus_1 = 0;
701 }
702 width = buffer_size_minus_1 & 0x7f;
703 height = (buffer_size_minus_1 & 0xfff80) >> 7;
704 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
705
706 switch (num_vector_components) {
707 case 1:
708 surface_format = ISL_FORMAT_R32_FLOAT;
709 break;
710 case 2:
711 surface_format = ISL_FORMAT_R32G32_FLOAT;
712 break;
713 case 3:
714 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
715 break;
716 case 4:
717 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
718 break;
719 default:
720 unreachable("Invalid vector size for transform feedback output");
721 }
722
723 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
724 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
725 surface_format << BRW_SURFACE_FORMAT_SHIFT |
726 BRW_SURFACE_RC_READ_WRITE;
727 surf[1] = brw_state_reloc(&brw->batch,
728 *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
729 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
730 height << BRW_SURFACE_HEIGHT_SHIFT);
731 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
732 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
733 surf[4] = 0;
734 surf[5] = 0;
735 }
736
737 /* Creates a new WM constant buffer reflecting the current fragment program's
738 * constants, if needed by the fragment program.
739 *
740 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
741 * state atom.
742 */
743 static void
744 brw_upload_wm_pull_constants(struct brw_context *brw)
745 {
746 struct brw_stage_state *stage_state = &brw->wm.base;
747 /* BRW_NEW_FRAGMENT_PROGRAM */
748 struct brw_program *fp =
749 (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
750
751 /* BRW_NEW_FS_PROG_DATA */
752 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
753
754 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
755 /* _NEW_PROGRAM_CONSTANTS */
756 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
757 stage_state, prog_data);
758 }
759
760 const struct brw_tracked_state brw_wm_pull_constants = {
761 .dirty = {
762 .mesa = _NEW_PROGRAM_CONSTANTS,
763 .brw = BRW_NEW_BATCH |
764 BRW_NEW_FRAGMENT_PROGRAM |
765 BRW_NEW_FS_PROG_DATA,
766 },
767 .emit = brw_upload_wm_pull_constants,
768 };
769
770 /**
771 * Creates a null renderbuffer surface.
772 *
773 * This is used when the shader doesn't write to any color output. An FB
774 * write to target 0 will still be emitted, because that's how the thread is
775 * terminated (and computed depth is returned), so we need to have the
776 * hardware discard the target 0 color output..
777 */
778 static void
779 emit_null_surface_state(struct brw_context *brw,
780 const struct gl_framebuffer *fb,
781 uint32_t *out_offset)
782 {
783 const struct gen_device_info *devinfo = &brw->screen->devinfo;
784 uint32_t *surf = brw_state_batch(brw,
785 brw->isl_dev.ss.size,
786 brw->isl_dev.ss.align,
787 out_offset);
788
789 /* Use the fb dimensions or 1x1x1 */
790 const unsigned width = fb ? _mesa_geometric_width(fb) : 1;
791 const unsigned height = fb ? _mesa_geometric_height(fb) : 1;
792 const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
793
794 if (devinfo->gen != 6 || samples <= 1) {
795 isl_null_fill_state(&brw->isl_dev, surf,
796 isl_extent3d(width, height, 1));
797 return;
798 }
799
800 /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
801 * So work around this problem by rendering into dummy color buffer.
802 *
803 * To decrease the amount of memory needed by the workaround buffer, we
804 * set its pitch to 128 bytes (the width of a Y tile). This means that
805 * the amount of memory needed for the workaround buffer is
806 * (width_in_tiles + height_in_tiles - 1) tiles.
807 *
808 * Note that since the workaround buffer will be interpreted by the
809 * hardware as an interleaved multisampled buffer, we need to compute
810 * width_in_tiles and height_in_tiles by dividing the width and height
811 * by 16 rather than the normal Y-tile size of 32.
812 */
813 unsigned width_in_tiles = ALIGN(width, 16) / 16;
814 unsigned height_in_tiles = ALIGN(height, 16) / 16;
815 unsigned pitch_minus_1 = 127;
816 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
817 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
818 size_needed);
819
820 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
821 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
822 surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4,
823 brw->wm.multisampled_null_render_target_bo,
824 0, RELOC_WRITE);
825
826 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
827 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
828
829 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
830 * Notes):
831 *
832 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
833 */
834 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
835 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
836 surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
837 surf[5] = 0;
838 }
839
840 /**
841 * Sets up a surface state structure to point at the given region.
842 * While it is only used for the front/back buffer currently, it should be
843 * usable for further buffers when doing ARB_draw_buffer support.
844 */
845 static uint32_t
846 gen4_update_renderbuffer_surface(struct brw_context *brw,
847 struct gl_renderbuffer *rb,
848 unsigned unit,
849 uint32_t surf_index)
850 {
851 const struct gen_device_info *devinfo = &brw->screen->devinfo;
852 struct gl_context *ctx = &brw->ctx;
853 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
854 struct intel_mipmap_tree *mt = irb->mt;
855 uint32_t *surf;
856 uint32_t tile_x, tile_y;
857 enum isl_format format;
858 uint32_t offset;
859 /* _NEW_BUFFERS */
860 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
861 /* BRW_NEW_FS_PROG_DATA */
862
863 if (rb->TexImage && !devinfo->has_surface_tile_offset) {
864 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
865
866 if (tile_x != 0 || tile_y != 0) {
867 /* Original gen4 hardware couldn't draw to a non-tile-aligned
868 * destination in a miptree unless you actually setup your renderbuffer
869 * as a miptree and used the fragile lod/array_index/etc. controls to
870 * select the image. So, instead, we just make a new single-level
871 * miptree and render into that.
872 */
873 intel_renderbuffer_move_to_temp(brw, irb, false);
874 assert(irb->align_wa_mt);
875 mt = irb->align_wa_mt;
876 }
877 }
878
879 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
880
881 format = brw->mesa_to_isl_render_format[rb_format];
882 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
883 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
884 __func__, _mesa_get_format_name(rb_format));
885 }
886
887 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
888 format << BRW_SURFACE_FORMAT_SHIFT);
889
890 /* reloc */
891 assert(mt->offset % mt->cpp == 0);
892 surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo,
893 mt->offset +
894 intel_renderbuffer_get_tile_offsets(irb,
895 &tile_x,
896 &tile_y),
897 RELOC_WRITE);
898
899 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
900 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
901
902 surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
903 (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
904
905 surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
906
907 assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
908 /* Note that the low bits of these fields are missing, so
909 * there's the possibility of getting in trouble.
910 */
911 assert(tile_x % 4 == 0);
912 assert(tile_y % 2 == 0);
913 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
914 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
915 (mt->surf.image_alignment_el.height == 4 ?
916 BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
917
918 if (devinfo->gen < 6) {
919 /* _NEW_COLOR */
920 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
921 (ctx->Color.BlendEnabled & (1 << unit)))
922 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
923
924 if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 0))
925 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
926 if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 1))
927 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
928 if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 2))
929 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
930
931 /* As mentioned above, disable writes to the alpha component when the
932 * renderbuffer is XRGB.
933 */
934 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
935 !GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 3)) {
936 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
937 }
938 }
939
940 return offset;
941 }
942
943 static void
944 update_renderbuffer_surfaces(struct brw_context *brw)
945 {
946 const struct gen_device_info *devinfo = &brw->screen->devinfo;
947 const struct gl_context *ctx = &brw->ctx;
948
949 /* _NEW_BUFFERS | _NEW_COLOR */
950 const struct gl_framebuffer *fb = ctx->DrawBuffer;
951
952 /* Render targets always start at binding table index 0. */
953 const unsigned rt_start = 0;
954
955 uint32_t *surf_offsets = brw->wm.base.surf_offset;
956
957 /* Update surfaces for drawing buffers */
958 if (fb->_NumColorDrawBuffers >= 1) {
959 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
960 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
961
962 if (intel_renderbuffer(rb)) {
963 surf_offsets[rt_start + i] = devinfo->gen >= 6 ?
964 gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
965 gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
966 } else {
967 emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
968 }
969 }
970 } else {
971 emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
972 }
973
974 /* The PIPE_CONTROL command description says:
975 *
976 * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
977 * points to a different RENDER_SURFACE_STATE, SW must issue a Render
978 * Target Cache Flush by enabling this bit. When render target flush
979 * is set due to new association of BTI, PS Scoreboard Stall bit must
980 * be set in this packet."
981 */
982 if (devinfo->gen >= 11) {
983 brw_emit_pipe_control_flush(brw,
984 PIPE_CONTROL_RENDER_TARGET_FLUSH |
985 PIPE_CONTROL_STALL_AT_SCOREBOARD);
986 }
987
988 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
989 }
990
991 const struct brw_tracked_state brw_renderbuffer_surfaces = {
992 .dirty = {
993 .mesa = _NEW_BUFFERS |
994 _NEW_COLOR,
995 .brw = BRW_NEW_BATCH,
996 },
997 .emit = update_renderbuffer_surfaces,
998 };
999
1000 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1001 .dirty = {
1002 .mesa = _NEW_BUFFERS,
1003 .brw = BRW_NEW_BATCH |
1004 BRW_NEW_AUX_STATE,
1005 },
1006 .emit = update_renderbuffer_surfaces,
1007 };
1008
1009 static void
1010 update_renderbuffer_read_surfaces(struct brw_context *brw)
1011 {
1012 const struct gl_context *ctx = &brw->ctx;
1013
1014 /* BRW_NEW_FS_PROG_DATA */
1015 const struct brw_wm_prog_data *wm_prog_data =
1016 brw_wm_prog_data(brw->wm.base.prog_data);
1017
1018 if (wm_prog_data->has_render_target_reads &&
1019 !ctx->Extensions.MESA_shader_framebuffer_fetch) {
1020 /* _NEW_BUFFERS */
1021 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1022
1023 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1024 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1025 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1026 const unsigned surf_index =
1027 wm_prog_data->binding_table.render_target_read_start + i;
1028 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1029
1030 if (irb) {
1031 const enum isl_format format = brw->mesa_to_isl_render_format[
1032 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1033 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1034 format));
1035
1036 /* Override the target of the texture if the render buffer is a
1037 * single slice of a 3D texture (since the minimum array element
1038 * field of the surface state structure is ignored by the sampler
1039 * unit for 3D textures on some hardware), or if the render buffer
1040 * is a 1D array (since shaders always provide the array index
1041 * coordinate at the Z component to avoid state-dependent
1042 * recompiles when changing the texture target of the
1043 * framebuffer).
1044 */
1045 const GLenum target =
1046 (irb->mt->target == GL_TEXTURE_3D &&
1047 irb->layer_count == 1) ? GL_TEXTURE_2D :
1048 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1049 irb->mt->target;
1050
1051 const struct isl_view view = {
1052 .format = format,
1053 .base_level = irb->mt_level - irb->mt->first_level,
1054 .levels = 1,
1055 .base_array_layer = irb->mt_layer,
1056 .array_len = irb->layer_count,
1057 .swizzle = ISL_SWIZZLE_IDENTITY,
1058 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1059 };
1060
1061 enum isl_aux_usage aux_usage =
1062 intel_miptree_texture_aux_usage(brw, irb->mt, format);
1063 if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
1064 aux_usage = ISL_AUX_USAGE_NONE;
1065
1066 brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1067 surf_offset, surf_index,
1068 0);
1069
1070 } else {
1071 emit_null_surface_state(brw, fb, surf_offset);
1072 }
1073 }
1074
1075 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1076 }
1077 }
1078
1079 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1080 .dirty = {
1081 .mesa = _NEW_BUFFERS,
1082 .brw = BRW_NEW_BATCH |
1083 BRW_NEW_AUX_STATE |
1084 BRW_NEW_FS_PROG_DATA,
1085 },
1086 .emit = update_renderbuffer_read_surfaces,
1087 };
1088
1089 static bool
1090 is_depth_texture(struct intel_texture_object *iobj)
1091 {
1092 GLenum base_format = _mesa_get_format_base_format(iobj->_Format);
1093 return base_format == GL_DEPTH_COMPONENT ||
1094 (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling);
1095 }
1096
1097 static void
1098 update_stage_texture_surfaces(struct brw_context *brw,
1099 const struct gl_program *prog,
1100 struct brw_stage_state *stage_state,
1101 bool for_gather, uint32_t plane)
1102 {
1103 if (!prog)
1104 return;
1105
1106 struct gl_context *ctx = &brw->ctx;
1107
1108 uint32_t *surf_offset = stage_state->surf_offset;
1109
1110 /* BRW_NEW_*_PROG_DATA */
1111 if (for_gather)
1112 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1113 else
1114 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1115
1116 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1117 for (unsigned s = 0; s < num_samplers; s++) {
1118 surf_offset[s] = 0;
1119
1120 if (prog->SamplersUsed & (1 << s)) {
1121 const unsigned unit = prog->SamplerUnits[s];
1122 const bool used_by_txf = prog->info.textures_used_by_txf & (1 << s);
1123 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
1124 struct intel_texture_object *iobj = intel_texture_object(obj);
1125
1126 /* _NEW_TEXTURE */
1127 if (!obj)
1128 continue;
1129
1130 if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) {
1131 /* A programming note for the sample_c message says:
1132 *
1133 * "The Surface Format of the associated surface must be
1134 * indicated as supporting shadow mapping as indicated in the
1135 * surface format table."
1136 *
1137 * Accessing non-depth textures via a sampler*Shadow type is
1138 * undefined. GLSL 4.50 page 162 says:
1139 *
1140 * "If a shadow texture call is made to a sampler that does not
1141 * represent a depth texture, then results are undefined."
1142 *
1143 * We give them a null surface (zeros) for undefined. We've seen
1144 * GPU hangs with color buffers and sample_c, so we try and avoid
1145 * those with this hack.
1146 */
1147 emit_null_surface_state(brw, NULL, surf_offset + s);
1148 } else {
1149 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather,
1150 used_by_txf, plane);
1151 }
1152 }
1153 }
1154 }
1155
1156
1157 /**
1158 * Construct SURFACE_STATE objects for enabled textures.
1159 */
1160 static void
1161 brw_update_texture_surfaces(struct brw_context *brw)
1162 {
1163 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1164
1165 /* BRW_NEW_VERTEX_PROGRAM */
1166 struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
1167
1168 /* BRW_NEW_TESS_PROGRAMS */
1169 struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
1170 struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
1171
1172 /* BRW_NEW_GEOMETRY_PROGRAM */
1173 struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
1174
1175 /* BRW_NEW_FRAGMENT_PROGRAM */
1176 struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
1177
1178 /* _NEW_TEXTURE */
1179 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1180 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1181 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1182 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1183 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1184
1185 /* emit alternate set of surface state for gather. this
1186 * allows the surface format to be overriden for only the
1187 * gather4 messages. */
1188 if (devinfo->gen < 8) {
1189 if (vs && vs->info.uses_texture_gather)
1190 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1191 if (tcs && tcs->info.uses_texture_gather)
1192 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1193 if (tes && tes->info.uses_texture_gather)
1194 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1195 if (gs && gs->info.uses_texture_gather)
1196 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1197 if (fs && fs->info.uses_texture_gather)
1198 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1199 }
1200
1201 if (fs) {
1202 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1203 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1204 }
1205
1206 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1207 }
1208
1209 const struct brw_tracked_state brw_texture_surfaces = {
1210 .dirty = {
1211 .mesa = _NEW_TEXTURE,
1212 .brw = BRW_NEW_BATCH |
1213 BRW_NEW_AUX_STATE |
1214 BRW_NEW_FRAGMENT_PROGRAM |
1215 BRW_NEW_FS_PROG_DATA |
1216 BRW_NEW_GEOMETRY_PROGRAM |
1217 BRW_NEW_GS_PROG_DATA |
1218 BRW_NEW_TESS_PROGRAMS |
1219 BRW_NEW_TCS_PROG_DATA |
1220 BRW_NEW_TES_PROG_DATA |
1221 BRW_NEW_TEXTURE_BUFFER |
1222 BRW_NEW_VERTEX_PROGRAM |
1223 BRW_NEW_VS_PROG_DATA,
1224 },
1225 .emit = brw_update_texture_surfaces,
1226 };
1227
1228 static void
1229 brw_update_cs_texture_surfaces(struct brw_context *brw)
1230 {
1231 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1232
1233 /* BRW_NEW_COMPUTE_PROGRAM */
1234 struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
1235
1236 /* _NEW_TEXTURE */
1237 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1238
1239 /* emit alternate set of surface state for gather. this
1240 * allows the surface format to be overriden for only the
1241 * gather4 messages.
1242 */
1243 if (devinfo->gen < 8) {
1244 if (cs && cs->info.uses_texture_gather)
1245 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1246 }
1247
1248 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1249 }
1250
1251 const struct brw_tracked_state brw_cs_texture_surfaces = {
1252 .dirty = {
1253 .mesa = _NEW_TEXTURE,
1254 .brw = BRW_NEW_BATCH |
1255 BRW_NEW_COMPUTE_PROGRAM |
1256 BRW_NEW_AUX_STATE,
1257 },
1258 .emit = brw_update_cs_texture_surfaces,
1259 };
1260
1261 static void
1262 upload_buffer_surface(struct brw_context *brw,
1263 struct gl_buffer_binding *binding,
1264 uint32_t *out_offset,
1265 enum isl_format format,
1266 unsigned reloc_flags)
1267 {
1268 struct gl_context *ctx = &brw->ctx;
1269
1270 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1271 emit_null_surface_state(brw, NULL, out_offset);
1272 } else {
1273 ptrdiff_t size = binding->BufferObject->Size - binding->Offset;
1274 if (!binding->AutomaticSize)
1275 size = MIN2(size, binding->Size);
1276
1277 struct intel_buffer_object *iobj =
1278 intel_buffer_object(binding->BufferObject);
1279 struct brw_bo *bo =
1280 intel_bufferobj_buffer(brw, iobj, binding->Offset, size,
1281 (reloc_flags & RELOC_WRITE) != 0);
1282
1283 brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset,
1284 format, size, 1, reloc_flags);
1285 }
1286 }
1287
1288 void
1289 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1290 struct brw_stage_state *stage_state,
1291 struct brw_stage_prog_data *prog_data)
1292 {
1293 struct gl_context *ctx = &brw->ctx;
1294
1295 if (!prog || (prog->info.num_ubos == 0 &&
1296 prog->info.num_ssbos == 0 &&
1297 prog->info.num_abos == 0))
1298 return;
1299
1300 uint32_t *ubo_surf_offsets =
1301 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1302
1303 for (int i = 0; i < prog->info.num_ubos; i++) {
1304 struct gl_buffer_binding *binding =
1305 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1306 upload_buffer_surface(brw, binding, &ubo_surf_offsets[i],
1307 ISL_FORMAT_R32G32B32A32_FLOAT, 0);
1308 }
1309
1310 uint32_t *abo_surf_offsets =
1311 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1312 uint32_t *ssbo_surf_offsets = abo_surf_offsets + prog->info.num_abos;
1313
1314 for (int i = 0; i < prog->info.num_abos; i++) {
1315 struct gl_buffer_binding *binding =
1316 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1317 upload_buffer_surface(brw, binding, &abo_surf_offsets[i],
1318 ISL_FORMAT_RAW, RELOC_WRITE);
1319 }
1320
1321 for (int i = 0; i < prog->info.num_ssbos; i++) {
1322 struct gl_buffer_binding *binding =
1323 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1324
1325 upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i],
1326 ISL_FORMAT_RAW, RELOC_WRITE);
1327 }
1328
1329 stage_state->push_constants_dirty = true;
1330 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1331 }
1332
1333 static void
1334 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1335 {
1336 struct gl_context *ctx = &brw->ctx;
1337 /* _NEW_PROGRAM */
1338 struct gl_program *prog = ctx->FragmentProgram._Current;
1339
1340 /* BRW_NEW_FS_PROG_DATA */
1341 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1342 }
1343
1344 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1345 .dirty = {
1346 .mesa = _NEW_PROGRAM,
1347 .brw = BRW_NEW_BATCH |
1348 BRW_NEW_FS_PROG_DATA |
1349 BRW_NEW_UNIFORM_BUFFER,
1350 },
1351 .emit = brw_upload_wm_ubo_surfaces,
1352 };
1353
1354 static void
1355 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1356 {
1357 struct gl_context *ctx = &brw->ctx;
1358 /* _NEW_PROGRAM */
1359 struct gl_program *prog =
1360 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1361
1362 /* BRW_NEW_CS_PROG_DATA */
1363 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1364 }
1365
1366 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1367 .dirty = {
1368 .mesa = _NEW_PROGRAM,
1369 .brw = BRW_NEW_BATCH |
1370 BRW_NEW_CS_PROG_DATA |
1371 BRW_NEW_UNIFORM_BUFFER,
1372 },
1373 .emit = brw_upload_cs_ubo_surfaces,
1374 };
1375
1376 static void
1377 brw_upload_cs_image_surfaces(struct brw_context *brw)
1378 {
1379 /* _NEW_PROGRAM */
1380 const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
1381
1382 if (cp) {
1383 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1384 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1385 brw->cs.base.prog_data);
1386 }
1387 }
1388
1389 const struct brw_tracked_state brw_cs_image_surfaces = {
1390 .dirty = {
1391 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1392 .brw = BRW_NEW_BATCH |
1393 BRW_NEW_CS_PROG_DATA |
1394 BRW_NEW_AUX_STATE |
1395 BRW_NEW_IMAGE_UNITS
1396 },
1397 .emit = brw_upload_cs_image_surfaces,
1398 };
1399
1400 static uint32_t
1401 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1402 {
1403 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1404 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1405 if (access == GL_WRITE_ONLY) {
1406 return hw_format;
1407 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1408 /* Typed surface reads support a very limited subset of the shader
1409 * image formats. Translate it into the closest format the
1410 * hardware supports.
1411 */
1412 return isl_lower_storage_image_format(devinfo, hw_format);
1413 } else {
1414 /* The hardware doesn't actually support a typed format that we can use
1415 * so we have to fall back to untyped read/write messages.
1416 */
1417 return ISL_FORMAT_RAW;
1418 }
1419 }
1420
1421 static void
1422 update_default_image_param(struct brw_context *brw,
1423 struct gl_image_unit *u,
1424 unsigned surface_idx,
1425 struct brw_image_param *param)
1426 {
1427 memset(param, 0, sizeof(*param));
1428 param->surface_idx = surface_idx;
1429 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1430 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1431 * detailed explanation of these parameters.
1432 */
1433 param->swizzling[0] = 0xff;
1434 param->swizzling[1] = 0xff;
1435 }
1436
1437 static void
1438 update_buffer_image_param(struct brw_context *brw,
1439 struct gl_image_unit *u,
1440 unsigned surface_idx,
1441 struct brw_image_param *param)
1442 {
1443 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1444 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1445 update_default_image_param(brw, u, surface_idx, param);
1446
1447 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1448 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1449 }
1450
1451 static unsigned
1452 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1453 unsigned level)
1454 {
1455 if (target == GL_TEXTURE_CUBE_MAP)
1456 return 6;
1457
1458 return target == GL_TEXTURE_3D ?
1459 minify(mt->surf.logical_level0_px.depth, level) :
1460 mt->surf.logical_level0_px.array_len;
1461 }
1462
1463 static void
1464 update_image_surface(struct brw_context *brw,
1465 struct gl_image_unit *u,
1466 GLenum access,
1467 unsigned surface_idx,
1468 uint32_t *surf_offset,
1469 struct brw_image_param *param)
1470 {
1471 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1472 struct gl_texture_object *obj = u->TexObj;
1473 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1474
1475 if (obj->Target == GL_TEXTURE_BUFFER) {
1476 struct intel_buffer_object *intel_obj =
1477 intel_buffer_object(obj->BufferObject);
1478 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1479 _mesa_get_format_bytes(u->_ActualFormat));
1480
1481 brw_emit_buffer_surface_state(
1482 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1483 format, intel_obj->Base.Size, texel_size,
1484 access != GL_READ_ONLY ? RELOC_WRITE : 0);
1485
1486 update_buffer_image_param(brw, u, surface_idx, param);
1487
1488 } else {
1489 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1490 struct intel_mipmap_tree *mt = intel_obj->mt;
1491 const unsigned num_layers = u->Layered ?
1492 get_image_num_layers(mt, obj->Target, u->Level) : 1;
1493
1494 struct isl_view view = {
1495 .format = format,
1496 .base_level = obj->MinLevel + u->Level,
1497 .levels = 1,
1498 .base_array_layer = obj->MinLayer + u->_Layer,
1499 .array_len = num_layers,
1500 .swizzle = ISL_SWIZZLE_IDENTITY,
1501 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1502 };
1503
1504 if (format == ISL_FORMAT_RAW) {
1505 brw_emit_buffer_surface_state(
1506 brw, surf_offset, mt->bo, mt->offset,
1507 format, mt->bo->size - mt->offset, 1 /* pitch */,
1508 access != GL_READ_ONLY ? RELOC_WRITE : 0);
1509
1510 } else {
1511 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1512 assert(!intel_miptree_has_color_unresolved(mt,
1513 view.base_level, 1,
1514 view.base_array_layer,
1515 view.array_len));
1516 brw_emit_surface_state(brw, mt, mt->target, view,
1517 ISL_AUX_USAGE_NONE,
1518 surf_offset, surf_index,
1519 access == GL_READ_ONLY ? 0 : RELOC_WRITE);
1520 }
1521
1522 isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1523 param->surface_idx = surface_idx;
1524 }
1525
1526 } else {
1527 emit_null_surface_state(brw, NULL, surf_offset);
1528 update_default_image_param(brw, u, surface_idx, param);
1529 }
1530 }
1531
1532 void
1533 brw_upload_image_surfaces(struct brw_context *brw,
1534 const struct gl_program *prog,
1535 struct brw_stage_state *stage_state,
1536 struct brw_stage_prog_data *prog_data)
1537 {
1538 assert(prog);
1539 struct gl_context *ctx = &brw->ctx;
1540
1541 if (prog->info.num_images) {
1542 for (unsigned i = 0; i < prog->info.num_images; i++) {
1543 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1544 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1545
1546 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1547 surf_idx,
1548 &stage_state->surf_offset[surf_idx],
1549 &stage_state->image_param[i]);
1550 }
1551
1552 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1553 /* This may have changed the image metadata dependent on the context
1554 * image unit state and passed to the program as uniforms, make sure
1555 * that push and pull constants are reuploaded.
1556 */
1557 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1558 }
1559 }
1560
1561 static void
1562 brw_upload_wm_image_surfaces(struct brw_context *brw)
1563 {
1564 /* BRW_NEW_FRAGMENT_PROGRAM */
1565 const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
1566
1567 if (wm) {
1568 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1569 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1570 brw->wm.base.prog_data);
1571 }
1572 }
1573
1574 const struct brw_tracked_state brw_wm_image_surfaces = {
1575 .dirty = {
1576 .mesa = _NEW_TEXTURE,
1577 .brw = BRW_NEW_BATCH |
1578 BRW_NEW_AUX_STATE |
1579 BRW_NEW_FRAGMENT_PROGRAM |
1580 BRW_NEW_FS_PROG_DATA |
1581 BRW_NEW_IMAGE_UNITS
1582 },
1583 .emit = brw_upload_wm_image_surfaces,
1584 };
1585
1586 static void
1587 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1588 {
1589 struct gl_context *ctx = &brw->ctx;
1590 /* _NEW_PROGRAM */
1591 struct gl_program *prog =
1592 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1593 /* BRW_NEW_CS_PROG_DATA */
1594 const struct brw_cs_prog_data *cs_prog_data =
1595 brw_cs_prog_data(brw->cs.base.prog_data);
1596
1597 if (prog && cs_prog_data->uses_num_work_groups) {
1598 const unsigned surf_idx =
1599 cs_prog_data->binding_table.work_groups_start;
1600 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1601 struct brw_bo *bo;
1602 uint32_t bo_offset;
1603
1604 if (brw->compute.num_work_groups_bo == NULL) {
1605 bo = NULL;
1606 intel_upload_data(brw,
1607 (void *)brw->compute.num_work_groups,
1608 3 * sizeof(GLuint),
1609 sizeof(GLuint),
1610 &bo,
1611 &bo_offset);
1612 } else {
1613 bo = brw->compute.num_work_groups_bo;
1614 bo_offset = brw->compute.num_work_groups_offset;
1615 }
1616
1617 brw_emit_buffer_surface_state(brw, surf_offset,
1618 bo, bo_offset,
1619 ISL_FORMAT_RAW,
1620 3 * sizeof(GLuint), 1,
1621 RELOC_WRITE);
1622 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1623 }
1624 }
1625
1626 const struct brw_tracked_state brw_cs_work_groups_surface = {
1627 .dirty = {
1628 .brw = BRW_NEW_CS_PROG_DATA |
1629 BRW_NEW_CS_WORK_GROUPS
1630 },
1631 .emit = brw_upload_cs_work_groups_surface,
1632 };