i965/miptree: Replace is_lossless_compressed with mt->aux_usage checks
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 enum {
59 INTEL_RENDERBUFFER_LAYERED = 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED = 1 << 1,
61 };
62
63 uint32_t tex_mocs[] = {
64 [7] = GEN7_MOCS_L3,
65 [8] = BDW_MOCS_WB,
66 [9] = SKL_MOCS_WB,
67 [10] = CNL_MOCS_WB,
68 };
69
70 uint32_t rb_mocs[] = {
71 [7] = GEN7_MOCS_L3,
72 [8] = BDW_MOCS_PTE,
73 [9] = SKL_MOCS_PTE,
74 [10] = CNL_MOCS_PTE,
75 };
76
77 static void
78 brw_emit_surface_state(struct brw_context *brw,
79 struct intel_mipmap_tree *mt, uint32_t flags,
80 GLenum target, struct isl_view view,
81 uint32_t mocs, uint32_t *surf_offset, int surf_index,
82 unsigned read_domains, unsigned write_domains)
83 {
84 uint32_t tile_x = mt->level[0].slice[0].x_offset;
85 uint32_t tile_y = mt->level[0].slice[0].y_offset;
86 uint32_t offset = mt->offset;
87
88 struct isl_surf surf;
89 intel_miptree_get_isl_surf(brw, mt, &surf);
90
91 surf.dim = get_isl_surf_dim(target);
92
93 const enum isl_dim_layout dim_layout =
94 get_isl_dim_layout(&brw->screen->devinfo, mt->tiling, target,
95 mt->array_layout);
96
97 if (surf.dim_layout != dim_layout) {
98 /* The layout of the specified texture target is not compatible with the
99 * actual layout of the miptree structure in memory -- You're entering
100 * dangerous territory, this can only possibly work if you only intended
101 * to access a single level and slice of the texture, and the hardware
102 * supports the tile offset feature in order to allow non-tile-aligned
103 * base offsets, since we'll have to point the hardware to the first
104 * texel of the level instead of relying on the usual base level/layer
105 * controls.
106 */
107 assert(brw->has_surface_tile_offset);
108 assert(view.levels == 1 && view.array_len == 1);
109 assert(tile_x == 0 && tile_y == 0);
110
111 offset += intel_miptree_get_tile_offsets(mt, view.base_level,
112 view.base_array_layer,
113 &tile_x, &tile_y);
114
115 /* Minify the logical dimensions of the texture. */
116 const unsigned l = view.base_level - mt->first_level;
117 surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
118 surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
119 minify(surf.logical_level0_px.height, l);
120 surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
121 minify(surf.logical_level0_px.depth, l);
122
123 /* Only the base level and layer can be addressed with the overridden
124 * layout.
125 */
126 surf.logical_level0_px.array_len = 1;
127 surf.levels = 1;
128 surf.dim_layout = dim_layout;
129
130 /* The requested slice of the texture is now at the base level and
131 * layer.
132 */
133 view.base_level = 0;
134 view.base_array_layer = 0;
135 }
136
137 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
138
139 struct brw_bo *aux_bo;
140 struct isl_surf *aux_surf = NULL;
141 uint64_t aux_offset = 0;
142 enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
143 if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
144 !(flags & INTEL_AUX_BUFFER_DISABLED)) {
145 aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
146
147 if (mt->mcs_buf) {
148 aux_surf = &mt->mcs_buf->surf;
149
150 assert(mt->mcs_buf->offset == 0);
151 aux_bo = mt->mcs_buf->bo;
152 aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
153 } else {
154 aux_surf = &mt->hiz_buf->surf;
155
156 aux_bo = mt->hiz_buf->bo;
157 aux_offset = mt->hiz_buf->bo->offset64;
158 }
159
160 /* We only really need a clear color if we also have an auxiliary
161 * surface. Without one, it does nothing.
162 */
163 clear_color = mt->fast_clear_color;
164 }
165
166 void *state = brw_state_batch(brw,
167 brw->isl_dev.ss.size,
168 brw->isl_dev.ss.align,
169 surf_offset);
170
171 isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
172 .address = mt->bo->offset64 + offset,
173 .aux_surf = aux_surf, .aux_usage = aux_usage,
174 .aux_address = aux_offset,
175 .mocs = mocs, .clear_color = clear_color,
176 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
177
178 brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
179 mt->bo, offset, read_domains, write_domains);
180
181 if (aux_surf) {
182 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
183 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
184 * contain other control information. Since buffer addresses are always
185 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
186 * an ordinary reloc to do the necessary address translation.
187 */
188 assert((aux_offset & 0xfff) == 0);
189 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
190 brw_emit_reloc(&brw->batch,
191 *surf_offset + brw->isl_dev.ss.aux_addr_offset,
192 aux_bo, *aux_addr - aux_bo->offset64,
193 read_domains, write_domains);
194 }
195 }
196
197 uint32_t
198 brw_update_renderbuffer_surface(struct brw_context *brw,
199 struct gl_renderbuffer *rb,
200 uint32_t flags, unsigned unit /* unused */,
201 uint32_t surf_index)
202 {
203 struct gl_context *ctx = &brw->ctx;
204 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
205 struct intel_mipmap_tree *mt = irb->mt;
206
207 if (brw->gen < 9) {
208 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
209 }
210
211 assert(brw_render_target_supported(brw, rb));
212
213 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
214 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
215 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
216 __func__, _mesa_get_format_name(rb_format));
217 }
218
219 const unsigned layer_multiplier =
220 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
221 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
222 MAX2(irb->mt->num_samples, 1) : 1;
223
224 struct isl_view view = {
225 .format = brw->mesa_to_isl_render_format[rb_format],
226 .base_level = irb->mt_level - irb->mt->first_level,
227 .levels = 1,
228 .base_array_layer = irb->mt_layer / layer_multiplier,
229 .array_len = MAX2(irb->layer_count, 1),
230 .swizzle = ISL_SWIZZLE_IDENTITY,
231 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
232 };
233
234 uint32_t offset;
235 brw_emit_surface_state(brw, mt, flags, mt->target, view,
236 rb_mocs[brw->gen],
237 &offset, surf_index,
238 I915_GEM_DOMAIN_RENDER,
239 I915_GEM_DOMAIN_RENDER);
240 return offset;
241 }
242
243 GLuint
244 translate_tex_target(GLenum target)
245 {
246 switch (target) {
247 case GL_TEXTURE_1D:
248 case GL_TEXTURE_1D_ARRAY_EXT:
249 return BRW_SURFACE_1D;
250
251 case GL_TEXTURE_RECTANGLE_NV:
252 return BRW_SURFACE_2D;
253
254 case GL_TEXTURE_2D:
255 case GL_TEXTURE_2D_ARRAY_EXT:
256 case GL_TEXTURE_EXTERNAL_OES:
257 case GL_TEXTURE_2D_MULTISAMPLE:
258 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
259 return BRW_SURFACE_2D;
260
261 case GL_TEXTURE_3D:
262 return BRW_SURFACE_3D;
263
264 case GL_TEXTURE_CUBE_MAP:
265 case GL_TEXTURE_CUBE_MAP_ARRAY:
266 return BRW_SURFACE_CUBE;
267
268 default:
269 unreachable("not reached");
270 }
271 }
272
273 uint32_t
274 brw_get_surface_tiling_bits(uint32_t tiling)
275 {
276 switch (tiling) {
277 case I915_TILING_X:
278 return BRW_SURFACE_TILED;
279 case I915_TILING_Y:
280 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
281 default:
282 return 0;
283 }
284 }
285
286
287 uint32_t
288 brw_get_surface_num_multisamples(unsigned num_samples)
289 {
290 if (num_samples > 1)
291 return BRW_SURFACE_MULTISAMPLECOUNT_4;
292 else
293 return BRW_SURFACE_MULTISAMPLECOUNT_1;
294 }
295
296 /**
297 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
298 * swizzling.
299 */
300 int
301 brw_get_texture_swizzle(const struct gl_context *ctx,
302 const struct gl_texture_object *t)
303 {
304 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
305
306 int swizzles[SWIZZLE_NIL + 1] = {
307 SWIZZLE_X,
308 SWIZZLE_Y,
309 SWIZZLE_Z,
310 SWIZZLE_W,
311 SWIZZLE_ZERO,
312 SWIZZLE_ONE,
313 SWIZZLE_NIL
314 };
315
316 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
317 img->_BaseFormat == GL_DEPTH_STENCIL) {
318 GLenum depth_mode = t->DepthMode;
319
320 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
321 * with depth component data specified with a sized internal format.
322 * Otherwise, it's left at the old default, GL_LUMINANCE.
323 */
324 if (_mesa_is_gles3(ctx) &&
325 img->InternalFormat != GL_DEPTH_COMPONENT &&
326 img->InternalFormat != GL_DEPTH_STENCIL) {
327 depth_mode = GL_RED;
328 }
329
330 switch (depth_mode) {
331 case GL_ALPHA:
332 swizzles[0] = SWIZZLE_ZERO;
333 swizzles[1] = SWIZZLE_ZERO;
334 swizzles[2] = SWIZZLE_ZERO;
335 swizzles[3] = SWIZZLE_X;
336 break;
337 case GL_LUMINANCE:
338 swizzles[0] = SWIZZLE_X;
339 swizzles[1] = SWIZZLE_X;
340 swizzles[2] = SWIZZLE_X;
341 swizzles[3] = SWIZZLE_ONE;
342 break;
343 case GL_INTENSITY:
344 swizzles[0] = SWIZZLE_X;
345 swizzles[1] = SWIZZLE_X;
346 swizzles[2] = SWIZZLE_X;
347 swizzles[3] = SWIZZLE_X;
348 break;
349 case GL_RED:
350 swizzles[0] = SWIZZLE_X;
351 swizzles[1] = SWIZZLE_ZERO;
352 swizzles[2] = SWIZZLE_ZERO;
353 swizzles[3] = SWIZZLE_ONE;
354 break;
355 }
356 }
357
358 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
359
360 /* If the texture's format is alpha-only, force R, G, and B to
361 * 0.0. Similarly, if the texture's format has no alpha channel,
362 * force the alpha value read to 1.0. This allows for the
363 * implementation to use an RGBA texture for any of these formats
364 * without leaking any unexpected values.
365 */
366 switch (img->_BaseFormat) {
367 case GL_ALPHA:
368 swizzles[0] = SWIZZLE_ZERO;
369 swizzles[1] = SWIZZLE_ZERO;
370 swizzles[2] = SWIZZLE_ZERO;
371 break;
372 case GL_LUMINANCE:
373 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
374 swizzles[0] = SWIZZLE_X;
375 swizzles[1] = SWIZZLE_X;
376 swizzles[2] = SWIZZLE_X;
377 swizzles[3] = SWIZZLE_ONE;
378 }
379 break;
380 case GL_LUMINANCE_ALPHA:
381 if (datatype == GL_SIGNED_NORMALIZED) {
382 swizzles[0] = SWIZZLE_X;
383 swizzles[1] = SWIZZLE_X;
384 swizzles[2] = SWIZZLE_X;
385 swizzles[3] = SWIZZLE_W;
386 }
387 break;
388 case GL_INTENSITY:
389 if (datatype == GL_SIGNED_NORMALIZED) {
390 swizzles[0] = SWIZZLE_X;
391 swizzles[1] = SWIZZLE_X;
392 swizzles[2] = SWIZZLE_X;
393 swizzles[3] = SWIZZLE_X;
394 }
395 break;
396 case GL_RED:
397 case GL_RG:
398 case GL_RGB:
399 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
400 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
401 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
402 swizzles[3] = SWIZZLE_ONE;
403 break;
404 }
405
406 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
407 swizzles[GET_SWZ(t->_Swizzle, 1)],
408 swizzles[GET_SWZ(t->_Swizzle, 2)],
409 swizzles[GET_SWZ(t->_Swizzle, 3)]);
410 }
411
412 /**
413 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
414 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
415 *
416 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
417 * 0 1 2 3 4 5
418 * 4 5 6 7 0 1
419 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
420 *
421 * which is simply adding 4 then modding by 8 (or anding with 7).
422 *
423 * We then may need to apply workarounds for textureGather hardware bugs.
424 */
425 static unsigned
426 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
427 {
428 unsigned scs = (swizzle + 4) & 7;
429
430 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
431 }
432
433 static unsigned
434 brw_find_matching_rb(const struct gl_framebuffer *fb,
435 const struct intel_mipmap_tree *mt)
436 {
437 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
438 const struct intel_renderbuffer *irb =
439 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
440
441 if (irb && irb->mt == mt)
442 return i;
443 }
444
445 return fb->_NumColorDrawBuffers;
446 }
447
448 static inline bool
449 brw_texture_view_sane(const struct brw_context *brw,
450 const struct intel_mipmap_tree *mt,
451 const struct isl_view *view)
452 {
453 /* There are special cases only for lossless compression. */
454 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
455 return true;
456
457 if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
458 return true;
459
460 /* Logic elsewhere needs to take care to resolve the color buffer prior
461 * to sampling it as non-compressed.
462 */
463 if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
464 view->base_array_layer,
465 view->array_len))
466 return false;
467
468 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
469 const unsigned rb_index = brw_find_matching_rb(fb, mt);
470
471 if (rb_index == fb->_NumColorDrawBuffers)
472 return true;
473
474 /* Underlying surface is compressed but it is sampled using a format that
475 * the sampling engine doesn't support as compressed. Compression must be
476 * disabled for both sampling engine and data port in case the same surface
477 * is used also as render target.
478 */
479 return brw->draw_aux_buffer_disabled[rb_index];
480 }
481
482 static bool
483 brw_disable_aux_surface(const struct brw_context *brw,
484 const struct intel_mipmap_tree *mt,
485 const struct isl_view *view)
486 {
487 /* Nothing to disable. */
488 if (!mt->mcs_buf)
489 return false;
490
491 const bool is_unresolved = intel_miptree_has_color_unresolved(
492 mt, view->base_level, view->levels,
493 view->base_array_layer, view->array_len);
494
495 /* There are special cases only for lossless compression. */
496 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
497 return !is_unresolved;
498
499 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
500 const unsigned rb_index = brw_find_matching_rb(fb, mt);
501
502 /* If we are drawing into this with compression enabled, then we must also
503 * enable compression when texturing from it regardless of
504 * fast_clear_state. If we don't then, after the first draw call with
505 * this setup, there will be data in the CCS which won't get picked up by
506 * subsequent texturing operations as required by ARB_texture_barrier.
507 * Since we don't want to re-emit the binding table or do a resolve
508 * operation every draw call, the easiest thing to do is just enable
509 * compression on the texturing side. This is completely safe to do
510 * since, if compressed texturing weren't allowed, we would have disabled
511 * compression of render targets in whatever_that_function_is_called().
512 */
513 if (rb_index < fb->_NumColorDrawBuffers) {
514 if (brw->draw_aux_buffer_disabled[rb_index]) {
515 assert(!is_unresolved);
516 }
517
518 return brw->draw_aux_buffer_disabled[rb_index];
519 }
520
521 return !is_unresolved;
522 }
523
524 void
525 brw_update_texture_surface(struct gl_context *ctx,
526 unsigned unit,
527 uint32_t *surf_offset,
528 bool for_gather,
529 uint32_t plane)
530 {
531 struct brw_context *brw = brw_context(ctx);
532 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
533
534 if (obj->Target == GL_TEXTURE_BUFFER) {
535 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
536
537 } else {
538 struct intel_texture_object *intel_obj = intel_texture_object(obj);
539 struct intel_mipmap_tree *mt = intel_obj->mt;
540
541 if (plane > 0) {
542 if (mt->plane[plane - 1] == NULL)
543 return;
544 mt = mt->plane[plane - 1];
545 }
546
547 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
548 /* If this is a view with restricted NumLayers, then our effective depth
549 * is not just the miptree depth.
550 */
551 const unsigned view_num_layers =
552 (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
553 mt->logical_depth0;
554
555 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
556 * texturing functions that return a float, as our code generation always
557 * selects the .x channel (which would always be 0).
558 */
559 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
560 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
561 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
562 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
563 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
564 brw_get_texture_swizzle(&brw->ctx, obj));
565
566 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
567 enum isl_format format = translate_tex_format(brw, mesa_fmt,
568 sampler->sRGBDecode);
569
570 /* Implement gen6 and gen7 gather work-around */
571 bool need_green_to_blue = false;
572 if (for_gather) {
573 if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
574 format == ISL_FORMAT_R32G32_SINT ||
575 format == ISL_FORMAT_R32G32_UINT)) {
576 format = ISL_FORMAT_R32G32_FLOAT_LD;
577 need_green_to_blue = brw->is_haswell;
578 } else if (brw->gen == 6) {
579 /* Sandybridge's gather4 message is broken for integer formats.
580 * To work around this, we pretend the surface is UNORM for
581 * 8 or 16-bit formats, and emit shader instructions to recover
582 * the real INT/UINT value. For 32-bit formats, we pretend
583 * the surface is FLOAT, and simply reinterpret the resulting
584 * bits.
585 */
586 switch (format) {
587 case ISL_FORMAT_R8_SINT:
588 case ISL_FORMAT_R8_UINT:
589 format = ISL_FORMAT_R8_UNORM;
590 break;
591
592 case ISL_FORMAT_R16_SINT:
593 case ISL_FORMAT_R16_UINT:
594 format = ISL_FORMAT_R16_UNORM;
595 break;
596
597 case ISL_FORMAT_R32_SINT:
598 case ISL_FORMAT_R32_UINT:
599 format = ISL_FORMAT_R32_FLOAT;
600 break;
601
602 default:
603 break;
604 }
605 }
606 }
607
608 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
609 if (brw->gen <= 7) {
610 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
611 mt = mt->r8stencil_mt;
612 } else {
613 mt = mt->stencil_mt;
614 }
615 format = ISL_FORMAT_R8_UINT;
616 } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
617 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
618 mt = mt->r8stencil_mt;
619 format = ISL_FORMAT_R8_UINT;
620 }
621
622 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
623
624 struct isl_view view = {
625 .format = format,
626 .base_level = obj->MinLevel + obj->BaseLevel,
627 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
628 .base_array_layer = obj->MinLayer,
629 .array_len = view_num_layers,
630 .swizzle = {
631 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
632 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
633 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
634 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
635 },
636 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
637 };
638
639 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
640 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
641 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
642
643 assert(brw_texture_view_sane(brw, mt, &view));
644
645 const int flags = brw_disable_aux_surface(brw, mt, &view) ?
646 INTEL_AUX_BUFFER_DISABLED : 0;
647 brw_emit_surface_state(brw, mt, flags, mt->target, view,
648 tex_mocs[brw->gen],
649 surf_offset, surf_index,
650 I915_GEM_DOMAIN_SAMPLER, 0);
651 }
652 }
653
654 void
655 brw_emit_buffer_surface_state(struct brw_context *brw,
656 uint32_t *out_offset,
657 struct brw_bo *bo,
658 unsigned buffer_offset,
659 unsigned surface_format,
660 unsigned buffer_size,
661 unsigned pitch,
662 bool rw)
663 {
664 uint32_t *dw = brw_state_batch(brw,
665 brw->isl_dev.ss.size,
666 brw->isl_dev.ss.align,
667 out_offset);
668
669 isl_buffer_fill_state(&brw->isl_dev, dw,
670 .address = (bo ? bo->offset64 : 0) + buffer_offset,
671 .size = buffer_size,
672 .format = surface_format,
673 .stride = pitch,
674 .mocs = tex_mocs[brw->gen]);
675
676 if (bo) {
677 brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
678 bo, buffer_offset,
679 I915_GEM_DOMAIN_SAMPLER,
680 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
681 }
682 }
683
684 void
685 brw_update_buffer_texture_surface(struct gl_context *ctx,
686 unsigned unit,
687 uint32_t *surf_offset)
688 {
689 struct brw_context *brw = brw_context(ctx);
690 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
691 struct intel_buffer_object *intel_obj =
692 intel_buffer_object(tObj->BufferObject);
693 uint32_t size = tObj->BufferSize;
694 struct brw_bo *bo = NULL;
695 mesa_format format = tObj->_BufferObjectFormat;
696 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
697 int texel_size = _mesa_get_format_bytes(format);
698
699 if (intel_obj) {
700 size = MIN2(size, intel_obj->Base.Size);
701 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
702 false);
703 }
704
705 /* The ARB_texture_buffer_specification says:
706 *
707 * "The number of texels in the buffer texture's texel array is given by
708 *
709 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
710 *
711 * where <buffer_size> is the size of the buffer object, in basic
712 * machine units and <components> and <base_type> are the element count
713 * and base data type for elements, as specified in Table X.1. The
714 * number of texels in the texel array is then clamped to the
715 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
716 *
717 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
718 * so that when ISL divides by stride to obtain the number of texels, that
719 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
720 */
721 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
722
723 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
724 _mesa_problem(NULL, "bad format %s for texture buffer\n",
725 _mesa_get_format_name(format));
726 }
727
728 brw_emit_buffer_surface_state(brw, surf_offset, bo,
729 tObj->BufferOffset,
730 isl_format,
731 size,
732 texel_size,
733 false /* rw */);
734 }
735
736 /**
737 * Create the constant buffer surface. Vertex/fragment shader constants will be
738 * read from this buffer with Data Port Read instructions/messages.
739 */
740 void
741 brw_create_constant_surface(struct brw_context *brw,
742 struct brw_bo *bo,
743 uint32_t offset,
744 uint32_t size,
745 uint32_t *out_offset)
746 {
747 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
748 ISL_FORMAT_R32G32B32A32_FLOAT,
749 size, 1, false);
750 }
751
752 /**
753 * Create the buffer surface. Shader buffer variables will be
754 * read from / write to this buffer with Data Port Read/Write
755 * instructions/messages.
756 */
757 void
758 brw_create_buffer_surface(struct brw_context *brw,
759 struct brw_bo *bo,
760 uint32_t offset,
761 uint32_t size,
762 uint32_t *out_offset)
763 {
764 /* Use a raw surface so we can reuse existing untyped read/write/atomic
765 * messages. We need these specifically for the fragment shader since they
766 * include a pixel mask header that we need to ensure correct behavior
767 * with helper invocations, which cannot write to the buffer.
768 */
769 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
770 ISL_FORMAT_RAW,
771 size, 1, true);
772 }
773
774 /**
775 * Set up a binding table entry for use by stream output logic (transform
776 * feedback).
777 *
778 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
779 */
780 void
781 brw_update_sol_surface(struct brw_context *brw,
782 struct gl_buffer_object *buffer_obj,
783 uint32_t *out_offset, unsigned num_vector_components,
784 unsigned stride_dwords, unsigned offset_dwords)
785 {
786 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
787 uint32_t offset_bytes = 4 * offset_dwords;
788 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
789 offset_bytes,
790 buffer_obj->Size - offset_bytes,
791 true);
792 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
793 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
794 size_t size_dwords = buffer_obj->Size / 4;
795 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
796
797 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
798 * too big to map using a single binding table entry?
799 */
800 assert((size_dwords - offset_dwords) / stride_dwords
801 <= BRW_MAX_NUM_BUFFER_ENTRIES);
802
803 if (size_dwords > offset_dwords + num_vector_components) {
804 /* There is room for at least 1 transform feedback output in the buffer.
805 * Compute the number of additional transform feedback outputs the
806 * buffer has room for.
807 */
808 buffer_size_minus_1 =
809 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
810 } else {
811 /* There isn't even room for a single transform feedback output in the
812 * buffer. We can't configure the binding table entry to prevent output
813 * entirely; we'll have to rely on the geometry shader to detect
814 * overflow. But to minimize the damage in case of a bug, set up the
815 * binding table entry to just allow a single output.
816 */
817 buffer_size_minus_1 = 0;
818 }
819 width = buffer_size_minus_1 & 0x7f;
820 height = (buffer_size_minus_1 & 0xfff80) >> 7;
821 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
822
823 switch (num_vector_components) {
824 case 1:
825 surface_format = ISL_FORMAT_R32_FLOAT;
826 break;
827 case 2:
828 surface_format = ISL_FORMAT_R32G32_FLOAT;
829 break;
830 case 3:
831 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
832 break;
833 case 4:
834 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
835 break;
836 default:
837 unreachable("Invalid vector size for transform feedback output");
838 }
839
840 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
841 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
842 surface_format << BRW_SURFACE_FORMAT_SHIFT |
843 BRW_SURFACE_RC_READ_WRITE;
844 surf[1] = bo->offset64 + offset_bytes; /* reloc */
845 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
846 height << BRW_SURFACE_HEIGHT_SHIFT);
847 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
848 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
849 surf[4] = 0;
850 surf[5] = 0;
851
852 /* Emit relocation to surface contents. */
853 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
854 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
855 }
856
857 /* Creates a new WM constant buffer reflecting the current fragment program's
858 * constants, if needed by the fragment program.
859 *
860 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
861 * state atom.
862 */
863 static void
864 brw_upload_wm_pull_constants(struct brw_context *brw)
865 {
866 struct brw_stage_state *stage_state = &brw->wm.base;
867 /* BRW_NEW_FRAGMENT_PROGRAM */
868 struct brw_program *fp = (struct brw_program *) brw->fragment_program;
869 /* BRW_NEW_FS_PROG_DATA */
870 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
871
872 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
873 /* _NEW_PROGRAM_CONSTANTS */
874 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
875 stage_state, prog_data);
876 }
877
878 const struct brw_tracked_state brw_wm_pull_constants = {
879 .dirty = {
880 .mesa = _NEW_PROGRAM_CONSTANTS,
881 .brw = BRW_NEW_BATCH |
882 BRW_NEW_BLORP |
883 BRW_NEW_FRAGMENT_PROGRAM |
884 BRW_NEW_FS_PROG_DATA,
885 },
886 .emit = brw_upload_wm_pull_constants,
887 };
888
889 /**
890 * Creates a null renderbuffer surface.
891 *
892 * This is used when the shader doesn't write to any color output. An FB
893 * write to target 0 will still be emitted, because that's how the thread is
894 * terminated (and computed depth is returned), so we need to have the
895 * hardware discard the target 0 color output..
896 */
897 static void
898 brw_emit_null_surface_state(struct brw_context *brw,
899 unsigned width,
900 unsigned height,
901 unsigned samples,
902 uint32_t *out_offset)
903 {
904 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
905 * Notes):
906 *
907 * A null surface will be used in instances where an actual surface is
908 * not bound. When a write message is generated to a null surface, no
909 * actual surface is written to. When a read message (including any
910 * sampling engine message) is generated to a null surface, the result
911 * is all zeros. Note that a null surface type is allowed to be used
912 * with all messages, even if it is not specificially indicated as
913 * supported. All of the remaining fields in surface state are ignored
914 * for null surfaces, with the following exceptions:
915 *
916 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
917 * depth buffer’s corresponding state for all render target surfaces,
918 * including null.
919 *
920 * - Surface Format must be R8G8B8A8_UNORM.
921 */
922 unsigned surface_type = BRW_SURFACE_NULL;
923 struct brw_bo *bo = NULL;
924 unsigned pitch_minus_1 = 0;
925 uint32_t multisampling_state = 0;
926 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
927
928 if (samples > 1) {
929 /* On Gen6, null render targets seem to cause GPU hangs when
930 * multisampling. So work around this problem by rendering into dummy
931 * color buffer.
932 *
933 * To decrease the amount of memory needed by the workaround buffer, we
934 * set its pitch to 128 bytes (the width of a Y tile). This means that
935 * the amount of memory needed for the workaround buffer is
936 * (width_in_tiles + height_in_tiles - 1) tiles.
937 *
938 * Note that since the workaround buffer will be interpreted by the
939 * hardware as an interleaved multisampled buffer, we need to compute
940 * width_in_tiles and height_in_tiles by dividing the width and height
941 * by 16 rather than the normal Y-tile size of 32.
942 */
943 unsigned width_in_tiles = ALIGN(width, 16) / 16;
944 unsigned height_in_tiles = ALIGN(height, 16) / 16;
945 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
946 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
947 size_needed);
948 bo = brw->wm.multisampled_null_render_target_bo;
949 surface_type = BRW_SURFACE_2D;
950 pitch_minus_1 = 127;
951 multisampling_state = brw_get_surface_num_multisamples(samples);
952 }
953
954 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
955 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
956 if (brw->gen < 6) {
957 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
958 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
959 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
960 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
961 }
962 surf[1] = bo ? bo->offset64 : 0;
963 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
964 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
965
966 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
967 * Notes):
968 *
969 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
970 */
971 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
972 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
973 surf[4] = multisampling_state;
974 surf[5] = 0;
975
976 if (bo) {
977 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
978 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
979 }
980 }
981
982 /**
983 * Sets up a surface state structure to point at the given region.
984 * While it is only used for the front/back buffer currently, it should be
985 * usable for further buffers when doing ARB_draw_buffer support.
986 */
987 static uint32_t
988 gen4_update_renderbuffer_surface(struct brw_context *brw,
989 struct gl_renderbuffer *rb,
990 uint32_t flags, unsigned unit,
991 uint32_t surf_index)
992 {
993 struct gl_context *ctx = &brw->ctx;
994 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
995 struct intel_mipmap_tree *mt = irb->mt;
996 uint32_t *surf;
997 uint32_t tile_x, tile_y;
998 enum isl_format format;
999 uint32_t offset;
1000 /* _NEW_BUFFERS */
1001 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1002 /* BRW_NEW_FS_PROG_DATA */
1003
1004 assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1005 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1006
1007 if (rb->TexImage && !brw->has_surface_tile_offset) {
1008 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1009
1010 if (tile_x != 0 || tile_y != 0) {
1011 /* Original gen4 hardware couldn't draw to a non-tile-aligned
1012 * destination in a miptree unless you actually setup your renderbuffer
1013 * as a miptree and used the fragile lod/array_index/etc. controls to
1014 * select the image. So, instead, we just make a new single-level
1015 * miptree and render into that.
1016 */
1017 intel_renderbuffer_move_to_temp(brw, irb, false);
1018 assert(irb->align_wa_mt);
1019 mt = irb->align_wa_mt;
1020 }
1021 }
1022
1023 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1024
1025 format = brw->mesa_to_isl_render_format[rb_format];
1026 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1027 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1028 __func__, _mesa_get_format_name(rb_format));
1029 }
1030
1031 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1032 format << BRW_SURFACE_FORMAT_SHIFT);
1033
1034 /* reloc */
1035 assert(mt->offset % mt->cpp == 0);
1036 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1037 mt->bo->offset64 + mt->offset);
1038
1039 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1040 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1041
1042 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
1043 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1044
1045 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
1046
1047 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1048 /* Note that the low bits of these fields are missing, so
1049 * there's the possibility of getting in trouble.
1050 */
1051 assert(tile_x % 4 == 0);
1052 assert(tile_y % 2 == 0);
1053 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1054 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1055 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1056
1057 if (brw->gen < 6) {
1058 /* _NEW_COLOR */
1059 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1060 (ctx->Color.BlendEnabled & (1 << unit)))
1061 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1062
1063 if (!ctx->Color.ColorMask[unit][0])
1064 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1065 if (!ctx->Color.ColorMask[unit][1])
1066 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1067 if (!ctx->Color.ColorMask[unit][2])
1068 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1069
1070 /* As mentioned above, disable writes to the alpha component when the
1071 * renderbuffer is XRGB.
1072 */
1073 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1074 !ctx->Color.ColorMask[unit][3]) {
1075 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1076 }
1077 }
1078
1079 brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1080 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1081
1082 return offset;
1083 }
1084
1085 /**
1086 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1087 */
1088 void
1089 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1090 const struct gl_framebuffer *fb,
1091 uint32_t render_target_start,
1092 uint32_t *surf_offset)
1093 {
1094 GLuint i;
1095 const unsigned int w = _mesa_geometric_width(fb);
1096 const unsigned int h = _mesa_geometric_height(fb);
1097 const unsigned int s = _mesa_geometric_samples(fb);
1098
1099 /* Update surfaces for drawing buffers */
1100 if (fb->_NumColorDrawBuffers >= 1) {
1101 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1102 const uint32_t surf_index = render_target_start + i;
1103 const int flags = (_mesa_geometric_layers(fb) > 0 ?
1104 INTEL_RENDERBUFFER_LAYERED : 0) |
1105 (brw->draw_aux_buffer_disabled[i] ?
1106 INTEL_AUX_BUFFER_DISABLED : 0);
1107
1108 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1109 surf_offset[surf_index] =
1110 brw->vtbl.update_renderbuffer_surface(
1111 brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1112 } else {
1113 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1114 &surf_offset[surf_index]);
1115 }
1116 }
1117 } else {
1118 const uint32_t surf_index = render_target_start;
1119 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1120 &surf_offset[surf_index]);
1121 }
1122 }
1123
1124 static void
1125 update_renderbuffer_surfaces(struct brw_context *brw)
1126 {
1127 const struct gl_context *ctx = &brw->ctx;
1128
1129 /* BRW_NEW_FS_PROG_DATA */
1130 const struct brw_wm_prog_data *wm_prog_data =
1131 brw_wm_prog_data(brw->wm.base.prog_data);
1132
1133 /* _NEW_BUFFERS | _NEW_COLOR */
1134 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1135 brw_update_renderbuffer_surfaces(
1136 brw, fb,
1137 wm_prog_data->binding_table.render_target_start,
1138 brw->wm.base.surf_offset);
1139 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1140 }
1141
1142 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1143 .dirty = {
1144 .mesa = _NEW_BUFFERS |
1145 _NEW_COLOR,
1146 .brw = BRW_NEW_BATCH |
1147 BRW_NEW_BLORP |
1148 BRW_NEW_FS_PROG_DATA,
1149 },
1150 .emit = update_renderbuffer_surfaces,
1151 };
1152
1153 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1154 .dirty = {
1155 .mesa = _NEW_BUFFERS,
1156 .brw = BRW_NEW_BATCH |
1157 BRW_NEW_BLORP,
1158 },
1159 .emit = update_renderbuffer_surfaces,
1160 };
1161
1162 static void
1163 update_renderbuffer_read_surfaces(struct brw_context *brw)
1164 {
1165 const struct gl_context *ctx = &brw->ctx;
1166
1167 /* BRW_NEW_FS_PROG_DATA */
1168 const struct brw_wm_prog_data *wm_prog_data =
1169 brw_wm_prog_data(brw->wm.base.prog_data);
1170
1171 /* BRW_NEW_FRAGMENT_PROGRAM */
1172 if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1173 brw->fragment_program && brw->fragment_program->info.outputs_read) {
1174 /* _NEW_BUFFERS */
1175 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1176
1177 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1178 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1179 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1180 const unsigned surf_index =
1181 wm_prog_data->binding_table.render_target_read_start + i;
1182 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1183
1184 if (irb) {
1185 const enum isl_format format = brw->mesa_to_isl_render_format[
1186 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1187 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1188 format));
1189
1190 /* Override the target of the texture if the render buffer is a
1191 * single slice of a 3D texture (since the minimum array element
1192 * field of the surface state structure is ignored by the sampler
1193 * unit for 3D textures on some hardware), or if the render buffer
1194 * is a 1D array (since shaders always provide the array index
1195 * coordinate at the Z component to avoid state-dependent
1196 * recompiles when changing the texture target of the
1197 * framebuffer).
1198 */
1199 const GLenum target =
1200 (irb->mt->target == GL_TEXTURE_3D &&
1201 irb->layer_count == 1) ? GL_TEXTURE_2D :
1202 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1203 irb->mt->target;
1204
1205 /* intel_renderbuffer::mt_layer is expressed in sample units for
1206 * the UMS and CMS multisample layouts, but
1207 * intel_renderbuffer::layer_count is expressed in units of whole
1208 * logical layers regardless of the multisample layout.
1209 */
1210 const unsigned mt_layer_unit =
1211 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
1212 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
1213 MAX2(irb->mt->num_samples, 1) : 1;
1214
1215 const struct isl_view view = {
1216 .format = format,
1217 .base_level = irb->mt_level - irb->mt->first_level,
1218 .levels = 1,
1219 .base_array_layer = irb->mt_layer / mt_layer_unit,
1220 .array_len = irb->layer_count,
1221 .swizzle = ISL_SWIZZLE_IDENTITY,
1222 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1223 };
1224
1225 const int flags = brw->draw_aux_buffer_disabled[i] ?
1226 INTEL_AUX_BUFFER_DISABLED : 0;
1227 brw_emit_surface_state(brw, irb->mt, flags, target, view,
1228 tex_mocs[brw->gen],
1229 surf_offset, surf_index,
1230 I915_GEM_DOMAIN_SAMPLER, 0);
1231
1232 } else {
1233 brw->vtbl.emit_null_surface_state(
1234 brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1235 _mesa_geometric_samples(fb), surf_offset);
1236 }
1237 }
1238
1239 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1240 }
1241 }
1242
1243 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1244 .dirty = {
1245 .mesa = _NEW_BUFFERS,
1246 .brw = BRW_NEW_BATCH |
1247 BRW_NEW_FRAGMENT_PROGRAM |
1248 BRW_NEW_FS_PROG_DATA,
1249 },
1250 .emit = update_renderbuffer_read_surfaces,
1251 };
1252
1253 static void
1254 update_stage_texture_surfaces(struct brw_context *brw,
1255 const struct gl_program *prog,
1256 struct brw_stage_state *stage_state,
1257 bool for_gather, uint32_t plane)
1258 {
1259 if (!prog)
1260 return;
1261
1262 struct gl_context *ctx = &brw->ctx;
1263
1264 uint32_t *surf_offset = stage_state->surf_offset;
1265
1266 /* BRW_NEW_*_PROG_DATA */
1267 if (for_gather)
1268 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1269 else
1270 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1271
1272 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1273 for (unsigned s = 0; s < num_samplers; s++) {
1274 surf_offset[s] = 0;
1275
1276 if (prog->SamplersUsed & (1 << s)) {
1277 const unsigned unit = prog->SamplerUnits[s];
1278
1279 /* _NEW_TEXTURE */
1280 if (ctx->Texture.Unit[unit]._Current) {
1281 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1282 }
1283 }
1284 }
1285 }
1286
1287
1288 /**
1289 * Construct SURFACE_STATE objects for enabled textures.
1290 */
1291 static void
1292 brw_update_texture_surfaces(struct brw_context *brw)
1293 {
1294 /* BRW_NEW_VERTEX_PROGRAM */
1295 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1296
1297 /* BRW_NEW_TESS_PROGRAMS */
1298 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1299 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1300
1301 /* BRW_NEW_GEOMETRY_PROGRAM */
1302 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1303
1304 /* BRW_NEW_FRAGMENT_PROGRAM */
1305 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1306
1307 /* _NEW_TEXTURE */
1308 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1309 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1310 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1311 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1312 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1313
1314 /* emit alternate set of surface state for gather. this
1315 * allows the surface format to be overriden for only the
1316 * gather4 messages. */
1317 if (brw->gen < 8) {
1318 if (vs && vs->nir->info.uses_texture_gather)
1319 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1320 if (tcs && tcs->nir->info.uses_texture_gather)
1321 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1322 if (tes && tes->nir->info.uses_texture_gather)
1323 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1324 if (gs && gs->nir->info.uses_texture_gather)
1325 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1326 if (fs && fs->nir->info.uses_texture_gather)
1327 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1328 }
1329
1330 if (fs) {
1331 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1332 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1333 }
1334
1335 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1336 }
1337
1338 const struct brw_tracked_state brw_texture_surfaces = {
1339 .dirty = {
1340 .mesa = _NEW_TEXTURE,
1341 .brw = BRW_NEW_BATCH |
1342 BRW_NEW_BLORP |
1343 BRW_NEW_FRAGMENT_PROGRAM |
1344 BRW_NEW_FS_PROG_DATA |
1345 BRW_NEW_GEOMETRY_PROGRAM |
1346 BRW_NEW_GS_PROG_DATA |
1347 BRW_NEW_TESS_PROGRAMS |
1348 BRW_NEW_TCS_PROG_DATA |
1349 BRW_NEW_TES_PROG_DATA |
1350 BRW_NEW_TEXTURE_BUFFER |
1351 BRW_NEW_VERTEX_PROGRAM |
1352 BRW_NEW_VS_PROG_DATA,
1353 },
1354 .emit = brw_update_texture_surfaces,
1355 };
1356
1357 static void
1358 brw_update_cs_texture_surfaces(struct brw_context *brw)
1359 {
1360 /* BRW_NEW_COMPUTE_PROGRAM */
1361 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1362
1363 /* _NEW_TEXTURE */
1364 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1365
1366 /* emit alternate set of surface state for gather. this
1367 * allows the surface format to be overriden for only the
1368 * gather4 messages.
1369 */
1370 if (brw->gen < 8) {
1371 if (cs && cs->nir->info.uses_texture_gather)
1372 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1373 }
1374
1375 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1376 }
1377
1378 const struct brw_tracked_state brw_cs_texture_surfaces = {
1379 .dirty = {
1380 .mesa = _NEW_TEXTURE,
1381 .brw = BRW_NEW_BATCH |
1382 BRW_NEW_BLORP |
1383 BRW_NEW_COMPUTE_PROGRAM,
1384 },
1385 .emit = brw_update_cs_texture_surfaces,
1386 };
1387
1388
1389 void
1390 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1391 struct brw_stage_state *stage_state,
1392 struct brw_stage_prog_data *prog_data)
1393 {
1394 struct gl_context *ctx = &brw->ctx;
1395
1396 if (!prog)
1397 return;
1398
1399 uint32_t *ubo_surf_offsets =
1400 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1401
1402 for (int i = 0; i < prog->info.num_ubos; i++) {
1403 struct gl_uniform_buffer_binding *binding =
1404 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1405
1406 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1407 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1408 } else {
1409 struct intel_buffer_object *intel_bo =
1410 intel_buffer_object(binding->BufferObject);
1411 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1412 if (!binding->AutomaticSize)
1413 size = MIN2(size, binding->Size);
1414 struct brw_bo *bo =
1415 intel_bufferobj_buffer(brw, intel_bo,
1416 binding->Offset,
1417 size, false);
1418 brw_create_constant_surface(brw, bo, binding->Offset,
1419 size,
1420 &ubo_surf_offsets[i]);
1421 }
1422 }
1423
1424 uint32_t *ssbo_surf_offsets =
1425 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1426
1427 for (int i = 0; i < prog->info.num_ssbos; i++) {
1428 struct gl_shader_storage_buffer_binding *binding =
1429 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1430
1431 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1432 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1433 } else {
1434 struct intel_buffer_object *intel_bo =
1435 intel_buffer_object(binding->BufferObject);
1436 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1437 if (!binding->AutomaticSize)
1438 size = MIN2(size, binding->Size);
1439 struct brw_bo *bo =
1440 intel_bufferobj_buffer(brw, intel_bo,
1441 binding->Offset,
1442 size, true);
1443 brw_create_buffer_surface(brw, bo, binding->Offset,
1444 size,
1445 &ssbo_surf_offsets[i]);
1446 }
1447 }
1448
1449 stage_state->push_constants_dirty = true;
1450
1451 if (prog->info.num_ubos || prog->info.num_ssbos)
1452 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1453 }
1454
1455 static void
1456 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1457 {
1458 struct gl_context *ctx = &brw->ctx;
1459 /* _NEW_PROGRAM */
1460 struct gl_program *prog = ctx->FragmentProgram._Current;
1461
1462 /* BRW_NEW_FS_PROG_DATA */
1463 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1464 }
1465
1466 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1467 .dirty = {
1468 .mesa = _NEW_PROGRAM,
1469 .brw = BRW_NEW_BATCH |
1470 BRW_NEW_BLORP |
1471 BRW_NEW_FS_PROG_DATA |
1472 BRW_NEW_UNIFORM_BUFFER,
1473 },
1474 .emit = brw_upload_wm_ubo_surfaces,
1475 };
1476
1477 static void
1478 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1479 {
1480 struct gl_context *ctx = &brw->ctx;
1481 /* _NEW_PROGRAM */
1482 struct gl_program *prog =
1483 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1484
1485 /* BRW_NEW_CS_PROG_DATA */
1486 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1487 }
1488
1489 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1490 .dirty = {
1491 .mesa = _NEW_PROGRAM,
1492 .brw = BRW_NEW_BATCH |
1493 BRW_NEW_BLORP |
1494 BRW_NEW_CS_PROG_DATA |
1495 BRW_NEW_UNIFORM_BUFFER,
1496 },
1497 .emit = brw_upload_cs_ubo_surfaces,
1498 };
1499
1500 void
1501 brw_upload_abo_surfaces(struct brw_context *brw,
1502 const struct gl_program *prog,
1503 struct brw_stage_state *stage_state,
1504 struct brw_stage_prog_data *prog_data)
1505 {
1506 struct gl_context *ctx = &brw->ctx;
1507 uint32_t *surf_offsets =
1508 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1509
1510 if (prog->info.num_abos) {
1511 for (unsigned i = 0; i < prog->info.num_abos; i++) {
1512 struct gl_atomic_buffer_binding *binding =
1513 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1514 struct intel_buffer_object *intel_bo =
1515 intel_buffer_object(binding->BufferObject);
1516 struct brw_bo *bo =
1517 intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1518 intel_bo->Base.Size - binding->Offset,
1519 true);
1520
1521 brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1522 binding->Offset, ISL_FORMAT_RAW,
1523 bo->size - binding->Offset, 1, true);
1524 }
1525
1526 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1527 }
1528 }
1529
1530 static void
1531 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1532 {
1533 /* _NEW_PROGRAM */
1534 const struct gl_program *wm = brw->fragment_program;
1535
1536 if (wm) {
1537 /* BRW_NEW_FS_PROG_DATA */
1538 brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1539 }
1540 }
1541
1542 const struct brw_tracked_state brw_wm_abo_surfaces = {
1543 .dirty = {
1544 .mesa = _NEW_PROGRAM,
1545 .brw = BRW_NEW_ATOMIC_BUFFER |
1546 BRW_NEW_BLORP |
1547 BRW_NEW_BATCH |
1548 BRW_NEW_FS_PROG_DATA,
1549 },
1550 .emit = brw_upload_wm_abo_surfaces,
1551 };
1552
1553 static void
1554 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1555 {
1556 /* _NEW_PROGRAM */
1557 const struct gl_program *cp = brw->compute_program;
1558
1559 if (cp) {
1560 /* BRW_NEW_CS_PROG_DATA */
1561 brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1562 }
1563 }
1564
1565 const struct brw_tracked_state brw_cs_abo_surfaces = {
1566 .dirty = {
1567 .mesa = _NEW_PROGRAM,
1568 .brw = BRW_NEW_ATOMIC_BUFFER |
1569 BRW_NEW_BLORP |
1570 BRW_NEW_BATCH |
1571 BRW_NEW_CS_PROG_DATA,
1572 },
1573 .emit = brw_upload_cs_abo_surfaces,
1574 };
1575
1576 static void
1577 brw_upload_cs_image_surfaces(struct brw_context *brw)
1578 {
1579 /* _NEW_PROGRAM */
1580 const struct gl_program *cp = brw->compute_program;
1581
1582 if (cp) {
1583 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1584 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1585 brw->cs.base.prog_data);
1586 }
1587 }
1588
1589 const struct brw_tracked_state brw_cs_image_surfaces = {
1590 .dirty = {
1591 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1592 .brw = BRW_NEW_BATCH |
1593 BRW_NEW_BLORP |
1594 BRW_NEW_CS_PROG_DATA |
1595 BRW_NEW_IMAGE_UNITS
1596 },
1597 .emit = brw_upload_cs_image_surfaces,
1598 };
1599
1600 static uint32_t
1601 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1602 {
1603 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1604 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1605 if (access == GL_WRITE_ONLY) {
1606 return hw_format;
1607 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1608 /* Typed surface reads support a very limited subset of the shader
1609 * image formats. Translate it into the closest format the
1610 * hardware supports.
1611 */
1612 return isl_lower_storage_image_format(devinfo, hw_format);
1613 } else {
1614 /* The hardware doesn't actually support a typed format that we can use
1615 * so we have to fall back to untyped read/write messages.
1616 */
1617 return ISL_FORMAT_RAW;
1618 }
1619 }
1620
1621 static void
1622 update_default_image_param(struct brw_context *brw,
1623 struct gl_image_unit *u,
1624 unsigned surface_idx,
1625 struct brw_image_param *param)
1626 {
1627 memset(param, 0, sizeof(*param));
1628 param->surface_idx = surface_idx;
1629 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1630 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1631 * detailed explanation of these parameters.
1632 */
1633 param->swizzling[0] = 0xff;
1634 param->swizzling[1] = 0xff;
1635 }
1636
1637 static void
1638 update_buffer_image_param(struct brw_context *brw,
1639 struct gl_image_unit *u,
1640 unsigned surface_idx,
1641 struct brw_image_param *param)
1642 {
1643 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1644 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1645 update_default_image_param(brw, u, surface_idx, param);
1646
1647 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1648 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1649 }
1650
1651 static void
1652 update_texture_image_param(struct brw_context *brw,
1653 struct gl_image_unit *u,
1654 unsigned surface_idx,
1655 struct brw_image_param *param)
1656 {
1657 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1658
1659 update_default_image_param(brw, u, surface_idx, param);
1660
1661 param->size[0] = minify(mt->logical_width0, u->Level);
1662 param->size[1] = minify(mt->logical_height0, u->Level);
1663 param->size[2] = (!u->Layered ? 1 :
1664 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1665 u->TexObj->Target == GL_TEXTURE_3D ?
1666 minify(mt->logical_depth0, u->Level) :
1667 mt->logical_depth0);
1668
1669 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1670 &param->offset[0],
1671 &param->offset[1]);
1672
1673 param->stride[0] = mt->cpp;
1674 param->stride[1] = mt->pitch / mt->cpp;
1675 param->stride[2] =
1676 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1677 param->stride[3] =
1678 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1679
1680 if (mt->tiling == I915_TILING_X) {
1681 /* An X tile is a rectangular block of 512x8 bytes. */
1682 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1683 param->tiling[1] = _mesa_logbase2(8);
1684
1685 if (brw->has_swizzling) {
1686 /* Right shifts required to swizzle bits 9 and 10 of the memory
1687 * address with bit 6.
1688 */
1689 param->swizzling[0] = 3;
1690 param->swizzling[1] = 4;
1691 }
1692 } else if (mt->tiling == I915_TILING_Y) {
1693 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1694 * different to the layout of an X-tiled surface, we simply pretend that
1695 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1696 * one arranged in X-major order just like is the case for X-tiling.
1697 */
1698 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1699 param->tiling[1] = _mesa_logbase2(32);
1700
1701 if (brw->has_swizzling) {
1702 /* Right shift required to swizzle bit 9 of the memory address with
1703 * bit 6.
1704 */
1705 param->swizzling[0] = 3;
1706 }
1707 }
1708
1709 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1710 * address calculation algorithm (emit_address_calculation() in
1711 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1712 * modulus equal to the LOD.
1713 */
1714 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1715 0);
1716 }
1717
1718 static void
1719 update_image_surface(struct brw_context *brw,
1720 struct gl_image_unit *u,
1721 GLenum access,
1722 unsigned surface_idx,
1723 uint32_t *surf_offset,
1724 struct brw_image_param *param)
1725 {
1726 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1727 struct gl_texture_object *obj = u->TexObj;
1728 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1729
1730 if (obj->Target == GL_TEXTURE_BUFFER) {
1731 struct intel_buffer_object *intel_obj =
1732 intel_buffer_object(obj->BufferObject);
1733 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1734 _mesa_get_format_bytes(u->_ActualFormat));
1735
1736 brw_emit_buffer_surface_state(
1737 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1738 format, intel_obj->Base.Size, texel_size,
1739 access != GL_READ_ONLY);
1740
1741 update_buffer_image_param(brw, u, surface_idx, param);
1742
1743 } else {
1744 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1745 struct intel_mipmap_tree *mt = intel_obj->mt;
1746
1747 if (format == ISL_FORMAT_RAW) {
1748 brw_emit_buffer_surface_state(
1749 brw, surf_offset, mt->bo, mt->offset,
1750 format, mt->bo->size - mt->offset, 1 /* pitch */,
1751 access != GL_READ_ONLY);
1752
1753 } else {
1754 const unsigned num_layers = (!u->Layered ? 1 :
1755 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1756 mt->logical_depth0);
1757
1758 struct isl_view view = {
1759 .format = format,
1760 .base_level = obj->MinLevel + u->Level,
1761 .levels = 1,
1762 .base_array_layer = obj->MinLayer + u->_Layer,
1763 .array_len = num_layers,
1764 .swizzle = ISL_SWIZZLE_IDENTITY,
1765 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1766 };
1767
1768 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1769 assert(!intel_miptree_has_color_unresolved(mt,
1770 view.base_level, 1,
1771 view.base_array_layer,
1772 view.array_len));
1773 brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1774 mt->target, view, tex_mocs[brw->gen],
1775 surf_offset, surf_index,
1776 I915_GEM_DOMAIN_SAMPLER,
1777 access == GL_READ_ONLY ? 0 :
1778 I915_GEM_DOMAIN_SAMPLER);
1779 }
1780
1781 update_texture_image_param(brw, u, surface_idx, param);
1782 }
1783
1784 } else {
1785 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1786 update_default_image_param(brw, u, surface_idx, param);
1787 }
1788 }
1789
1790 void
1791 brw_upload_image_surfaces(struct brw_context *brw,
1792 const struct gl_program *prog,
1793 struct brw_stage_state *stage_state,
1794 struct brw_stage_prog_data *prog_data)
1795 {
1796 assert(prog);
1797 struct gl_context *ctx = &brw->ctx;
1798
1799 if (prog->info.num_images) {
1800 for (unsigned i = 0; i < prog->info.num_images; i++) {
1801 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1802 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1803
1804 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1805 surf_idx,
1806 &stage_state->surf_offset[surf_idx],
1807 &prog_data->image_param[i]);
1808 }
1809
1810 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1811 /* This may have changed the image metadata dependent on the context
1812 * image unit state and passed to the program as uniforms, make sure
1813 * that push and pull constants are reuploaded.
1814 */
1815 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1816 }
1817 }
1818
1819 static void
1820 brw_upload_wm_image_surfaces(struct brw_context *brw)
1821 {
1822 /* BRW_NEW_FRAGMENT_PROGRAM */
1823 const struct gl_program *wm = brw->fragment_program;
1824
1825 if (wm) {
1826 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1827 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1828 brw->wm.base.prog_data);
1829 }
1830 }
1831
1832 const struct brw_tracked_state brw_wm_image_surfaces = {
1833 .dirty = {
1834 .mesa = _NEW_TEXTURE,
1835 .brw = BRW_NEW_BATCH |
1836 BRW_NEW_BLORP |
1837 BRW_NEW_FRAGMENT_PROGRAM |
1838 BRW_NEW_FS_PROG_DATA |
1839 BRW_NEW_IMAGE_UNITS
1840 },
1841 .emit = brw_upload_wm_image_surfaces,
1842 };
1843
1844 void
1845 gen4_init_vtable_surface_functions(struct brw_context *brw)
1846 {
1847 brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1848 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1849 }
1850
1851 void
1852 gen6_init_vtable_surface_functions(struct brw_context *brw)
1853 {
1854 gen4_init_vtable_surface_functions(brw);
1855 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1856 }
1857
1858 static void
1859 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1860 {
1861 struct gl_context *ctx = &brw->ctx;
1862 /* _NEW_PROGRAM */
1863 struct gl_program *prog =
1864 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1865 /* BRW_NEW_CS_PROG_DATA */
1866 const struct brw_cs_prog_data *cs_prog_data =
1867 brw_cs_prog_data(brw->cs.base.prog_data);
1868
1869 if (prog && cs_prog_data->uses_num_work_groups) {
1870 const unsigned surf_idx =
1871 cs_prog_data->binding_table.work_groups_start;
1872 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1873 struct brw_bo *bo;
1874 uint32_t bo_offset;
1875
1876 if (brw->compute.num_work_groups_bo == NULL) {
1877 bo = NULL;
1878 intel_upload_data(brw,
1879 (void *)brw->compute.num_work_groups,
1880 3 * sizeof(GLuint),
1881 sizeof(GLuint),
1882 &bo,
1883 &bo_offset);
1884 } else {
1885 bo = brw->compute.num_work_groups_bo;
1886 bo_offset = brw->compute.num_work_groups_offset;
1887 }
1888
1889 brw_emit_buffer_surface_state(brw, surf_offset,
1890 bo, bo_offset,
1891 ISL_FORMAT_RAW,
1892 3 * sizeof(GLuint), 1, true);
1893 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1894 }
1895 }
1896
1897 const struct brw_tracked_state brw_cs_work_groups_surface = {
1898 .dirty = {
1899 .brw = BRW_NEW_BLORP |
1900 BRW_NEW_CS_PROG_DATA |
1901 BRW_NEW_CS_WORK_GROUPS
1902 },
1903 .emit = brw_upload_cs_work_groups_surface,
1904 };