i965: Refactor miptree to isl converter and adjustment
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 enum {
59 INTEL_RENDERBUFFER_LAYERED = 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED = 1 << 1,
61 };
62
63 uint32_t tex_mocs[] = {
64 [7] = GEN7_MOCS_L3,
65 [8] = BDW_MOCS_WB,
66 [9] = SKL_MOCS_WB,
67 [10] = CNL_MOCS_WB,
68 };
69
70 uint32_t rb_mocs[] = {
71 [7] = GEN7_MOCS_L3,
72 [8] = BDW_MOCS_PTE,
73 [9] = SKL_MOCS_PTE,
74 [10] = CNL_MOCS_PTE,
75 };
76
77 static void
78 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
79 GLenum target, struct isl_view *view,
80 uint32_t *tile_x, uint32_t *tile_y,
81 uint32_t *offset, struct isl_surf *surf)
82 {
83 intel_miptree_get_isl_surf(brw, mt, surf);
84
85 surf->dim = get_isl_surf_dim(target);
86
87 const enum isl_dim_layout dim_layout =
88 get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target,
89 mt->array_layout);
90
91 if (surf->dim_layout == dim_layout)
92 return;
93
94 /* The layout of the specified texture target is not compatible with the
95 * actual layout of the miptree structure in memory -- You're entering
96 * dangerous territory, this can only possibly work if you only intended
97 * to access a single level and slice of the texture, and the hardware
98 * supports the tile offset feature in order to allow non-tile-aligned
99 * base offsets, since we'll have to point the hardware to the first
100 * texel of the level instead of relying on the usual base level/layer
101 * controls.
102 */
103 assert(brw->has_surface_tile_offset);
104 assert(view->levels == 1 && view->array_len == 1);
105 assert(*tile_x == 0 && *tile_y == 0);
106
107 offset += intel_miptree_get_tile_offsets(mt, view->base_level,
108 view->base_array_layer,
109 tile_x, tile_y);
110
111 /* Minify the logical dimensions of the texture. */
112 const unsigned l = view->base_level - mt->first_level;
113 surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
114 surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
115 minify(surf->logical_level0_px.height, l);
116 surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
117 minify(surf->logical_level0_px.depth, l);
118
119 /* Only the base level and layer can be addressed with the overridden
120 * layout.
121 */
122 surf->logical_level0_px.array_len = 1;
123 surf->levels = 1;
124 surf->dim_layout = dim_layout;
125
126 /* The requested slice of the texture is now at the base level and
127 * layer.
128 */
129 view->base_level = 0;
130 view->base_array_layer = 0;
131 }
132
133 static void
134 brw_emit_surface_state(struct brw_context *brw,
135 struct intel_mipmap_tree *mt, uint32_t flags,
136 GLenum target, struct isl_view view,
137 uint32_t mocs, uint32_t *surf_offset, int surf_index,
138 unsigned read_domains, unsigned write_domains)
139 {
140 uint32_t tile_x = mt->level[0].level_x;
141 uint32_t tile_y = mt->level[0].level_y;
142 uint32_t offset = mt->offset;
143
144 struct isl_surf surf;
145
146 get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
147
148 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
149
150 struct brw_bo *aux_bo;
151 struct isl_surf *aux_surf = NULL;
152 uint64_t aux_offset = 0;
153 enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
154 if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
155 !(flags & INTEL_AUX_BUFFER_DISABLED)) {
156 aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
157
158 if (mt->mcs_buf) {
159 aux_surf = &mt->mcs_buf->surf;
160
161 aux_bo = mt->mcs_buf->bo;
162 aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
163 } else {
164 aux_surf = &mt->hiz_buf->surf;
165
166 aux_bo = mt->hiz_buf->bo;
167 aux_offset = mt->hiz_buf->bo->offset64;
168 }
169
170 /* We only really need a clear color if we also have an auxiliary
171 * surface. Without one, it does nothing.
172 */
173 clear_color = mt->fast_clear_color;
174 }
175
176 void *state = brw_state_batch(brw,
177 brw->isl_dev.ss.size,
178 brw->isl_dev.ss.align,
179 surf_offset);
180
181 isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
182 .address = mt->bo->offset64 + offset,
183 .aux_surf = aux_surf, .aux_usage = aux_usage,
184 .aux_address = aux_offset,
185 .mocs = mocs, .clear_color = clear_color,
186 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
187
188 brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
189 mt->bo, offset, read_domains, write_domains);
190
191 if (aux_surf) {
192 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
193 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
194 * contain other control information. Since buffer addresses are always
195 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
196 * an ordinary reloc to do the necessary address translation.
197 */
198 assert((aux_offset & 0xfff) == 0);
199 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
200 brw_emit_reloc(&brw->batch,
201 *surf_offset + brw->isl_dev.ss.aux_addr_offset,
202 aux_bo, *aux_addr - aux_bo->offset64,
203 read_domains, write_domains);
204 }
205 }
206
207 uint32_t
208 brw_update_renderbuffer_surface(struct brw_context *brw,
209 struct gl_renderbuffer *rb,
210 uint32_t flags, unsigned unit /* unused */,
211 uint32_t surf_index)
212 {
213 struct gl_context *ctx = &brw->ctx;
214 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
215 struct intel_mipmap_tree *mt = irb->mt;
216
217 if (brw->gen < 9) {
218 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
219 }
220
221 assert(brw_render_target_supported(brw, rb));
222
223 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
224 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
225 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
226 __func__, _mesa_get_format_name(rb_format));
227 }
228
229 struct isl_view view = {
230 .format = brw->mesa_to_isl_render_format[rb_format],
231 .base_level = irb->mt_level - irb->mt->first_level,
232 .levels = 1,
233 .base_array_layer = irb->mt_layer,
234 .array_len = MAX2(irb->layer_count, 1),
235 .swizzle = ISL_SWIZZLE_IDENTITY,
236 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
237 };
238
239 uint32_t offset;
240 brw_emit_surface_state(brw, mt, flags, mt->target, view,
241 rb_mocs[brw->gen],
242 &offset, surf_index,
243 I915_GEM_DOMAIN_RENDER,
244 I915_GEM_DOMAIN_RENDER);
245 return offset;
246 }
247
248 GLuint
249 translate_tex_target(GLenum target)
250 {
251 switch (target) {
252 case GL_TEXTURE_1D:
253 case GL_TEXTURE_1D_ARRAY_EXT:
254 return BRW_SURFACE_1D;
255
256 case GL_TEXTURE_RECTANGLE_NV:
257 return BRW_SURFACE_2D;
258
259 case GL_TEXTURE_2D:
260 case GL_TEXTURE_2D_ARRAY_EXT:
261 case GL_TEXTURE_EXTERNAL_OES:
262 case GL_TEXTURE_2D_MULTISAMPLE:
263 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
264 return BRW_SURFACE_2D;
265
266 case GL_TEXTURE_3D:
267 return BRW_SURFACE_3D;
268
269 case GL_TEXTURE_CUBE_MAP:
270 case GL_TEXTURE_CUBE_MAP_ARRAY:
271 return BRW_SURFACE_CUBE;
272
273 default:
274 unreachable("not reached");
275 }
276 }
277
278 uint32_t
279 brw_get_surface_tiling_bits(enum isl_tiling tiling)
280 {
281 switch (tiling) {
282 case ISL_TILING_X:
283 return BRW_SURFACE_TILED;
284 case ISL_TILING_Y0:
285 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
286 default:
287 return 0;
288 }
289 }
290
291
292 uint32_t
293 brw_get_surface_num_multisamples(unsigned num_samples)
294 {
295 if (num_samples > 1)
296 return BRW_SURFACE_MULTISAMPLECOUNT_4;
297 else
298 return BRW_SURFACE_MULTISAMPLECOUNT_1;
299 }
300
301 /**
302 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
303 * swizzling.
304 */
305 int
306 brw_get_texture_swizzle(const struct gl_context *ctx,
307 const struct gl_texture_object *t)
308 {
309 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
310
311 int swizzles[SWIZZLE_NIL + 1] = {
312 SWIZZLE_X,
313 SWIZZLE_Y,
314 SWIZZLE_Z,
315 SWIZZLE_W,
316 SWIZZLE_ZERO,
317 SWIZZLE_ONE,
318 SWIZZLE_NIL
319 };
320
321 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
322 img->_BaseFormat == GL_DEPTH_STENCIL) {
323 GLenum depth_mode = t->DepthMode;
324
325 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
326 * with depth component data specified with a sized internal format.
327 * Otherwise, it's left at the old default, GL_LUMINANCE.
328 */
329 if (_mesa_is_gles3(ctx) &&
330 img->InternalFormat != GL_DEPTH_COMPONENT &&
331 img->InternalFormat != GL_DEPTH_STENCIL) {
332 depth_mode = GL_RED;
333 }
334
335 switch (depth_mode) {
336 case GL_ALPHA:
337 swizzles[0] = SWIZZLE_ZERO;
338 swizzles[1] = SWIZZLE_ZERO;
339 swizzles[2] = SWIZZLE_ZERO;
340 swizzles[3] = SWIZZLE_X;
341 break;
342 case GL_LUMINANCE:
343 swizzles[0] = SWIZZLE_X;
344 swizzles[1] = SWIZZLE_X;
345 swizzles[2] = SWIZZLE_X;
346 swizzles[3] = SWIZZLE_ONE;
347 break;
348 case GL_INTENSITY:
349 swizzles[0] = SWIZZLE_X;
350 swizzles[1] = SWIZZLE_X;
351 swizzles[2] = SWIZZLE_X;
352 swizzles[3] = SWIZZLE_X;
353 break;
354 case GL_RED:
355 swizzles[0] = SWIZZLE_X;
356 swizzles[1] = SWIZZLE_ZERO;
357 swizzles[2] = SWIZZLE_ZERO;
358 swizzles[3] = SWIZZLE_ONE;
359 break;
360 }
361 }
362
363 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
364
365 /* If the texture's format is alpha-only, force R, G, and B to
366 * 0.0. Similarly, if the texture's format has no alpha channel,
367 * force the alpha value read to 1.0. This allows for the
368 * implementation to use an RGBA texture for any of these formats
369 * without leaking any unexpected values.
370 */
371 switch (img->_BaseFormat) {
372 case GL_ALPHA:
373 swizzles[0] = SWIZZLE_ZERO;
374 swizzles[1] = SWIZZLE_ZERO;
375 swizzles[2] = SWIZZLE_ZERO;
376 break;
377 case GL_LUMINANCE:
378 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
379 swizzles[0] = SWIZZLE_X;
380 swizzles[1] = SWIZZLE_X;
381 swizzles[2] = SWIZZLE_X;
382 swizzles[3] = SWIZZLE_ONE;
383 }
384 break;
385 case GL_LUMINANCE_ALPHA:
386 if (datatype == GL_SIGNED_NORMALIZED) {
387 swizzles[0] = SWIZZLE_X;
388 swizzles[1] = SWIZZLE_X;
389 swizzles[2] = SWIZZLE_X;
390 swizzles[3] = SWIZZLE_W;
391 }
392 break;
393 case GL_INTENSITY:
394 if (datatype == GL_SIGNED_NORMALIZED) {
395 swizzles[0] = SWIZZLE_X;
396 swizzles[1] = SWIZZLE_X;
397 swizzles[2] = SWIZZLE_X;
398 swizzles[3] = SWIZZLE_X;
399 }
400 break;
401 case GL_RED:
402 case GL_RG:
403 case GL_RGB:
404 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
405 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
406 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
407 swizzles[3] = SWIZZLE_ONE;
408 break;
409 }
410
411 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
412 swizzles[GET_SWZ(t->_Swizzle, 1)],
413 swizzles[GET_SWZ(t->_Swizzle, 2)],
414 swizzles[GET_SWZ(t->_Swizzle, 3)]);
415 }
416
417 /**
418 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
419 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
420 *
421 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
422 * 0 1 2 3 4 5
423 * 4 5 6 7 0 1
424 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
425 *
426 * which is simply adding 4 then modding by 8 (or anding with 7).
427 *
428 * We then may need to apply workarounds for textureGather hardware bugs.
429 */
430 static unsigned
431 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
432 {
433 unsigned scs = (swizzle + 4) & 7;
434
435 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
436 }
437
438 static unsigned
439 brw_find_matching_rb(const struct gl_framebuffer *fb,
440 const struct intel_mipmap_tree *mt)
441 {
442 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
443 const struct intel_renderbuffer *irb =
444 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
445
446 if (irb && irb->mt == mt)
447 return i;
448 }
449
450 return fb->_NumColorDrawBuffers;
451 }
452
453 static inline bool
454 brw_texture_view_sane(const struct brw_context *brw,
455 const struct intel_mipmap_tree *mt,
456 const struct isl_view *view)
457 {
458 /* There are special cases only for lossless compression. */
459 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
460 return true;
461
462 if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
463 return true;
464
465 /* Logic elsewhere needs to take care to resolve the color buffer prior
466 * to sampling it as non-compressed.
467 */
468 if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
469 view->base_array_layer,
470 view->array_len))
471 return false;
472
473 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
474 const unsigned rb_index = brw_find_matching_rb(fb, mt);
475
476 if (rb_index == fb->_NumColorDrawBuffers)
477 return true;
478
479 /* Underlying surface is compressed but it is sampled using a format that
480 * the sampling engine doesn't support as compressed. Compression must be
481 * disabled for both sampling engine and data port in case the same surface
482 * is used also as render target.
483 */
484 return brw->draw_aux_buffer_disabled[rb_index];
485 }
486
487 static bool
488 brw_disable_aux_surface(const struct brw_context *brw,
489 const struct intel_mipmap_tree *mt,
490 const struct isl_view *view)
491 {
492 /* Nothing to disable. */
493 if (!mt->mcs_buf)
494 return false;
495
496 const bool is_unresolved = intel_miptree_has_color_unresolved(
497 mt, view->base_level, view->levels,
498 view->base_array_layer, view->array_len);
499
500 /* There are special cases only for lossless compression. */
501 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
502 return !is_unresolved;
503
504 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
505 const unsigned rb_index = brw_find_matching_rb(fb, mt);
506
507 /* If we are drawing into this with compression enabled, then we must also
508 * enable compression when texturing from it regardless of
509 * fast_clear_state. If we don't then, after the first draw call with
510 * this setup, there will be data in the CCS which won't get picked up by
511 * subsequent texturing operations as required by ARB_texture_barrier.
512 * Since we don't want to re-emit the binding table or do a resolve
513 * operation every draw call, the easiest thing to do is just enable
514 * compression on the texturing side. This is completely safe to do
515 * since, if compressed texturing weren't allowed, we would have disabled
516 * compression of render targets in whatever_that_function_is_called().
517 */
518 if (rb_index < fb->_NumColorDrawBuffers) {
519 if (brw->draw_aux_buffer_disabled[rb_index]) {
520 assert(!is_unresolved);
521 }
522
523 return brw->draw_aux_buffer_disabled[rb_index];
524 }
525
526 return !is_unresolved;
527 }
528
529 void
530 brw_update_texture_surface(struct gl_context *ctx,
531 unsigned unit,
532 uint32_t *surf_offset,
533 bool for_gather,
534 uint32_t plane)
535 {
536 struct brw_context *brw = brw_context(ctx);
537 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
538
539 if (obj->Target == GL_TEXTURE_BUFFER) {
540 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
541
542 } else {
543 struct intel_texture_object *intel_obj = intel_texture_object(obj);
544 struct intel_mipmap_tree *mt = intel_obj->mt;
545
546 if (plane > 0) {
547 if (mt->plane[plane - 1] == NULL)
548 return;
549 mt = mt->plane[plane - 1];
550 }
551
552 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
553 /* If this is a view with restricted NumLayers, then our effective depth
554 * is not just the miptree depth.
555 */
556 const unsigned view_num_layers =
557 (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
558 mt->logical_depth0;
559
560 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
561 * texturing functions that return a float, as our code generation always
562 * selects the .x channel (which would always be 0).
563 */
564 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
565 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
566 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
567 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
568 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
569 brw_get_texture_swizzle(&brw->ctx, obj));
570
571 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
572 enum isl_format format = translate_tex_format(brw, mesa_fmt,
573 sampler->sRGBDecode);
574
575 /* Implement gen6 and gen7 gather work-around */
576 bool need_green_to_blue = false;
577 if (for_gather) {
578 if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
579 format == ISL_FORMAT_R32G32_SINT ||
580 format == ISL_FORMAT_R32G32_UINT)) {
581 format = ISL_FORMAT_R32G32_FLOAT_LD;
582 need_green_to_blue = brw->is_haswell;
583 } else if (brw->gen == 6) {
584 /* Sandybridge's gather4 message is broken for integer formats.
585 * To work around this, we pretend the surface is UNORM for
586 * 8 or 16-bit formats, and emit shader instructions to recover
587 * the real INT/UINT value. For 32-bit formats, we pretend
588 * the surface is FLOAT, and simply reinterpret the resulting
589 * bits.
590 */
591 switch (format) {
592 case ISL_FORMAT_R8_SINT:
593 case ISL_FORMAT_R8_UINT:
594 format = ISL_FORMAT_R8_UNORM;
595 break;
596
597 case ISL_FORMAT_R16_SINT:
598 case ISL_FORMAT_R16_UINT:
599 format = ISL_FORMAT_R16_UNORM;
600 break;
601
602 case ISL_FORMAT_R32_SINT:
603 case ISL_FORMAT_R32_UINT:
604 format = ISL_FORMAT_R32_FLOAT;
605 break;
606
607 default:
608 break;
609 }
610 }
611 }
612
613 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
614 if (brw->gen <= 7) {
615 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
616 mt = mt->r8stencil_mt;
617 } else {
618 mt = mt->stencil_mt;
619 }
620 format = ISL_FORMAT_R8_UINT;
621 } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
622 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
623 mt = mt->r8stencil_mt;
624 format = ISL_FORMAT_R8_UINT;
625 }
626
627 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
628
629 struct isl_view view = {
630 .format = format,
631 .base_level = obj->MinLevel + obj->BaseLevel,
632 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
633 .base_array_layer = obj->MinLayer,
634 .array_len = view_num_layers,
635 .swizzle = {
636 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
637 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
638 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
639 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
640 },
641 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
642 };
643
644 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
645 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
646 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
647
648 assert(brw_texture_view_sane(brw, mt, &view));
649
650 const int flags = brw_disable_aux_surface(brw, mt, &view) ?
651 INTEL_AUX_BUFFER_DISABLED : 0;
652 brw_emit_surface_state(brw, mt, flags, mt->target, view,
653 tex_mocs[brw->gen],
654 surf_offset, surf_index,
655 I915_GEM_DOMAIN_SAMPLER, 0);
656 }
657 }
658
659 void
660 brw_emit_buffer_surface_state(struct brw_context *brw,
661 uint32_t *out_offset,
662 struct brw_bo *bo,
663 unsigned buffer_offset,
664 unsigned surface_format,
665 unsigned buffer_size,
666 unsigned pitch,
667 bool rw)
668 {
669 uint32_t *dw = brw_state_batch(brw,
670 brw->isl_dev.ss.size,
671 brw->isl_dev.ss.align,
672 out_offset);
673
674 isl_buffer_fill_state(&brw->isl_dev, dw,
675 .address = (bo ? bo->offset64 : 0) + buffer_offset,
676 .size = buffer_size,
677 .format = surface_format,
678 .stride = pitch,
679 .mocs = tex_mocs[brw->gen]);
680
681 if (bo) {
682 brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
683 bo, buffer_offset,
684 I915_GEM_DOMAIN_SAMPLER,
685 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
686 }
687 }
688
689 void
690 brw_update_buffer_texture_surface(struct gl_context *ctx,
691 unsigned unit,
692 uint32_t *surf_offset)
693 {
694 struct brw_context *brw = brw_context(ctx);
695 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
696 struct intel_buffer_object *intel_obj =
697 intel_buffer_object(tObj->BufferObject);
698 uint32_t size = tObj->BufferSize;
699 struct brw_bo *bo = NULL;
700 mesa_format format = tObj->_BufferObjectFormat;
701 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
702 int texel_size = _mesa_get_format_bytes(format);
703
704 if (intel_obj) {
705 size = MIN2(size, intel_obj->Base.Size);
706 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
707 false);
708 }
709
710 /* The ARB_texture_buffer_specification says:
711 *
712 * "The number of texels in the buffer texture's texel array is given by
713 *
714 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
715 *
716 * where <buffer_size> is the size of the buffer object, in basic
717 * machine units and <components> and <base_type> are the element count
718 * and base data type for elements, as specified in Table X.1. The
719 * number of texels in the texel array is then clamped to the
720 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
721 *
722 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
723 * so that when ISL divides by stride to obtain the number of texels, that
724 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
725 */
726 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
727
728 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
729 _mesa_problem(NULL, "bad format %s for texture buffer\n",
730 _mesa_get_format_name(format));
731 }
732
733 brw_emit_buffer_surface_state(brw, surf_offset, bo,
734 tObj->BufferOffset,
735 isl_format,
736 size,
737 texel_size,
738 false /* rw */);
739 }
740
741 /**
742 * Create the constant buffer surface. Vertex/fragment shader constants will be
743 * read from this buffer with Data Port Read instructions/messages.
744 */
745 void
746 brw_create_constant_surface(struct brw_context *brw,
747 struct brw_bo *bo,
748 uint32_t offset,
749 uint32_t size,
750 uint32_t *out_offset)
751 {
752 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
753 ISL_FORMAT_R32G32B32A32_FLOAT,
754 size, 1, false);
755 }
756
757 /**
758 * Create the buffer surface. Shader buffer variables will be
759 * read from / write to this buffer with Data Port Read/Write
760 * instructions/messages.
761 */
762 void
763 brw_create_buffer_surface(struct brw_context *brw,
764 struct brw_bo *bo,
765 uint32_t offset,
766 uint32_t size,
767 uint32_t *out_offset)
768 {
769 /* Use a raw surface so we can reuse existing untyped read/write/atomic
770 * messages. We need these specifically for the fragment shader since they
771 * include a pixel mask header that we need to ensure correct behavior
772 * with helper invocations, which cannot write to the buffer.
773 */
774 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
775 ISL_FORMAT_RAW,
776 size, 1, true);
777 }
778
779 /**
780 * Set up a binding table entry for use by stream output logic (transform
781 * feedback).
782 *
783 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
784 */
785 void
786 brw_update_sol_surface(struct brw_context *brw,
787 struct gl_buffer_object *buffer_obj,
788 uint32_t *out_offset, unsigned num_vector_components,
789 unsigned stride_dwords, unsigned offset_dwords)
790 {
791 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
792 uint32_t offset_bytes = 4 * offset_dwords;
793 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
794 offset_bytes,
795 buffer_obj->Size - offset_bytes,
796 true);
797 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
798 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
799 size_t size_dwords = buffer_obj->Size / 4;
800 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
801
802 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
803 * too big to map using a single binding table entry?
804 */
805 assert((size_dwords - offset_dwords) / stride_dwords
806 <= BRW_MAX_NUM_BUFFER_ENTRIES);
807
808 if (size_dwords > offset_dwords + num_vector_components) {
809 /* There is room for at least 1 transform feedback output in the buffer.
810 * Compute the number of additional transform feedback outputs the
811 * buffer has room for.
812 */
813 buffer_size_minus_1 =
814 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
815 } else {
816 /* There isn't even room for a single transform feedback output in the
817 * buffer. We can't configure the binding table entry to prevent output
818 * entirely; we'll have to rely on the geometry shader to detect
819 * overflow. But to minimize the damage in case of a bug, set up the
820 * binding table entry to just allow a single output.
821 */
822 buffer_size_minus_1 = 0;
823 }
824 width = buffer_size_minus_1 & 0x7f;
825 height = (buffer_size_minus_1 & 0xfff80) >> 7;
826 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
827
828 switch (num_vector_components) {
829 case 1:
830 surface_format = ISL_FORMAT_R32_FLOAT;
831 break;
832 case 2:
833 surface_format = ISL_FORMAT_R32G32_FLOAT;
834 break;
835 case 3:
836 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
837 break;
838 case 4:
839 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
840 break;
841 default:
842 unreachable("Invalid vector size for transform feedback output");
843 }
844
845 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
846 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
847 surface_format << BRW_SURFACE_FORMAT_SHIFT |
848 BRW_SURFACE_RC_READ_WRITE;
849 surf[1] = bo->offset64 + offset_bytes; /* reloc */
850 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
851 height << BRW_SURFACE_HEIGHT_SHIFT);
852 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
853 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
854 surf[4] = 0;
855 surf[5] = 0;
856
857 /* Emit relocation to surface contents. */
858 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
859 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
860 }
861
862 /* Creates a new WM constant buffer reflecting the current fragment program's
863 * constants, if needed by the fragment program.
864 *
865 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
866 * state atom.
867 */
868 static void
869 brw_upload_wm_pull_constants(struct brw_context *brw)
870 {
871 struct brw_stage_state *stage_state = &brw->wm.base;
872 /* BRW_NEW_FRAGMENT_PROGRAM */
873 struct brw_program *fp = (struct brw_program *) brw->fragment_program;
874 /* BRW_NEW_FS_PROG_DATA */
875 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
876
877 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
878 /* _NEW_PROGRAM_CONSTANTS */
879 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
880 stage_state, prog_data);
881 }
882
883 const struct brw_tracked_state brw_wm_pull_constants = {
884 .dirty = {
885 .mesa = _NEW_PROGRAM_CONSTANTS,
886 .brw = BRW_NEW_BATCH |
887 BRW_NEW_BLORP |
888 BRW_NEW_FRAGMENT_PROGRAM |
889 BRW_NEW_FS_PROG_DATA,
890 },
891 .emit = brw_upload_wm_pull_constants,
892 };
893
894 /**
895 * Creates a null renderbuffer surface.
896 *
897 * This is used when the shader doesn't write to any color output. An FB
898 * write to target 0 will still be emitted, because that's how the thread is
899 * terminated (and computed depth is returned), so we need to have the
900 * hardware discard the target 0 color output..
901 */
902 static void
903 brw_emit_null_surface_state(struct brw_context *brw,
904 unsigned width,
905 unsigned height,
906 unsigned samples,
907 uint32_t *out_offset)
908 {
909 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
910 * Notes):
911 *
912 * A null surface will be used in instances where an actual surface is
913 * not bound. When a write message is generated to a null surface, no
914 * actual surface is written to. When a read message (including any
915 * sampling engine message) is generated to a null surface, the result
916 * is all zeros. Note that a null surface type is allowed to be used
917 * with all messages, even if it is not specificially indicated as
918 * supported. All of the remaining fields in surface state are ignored
919 * for null surfaces, with the following exceptions:
920 *
921 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
922 * depth buffer’s corresponding state for all render target surfaces,
923 * including null.
924 *
925 * - Surface Format must be R8G8B8A8_UNORM.
926 */
927 unsigned surface_type = BRW_SURFACE_NULL;
928 struct brw_bo *bo = NULL;
929 unsigned pitch_minus_1 = 0;
930 uint32_t multisampling_state = 0;
931 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
932
933 if (samples > 1) {
934 /* On Gen6, null render targets seem to cause GPU hangs when
935 * multisampling. So work around this problem by rendering into dummy
936 * color buffer.
937 *
938 * To decrease the amount of memory needed by the workaround buffer, we
939 * set its pitch to 128 bytes (the width of a Y tile). This means that
940 * the amount of memory needed for the workaround buffer is
941 * (width_in_tiles + height_in_tiles - 1) tiles.
942 *
943 * Note that since the workaround buffer will be interpreted by the
944 * hardware as an interleaved multisampled buffer, we need to compute
945 * width_in_tiles and height_in_tiles by dividing the width and height
946 * by 16 rather than the normal Y-tile size of 32.
947 */
948 unsigned width_in_tiles = ALIGN(width, 16) / 16;
949 unsigned height_in_tiles = ALIGN(height, 16) / 16;
950 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
951 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
952 size_needed);
953 bo = brw->wm.multisampled_null_render_target_bo;
954 surface_type = BRW_SURFACE_2D;
955 pitch_minus_1 = 127;
956 multisampling_state = brw_get_surface_num_multisamples(samples);
957 }
958
959 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
960 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
961 if (brw->gen < 6) {
962 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
963 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
964 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
965 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
966 }
967 surf[1] = bo ? bo->offset64 : 0;
968 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
969 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
970
971 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
972 * Notes):
973 *
974 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
975 */
976 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
977 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
978 surf[4] = multisampling_state;
979 surf[5] = 0;
980
981 if (bo) {
982 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
983 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
984 }
985 }
986
987 /**
988 * Sets up a surface state structure to point at the given region.
989 * While it is only used for the front/back buffer currently, it should be
990 * usable for further buffers when doing ARB_draw_buffer support.
991 */
992 static uint32_t
993 gen4_update_renderbuffer_surface(struct brw_context *brw,
994 struct gl_renderbuffer *rb,
995 uint32_t flags, unsigned unit,
996 uint32_t surf_index)
997 {
998 struct gl_context *ctx = &brw->ctx;
999 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1000 struct intel_mipmap_tree *mt = irb->mt;
1001 uint32_t *surf;
1002 uint32_t tile_x, tile_y;
1003 enum isl_format format;
1004 uint32_t offset;
1005 /* _NEW_BUFFERS */
1006 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1007 /* BRW_NEW_FS_PROG_DATA */
1008
1009 assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1010 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1011
1012 if (rb->TexImage && !brw->has_surface_tile_offset) {
1013 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1014
1015 if (tile_x != 0 || tile_y != 0) {
1016 /* Original gen4 hardware couldn't draw to a non-tile-aligned
1017 * destination in a miptree unless you actually setup your renderbuffer
1018 * as a miptree and used the fragile lod/array_index/etc. controls to
1019 * select the image. So, instead, we just make a new single-level
1020 * miptree and render into that.
1021 */
1022 intel_renderbuffer_move_to_temp(brw, irb, false);
1023 assert(irb->align_wa_mt);
1024 mt = irb->align_wa_mt;
1025 }
1026 }
1027
1028 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1029
1030 format = brw->mesa_to_isl_render_format[rb_format];
1031 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1032 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1033 __func__, _mesa_get_format_name(rb_format));
1034 }
1035
1036 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1037 format << BRW_SURFACE_FORMAT_SHIFT);
1038
1039 /* reloc */
1040 assert(mt->offset % mt->cpp == 0);
1041 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1042 mt->bo->offset64 + mt->offset);
1043
1044 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1045 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1046
1047 surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
1048 (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1049
1050 surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
1051
1052 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1053 /* Note that the low bits of these fields are missing, so
1054 * there's the possibility of getting in trouble.
1055 */
1056 assert(tile_x % 4 == 0);
1057 assert(tile_y % 2 == 0);
1058 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1059 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1060 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1061
1062 if (brw->gen < 6) {
1063 /* _NEW_COLOR */
1064 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1065 (ctx->Color.BlendEnabled & (1 << unit)))
1066 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1067
1068 if (!ctx->Color.ColorMask[unit][0])
1069 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1070 if (!ctx->Color.ColorMask[unit][1])
1071 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1072 if (!ctx->Color.ColorMask[unit][2])
1073 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1074
1075 /* As mentioned above, disable writes to the alpha component when the
1076 * renderbuffer is XRGB.
1077 */
1078 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1079 !ctx->Color.ColorMask[unit][3]) {
1080 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1081 }
1082 }
1083
1084 brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1085 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1086
1087 return offset;
1088 }
1089
1090 /**
1091 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1092 */
1093 void
1094 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1095 const struct gl_framebuffer *fb,
1096 uint32_t render_target_start,
1097 uint32_t *surf_offset)
1098 {
1099 GLuint i;
1100 const unsigned int w = _mesa_geometric_width(fb);
1101 const unsigned int h = _mesa_geometric_height(fb);
1102 const unsigned int s = _mesa_geometric_samples(fb);
1103
1104 /* Update surfaces for drawing buffers */
1105 if (fb->_NumColorDrawBuffers >= 1) {
1106 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1107 const uint32_t surf_index = render_target_start + i;
1108 const int flags = (_mesa_geometric_layers(fb) > 0 ?
1109 INTEL_RENDERBUFFER_LAYERED : 0) |
1110 (brw->draw_aux_buffer_disabled[i] ?
1111 INTEL_AUX_BUFFER_DISABLED : 0);
1112
1113 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1114 surf_offset[surf_index] =
1115 brw->vtbl.update_renderbuffer_surface(
1116 brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1117 } else {
1118 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1119 &surf_offset[surf_index]);
1120 }
1121 }
1122 } else {
1123 const uint32_t surf_index = render_target_start;
1124 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1125 &surf_offset[surf_index]);
1126 }
1127 }
1128
1129 static void
1130 update_renderbuffer_surfaces(struct brw_context *brw)
1131 {
1132 const struct gl_context *ctx = &brw->ctx;
1133
1134 /* BRW_NEW_FS_PROG_DATA */
1135 const struct brw_wm_prog_data *wm_prog_data =
1136 brw_wm_prog_data(brw->wm.base.prog_data);
1137
1138 /* _NEW_BUFFERS | _NEW_COLOR */
1139 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1140 brw_update_renderbuffer_surfaces(
1141 brw, fb,
1142 wm_prog_data->binding_table.render_target_start,
1143 brw->wm.base.surf_offset);
1144 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1145 }
1146
1147 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1148 .dirty = {
1149 .mesa = _NEW_BUFFERS |
1150 _NEW_COLOR,
1151 .brw = BRW_NEW_BATCH |
1152 BRW_NEW_BLORP |
1153 BRW_NEW_FS_PROG_DATA,
1154 },
1155 .emit = update_renderbuffer_surfaces,
1156 };
1157
1158 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1159 .dirty = {
1160 .mesa = _NEW_BUFFERS,
1161 .brw = BRW_NEW_BATCH |
1162 BRW_NEW_BLORP,
1163 },
1164 .emit = update_renderbuffer_surfaces,
1165 };
1166
1167 static void
1168 update_renderbuffer_read_surfaces(struct brw_context *brw)
1169 {
1170 const struct gl_context *ctx = &brw->ctx;
1171
1172 /* BRW_NEW_FS_PROG_DATA */
1173 const struct brw_wm_prog_data *wm_prog_data =
1174 brw_wm_prog_data(brw->wm.base.prog_data);
1175
1176 /* BRW_NEW_FRAGMENT_PROGRAM */
1177 if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1178 brw->fragment_program && brw->fragment_program->info.outputs_read) {
1179 /* _NEW_BUFFERS */
1180 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1181
1182 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1183 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1184 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1185 const unsigned surf_index =
1186 wm_prog_data->binding_table.render_target_read_start + i;
1187 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1188
1189 if (irb) {
1190 const enum isl_format format = brw->mesa_to_isl_render_format[
1191 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1192 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1193 format));
1194
1195 /* Override the target of the texture if the render buffer is a
1196 * single slice of a 3D texture (since the minimum array element
1197 * field of the surface state structure is ignored by the sampler
1198 * unit for 3D textures on some hardware), or if the render buffer
1199 * is a 1D array (since shaders always provide the array index
1200 * coordinate at the Z component to avoid state-dependent
1201 * recompiles when changing the texture target of the
1202 * framebuffer).
1203 */
1204 const GLenum target =
1205 (irb->mt->target == GL_TEXTURE_3D &&
1206 irb->layer_count == 1) ? GL_TEXTURE_2D :
1207 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1208 irb->mt->target;
1209
1210 const struct isl_view view = {
1211 .format = format,
1212 .base_level = irb->mt_level - irb->mt->first_level,
1213 .levels = 1,
1214 .base_array_layer = irb->mt_layer,
1215 .array_len = irb->layer_count,
1216 .swizzle = ISL_SWIZZLE_IDENTITY,
1217 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1218 };
1219
1220 const int flags = brw->draw_aux_buffer_disabled[i] ?
1221 INTEL_AUX_BUFFER_DISABLED : 0;
1222 brw_emit_surface_state(brw, irb->mt, flags, target, view,
1223 tex_mocs[brw->gen],
1224 surf_offset, surf_index,
1225 I915_GEM_DOMAIN_SAMPLER, 0);
1226
1227 } else {
1228 brw->vtbl.emit_null_surface_state(
1229 brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1230 _mesa_geometric_samples(fb), surf_offset);
1231 }
1232 }
1233
1234 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1235 }
1236 }
1237
1238 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1239 .dirty = {
1240 .mesa = _NEW_BUFFERS,
1241 .brw = BRW_NEW_BATCH |
1242 BRW_NEW_FRAGMENT_PROGRAM |
1243 BRW_NEW_FS_PROG_DATA,
1244 },
1245 .emit = update_renderbuffer_read_surfaces,
1246 };
1247
1248 static void
1249 update_stage_texture_surfaces(struct brw_context *brw,
1250 const struct gl_program *prog,
1251 struct brw_stage_state *stage_state,
1252 bool for_gather, uint32_t plane)
1253 {
1254 if (!prog)
1255 return;
1256
1257 struct gl_context *ctx = &brw->ctx;
1258
1259 uint32_t *surf_offset = stage_state->surf_offset;
1260
1261 /* BRW_NEW_*_PROG_DATA */
1262 if (for_gather)
1263 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1264 else
1265 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1266
1267 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1268 for (unsigned s = 0; s < num_samplers; s++) {
1269 surf_offset[s] = 0;
1270
1271 if (prog->SamplersUsed & (1 << s)) {
1272 const unsigned unit = prog->SamplerUnits[s];
1273
1274 /* _NEW_TEXTURE */
1275 if (ctx->Texture.Unit[unit]._Current) {
1276 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1277 }
1278 }
1279 }
1280 }
1281
1282
1283 /**
1284 * Construct SURFACE_STATE objects for enabled textures.
1285 */
1286 static void
1287 brw_update_texture_surfaces(struct brw_context *brw)
1288 {
1289 /* BRW_NEW_VERTEX_PROGRAM */
1290 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1291
1292 /* BRW_NEW_TESS_PROGRAMS */
1293 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1294 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1295
1296 /* BRW_NEW_GEOMETRY_PROGRAM */
1297 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1298
1299 /* BRW_NEW_FRAGMENT_PROGRAM */
1300 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1301
1302 /* _NEW_TEXTURE */
1303 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1304 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1305 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1306 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1307 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1308
1309 /* emit alternate set of surface state for gather. this
1310 * allows the surface format to be overriden for only the
1311 * gather4 messages. */
1312 if (brw->gen < 8) {
1313 if (vs && vs->nir->info.uses_texture_gather)
1314 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1315 if (tcs && tcs->nir->info.uses_texture_gather)
1316 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1317 if (tes && tes->nir->info.uses_texture_gather)
1318 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1319 if (gs && gs->nir->info.uses_texture_gather)
1320 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1321 if (fs && fs->nir->info.uses_texture_gather)
1322 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1323 }
1324
1325 if (fs) {
1326 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1327 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1328 }
1329
1330 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1331 }
1332
1333 const struct brw_tracked_state brw_texture_surfaces = {
1334 .dirty = {
1335 .mesa = _NEW_TEXTURE,
1336 .brw = BRW_NEW_BATCH |
1337 BRW_NEW_BLORP |
1338 BRW_NEW_FRAGMENT_PROGRAM |
1339 BRW_NEW_FS_PROG_DATA |
1340 BRW_NEW_GEOMETRY_PROGRAM |
1341 BRW_NEW_GS_PROG_DATA |
1342 BRW_NEW_TESS_PROGRAMS |
1343 BRW_NEW_TCS_PROG_DATA |
1344 BRW_NEW_TES_PROG_DATA |
1345 BRW_NEW_TEXTURE_BUFFER |
1346 BRW_NEW_VERTEX_PROGRAM |
1347 BRW_NEW_VS_PROG_DATA,
1348 },
1349 .emit = brw_update_texture_surfaces,
1350 };
1351
1352 static void
1353 brw_update_cs_texture_surfaces(struct brw_context *brw)
1354 {
1355 /* BRW_NEW_COMPUTE_PROGRAM */
1356 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1357
1358 /* _NEW_TEXTURE */
1359 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1360
1361 /* emit alternate set of surface state for gather. this
1362 * allows the surface format to be overriden for only the
1363 * gather4 messages.
1364 */
1365 if (brw->gen < 8) {
1366 if (cs && cs->nir->info.uses_texture_gather)
1367 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1368 }
1369
1370 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1371 }
1372
1373 const struct brw_tracked_state brw_cs_texture_surfaces = {
1374 .dirty = {
1375 .mesa = _NEW_TEXTURE,
1376 .brw = BRW_NEW_BATCH |
1377 BRW_NEW_BLORP |
1378 BRW_NEW_COMPUTE_PROGRAM,
1379 },
1380 .emit = brw_update_cs_texture_surfaces,
1381 };
1382
1383
1384 void
1385 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1386 struct brw_stage_state *stage_state,
1387 struct brw_stage_prog_data *prog_data)
1388 {
1389 struct gl_context *ctx = &brw->ctx;
1390
1391 if (!prog)
1392 return;
1393
1394 uint32_t *ubo_surf_offsets =
1395 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1396
1397 for (int i = 0; i < prog->info.num_ubos; i++) {
1398 struct gl_uniform_buffer_binding *binding =
1399 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1400
1401 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1402 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1403 } else {
1404 struct intel_buffer_object *intel_bo =
1405 intel_buffer_object(binding->BufferObject);
1406 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1407 if (!binding->AutomaticSize)
1408 size = MIN2(size, binding->Size);
1409 struct brw_bo *bo =
1410 intel_bufferobj_buffer(brw, intel_bo,
1411 binding->Offset,
1412 size, false);
1413 brw_create_constant_surface(brw, bo, binding->Offset,
1414 size,
1415 &ubo_surf_offsets[i]);
1416 }
1417 }
1418
1419 uint32_t *ssbo_surf_offsets =
1420 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1421
1422 for (int i = 0; i < prog->info.num_ssbos; i++) {
1423 struct gl_shader_storage_buffer_binding *binding =
1424 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1425
1426 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1427 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1428 } else {
1429 struct intel_buffer_object *intel_bo =
1430 intel_buffer_object(binding->BufferObject);
1431 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1432 if (!binding->AutomaticSize)
1433 size = MIN2(size, binding->Size);
1434 struct brw_bo *bo =
1435 intel_bufferobj_buffer(brw, intel_bo,
1436 binding->Offset,
1437 size, true);
1438 brw_create_buffer_surface(brw, bo, binding->Offset,
1439 size,
1440 &ssbo_surf_offsets[i]);
1441 }
1442 }
1443
1444 stage_state->push_constants_dirty = true;
1445
1446 if (prog->info.num_ubos || prog->info.num_ssbos)
1447 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1448 }
1449
1450 static void
1451 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1452 {
1453 struct gl_context *ctx = &brw->ctx;
1454 /* _NEW_PROGRAM */
1455 struct gl_program *prog = ctx->FragmentProgram._Current;
1456
1457 /* BRW_NEW_FS_PROG_DATA */
1458 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1459 }
1460
1461 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1462 .dirty = {
1463 .mesa = _NEW_PROGRAM,
1464 .brw = BRW_NEW_BATCH |
1465 BRW_NEW_BLORP |
1466 BRW_NEW_FS_PROG_DATA |
1467 BRW_NEW_UNIFORM_BUFFER,
1468 },
1469 .emit = brw_upload_wm_ubo_surfaces,
1470 };
1471
1472 static void
1473 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1474 {
1475 struct gl_context *ctx = &brw->ctx;
1476 /* _NEW_PROGRAM */
1477 struct gl_program *prog =
1478 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1479
1480 /* BRW_NEW_CS_PROG_DATA */
1481 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1482 }
1483
1484 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1485 .dirty = {
1486 .mesa = _NEW_PROGRAM,
1487 .brw = BRW_NEW_BATCH |
1488 BRW_NEW_BLORP |
1489 BRW_NEW_CS_PROG_DATA |
1490 BRW_NEW_UNIFORM_BUFFER,
1491 },
1492 .emit = brw_upload_cs_ubo_surfaces,
1493 };
1494
1495 void
1496 brw_upload_abo_surfaces(struct brw_context *brw,
1497 const struct gl_program *prog,
1498 struct brw_stage_state *stage_state,
1499 struct brw_stage_prog_data *prog_data)
1500 {
1501 struct gl_context *ctx = &brw->ctx;
1502 uint32_t *surf_offsets =
1503 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1504
1505 if (prog->info.num_abos) {
1506 for (unsigned i = 0; i < prog->info.num_abos; i++) {
1507 struct gl_atomic_buffer_binding *binding =
1508 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1509 struct intel_buffer_object *intel_bo =
1510 intel_buffer_object(binding->BufferObject);
1511 struct brw_bo *bo =
1512 intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1513 intel_bo->Base.Size - binding->Offset,
1514 true);
1515
1516 brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1517 binding->Offset, ISL_FORMAT_RAW,
1518 bo->size - binding->Offset, 1, true);
1519 }
1520
1521 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1522 }
1523 }
1524
1525 static void
1526 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1527 {
1528 /* _NEW_PROGRAM */
1529 const struct gl_program *wm = brw->fragment_program;
1530
1531 if (wm) {
1532 /* BRW_NEW_FS_PROG_DATA */
1533 brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1534 }
1535 }
1536
1537 const struct brw_tracked_state brw_wm_abo_surfaces = {
1538 .dirty = {
1539 .mesa = _NEW_PROGRAM,
1540 .brw = BRW_NEW_ATOMIC_BUFFER |
1541 BRW_NEW_BLORP |
1542 BRW_NEW_BATCH |
1543 BRW_NEW_FS_PROG_DATA,
1544 },
1545 .emit = brw_upload_wm_abo_surfaces,
1546 };
1547
1548 static void
1549 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1550 {
1551 /* _NEW_PROGRAM */
1552 const struct gl_program *cp = brw->compute_program;
1553
1554 if (cp) {
1555 /* BRW_NEW_CS_PROG_DATA */
1556 brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1557 }
1558 }
1559
1560 const struct brw_tracked_state brw_cs_abo_surfaces = {
1561 .dirty = {
1562 .mesa = _NEW_PROGRAM,
1563 .brw = BRW_NEW_ATOMIC_BUFFER |
1564 BRW_NEW_BLORP |
1565 BRW_NEW_BATCH |
1566 BRW_NEW_CS_PROG_DATA,
1567 },
1568 .emit = brw_upload_cs_abo_surfaces,
1569 };
1570
1571 static void
1572 brw_upload_cs_image_surfaces(struct brw_context *brw)
1573 {
1574 /* _NEW_PROGRAM */
1575 const struct gl_program *cp = brw->compute_program;
1576
1577 if (cp) {
1578 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1579 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1580 brw->cs.base.prog_data);
1581 }
1582 }
1583
1584 const struct brw_tracked_state brw_cs_image_surfaces = {
1585 .dirty = {
1586 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1587 .brw = BRW_NEW_BATCH |
1588 BRW_NEW_BLORP |
1589 BRW_NEW_CS_PROG_DATA |
1590 BRW_NEW_IMAGE_UNITS
1591 },
1592 .emit = brw_upload_cs_image_surfaces,
1593 };
1594
1595 static uint32_t
1596 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1597 {
1598 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1599 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1600 if (access == GL_WRITE_ONLY) {
1601 return hw_format;
1602 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1603 /* Typed surface reads support a very limited subset of the shader
1604 * image formats. Translate it into the closest format the
1605 * hardware supports.
1606 */
1607 return isl_lower_storage_image_format(devinfo, hw_format);
1608 } else {
1609 /* The hardware doesn't actually support a typed format that we can use
1610 * so we have to fall back to untyped read/write messages.
1611 */
1612 return ISL_FORMAT_RAW;
1613 }
1614 }
1615
1616 static void
1617 update_default_image_param(struct brw_context *brw,
1618 struct gl_image_unit *u,
1619 unsigned surface_idx,
1620 struct brw_image_param *param)
1621 {
1622 memset(param, 0, sizeof(*param));
1623 param->surface_idx = surface_idx;
1624 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1625 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1626 * detailed explanation of these parameters.
1627 */
1628 param->swizzling[0] = 0xff;
1629 param->swizzling[1] = 0xff;
1630 }
1631
1632 static void
1633 update_buffer_image_param(struct brw_context *brw,
1634 struct gl_image_unit *u,
1635 unsigned surface_idx,
1636 struct brw_image_param *param)
1637 {
1638 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1639 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1640 update_default_image_param(brw, u, surface_idx, param);
1641
1642 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1643 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1644 }
1645
1646 static unsigned
1647 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1648 unsigned level)
1649 {
1650 if (target == GL_TEXTURE_CUBE_MAP)
1651 return 6;
1652
1653 if (mt->surf.size > 0) {
1654 return target == GL_TEXTURE_3D ?
1655 minify(mt->surf.logical_level0_px.depth, level) :
1656 mt->surf.logical_level0_px.array_len;
1657 }
1658
1659 return target == GL_TEXTURE_3D ?
1660 minify(mt->logical_depth0, level) : mt->logical_depth0;
1661 }
1662
1663 static void
1664 update_image_surface(struct brw_context *brw,
1665 struct gl_image_unit *u,
1666 GLenum access,
1667 unsigned surface_idx,
1668 uint32_t *surf_offset,
1669 struct brw_image_param *param)
1670 {
1671 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1672 struct gl_texture_object *obj = u->TexObj;
1673 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1674
1675 if (obj->Target == GL_TEXTURE_BUFFER) {
1676 struct intel_buffer_object *intel_obj =
1677 intel_buffer_object(obj->BufferObject);
1678 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1679 _mesa_get_format_bytes(u->_ActualFormat));
1680
1681 brw_emit_buffer_surface_state(
1682 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1683 format, intel_obj->Base.Size, texel_size,
1684 access != GL_READ_ONLY);
1685
1686 update_buffer_image_param(brw, u, surface_idx, param);
1687
1688 } else {
1689 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1690 struct intel_mipmap_tree *mt = intel_obj->mt;
1691 const unsigned num_layers = u->Layered ?
1692 get_image_num_layers(mt, obj->Target, u->Level) : 1;
1693
1694 struct isl_view view = {
1695 .format = format,
1696 .base_level = obj->MinLevel + u->Level,
1697 .levels = 1,
1698 .base_array_layer = obj->MinLayer + u->_Layer,
1699 .array_len = num_layers,
1700 .swizzle = ISL_SWIZZLE_IDENTITY,
1701 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1702 };
1703
1704 if (format == ISL_FORMAT_RAW) {
1705 brw_emit_buffer_surface_state(
1706 brw, surf_offset, mt->bo, mt->offset,
1707 format, mt->bo->size - mt->offset, 1 /* pitch */,
1708 access != GL_READ_ONLY);
1709
1710 } else {
1711 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1712 assert(!intel_miptree_has_color_unresolved(mt,
1713 view.base_level, 1,
1714 view.base_array_layer,
1715 view.array_len));
1716 brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1717 mt->target, view, tex_mocs[brw->gen],
1718 surf_offset, surf_index,
1719 I915_GEM_DOMAIN_SAMPLER,
1720 access == GL_READ_ONLY ? 0 :
1721 I915_GEM_DOMAIN_SAMPLER);
1722 }
1723
1724 struct isl_surf surf;
1725 intel_miptree_get_isl_surf(brw, mt, &surf);
1726
1727 isl_surf_fill_image_param(&brw->isl_dev, param, &surf, &view);
1728 param->surface_idx = surface_idx;
1729 }
1730
1731 } else {
1732 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1733 update_default_image_param(brw, u, surface_idx, param);
1734 }
1735 }
1736
1737 void
1738 brw_upload_image_surfaces(struct brw_context *brw,
1739 const struct gl_program *prog,
1740 struct brw_stage_state *stage_state,
1741 struct brw_stage_prog_data *prog_data)
1742 {
1743 assert(prog);
1744 struct gl_context *ctx = &brw->ctx;
1745
1746 if (prog->info.num_images) {
1747 for (unsigned i = 0; i < prog->info.num_images; i++) {
1748 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1749 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1750
1751 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1752 surf_idx,
1753 &stage_state->surf_offset[surf_idx],
1754 &prog_data->image_param[i]);
1755 }
1756
1757 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1758 /* This may have changed the image metadata dependent on the context
1759 * image unit state and passed to the program as uniforms, make sure
1760 * that push and pull constants are reuploaded.
1761 */
1762 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1763 }
1764 }
1765
1766 static void
1767 brw_upload_wm_image_surfaces(struct brw_context *brw)
1768 {
1769 /* BRW_NEW_FRAGMENT_PROGRAM */
1770 const struct gl_program *wm = brw->fragment_program;
1771
1772 if (wm) {
1773 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1774 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1775 brw->wm.base.prog_data);
1776 }
1777 }
1778
1779 const struct brw_tracked_state brw_wm_image_surfaces = {
1780 .dirty = {
1781 .mesa = _NEW_TEXTURE,
1782 .brw = BRW_NEW_BATCH |
1783 BRW_NEW_BLORP |
1784 BRW_NEW_FRAGMENT_PROGRAM |
1785 BRW_NEW_FS_PROG_DATA |
1786 BRW_NEW_IMAGE_UNITS
1787 },
1788 .emit = brw_upload_wm_image_surfaces,
1789 };
1790
1791 void
1792 gen4_init_vtable_surface_functions(struct brw_context *brw)
1793 {
1794 brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1795 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1796 }
1797
1798 void
1799 gen6_init_vtable_surface_functions(struct brw_context *brw)
1800 {
1801 gen4_init_vtable_surface_functions(brw);
1802 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1803 }
1804
1805 static void
1806 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1807 {
1808 struct gl_context *ctx = &brw->ctx;
1809 /* _NEW_PROGRAM */
1810 struct gl_program *prog =
1811 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1812 /* BRW_NEW_CS_PROG_DATA */
1813 const struct brw_cs_prog_data *cs_prog_data =
1814 brw_cs_prog_data(brw->cs.base.prog_data);
1815
1816 if (prog && cs_prog_data->uses_num_work_groups) {
1817 const unsigned surf_idx =
1818 cs_prog_data->binding_table.work_groups_start;
1819 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1820 struct brw_bo *bo;
1821 uint32_t bo_offset;
1822
1823 if (brw->compute.num_work_groups_bo == NULL) {
1824 bo = NULL;
1825 intel_upload_data(brw,
1826 (void *)brw->compute.num_work_groups,
1827 3 * sizeof(GLuint),
1828 sizeof(GLuint),
1829 &bo,
1830 &bo_offset);
1831 } else {
1832 bo = brw->compute.num_work_groups_bo;
1833 bo_offset = brw->compute.num_work_groups_offset;
1834 }
1835
1836 brw_emit_buffer_surface_state(brw, surf_offset,
1837 bo, bo_offset,
1838 ISL_FORMAT_RAW,
1839 3 * sizeof(GLuint), 1, true);
1840 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1841 }
1842 }
1843
1844 const struct brw_tracked_state brw_cs_work_groups_surface = {
1845 .dirty = {
1846 .brw = BRW_NEW_BLORP |
1847 BRW_NEW_CS_PROG_DATA |
1848 BRW_NEW_CS_WORK_GROUPS
1849 },
1850 .emit = brw_upload_cs_work_groups_surface,
1851 };