i965/surface_state: Get the aux usage from the miptree code
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 enum {
59 INTEL_RENDERBUFFER_LAYERED = 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED = 1 << 1,
61 };
62
63 uint32_t tex_mocs[] = {
64 [7] = GEN7_MOCS_L3,
65 [8] = BDW_MOCS_WB,
66 [9] = SKL_MOCS_WB,
67 [10] = CNL_MOCS_WB,
68 };
69
70 uint32_t rb_mocs[] = {
71 [7] = GEN7_MOCS_L3,
72 [8] = BDW_MOCS_PTE,
73 [9] = SKL_MOCS_PTE,
74 [10] = CNL_MOCS_PTE,
75 };
76
77 static void
78 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
79 GLenum target, struct isl_view *view,
80 uint32_t *tile_x, uint32_t *tile_y,
81 uint32_t *offset, struct isl_surf *surf)
82 {
83 *surf = mt->surf;
84
85 const enum isl_dim_layout dim_layout =
86 get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target);
87
88 if (surf->dim_layout == dim_layout)
89 return;
90
91 /* The layout of the specified texture target is not compatible with the
92 * actual layout of the miptree structure in memory -- You're entering
93 * dangerous territory, this can only possibly work if you only intended
94 * to access a single level and slice of the texture, and the hardware
95 * supports the tile offset feature in order to allow non-tile-aligned
96 * base offsets, since we'll have to point the hardware to the first
97 * texel of the level instead of relying on the usual base level/layer
98 * controls.
99 */
100 assert(brw->has_surface_tile_offset);
101 assert(view->levels == 1 && view->array_len == 1);
102 assert(*tile_x == 0 && *tile_y == 0);
103
104 offset += intel_miptree_get_tile_offsets(mt, view->base_level,
105 view->base_array_layer,
106 tile_x, tile_y);
107
108 /* Minify the logical dimensions of the texture. */
109 const unsigned l = view->base_level - mt->first_level;
110 surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
111 surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
112 minify(surf->logical_level0_px.height, l);
113 surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
114 minify(surf->logical_level0_px.depth, l);
115
116 /* Only the base level and layer can be addressed with the overridden
117 * layout.
118 */
119 surf->logical_level0_px.array_len = 1;
120 surf->levels = 1;
121 surf->dim_layout = dim_layout;
122
123 /* The requested slice of the texture is now at the base level and
124 * layer.
125 */
126 view->base_level = 0;
127 view->base_array_layer = 0;
128 }
129
130 static void
131 brw_emit_surface_state(struct brw_context *brw,
132 struct intel_mipmap_tree *mt,
133 GLenum target, struct isl_view view,
134 enum isl_aux_usage aux_usage,
135 uint32_t mocs, uint32_t *surf_offset, int surf_index,
136 unsigned read_domains, unsigned write_domains)
137 {
138 uint32_t tile_x = mt->level[0].level_x;
139 uint32_t tile_y = mt->level[0].level_y;
140 uint32_t offset = mt->offset;
141
142 struct isl_surf surf;
143
144 get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
145
146 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
147
148 struct brw_bo *aux_bo;
149 struct isl_surf *aux_surf = NULL;
150 uint64_t aux_offset = 0;
151 switch (aux_usage) {
152 case ISL_AUX_USAGE_MCS:
153 case ISL_AUX_USAGE_CCS_D:
154 case ISL_AUX_USAGE_CCS_E:
155 aux_surf = &mt->mcs_buf->surf;
156 aux_bo = mt->mcs_buf->bo;
157 aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
158 break;
159
160 case ISL_AUX_USAGE_HIZ:
161 aux_surf = &mt->hiz_buf->surf;
162 aux_bo = mt->hiz_buf->bo;
163 aux_offset = mt->hiz_buf->bo->offset64;
164 break;
165
166 case ISL_AUX_USAGE_NONE:
167 break;
168 }
169
170 if (aux_usage != ISL_AUX_USAGE_NONE) {
171 /* We only really need a clear color if we also have an auxiliary
172 * surface. Without one, it does nothing.
173 */
174 clear_color = mt->fast_clear_color;
175 }
176
177 void *state = brw_state_batch(brw,
178 brw->isl_dev.ss.size,
179 brw->isl_dev.ss.align,
180 surf_offset);
181
182 isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
183 .address = mt->bo->offset64 + offset,
184 .aux_surf = aux_surf, .aux_usage = aux_usage,
185 .aux_address = aux_offset,
186 .mocs = mocs, .clear_color = clear_color,
187 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
188
189 brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
190 mt->bo, offset, read_domains, write_domains);
191
192 if (aux_surf) {
193 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
194 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
195 * contain other control information. Since buffer addresses are always
196 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
197 * an ordinary reloc to do the necessary address translation.
198 */
199 assert((aux_offset & 0xfff) == 0);
200 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
201 brw_emit_reloc(&brw->batch,
202 *surf_offset + brw->isl_dev.ss.aux_addr_offset,
203 aux_bo, *aux_addr - aux_bo->offset64,
204 read_domains, write_domains);
205 }
206 }
207
208 uint32_t
209 brw_update_renderbuffer_surface(struct brw_context *brw,
210 struct gl_renderbuffer *rb,
211 uint32_t flags, unsigned unit /* unused */,
212 uint32_t surf_index)
213 {
214 struct gl_context *ctx = &brw->ctx;
215 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
216 struct intel_mipmap_tree *mt = irb->mt;
217
218 enum isl_aux_usage aux_usage =
219 intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled);
220
221 if (flags & INTEL_AUX_BUFFER_DISABLED) {
222 assert(brw->gen >= 9);
223 aux_usage = ISL_AUX_USAGE_NONE;
224 }
225
226 assert(brw_render_target_supported(brw, rb));
227
228 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
229 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
230 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
231 __func__, _mesa_get_format_name(rb_format));
232 }
233
234 struct isl_view view = {
235 .format = brw->mesa_to_isl_render_format[rb_format],
236 .base_level = irb->mt_level - irb->mt->first_level,
237 .levels = 1,
238 .base_array_layer = irb->mt_layer,
239 .array_len = MAX2(irb->layer_count, 1),
240 .swizzle = ISL_SWIZZLE_IDENTITY,
241 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
242 };
243
244 uint32_t offset;
245 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
246 rb_mocs[brw->gen],
247 &offset, surf_index,
248 I915_GEM_DOMAIN_RENDER,
249 I915_GEM_DOMAIN_RENDER);
250 return offset;
251 }
252
253 GLuint
254 translate_tex_target(GLenum target)
255 {
256 switch (target) {
257 case GL_TEXTURE_1D:
258 case GL_TEXTURE_1D_ARRAY_EXT:
259 return BRW_SURFACE_1D;
260
261 case GL_TEXTURE_RECTANGLE_NV:
262 return BRW_SURFACE_2D;
263
264 case GL_TEXTURE_2D:
265 case GL_TEXTURE_2D_ARRAY_EXT:
266 case GL_TEXTURE_EXTERNAL_OES:
267 case GL_TEXTURE_2D_MULTISAMPLE:
268 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
269 return BRW_SURFACE_2D;
270
271 case GL_TEXTURE_3D:
272 return BRW_SURFACE_3D;
273
274 case GL_TEXTURE_CUBE_MAP:
275 case GL_TEXTURE_CUBE_MAP_ARRAY:
276 return BRW_SURFACE_CUBE;
277
278 default:
279 unreachable("not reached");
280 }
281 }
282
283 uint32_t
284 brw_get_surface_tiling_bits(enum isl_tiling tiling)
285 {
286 switch (tiling) {
287 case ISL_TILING_X:
288 return BRW_SURFACE_TILED;
289 case ISL_TILING_Y0:
290 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
291 default:
292 return 0;
293 }
294 }
295
296
297 uint32_t
298 brw_get_surface_num_multisamples(unsigned num_samples)
299 {
300 if (num_samples > 1)
301 return BRW_SURFACE_MULTISAMPLECOUNT_4;
302 else
303 return BRW_SURFACE_MULTISAMPLECOUNT_1;
304 }
305
306 /**
307 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
308 * swizzling.
309 */
310 int
311 brw_get_texture_swizzle(const struct gl_context *ctx,
312 const struct gl_texture_object *t)
313 {
314 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
315
316 int swizzles[SWIZZLE_NIL + 1] = {
317 SWIZZLE_X,
318 SWIZZLE_Y,
319 SWIZZLE_Z,
320 SWIZZLE_W,
321 SWIZZLE_ZERO,
322 SWIZZLE_ONE,
323 SWIZZLE_NIL
324 };
325
326 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
327 img->_BaseFormat == GL_DEPTH_STENCIL) {
328 GLenum depth_mode = t->DepthMode;
329
330 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
331 * with depth component data specified with a sized internal format.
332 * Otherwise, it's left at the old default, GL_LUMINANCE.
333 */
334 if (_mesa_is_gles3(ctx) &&
335 img->InternalFormat != GL_DEPTH_COMPONENT &&
336 img->InternalFormat != GL_DEPTH_STENCIL) {
337 depth_mode = GL_RED;
338 }
339
340 switch (depth_mode) {
341 case GL_ALPHA:
342 swizzles[0] = SWIZZLE_ZERO;
343 swizzles[1] = SWIZZLE_ZERO;
344 swizzles[2] = SWIZZLE_ZERO;
345 swizzles[3] = SWIZZLE_X;
346 break;
347 case GL_LUMINANCE:
348 swizzles[0] = SWIZZLE_X;
349 swizzles[1] = SWIZZLE_X;
350 swizzles[2] = SWIZZLE_X;
351 swizzles[3] = SWIZZLE_ONE;
352 break;
353 case GL_INTENSITY:
354 swizzles[0] = SWIZZLE_X;
355 swizzles[1] = SWIZZLE_X;
356 swizzles[2] = SWIZZLE_X;
357 swizzles[3] = SWIZZLE_X;
358 break;
359 case GL_RED:
360 swizzles[0] = SWIZZLE_X;
361 swizzles[1] = SWIZZLE_ZERO;
362 swizzles[2] = SWIZZLE_ZERO;
363 swizzles[3] = SWIZZLE_ONE;
364 break;
365 }
366 }
367
368 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
369
370 /* If the texture's format is alpha-only, force R, G, and B to
371 * 0.0. Similarly, if the texture's format has no alpha channel,
372 * force the alpha value read to 1.0. This allows for the
373 * implementation to use an RGBA texture for any of these formats
374 * without leaking any unexpected values.
375 */
376 switch (img->_BaseFormat) {
377 case GL_ALPHA:
378 swizzles[0] = SWIZZLE_ZERO;
379 swizzles[1] = SWIZZLE_ZERO;
380 swizzles[2] = SWIZZLE_ZERO;
381 break;
382 case GL_LUMINANCE:
383 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
384 swizzles[0] = SWIZZLE_X;
385 swizzles[1] = SWIZZLE_X;
386 swizzles[2] = SWIZZLE_X;
387 swizzles[3] = SWIZZLE_ONE;
388 }
389 break;
390 case GL_LUMINANCE_ALPHA:
391 if (datatype == GL_SIGNED_NORMALIZED) {
392 swizzles[0] = SWIZZLE_X;
393 swizzles[1] = SWIZZLE_X;
394 swizzles[2] = SWIZZLE_X;
395 swizzles[3] = SWIZZLE_W;
396 }
397 break;
398 case GL_INTENSITY:
399 if (datatype == GL_SIGNED_NORMALIZED) {
400 swizzles[0] = SWIZZLE_X;
401 swizzles[1] = SWIZZLE_X;
402 swizzles[2] = SWIZZLE_X;
403 swizzles[3] = SWIZZLE_X;
404 }
405 break;
406 case GL_RED:
407 case GL_RG:
408 case GL_RGB:
409 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
410 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
411 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
412 swizzles[3] = SWIZZLE_ONE;
413 break;
414 }
415
416 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
417 swizzles[GET_SWZ(t->_Swizzle, 1)],
418 swizzles[GET_SWZ(t->_Swizzle, 2)],
419 swizzles[GET_SWZ(t->_Swizzle, 3)]);
420 }
421
422 /**
423 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
424 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
425 *
426 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
427 * 0 1 2 3 4 5
428 * 4 5 6 7 0 1
429 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
430 *
431 * which is simply adding 4 then modding by 8 (or anding with 7).
432 *
433 * We then may need to apply workarounds for textureGather hardware bugs.
434 */
435 static unsigned
436 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
437 {
438 unsigned scs = (swizzle + 4) & 7;
439
440 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
441 }
442
443 static bool
444 brw_aux_surface_disabled(const struct brw_context *brw,
445 const struct intel_mipmap_tree *mt)
446 {
447 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
448
449 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
450 const struct intel_renderbuffer *irb =
451 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
452
453 if (irb && irb->mt == mt)
454 return brw->draw_aux_buffer_disabled[i];
455 }
456
457 return false;
458 }
459
460 void
461 brw_update_texture_surface(struct gl_context *ctx,
462 unsigned unit,
463 uint32_t *surf_offset,
464 bool for_gather,
465 uint32_t plane)
466 {
467 struct brw_context *brw = brw_context(ctx);
468 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
469
470 if (obj->Target == GL_TEXTURE_BUFFER) {
471 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
472
473 } else {
474 struct intel_texture_object *intel_obj = intel_texture_object(obj);
475 struct intel_mipmap_tree *mt = intel_obj->mt;
476
477 if (plane > 0) {
478 if (mt->plane[plane - 1] == NULL)
479 return;
480 mt = mt->plane[plane - 1];
481 }
482
483 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
484 /* If this is a view with restricted NumLayers, then our effective depth
485 * is not just the miptree depth.
486 */
487 unsigned view_num_layers;
488 if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
489 view_num_layers = obj->NumLayers;
490 } else {
491 view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
492 mt->surf.logical_level0_px.depth :
493 mt->surf.logical_level0_px.array_len;
494 }
495
496 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
497 * texturing functions that return a float, as our code generation always
498 * selects the .x channel (which would always be 0).
499 */
500 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
501 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
502 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
503 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
504 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
505 brw_get_texture_swizzle(&brw->ctx, obj));
506
507 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
508 enum isl_format format = translate_tex_format(brw, mesa_fmt,
509 sampler->sRGBDecode);
510
511 /* Implement gen6 and gen7 gather work-around */
512 bool need_green_to_blue = false;
513 if (for_gather) {
514 if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
515 format == ISL_FORMAT_R32G32_SINT ||
516 format == ISL_FORMAT_R32G32_UINT)) {
517 format = ISL_FORMAT_R32G32_FLOAT_LD;
518 need_green_to_blue = brw->is_haswell;
519 } else if (brw->gen == 6) {
520 /* Sandybridge's gather4 message is broken for integer formats.
521 * To work around this, we pretend the surface is UNORM for
522 * 8 or 16-bit formats, and emit shader instructions to recover
523 * the real INT/UINT value. For 32-bit formats, we pretend
524 * the surface is FLOAT, and simply reinterpret the resulting
525 * bits.
526 */
527 switch (format) {
528 case ISL_FORMAT_R8_SINT:
529 case ISL_FORMAT_R8_UINT:
530 format = ISL_FORMAT_R8_UNORM;
531 break;
532
533 case ISL_FORMAT_R16_SINT:
534 case ISL_FORMAT_R16_UINT:
535 format = ISL_FORMAT_R16_UNORM;
536 break;
537
538 case ISL_FORMAT_R32_SINT:
539 case ISL_FORMAT_R32_UINT:
540 format = ISL_FORMAT_R32_FLOAT;
541 break;
542
543 default:
544 break;
545 }
546 }
547 }
548
549 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
550 if (brw->gen <= 7) {
551 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
552 mt = mt->r8stencil_mt;
553 } else {
554 mt = mt->stencil_mt;
555 }
556 format = ISL_FORMAT_R8_UINT;
557 } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
558 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
559 mt = mt->r8stencil_mt;
560 format = ISL_FORMAT_R8_UINT;
561 }
562
563 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
564
565 struct isl_view view = {
566 .format = format,
567 .base_level = obj->MinLevel + obj->BaseLevel,
568 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
569 .base_array_layer = obj->MinLayer,
570 .array_len = view_num_layers,
571 .swizzle = {
572 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
573 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
574 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
575 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
576 },
577 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
578 };
579
580 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
581 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
582 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
583
584 enum isl_aux_usage aux_usage =
585 intel_miptree_texture_aux_usage(brw, mt, format);
586
587 if (brw_aux_surface_disabled(brw, mt))
588 aux_usage = ISL_AUX_USAGE_NONE;
589
590 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
591 tex_mocs[brw->gen],
592 surf_offset, surf_index,
593 I915_GEM_DOMAIN_SAMPLER, 0);
594 }
595 }
596
597 void
598 brw_emit_buffer_surface_state(struct brw_context *brw,
599 uint32_t *out_offset,
600 struct brw_bo *bo,
601 unsigned buffer_offset,
602 unsigned surface_format,
603 unsigned buffer_size,
604 unsigned pitch,
605 bool rw)
606 {
607 uint32_t *dw = brw_state_batch(brw,
608 brw->isl_dev.ss.size,
609 brw->isl_dev.ss.align,
610 out_offset);
611
612 isl_buffer_fill_state(&brw->isl_dev, dw,
613 .address = (bo ? bo->offset64 : 0) + buffer_offset,
614 .size = buffer_size,
615 .format = surface_format,
616 .stride = pitch,
617 .mocs = tex_mocs[brw->gen]);
618
619 if (bo) {
620 brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
621 bo, buffer_offset,
622 I915_GEM_DOMAIN_SAMPLER,
623 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
624 }
625 }
626
627 void
628 brw_update_buffer_texture_surface(struct gl_context *ctx,
629 unsigned unit,
630 uint32_t *surf_offset)
631 {
632 struct brw_context *brw = brw_context(ctx);
633 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
634 struct intel_buffer_object *intel_obj =
635 intel_buffer_object(tObj->BufferObject);
636 uint32_t size = tObj->BufferSize;
637 struct brw_bo *bo = NULL;
638 mesa_format format = tObj->_BufferObjectFormat;
639 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
640 int texel_size = _mesa_get_format_bytes(format);
641
642 if (intel_obj) {
643 size = MIN2(size, intel_obj->Base.Size);
644 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
645 false);
646 }
647
648 /* The ARB_texture_buffer_specification says:
649 *
650 * "The number of texels in the buffer texture's texel array is given by
651 *
652 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
653 *
654 * where <buffer_size> is the size of the buffer object, in basic
655 * machine units and <components> and <base_type> are the element count
656 * and base data type for elements, as specified in Table X.1. The
657 * number of texels in the texel array is then clamped to the
658 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
659 *
660 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
661 * so that when ISL divides by stride to obtain the number of texels, that
662 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
663 */
664 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
665
666 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
667 _mesa_problem(NULL, "bad format %s for texture buffer\n",
668 _mesa_get_format_name(format));
669 }
670
671 brw_emit_buffer_surface_state(brw, surf_offset, bo,
672 tObj->BufferOffset,
673 isl_format,
674 size,
675 texel_size,
676 false /* rw */);
677 }
678
679 /**
680 * Create the constant buffer surface. Vertex/fragment shader constants will be
681 * read from this buffer with Data Port Read instructions/messages.
682 */
683 void
684 brw_create_constant_surface(struct brw_context *brw,
685 struct brw_bo *bo,
686 uint32_t offset,
687 uint32_t size,
688 uint32_t *out_offset)
689 {
690 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
691 ISL_FORMAT_R32G32B32A32_FLOAT,
692 size, 1, false);
693 }
694
695 /**
696 * Create the buffer surface. Shader buffer variables will be
697 * read from / write to this buffer with Data Port Read/Write
698 * instructions/messages.
699 */
700 void
701 brw_create_buffer_surface(struct brw_context *brw,
702 struct brw_bo *bo,
703 uint32_t offset,
704 uint32_t size,
705 uint32_t *out_offset)
706 {
707 /* Use a raw surface so we can reuse existing untyped read/write/atomic
708 * messages. We need these specifically for the fragment shader since they
709 * include a pixel mask header that we need to ensure correct behavior
710 * with helper invocations, which cannot write to the buffer.
711 */
712 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
713 ISL_FORMAT_RAW,
714 size, 1, true);
715 }
716
717 /**
718 * Set up a binding table entry for use by stream output logic (transform
719 * feedback).
720 *
721 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
722 */
723 void
724 brw_update_sol_surface(struct brw_context *brw,
725 struct gl_buffer_object *buffer_obj,
726 uint32_t *out_offset, unsigned num_vector_components,
727 unsigned stride_dwords, unsigned offset_dwords)
728 {
729 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
730 uint32_t offset_bytes = 4 * offset_dwords;
731 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
732 offset_bytes,
733 buffer_obj->Size - offset_bytes,
734 true);
735 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
736 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
737 size_t size_dwords = buffer_obj->Size / 4;
738 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
739
740 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
741 * too big to map using a single binding table entry?
742 */
743 assert((size_dwords - offset_dwords) / stride_dwords
744 <= BRW_MAX_NUM_BUFFER_ENTRIES);
745
746 if (size_dwords > offset_dwords + num_vector_components) {
747 /* There is room for at least 1 transform feedback output in the buffer.
748 * Compute the number of additional transform feedback outputs the
749 * buffer has room for.
750 */
751 buffer_size_minus_1 =
752 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
753 } else {
754 /* There isn't even room for a single transform feedback output in the
755 * buffer. We can't configure the binding table entry to prevent output
756 * entirely; we'll have to rely on the geometry shader to detect
757 * overflow. But to minimize the damage in case of a bug, set up the
758 * binding table entry to just allow a single output.
759 */
760 buffer_size_minus_1 = 0;
761 }
762 width = buffer_size_minus_1 & 0x7f;
763 height = (buffer_size_minus_1 & 0xfff80) >> 7;
764 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
765
766 switch (num_vector_components) {
767 case 1:
768 surface_format = ISL_FORMAT_R32_FLOAT;
769 break;
770 case 2:
771 surface_format = ISL_FORMAT_R32G32_FLOAT;
772 break;
773 case 3:
774 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
775 break;
776 case 4:
777 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
778 break;
779 default:
780 unreachable("Invalid vector size for transform feedback output");
781 }
782
783 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
784 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
785 surface_format << BRW_SURFACE_FORMAT_SHIFT |
786 BRW_SURFACE_RC_READ_WRITE;
787 surf[1] = bo->offset64 + offset_bytes; /* reloc */
788 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
789 height << BRW_SURFACE_HEIGHT_SHIFT);
790 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
791 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
792 surf[4] = 0;
793 surf[5] = 0;
794
795 /* Emit relocation to surface contents. */
796 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
797 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
798 }
799
800 /* Creates a new WM constant buffer reflecting the current fragment program's
801 * constants, if needed by the fragment program.
802 *
803 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
804 * state atom.
805 */
806 static void
807 brw_upload_wm_pull_constants(struct brw_context *brw)
808 {
809 struct brw_stage_state *stage_state = &brw->wm.base;
810 /* BRW_NEW_FRAGMENT_PROGRAM */
811 struct brw_program *fp = (struct brw_program *) brw->fragment_program;
812 /* BRW_NEW_FS_PROG_DATA */
813 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
814
815 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
816 /* _NEW_PROGRAM_CONSTANTS */
817 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
818 stage_state, prog_data);
819 }
820
821 const struct brw_tracked_state brw_wm_pull_constants = {
822 .dirty = {
823 .mesa = _NEW_PROGRAM_CONSTANTS,
824 .brw = BRW_NEW_BATCH |
825 BRW_NEW_BLORP |
826 BRW_NEW_FRAGMENT_PROGRAM |
827 BRW_NEW_FS_PROG_DATA,
828 },
829 .emit = brw_upload_wm_pull_constants,
830 };
831
832 /**
833 * Creates a null renderbuffer surface.
834 *
835 * This is used when the shader doesn't write to any color output. An FB
836 * write to target 0 will still be emitted, because that's how the thread is
837 * terminated (and computed depth is returned), so we need to have the
838 * hardware discard the target 0 color output..
839 */
840 static void
841 brw_emit_null_surface_state(struct brw_context *brw,
842 unsigned width,
843 unsigned height,
844 unsigned samples,
845 uint32_t *out_offset)
846 {
847 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
848 * Notes):
849 *
850 * A null surface will be used in instances where an actual surface is
851 * not bound. When a write message is generated to a null surface, no
852 * actual surface is written to. When a read message (including any
853 * sampling engine message) is generated to a null surface, the result
854 * is all zeros. Note that a null surface type is allowed to be used
855 * with all messages, even if it is not specificially indicated as
856 * supported. All of the remaining fields in surface state are ignored
857 * for null surfaces, with the following exceptions:
858 *
859 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
860 * depth buffer’s corresponding state for all render target surfaces,
861 * including null.
862 *
863 * - Surface Format must be R8G8B8A8_UNORM.
864 */
865 unsigned surface_type = BRW_SURFACE_NULL;
866 struct brw_bo *bo = NULL;
867 unsigned pitch_minus_1 = 0;
868 uint32_t multisampling_state = 0;
869 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
870
871 if (samples > 1) {
872 /* On Gen6, null render targets seem to cause GPU hangs when
873 * multisampling. So work around this problem by rendering into dummy
874 * color buffer.
875 *
876 * To decrease the amount of memory needed by the workaround buffer, we
877 * set its pitch to 128 bytes (the width of a Y tile). This means that
878 * the amount of memory needed for the workaround buffer is
879 * (width_in_tiles + height_in_tiles - 1) tiles.
880 *
881 * Note that since the workaround buffer will be interpreted by the
882 * hardware as an interleaved multisampled buffer, we need to compute
883 * width_in_tiles and height_in_tiles by dividing the width and height
884 * by 16 rather than the normal Y-tile size of 32.
885 */
886 unsigned width_in_tiles = ALIGN(width, 16) / 16;
887 unsigned height_in_tiles = ALIGN(height, 16) / 16;
888 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
889 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
890 size_needed);
891 bo = brw->wm.multisampled_null_render_target_bo;
892 surface_type = BRW_SURFACE_2D;
893 pitch_minus_1 = 127;
894 multisampling_state = brw_get_surface_num_multisamples(samples);
895 }
896
897 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
898 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
899 if (brw->gen < 6) {
900 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
901 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
902 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
903 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
904 }
905 surf[1] = bo ? bo->offset64 : 0;
906 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
907 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
908
909 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
910 * Notes):
911 *
912 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
913 */
914 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
915 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
916 surf[4] = multisampling_state;
917 surf[5] = 0;
918
919 if (bo) {
920 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
921 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
922 }
923 }
924
925 /**
926 * Sets up a surface state structure to point at the given region.
927 * While it is only used for the front/back buffer currently, it should be
928 * usable for further buffers when doing ARB_draw_buffer support.
929 */
930 static uint32_t
931 gen4_update_renderbuffer_surface(struct brw_context *brw,
932 struct gl_renderbuffer *rb,
933 uint32_t flags, unsigned unit,
934 uint32_t surf_index)
935 {
936 struct gl_context *ctx = &brw->ctx;
937 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
938 struct intel_mipmap_tree *mt = irb->mt;
939 uint32_t *surf;
940 uint32_t tile_x, tile_y;
941 enum isl_format format;
942 uint32_t offset;
943 /* _NEW_BUFFERS */
944 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
945 /* BRW_NEW_FS_PROG_DATA */
946
947 assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
948 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
949
950 if (rb->TexImage && !brw->has_surface_tile_offset) {
951 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
952
953 if (tile_x != 0 || tile_y != 0) {
954 /* Original gen4 hardware couldn't draw to a non-tile-aligned
955 * destination in a miptree unless you actually setup your renderbuffer
956 * as a miptree and used the fragile lod/array_index/etc. controls to
957 * select the image. So, instead, we just make a new single-level
958 * miptree and render into that.
959 */
960 intel_renderbuffer_move_to_temp(brw, irb, false);
961 assert(irb->align_wa_mt);
962 mt = irb->align_wa_mt;
963 }
964 }
965
966 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
967
968 format = brw->mesa_to_isl_render_format[rb_format];
969 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
970 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
971 __func__, _mesa_get_format_name(rb_format));
972 }
973
974 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
975 format << BRW_SURFACE_FORMAT_SHIFT);
976
977 /* reloc */
978 assert(mt->offset % mt->cpp == 0);
979 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
980 mt->bo->offset64 + mt->offset);
981
982 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
983 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
984
985 surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
986 (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
987
988 surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
989
990 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
991 /* Note that the low bits of these fields are missing, so
992 * there's the possibility of getting in trouble.
993 */
994 assert(tile_x % 4 == 0);
995 assert(tile_y % 2 == 0);
996 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
997 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
998 (mt->surf.image_alignment_el.height == 4 ?
999 BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1000
1001 if (brw->gen < 6) {
1002 /* _NEW_COLOR */
1003 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1004 (ctx->Color.BlendEnabled & (1 << unit)))
1005 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1006
1007 if (!ctx->Color.ColorMask[unit][0])
1008 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1009 if (!ctx->Color.ColorMask[unit][1])
1010 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1011 if (!ctx->Color.ColorMask[unit][2])
1012 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1013
1014 /* As mentioned above, disable writes to the alpha component when the
1015 * renderbuffer is XRGB.
1016 */
1017 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1018 !ctx->Color.ColorMask[unit][3]) {
1019 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1020 }
1021 }
1022
1023 brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1024 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1025
1026 return offset;
1027 }
1028
1029 /**
1030 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1031 */
1032 void
1033 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1034 const struct gl_framebuffer *fb,
1035 uint32_t render_target_start,
1036 uint32_t *surf_offset)
1037 {
1038 GLuint i;
1039 const unsigned int w = _mesa_geometric_width(fb);
1040 const unsigned int h = _mesa_geometric_height(fb);
1041 const unsigned int s = _mesa_geometric_samples(fb);
1042
1043 /* Update surfaces for drawing buffers */
1044 if (fb->_NumColorDrawBuffers >= 1) {
1045 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1046 const uint32_t surf_index = render_target_start + i;
1047 const int flags = (_mesa_geometric_layers(fb) > 0 ?
1048 INTEL_RENDERBUFFER_LAYERED : 0) |
1049 (brw->draw_aux_buffer_disabled[i] ?
1050 INTEL_AUX_BUFFER_DISABLED : 0);
1051
1052 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1053 surf_offset[surf_index] =
1054 brw->vtbl.update_renderbuffer_surface(
1055 brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1056 } else {
1057 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1058 &surf_offset[surf_index]);
1059 }
1060 }
1061 } else {
1062 const uint32_t surf_index = render_target_start;
1063 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1064 &surf_offset[surf_index]);
1065 }
1066 }
1067
1068 static void
1069 update_renderbuffer_surfaces(struct brw_context *brw)
1070 {
1071 const struct gl_context *ctx = &brw->ctx;
1072
1073 /* BRW_NEW_FS_PROG_DATA */
1074 const struct brw_wm_prog_data *wm_prog_data =
1075 brw_wm_prog_data(brw->wm.base.prog_data);
1076
1077 /* _NEW_BUFFERS | _NEW_COLOR */
1078 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1079 brw_update_renderbuffer_surfaces(
1080 brw, fb,
1081 wm_prog_data->binding_table.render_target_start,
1082 brw->wm.base.surf_offset);
1083 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1084 }
1085
1086 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1087 .dirty = {
1088 .mesa = _NEW_BUFFERS |
1089 _NEW_COLOR,
1090 .brw = BRW_NEW_BATCH |
1091 BRW_NEW_BLORP |
1092 BRW_NEW_FS_PROG_DATA,
1093 },
1094 .emit = update_renderbuffer_surfaces,
1095 };
1096
1097 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1098 .dirty = {
1099 .mesa = _NEW_BUFFERS,
1100 .brw = BRW_NEW_BATCH |
1101 BRW_NEW_BLORP,
1102 },
1103 .emit = update_renderbuffer_surfaces,
1104 };
1105
1106 static void
1107 update_renderbuffer_read_surfaces(struct brw_context *brw)
1108 {
1109 const struct gl_context *ctx = &brw->ctx;
1110
1111 /* BRW_NEW_FS_PROG_DATA */
1112 const struct brw_wm_prog_data *wm_prog_data =
1113 brw_wm_prog_data(brw->wm.base.prog_data);
1114
1115 /* BRW_NEW_FRAGMENT_PROGRAM */
1116 if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1117 brw->fragment_program && brw->fragment_program->info.outputs_read) {
1118 /* _NEW_BUFFERS */
1119 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1120
1121 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1122 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1123 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1124 const unsigned surf_index =
1125 wm_prog_data->binding_table.render_target_read_start + i;
1126 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1127
1128 if (irb) {
1129 const enum isl_format format = brw->mesa_to_isl_render_format[
1130 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1131 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1132 format));
1133
1134 /* Override the target of the texture if the render buffer is a
1135 * single slice of a 3D texture (since the minimum array element
1136 * field of the surface state structure is ignored by the sampler
1137 * unit for 3D textures on some hardware), or if the render buffer
1138 * is a 1D array (since shaders always provide the array index
1139 * coordinate at the Z component to avoid state-dependent
1140 * recompiles when changing the texture target of the
1141 * framebuffer).
1142 */
1143 const GLenum target =
1144 (irb->mt->target == GL_TEXTURE_3D &&
1145 irb->layer_count == 1) ? GL_TEXTURE_2D :
1146 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1147 irb->mt->target;
1148
1149 const struct isl_view view = {
1150 .format = format,
1151 .base_level = irb->mt_level - irb->mt->first_level,
1152 .levels = 1,
1153 .base_array_layer = irb->mt_layer,
1154 .array_len = irb->layer_count,
1155 .swizzle = ISL_SWIZZLE_IDENTITY,
1156 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1157 };
1158
1159 enum isl_aux_usage aux_usage =
1160 intel_miptree_texture_aux_usage(brw, irb->mt, format);
1161 if (brw->draw_aux_buffer_disabled[i])
1162 aux_usage = ISL_AUX_USAGE_NONE;
1163
1164 brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1165 tex_mocs[brw->gen],
1166 surf_offset, surf_index,
1167 I915_GEM_DOMAIN_SAMPLER, 0);
1168
1169 } else {
1170 brw->vtbl.emit_null_surface_state(
1171 brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1172 _mesa_geometric_samples(fb), surf_offset);
1173 }
1174 }
1175
1176 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1177 }
1178 }
1179
1180 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1181 .dirty = {
1182 .mesa = _NEW_BUFFERS,
1183 .brw = BRW_NEW_BATCH |
1184 BRW_NEW_FRAGMENT_PROGRAM |
1185 BRW_NEW_FS_PROG_DATA,
1186 },
1187 .emit = update_renderbuffer_read_surfaces,
1188 };
1189
1190 static void
1191 update_stage_texture_surfaces(struct brw_context *brw,
1192 const struct gl_program *prog,
1193 struct brw_stage_state *stage_state,
1194 bool for_gather, uint32_t plane)
1195 {
1196 if (!prog)
1197 return;
1198
1199 struct gl_context *ctx = &brw->ctx;
1200
1201 uint32_t *surf_offset = stage_state->surf_offset;
1202
1203 /* BRW_NEW_*_PROG_DATA */
1204 if (for_gather)
1205 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1206 else
1207 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1208
1209 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1210 for (unsigned s = 0; s < num_samplers; s++) {
1211 surf_offset[s] = 0;
1212
1213 if (prog->SamplersUsed & (1 << s)) {
1214 const unsigned unit = prog->SamplerUnits[s];
1215
1216 /* _NEW_TEXTURE */
1217 if (ctx->Texture.Unit[unit]._Current) {
1218 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1219 }
1220 }
1221 }
1222 }
1223
1224
1225 /**
1226 * Construct SURFACE_STATE objects for enabled textures.
1227 */
1228 static void
1229 brw_update_texture_surfaces(struct brw_context *brw)
1230 {
1231 /* BRW_NEW_VERTEX_PROGRAM */
1232 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1233
1234 /* BRW_NEW_TESS_PROGRAMS */
1235 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1236 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1237
1238 /* BRW_NEW_GEOMETRY_PROGRAM */
1239 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1240
1241 /* BRW_NEW_FRAGMENT_PROGRAM */
1242 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1243
1244 /* _NEW_TEXTURE */
1245 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1246 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1247 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1248 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1249 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1250
1251 /* emit alternate set of surface state for gather. this
1252 * allows the surface format to be overriden for only the
1253 * gather4 messages. */
1254 if (brw->gen < 8) {
1255 if (vs && vs->nir->info.uses_texture_gather)
1256 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1257 if (tcs && tcs->nir->info.uses_texture_gather)
1258 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1259 if (tes && tes->nir->info.uses_texture_gather)
1260 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1261 if (gs && gs->nir->info.uses_texture_gather)
1262 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1263 if (fs && fs->nir->info.uses_texture_gather)
1264 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1265 }
1266
1267 if (fs) {
1268 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1269 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1270 }
1271
1272 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1273 }
1274
1275 const struct brw_tracked_state brw_texture_surfaces = {
1276 .dirty = {
1277 .mesa = _NEW_TEXTURE,
1278 .brw = BRW_NEW_BATCH |
1279 BRW_NEW_BLORP |
1280 BRW_NEW_FRAGMENT_PROGRAM |
1281 BRW_NEW_FS_PROG_DATA |
1282 BRW_NEW_GEOMETRY_PROGRAM |
1283 BRW_NEW_GS_PROG_DATA |
1284 BRW_NEW_TESS_PROGRAMS |
1285 BRW_NEW_TCS_PROG_DATA |
1286 BRW_NEW_TES_PROG_DATA |
1287 BRW_NEW_TEXTURE_BUFFER |
1288 BRW_NEW_VERTEX_PROGRAM |
1289 BRW_NEW_VS_PROG_DATA,
1290 },
1291 .emit = brw_update_texture_surfaces,
1292 };
1293
1294 static void
1295 brw_update_cs_texture_surfaces(struct brw_context *brw)
1296 {
1297 /* BRW_NEW_COMPUTE_PROGRAM */
1298 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1299
1300 /* _NEW_TEXTURE */
1301 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1302
1303 /* emit alternate set of surface state for gather. this
1304 * allows the surface format to be overriden for only the
1305 * gather4 messages.
1306 */
1307 if (brw->gen < 8) {
1308 if (cs && cs->nir->info.uses_texture_gather)
1309 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1310 }
1311
1312 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1313 }
1314
1315 const struct brw_tracked_state brw_cs_texture_surfaces = {
1316 .dirty = {
1317 .mesa = _NEW_TEXTURE,
1318 .brw = BRW_NEW_BATCH |
1319 BRW_NEW_BLORP |
1320 BRW_NEW_COMPUTE_PROGRAM,
1321 },
1322 .emit = brw_update_cs_texture_surfaces,
1323 };
1324
1325
1326 void
1327 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1328 struct brw_stage_state *stage_state,
1329 struct brw_stage_prog_data *prog_data)
1330 {
1331 struct gl_context *ctx = &brw->ctx;
1332
1333 if (!prog)
1334 return;
1335
1336 uint32_t *ubo_surf_offsets =
1337 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1338
1339 for (int i = 0; i < prog->info.num_ubos; i++) {
1340 struct gl_uniform_buffer_binding *binding =
1341 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1342
1343 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1344 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1345 } else {
1346 struct intel_buffer_object *intel_bo =
1347 intel_buffer_object(binding->BufferObject);
1348 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1349 if (!binding->AutomaticSize)
1350 size = MIN2(size, binding->Size);
1351 struct brw_bo *bo =
1352 intel_bufferobj_buffer(brw, intel_bo,
1353 binding->Offset,
1354 size, false);
1355 brw_create_constant_surface(brw, bo, binding->Offset,
1356 size,
1357 &ubo_surf_offsets[i]);
1358 }
1359 }
1360
1361 uint32_t *ssbo_surf_offsets =
1362 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1363
1364 for (int i = 0; i < prog->info.num_ssbos; i++) {
1365 struct gl_shader_storage_buffer_binding *binding =
1366 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1367
1368 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1369 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1370 } else {
1371 struct intel_buffer_object *intel_bo =
1372 intel_buffer_object(binding->BufferObject);
1373 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1374 if (!binding->AutomaticSize)
1375 size = MIN2(size, binding->Size);
1376 struct brw_bo *bo =
1377 intel_bufferobj_buffer(brw, intel_bo,
1378 binding->Offset,
1379 size, true);
1380 brw_create_buffer_surface(brw, bo, binding->Offset,
1381 size,
1382 &ssbo_surf_offsets[i]);
1383 }
1384 }
1385
1386 stage_state->push_constants_dirty = true;
1387
1388 if (prog->info.num_ubos || prog->info.num_ssbos)
1389 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1390 }
1391
1392 static void
1393 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1394 {
1395 struct gl_context *ctx = &brw->ctx;
1396 /* _NEW_PROGRAM */
1397 struct gl_program *prog = ctx->FragmentProgram._Current;
1398
1399 /* BRW_NEW_FS_PROG_DATA */
1400 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1401 }
1402
1403 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1404 .dirty = {
1405 .mesa = _NEW_PROGRAM,
1406 .brw = BRW_NEW_BATCH |
1407 BRW_NEW_BLORP |
1408 BRW_NEW_FS_PROG_DATA |
1409 BRW_NEW_UNIFORM_BUFFER,
1410 },
1411 .emit = brw_upload_wm_ubo_surfaces,
1412 };
1413
1414 static void
1415 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1416 {
1417 struct gl_context *ctx = &brw->ctx;
1418 /* _NEW_PROGRAM */
1419 struct gl_program *prog =
1420 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1421
1422 /* BRW_NEW_CS_PROG_DATA */
1423 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1424 }
1425
1426 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1427 .dirty = {
1428 .mesa = _NEW_PROGRAM,
1429 .brw = BRW_NEW_BATCH |
1430 BRW_NEW_BLORP |
1431 BRW_NEW_CS_PROG_DATA |
1432 BRW_NEW_UNIFORM_BUFFER,
1433 },
1434 .emit = brw_upload_cs_ubo_surfaces,
1435 };
1436
1437 void
1438 brw_upload_abo_surfaces(struct brw_context *brw,
1439 const struct gl_program *prog,
1440 struct brw_stage_state *stage_state,
1441 struct brw_stage_prog_data *prog_data)
1442 {
1443 struct gl_context *ctx = &brw->ctx;
1444 uint32_t *surf_offsets =
1445 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1446
1447 if (prog->info.num_abos) {
1448 for (unsigned i = 0; i < prog->info.num_abos; i++) {
1449 struct gl_atomic_buffer_binding *binding =
1450 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1451 struct intel_buffer_object *intel_bo =
1452 intel_buffer_object(binding->BufferObject);
1453 struct brw_bo *bo =
1454 intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1455 intel_bo->Base.Size - binding->Offset,
1456 true);
1457
1458 brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1459 binding->Offset, ISL_FORMAT_RAW,
1460 bo->size - binding->Offset, 1, true);
1461 }
1462
1463 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1464 }
1465 }
1466
1467 static void
1468 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1469 {
1470 /* _NEW_PROGRAM */
1471 const struct gl_program *wm = brw->fragment_program;
1472
1473 if (wm) {
1474 /* BRW_NEW_FS_PROG_DATA */
1475 brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1476 }
1477 }
1478
1479 const struct brw_tracked_state brw_wm_abo_surfaces = {
1480 .dirty = {
1481 .mesa = _NEW_PROGRAM,
1482 .brw = BRW_NEW_ATOMIC_BUFFER |
1483 BRW_NEW_BLORP |
1484 BRW_NEW_BATCH |
1485 BRW_NEW_FS_PROG_DATA,
1486 },
1487 .emit = brw_upload_wm_abo_surfaces,
1488 };
1489
1490 static void
1491 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1492 {
1493 /* _NEW_PROGRAM */
1494 const struct gl_program *cp = brw->compute_program;
1495
1496 if (cp) {
1497 /* BRW_NEW_CS_PROG_DATA */
1498 brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1499 }
1500 }
1501
1502 const struct brw_tracked_state brw_cs_abo_surfaces = {
1503 .dirty = {
1504 .mesa = _NEW_PROGRAM,
1505 .brw = BRW_NEW_ATOMIC_BUFFER |
1506 BRW_NEW_BLORP |
1507 BRW_NEW_BATCH |
1508 BRW_NEW_CS_PROG_DATA,
1509 },
1510 .emit = brw_upload_cs_abo_surfaces,
1511 };
1512
1513 static void
1514 brw_upload_cs_image_surfaces(struct brw_context *brw)
1515 {
1516 /* _NEW_PROGRAM */
1517 const struct gl_program *cp = brw->compute_program;
1518
1519 if (cp) {
1520 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1521 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1522 brw->cs.base.prog_data);
1523 }
1524 }
1525
1526 const struct brw_tracked_state brw_cs_image_surfaces = {
1527 .dirty = {
1528 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1529 .brw = BRW_NEW_BATCH |
1530 BRW_NEW_BLORP |
1531 BRW_NEW_CS_PROG_DATA |
1532 BRW_NEW_IMAGE_UNITS
1533 },
1534 .emit = brw_upload_cs_image_surfaces,
1535 };
1536
1537 static uint32_t
1538 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1539 {
1540 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1541 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1542 if (access == GL_WRITE_ONLY) {
1543 return hw_format;
1544 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1545 /* Typed surface reads support a very limited subset of the shader
1546 * image formats. Translate it into the closest format the
1547 * hardware supports.
1548 */
1549 return isl_lower_storage_image_format(devinfo, hw_format);
1550 } else {
1551 /* The hardware doesn't actually support a typed format that we can use
1552 * so we have to fall back to untyped read/write messages.
1553 */
1554 return ISL_FORMAT_RAW;
1555 }
1556 }
1557
1558 static void
1559 update_default_image_param(struct brw_context *brw,
1560 struct gl_image_unit *u,
1561 unsigned surface_idx,
1562 struct brw_image_param *param)
1563 {
1564 memset(param, 0, sizeof(*param));
1565 param->surface_idx = surface_idx;
1566 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1567 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1568 * detailed explanation of these parameters.
1569 */
1570 param->swizzling[0] = 0xff;
1571 param->swizzling[1] = 0xff;
1572 }
1573
1574 static void
1575 update_buffer_image_param(struct brw_context *brw,
1576 struct gl_image_unit *u,
1577 unsigned surface_idx,
1578 struct brw_image_param *param)
1579 {
1580 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1581 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1582 update_default_image_param(brw, u, surface_idx, param);
1583
1584 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1585 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1586 }
1587
1588 static unsigned
1589 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1590 unsigned level)
1591 {
1592 if (target == GL_TEXTURE_CUBE_MAP)
1593 return 6;
1594
1595 return target == GL_TEXTURE_3D ?
1596 minify(mt->surf.logical_level0_px.depth, level) :
1597 mt->surf.logical_level0_px.array_len;
1598 }
1599
1600 static void
1601 update_image_surface(struct brw_context *brw,
1602 struct gl_image_unit *u,
1603 GLenum access,
1604 unsigned surface_idx,
1605 uint32_t *surf_offset,
1606 struct brw_image_param *param)
1607 {
1608 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1609 struct gl_texture_object *obj = u->TexObj;
1610 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1611
1612 if (obj->Target == GL_TEXTURE_BUFFER) {
1613 struct intel_buffer_object *intel_obj =
1614 intel_buffer_object(obj->BufferObject);
1615 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1616 _mesa_get_format_bytes(u->_ActualFormat));
1617
1618 brw_emit_buffer_surface_state(
1619 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1620 format, intel_obj->Base.Size, texel_size,
1621 access != GL_READ_ONLY);
1622
1623 update_buffer_image_param(brw, u, surface_idx, param);
1624
1625 } else {
1626 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1627 struct intel_mipmap_tree *mt = intel_obj->mt;
1628 const unsigned num_layers = u->Layered ?
1629 get_image_num_layers(mt, obj->Target, u->Level) : 1;
1630
1631 struct isl_view view = {
1632 .format = format,
1633 .base_level = obj->MinLevel + u->Level,
1634 .levels = 1,
1635 .base_array_layer = obj->MinLayer + u->_Layer,
1636 .array_len = num_layers,
1637 .swizzle = ISL_SWIZZLE_IDENTITY,
1638 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1639 };
1640
1641 if (format == ISL_FORMAT_RAW) {
1642 brw_emit_buffer_surface_state(
1643 brw, surf_offset, mt->bo, mt->offset,
1644 format, mt->bo->size - mt->offset, 1 /* pitch */,
1645 access != GL_READ_ONLY);
1646
1647 } else {
1648 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1649 assert(!intel_miptree_has_color_unresolved(mt,
1650 view.base_level, 1,
1651 view.base_array_layer,
1652 view.array_len));
1653 brw_emit_surface_state(brw, mt, mt->target, view,
1654 ISL_AUX_USAGE_NONE, tex_mocs[brw->gen],
1655 surf_offset, surf_index,
1656 I915_GEM_DOMAIN_SAMPLER,
1657 access == GL_READ_ONLY ? 0 :
1658 I915_GEM_DOMAIN_SAMPLER);
1659 }
1660
1661 isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1662 param->surface_idx = surface_idx;
1663 }
1664
1665 } else {
1666 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1667 update_default_image_param(brw, u, surface_idx, param);
1668 }
1669 }
1670
1671 void
1672 brw_upload_image_surfaces(struct brw_context *brw,
1673 const struct gl_program *prog,
1674 struct brw_stage_state *stage_state,
1675 struct brw_stage_prog_data *prog_data)
1676 {
1677 assert(prog);
1678 struct gl_context *ctx = &brw->ctx;
1679
1680 if (prog->info.num_images) {
1681 for (unsigned i = 0; i < prog->info.num_images; i++) {
1682 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1683 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1684
1685 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1686 surf_idx,
1687 &stage_state->surf_offset[surf_idx],
1688 &prog_data->image_param[i]);
1689 }
1690
1691 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1692 /* This may have changed the image metadata dependent on the context
1693 * image unit state and passed to the program as uniforms, make sure
1694 * that push and pull constants are reuploaded.
1695 */
1696 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1697 }
1698 }
1699
1700 static void
1701 brw_upload_wm_image_surfaces(struct brw_context *brw)
1702 {
1703 /* BRW_NEW_FRAGMENT_PROGRAM */
1704 const struct gl_program *wm = brw->fragment_program;
1705
1706 if (wm) {
1707 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1708 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1709 brw->wm.base.prog_data);
1710 }
1711 }
1712
1713 const struct brw_tracked_state brw_wm_image_surfaces = {
1714 .dirty = {
1715 .mesa = _NEW_TEXTURE,
1716 .brw = BRW_NEW_BATCH |
1717 BRW_NEW_BLORP |
1718 BRW_NEW_FRAGMENT_PROGRAM |
1719 BRW_NEW_FS_PROG_DATA |
1720 BRW_NEW_IMAGE_UNITS
1721 },
1722 .emit = brw_upload_wm_image_surfaces,
1723 };
1724
1725 void
1726 gen4_init_vtable_surface_functions(struct brw_context *brw)
1727 {
1728 brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1729 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1730 }
1731
1732 void
1733 gen6_init_vtable_surface_functions(struct brw_context *brw)
1734 {
1735 gen4_init_vtable_surface_functions(brw);
1736 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1737 }
1738
1739 static void
1740 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1741 {
1742 struct gl_context *ctx = &brw->ctx;
1743 /* _NEW_PROGRAM */
1744 struct gl_program *prog =
1745 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1746 /* BRW_NEW_CS_PROG_DATA */
1747 const struct brw_cs_prog_data *cs_prog_data =
1748 brw_cs_prog_data(brw->cs.base.prog_data);
1749
1750 if (prog && cs_prog_data->uses_num_work_groups) {
1751 const unsigned surf_idx =
1752 cs_prog_data->binding_table.work_groups_start;
1753 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1754 struct brw_bo *bo;
1755 uint32_t bo_offset;
1756
1757 if (brw->compute.num_work_groups_bo == NULL) {
1758 bo = NULL;
1759 intel_upload_data(brw,
1760 (void *)brw->compute.num_work_groups,
1761 3 * sizeof(GLuint),
1762 sizeof(GLuint),
1763 &bo,
1764 &bo_offset);
1765 } else {
1766 bo = brw->compute.num_work_groups_bo;
1767 bo_offset = brw->compute.num_work_groups_offset;
1768 }
1769
1770 brw_emit_buffer_surface_state(brw, surf_offset,
1771 bo, bo_offset,
1772 ISL_FORMAT_RAW,
1773 3 * sizeof(GLuint), 1, true);
1774 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1775 }
1776 }
1777
1778 const struct brw_tracked_state brw_cs_work_groups_surface = {
1779 .dirty = {
1780 .brw = BRW_NEW_BLORP |
1781 BRW_NEW_CS_PROG_DATA |
1782 BRW_NEW_CS_WORK_GROUPS
1783 },
1784 .emit = brw_upload_cs_work_groups_surface,
1785 };