i965: Enable regular fast-clears (CCS_D) on gen9+
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 enum {
59 INTEL_RENDERBUFFER_LAYERED = 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED = 1 << 1,
61 };
62
63 uint32_t tex_mocs[] = {
64 [7] = GEN7_MOCS_L3,
65 [8] = BDW_MOCS_WB,
66 [9] = SKL_MOCS_WB,
67 [10] = CNL_MOCS_WB,
68 };
69
70 uint32_t rb_mocs[] = {
71 [7] = GEN7_MOCS_L3,
72 [8] = BDW_MOCS_PTE,
73 [9] = SKL_MOCS_PTE,
74 [10] = CNL_MOCS_PTE,
75 };
76
77 static void
78 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
79 GLenum target, struct isl_view *view,
80 uint32_t *tile_x, uint32_t *tile_y,
81 uint32_t *offset, struct isl_surf *surf)
82 {
83 *surf = mt->surf;
84
85 const enum isl_dim_layout dim_layout =
86 get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target);
87
88 if (surf->dim_layout == dim_layout)
89 return;
90
91 /* The layout of the specified texture target is not compatible with the
92 * actual layout of the miptree structure in memory -- You're entering
93 * dangerous territory, this can only possibly work if you only intended
94 * to access a single level and slice of the texture, and the hardware
95 * supports the tile offset feature in order to allow non-tile-aligned
96 * base offsets, since we'll have to point the hardware to the first
97 * texel of the level instead of relying on the usual base level/layer
98 * controls.
99 */
100 assert(brw->has_surface_tile_offset);
101 assert(view->levels == 1 && view->array_len == 1);
102 assert(*tile_x == 0 && *tile_y == 0);
103
104 offset += intel_miptree_get_tile_offsets(mt, view->base_level,
105 view->base_array_layer,
106 tile_x, tile_y);
107
108 /* Minify the logical dimensions of the texture. */
109 const unsigned l = view->base_level - mt->first_level;
110 surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
111 surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
112 minify(surf->logical_level0_px.height, l);
113 surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
114 minify(surf->logical_level0_px.depth, l);
115
116 /* Only the base level and layer can be addressed with the overridden
117 * layout.
118 */
119 surf->logical_level0_px.array_len = 1;
120 surf->levels = 1;
121 surf->dim_layout = dim_layout;
122
123 /* The requested slice of the texture is now at the base level and
124 * layer.
125 */
126 view->base_level = 0;
127 view->base_array_layer = 0;
128 }
129
130 static void
131 brw_emit_surface_state(struct brw_context *brw,
132 struct intel_mipmap_tree *mt,
133 GLenum target, struct isl_view view,
134 enum isl_aux_usage aux_usage,
135 uint32_t mocs, uint32_t *surf_offset, int surf_index,
136 unsigned read_domains, unsigned write_domains)
137 {
138 uint32_t tile_x = mt->level[0].level_x;
139 uint32_t tile_y = mt->level[0].level_y;
140 uint32_t offset = mt->offset;
141
142 struct isl_surf surf;
143
144 get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
145
146 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
147
148 struct brw_bo *aux_bo;
149 struct isl_surf *aux_surf = NULL;
150 uint64_t aux_offset = 0;
151 switch (aux_usage) {
152 case ISL_AUX_USAGE_MCS:
153 case ISL_AUX_USAGE_CCS_D:
154 case ISL_AUX_USAGE_CCS_E:
155 aux_surf = &mt->mcs_buf->surf;
156 aux_bo = mt->mcs_buf->bo;
157 aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
158 break;
159
160 case ISL_AUX_USAGE_HIZ:
161 aux_surf = &mt->hiz_buf->surf;
162 aux_bo = mt->hiz_buf->bo;
163 aux_offset = mt->hiz_buf->bo->offset64;
164 break;
165
166 case ISL_AUX_USAGE_NONE:
167 break;
168 }
169
170 if (aux_usage != ISL_AUX_USAGE_NONE) {
171 /* We only really need a clear color if we also have an auxiliary
172 * surface. Without one, it does nothing.
173 */
174 clear_color = mt->fast_clear_color;
175 }
176
177 void *state = brw_state_batch(brw,
178 brw->isl_dev.ss.size,
179 brw->isl_dev.ss.align,
180 surf_offset);
181
182 isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
183 .address = mt->bo->offset64 + offset,
184 .aux_surf = aux_surf, .aux_usage = aux_usage,
185 .aux_address = aux_offset,
186 .mocs = mocs, .clear_color = clear_color,
187 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
188
189 brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
190 mt->bo, offset, read_domains, write_domains);
191
192 if (aux_surf) {
193 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
194 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
195 * contain other control information. Since buffer addresses are always
196 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
197 * an ordinary reloc to do the necessary address translation.
198 */
199 assert((aux_offset & 0xfff) == 0);
200 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
201 brw_emit_reloc(&brw->batch,
202 *surf_offset + brw->isl_dev.ss.aux_addr_offset,
203 aux_bo, *aux_addr - aux_bo->offset64,
204 read_domains, write_domains);
205 }
206 }
207
208 uint32_t
209 brw_update_renderbuffer_surface(struct brw_context *brw,
210 struct gl_renderbuffer *rb,
211 uint32_t flags, unsigned unit,
212 uint32_t surf_index)
213 {
214 struct gl_context *ctx = &brw->ctx;
215 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
216 struct intel_mipmap_tree *mt = irb->mt;
217
218 enum isl_aux_usage aux_usage =
219 intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled,
220 ctx->Color.BlendEnabled & (1 << unit));
221
222 if (flags & INTEL_AUX_BUFFER_DISABLED) {
223 assert(brw->gen >= 9);
224 aux_usage = ISL_AUX_USAGE_NONE;
225 }
226
227 assert(brw_render_target_supported(brw, rb));
228
229 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
230 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
231 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
232 __func__, _mesa_get_format_name(rb_format));
233 }
234
235 struct isl_view view = {
236 .format = brw->mesa_to_isl_render_format[rb_format],
237 .base_level = irb->mt_level - irb->mt->first_level,
238 .levels = 1,
239 .base_array_layer = irb->mt_layer,
240 .array_len = MAX2(irb->layer_count, 1),
241 .swizzle = ISL_SWIZZLE_IDENTITY,
242 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
243 };
244
245 uint32_t offset;
246 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
247 rb_mocs[brw->gen],
248 &offset, surf_index,
249 I915_GEM_DOMAIN_RENDER,
250 I915_GEM_DOMAIN_RENDER);
251 return offset;
252 }
253
254 GLuint
255 translate_tex_target(GLenum target)
256 {
257 switch (target) {
258 case GL_TEXTURE_1D:
259 case GL_TEXTURE_1D_ARRAY_EXT:
260 return BRW_SURFACE_1D;
261
262 case GL_TEXTURE_RECTANGLE_NV:
263 return BRW_SURFACE_2D;
264
265 case GL_TEXTURE_2D:
266 case GL_TEXTURE_2D_ARRAY_EXT:
267 case GL_TEXTURE_EXTERNAL_OES:
268 case GL_TEXTURE_2D_MULTISAMPLE:
269 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
270 return BRW_SURFACE_2D;
271
272 case GL_TEXTURE_3D:
273 return BRW_SURFACE_3D;
274
275 case GL_TEXTURE_CUBE_MAP:
276 case GL_TEXTURE_CUBE_MAP_ARRAY:
277 return BRW_SURFACE_CUBE;
278
279 default:
280 unreachable("not reached");
281 }
282 }
283
284 uint32_t
285 brw_get_surface_tiling_bits(enum isl_tiling tiling)
286 {
287 switch (tiling) {
288 case ISL_TILING_X:
289 return BRW_SURFACE_TILED;
290 case ISL_TILING_Y0:
291 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
292 default:
293 return 0;
294 }
295 }
296
297
298 uint32_t
299 brw_get_surface_num_multisamples(unsigned num_samples)
300 {
301 if (num_samples > 1)
302 return BRW_SURFACE_MULTISAMPLECOUNT_4;
303 else
304 return BRW_SURFACE_MULTISAMPLECOUNT_1;
305 }
306
307 /**
308 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
309 * swizzling.
310 */
311 int
312 brw_get_texture_swizzle(const struct gl_context *ctx,
313 const struct gl_texture_object *t)
314 {
315 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
316
317 int swizzles[SWIZZLE_NIL + 1] = {
318 SWIZZLE_X,
319 SWIZZLE_Y,
320 SWIZZLE_Z,
321 SWIZZLE_W,
322 SWIZZLE_ZERO,
323 SWIZZLE_ONE,
324 SWIZZLE_NIL
325 };
326
327 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
328 img->_BaseFormat == GL_DEPTH_STENCIL) {
329 GLenum depth_mode = t->DepthMode;
330
331 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
332 * with depth component data specified with a sized internal format.
333 * Otherwise, it's left at the old default, GL_LUMINANCE.
334 */
335 if (_mesa_is_gles3(ctx) &&
336 img->InternalFormat != GL_DEPTH_COMPONENT &&
337 img->InternalFormat != GL_DEPTH_STENCIL) {
338 depth_mode = GL_RED;
339 }
340
341 switch (depth_mode) {
342 case GL_ALPHA:
343 swizzles[0] = SWIZZLE_ZERO;
344 swizzles[1] = SWIZZLE_ZERO;
345 swizzles[2] = SWIZZLE_ZERO;
346 swizzles[3] = SWIZZLE_X;
347 break;
348 case GL_LUMINANCE:
349 swizzles[0] = SWIZZLE_X;
350 swizzles[1] = SWIZZLE_X;
351 swizzles[2] = SWIZZLE_X;
352 swizzles[3] = SWIZZLE_ONE;
353 break;
354 case GL_INTENSITY:
355 swizzles[0] = SWIZZLE_X;
356 swizzles[1] = SWIZZLE_X;
357 swizzles[2] = SWIZZLE_X;
358 swizzles[3] = SWIZZLE_X;
359 break;
360 case GL_RED:
361 swizzles[0] = SWIZZLE_X;
362 swizzles[1] = SWIZZLE_ZERO;
363 swizzles[2] = SWIZZLE_ZERO;
364 swizzles[3] = SWIZZLE_ONE;
365 break;
366 }
367 }
368
369 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
370
371 /* If the texture's format is alpha-only, force R, G, and B to
372 * 0.0. Similarly, if the texture's format has no alpha channel,
373 * force the alpha value read to 1.0. This allows for the
374 * implementation to use an RGBA texture for any of these formats
375 * without leaking any unexpected values.
376 */
377 switch (img->_BaseFormat) {
378 case GL_ALPHA:
379 swizzles[0] = SWIZZLE_ZERO;
380 swizzles[1] = SWIZZLE_ZERO;
381 swizzles[2] = SWIZZLE_ZERO;
382 break;
383 case GL_LUMINANCE:
384 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
385 swizzles[0] = SWIZZLE_X;
386 swizzles[1] = SWIZZLE_X;
387 swizzles[2] = SWIZZLE_X;
388 swizzles[3] = SWIZZLE_ONE;
389 }
390 break;
391 case GL_LUMINANCE_ALPHA:
392 if (datatype == GL_SIGNED_NORMALIZED) {
393 swizzles[0] = SWIZZLE_X;
394 swizzles[1] = SWIZZLE_X;
395 swizzles[2] = SWIZZLE_X;
396 swizzles[3] = SWIZZLE_W;
397 }
398 break;
399 case GL_INTENSITY:
400 if (datatype == GL_SIGNED_NORMALIZED) {
401 swizzles[0] = SWIZZLE_X;
402 swizzles[1] = SWIZZLE_X;
403 swizzles[2] = SWIZZLE_X;
404 swizzles[3] = SWIZZLE_X;
405 }
406 break;
407 case GL_RED:
408 case GL_RG:
409 case GL_RGB:
410 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
411 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
412 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
413 swizzles[3] = SWIZZLE_ONE;
414 break;
415 }
416
417 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
418 swizzles[GET_SWZ(t->_Swizzle, 1)],
419 swizzles[GET_SWZ(t->_Swizzle, 2)],
420 swizzles[GET_SWZ(t->_Swizzle, 3)]);
421 }
422
423 /**
424 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
425 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
426 *
427 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
428 * 0 1 2 3 4 5
429 * 4 5 6 7 0 1
430 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
431 *
432 * which is simply adding 4 then modding by 8 (or anding with 7).
433 *
434 * We then may need to apply workarounds for textureGather hardware bugs.
435 */
436 static unsigned
437 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
438 {
439 unsigned scs = (swizzle + 4) & 7;
440
441 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
442 }
443
444 static bool
445 brw_aux_surface_disabled(const struct brw_context *brw,
446 const struct intel_mipmap_tree *mt)
447 {
448 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
449
450 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
451 const struct intel_renderbuffer *irb =
452 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
453
454 if (irb && irb->mt == mt)
455 return brw->draw_aux_buffer_disabled[i];
456 }
457
458 return false;
459 }
460
461 void
462 brw_update_texture_surface(struct gl_context *ctx,
463 unsigned unit,
464 uint32_t *surf_offset,
465 bool for_gather,
466 uint32_t plane)
467 {
468 struct brw_context *brw = brw_context(ctx);
469 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
470
471 if (obj->Target == GL_TEXTURE_BUFFER) {
472 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
473
474 } else {
475 struct intel_texture_object *intel_obj = intel_texture_object(obj);
476 struct intel_mipmap_tree *mt = intel_obj->mt;
477
478 if (plane > 0) {
479 if (mt->plane[plane - 1] == NULL)
480 return;
481 mt = mt->plane[plane - 1];
482 }
483
484 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
485 /* If this is a view with restricted NumLayers, then our effective depth
486 * is not just the miptree depth.
487 */
488 unsigned view_num_layers;
489 if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
490 view_num_layers = obj->NumLayers;
491 } else {
492 view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
493 mt->surf.logical_level0_px.depth :
494 mt->surf.logical_level0_px.array_len;
495 }
496
497 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
498 * texturing functions that return a float, as our code generation always
499 * selects the .x channel (which would always be 0).
500 */
501 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
502 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
503 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
504 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
505 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
506 brw_get_texture_swizzle(&brw->ctx, obj));
507
508 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
509 enum isl_format format = translate_tex_format(brw, mesa_fmt,
510 sampler->sRGBDecode);
511
512 /* Implement gen6 and gen7 gather work-around */
513 bool need_green_to_blue = false;
514 if (for_gather) {
515 if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
516 format == ISL_FORMAT_R32G32_SINT ||
517 format == ISL_FORMAT_R32G32_UINT)) {
518 format = ISL_FORMAT_R32G32_FLOAT_LD;
519 need_green_to_blue = brw->is_haswell;
520 } else if (brw->gen == 6) {
521 /* Sandybridge's gather4 message is broken for integer formats.
522 * To work around this, we pretend the surface is UNORM for
523 * 8 or 16-bit formats, and emit shader instructions to recover
524 * the real INT/UINT value. For 32-bit formats, we pretend
525 * the surface is FLOAT, and simply reinterpret the resulting
526 * bits.
527 */
528 switch (format) {
529 case ISL_FORMAT_R8_SINT:
530 case ISL_FORMAT_R8_UINT:
531 format = ISL_FORMAT_R8_UNORM;
532 break;
533
534 case ISL_FORMAT_R16_SINT:
535 case ISL_FORMAT_R16_UINT:
536 format = ISL_FORMAT_R16_UNORM;
537 break;
538
539 case ISL_FORMAT_R32_SINT:
540 case ISL_FORMAT_R32_UINT:
541 format = ISL_FORMAT_R32_FLOAT;
542 break;
543
544 default:
545 break;
546 }
547 }
548 }
549
550 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
551 if (brw->gen <= 7) {
552 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
553 mt = mt->r8stencil_mt;
554 } else {
555 mt = mt->stencil_mt;
556 }
557 format = ISL_FORMAT_R8_UINT;
558 } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
559 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
560 mt = mt->r8stencil_mt;
561 format = ISL_FORMAT_R8_UINT;
562 }
563
564 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
565
566 struct isl_view view = {
567 .format = format,
568 .base_level = obj->MinLevel + obj->BaseLevel,
569 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
570 .base_array_layer = obj->MinLayer,
571 .array_len = view_num_layers,
572 .swizzle = {
573 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
574 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
575 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
576 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
577 },
578 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
579 };
580
581 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
582 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
583 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
584
585 enum isl_aux_usage aux_usage =
586 intel_miptree_texture_aux_usage(brw, mt, format);
587
588 if (brw_aux_surface_disabled(brw, mt))
589 aux_usage = ISL_AUX_USAGE_NONE;
590
591 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
592 tex_mocs[brw->gen],
593 surf_offset, surf_index,
594 I915_GEM_DOMAIN_SAMPLER, 0);
595 }
596 }
597
598 void
599 brw_emit_buffer_surface_state(struct brw_context *brw,
600 uint32_t *out_offset,
601 struct brw_bo *bo,
602 unsigned buffer_offset,
603 unsigned surface_format,
604 unsigned buffer_size,
605 unsigned pitch,
606 bool rw)
607 {
608 uint32_t *dw = brw_state_batch(brw,
609 brw->isl_dev.ss.size,
610 brw->isl_dev.ss.align,
611 out_offset);
612
613 isl_buffer_fill_state(&brw->isl_dev, dw,
614 .address = (bo ? bo->offset64 : 0) + buffer_offset,
615 .size = buffer_size,
616 .format = surface_format,
617 .stride = pitch,
618 .mocs = tex_mocs[brw->gen]);
619
620 if (bo) {
621 brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
622 bo, buffer_offset,
623 I915_GEM_DOMAIN_SAMPLER,
624 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
625 }
626 }
627
628 void
629 brw_update_buffer_texture_surface(struct gl_context *ctx,
630 unsigned unit,
631 uint32_t *surf_offset)
632 {
633 struct brw_context *brw = brw_context(ctx);
634 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
635 struct intel_buffer_object *intel_obj =
636 intel_buffer_object(tObj->BufferObject);
637 uint32_t size = tObj->BufferSize;
638 struct brw_bo *bo = NULL;
639 mesa_format format = tObj->_BufferObjectFormat;
640 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
641 int texel_size = _mesa_get_format_bytes(format);
642
643 if (intel_obj) {
644 size = MIN2(size, intel_obj->Base.Size);
645 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
646 false);
647 }
648
649 /* The ARB_texture_buffer_specification says:
650 *
651 * "The number of texels in the buffer texture's texel array is given by
652 *
653 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
654 *
655 * where <buffer_size> is the size of the buffer object, in basic
656 * machine units and <components> and <base_type> are the element count
657 * and base data type for elements, as specified in Table X.1. The
658 * number of texels in the texel array is then clamped to the
659 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
660 *
661 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
662 * so that when ISL divides by stride to obtain the number of texels, that
663 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
664 */
665 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
666
667 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
668 _mesa_problem(NULL, "bad format %s for texture buffer\n",
669 _mesa_get_format_name(format));
670 }
671
672 brw_emit_buffer_surface_state(brw, surf_offset, bo,
673 tObj->BufferOffset,
674 isl_format,
675 size,
676 texel_size,
677 false /* rw */);
678 }
679
680 /**
681 * Create the constant buffer surface. Vertex/fragment shader constants will be
682 * read from this buffer with Data Port Read instructions/messages.
683 */
684 void
685 brw_create_constant_surface(struct brw_context *brw,
686 struct brw_bo *bo,
687 uint32_t offset,
688 uint32_t size,
689 uint32_t *out_offset)
690 {
691 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
692 ISL_FORMAT_R32G32B32A32_FLOAT,
693 size, 1, false);
694 }
695
696 /**
697 * Create the buffer surface. Shader buffer variables will be
698 * read from / write to this buffer with Data Port Read/Write
699 * instructions/messages.
700 */
701 void
702 brw_create_buffer_surface(struct brw_context *brw,
703 struct brw_bo *bo,
704 uint32_t offset,
705 uint32_t size,
706 uint32_t *out_offset)
707 {
708 /* Use a raw surface so we can reuse existing untyped read/write/atomic
709 * messages. We need these specifically for the fragment shader since they
710 * include a pixel mask header that we need to ensure correct behavior
711 * with helper invocations, which cannot write to the buffer.
712 */
713 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
714 ISL_FORMAT_RAW,
715 size, 1, true);
716 }
717
718 /**
719 * Set up a binding table entry for use by stream output logic (transform
720 * feedback).
721 *
722 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
723 */
724 void
725 brw_update_sol_surface(struct brw_context *brw,
726 struct gl_buffer_object *buffer_obj,
727 uint32_t *out_offset, unsigned num_vector_components,
728 unsigned stride_dwords, unsigned offset_dwords)
729 {
730 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
731 uint32_t offset_bytes = 4 * offset_dwords;
732 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
733 offset_bytes,
734 buffer_obj->Size - offset_bytes,
735 true);
736 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
737 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
738 size_t size_dwords = buffer_obj->Size / 4;
739 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
740
741 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
742 * too big to map using a single binding table entry?
743 */
744 assert((size_dwords - offset_dwords) / stride_dwords
745 <= BRW_MAX_NUM_BUFFER_ENTRIES);
746
747 if (size_dwords > offset_dwords + num_vector_components) {
748 /* There is room for at least 1 transform feedback output in the buffer.
749 * Compute the number of additional transform feedback outputs the
750 * buffer has room for.
751 */
752 buffer_size_minus_1 =
753 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
754 } else {
755 /* There isn't even room for a single transform feedback output in the
756 * buffer. We can't configure the binding table entry to prevent output
757 * entirely; we'll have to rely on the geometry shader to detect
758 * overflow. But to minimize the damage in case of a bug, set up the
759 * binding table entry to just allow a single output.
760 */
761 buffer_size_minus_1 = 0;
762 }
763 width = buffer_size_minus_1 & 0x7f;
764 height = (buffer_size_minus_1 & 0xfff80) >> 7;
765 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
766
767 switch (num_vector_components) {
768 case 1:
769 surface_format = ISL_FORMAT_R32_FLOAT;
770 break;
771 case 2:
772 surface_format = ISL_FORMAT_R32G32_FLOAT;
773 break;
774 case 3:
775 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
776 break;
777 case 4:
778 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
779 break;
780 default:
781 unreachable("Invalid vector size for transform feedback output");
782 }
783
784 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
785 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
786 surface_format << BRW_SURFACE_FORMAT_SHIFT |
787 BRW_SURFACE_RC_READ_WRITE;
788 surf[1] = bo->offset64 + offset_bytes; /* reloc */
789 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
790 height << BRW_SURFACE_HEIGHT_SHIFT);
791 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
792 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
793 surf[4] = 0;
794 surf[5] = 0;
795
796 /* Emit relocation to surface contents. */
797 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
798 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
799 }
800
801 /* Creates a new WM constant buffer reflecting the current fragment program's
802 * constants, if needed by the fragment program.
803 *
804 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
805 * state atom.
806 */
807 static void
808 brw_upload_wm_pull_constants(struct brw_context *brw)
809 {
810 struct brw_stage_state *stage_state = &brw->wm.base;
811 /* BRW_NEW_FRAGMENT_PROGRAM */
812 struct brw_program *fp = (struct brw_program *) brw->fragment_program;
813 /* BRW_NEW_FS_PROG_DATA */
814 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
815
816 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
817 /* _NEW_PROGRAM_CONSTANTS */
818 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
819 stage_state, prog_data);
820 }
821
822 const struct brw_tracked_state brw_wm_pull_constants = {
823 .dirty = {
824 .mesa = _NEW_PROGRAM_CONSTANTS,
825 .brw = BRW_NEW_BATCH |
826 BRW_NEW_BLORP |
827 BRW_NEW_FRAGMENT_PROGRAM |
828 BRW_NEW_FS_PROG_DATA,
829 },
830 .emit = brw_upload_wm_pull_constants,
831 };
832
833 /**
834 * Creates a null renderbuffer surface.
835 *
836 * This is used when the shader doesn't write to any color output. An FB
837 * write to target 0 will still be emitted, because that's how the thread is
838 * terminated (and computed depth is returned), so we need to have the
839 * hardware discard the target 0 color output..
840 */
841 static void
842 brw_emit_null_surface_state(struct brw_context *brw,
843 unsigned width,
844 unsigned height,
845 unsigned samples,
846 uint32_t *out_offset)
847 {
848 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
849 * Notes):
850 *
851 * A null surface will be used in instances where an actual surface is
852 * not bound. When a write message is generated to a null surface, no
853 * actual surface is written to. When a read message (including any
854 * sampling engine message) is generated to a null surface, the result
855 * is all zeros. Note that a null surface type is allowed to be used
856 * with all messages, even if it is not specificially indicated as
857 * supported. All of the remaining fields in surface state are ignored
858 * for null surfaces, with the following exceptions:
859 *
860 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
861 * depth buffer’s corresponding state for all render target surfaces,
862 * including null.
863 *
864 * - Surface Format must be R8G8B8A8_UNORM.
865 */
866 unsigned surface_type = BRW_SURFACE_NULL;
867 struct brw_bo *bo = NULL;
868 unsigned pitch_minus_1 = 0;
869 uint32_t multisampling_state = 0;
870 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
871
872 if (samples > 1) {
873 /* On Gen6, null render targets seem to cause GPU hangs when
874 * multisampling. So work around this problem by rendering into dummy
875 * color buffer.
876 *
877 * To decrease the amount of memory needed by the workaround buffer, we
878 * set its pitch to 128 bytes (the width of a Y tile). This means that
879 * the amount of memory needed for the workaround buffer is
880 * (width_in_tiles + height_in_tiles - 1) tiles.
881 *
882 * Note that since the workaround buffer will be interpreted by the
883 * hardware as an interleaved multisampled buffer, we need to compute
884 * width_in_tiles and height_in_tiles by dividing the width and height
885 * by 16 rather than the normal Y-tile size of 32.
886 */
887 unsigned width_in_tiles = ALIGN(width, 16) / 16;
888 unsigned height_in_tiles = ALIGN(height, 16) / 16;
889 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
890 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
891 size_needed);
892 bo = brw->wm.multisampled_null_render_target_bo;
893 surface_type = BRW_SURFACE_2D;
894 pitch_minus_1 = 127;
895 multisampling_state = brw_get_surface_num_multisamples(samples);
896 }
897
898 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
899 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
900 if (brw->gen < 6) {
901 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
902 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
903 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
904 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
905 }
906 surf[1] = bo ? bo->offset64 : 0;
907 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
908 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
909
910 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
911 * Notes):
912 *
913 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
914 */
915 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
916 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
917 surf[4] = multisampling_state;
918 surf[5] = 0;
919
920 if (bo) {
921 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
922 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
923 }
924 }
925
926 /**
927 * Sets up a surface state structure to point at the given region.
928 * While it is only used for the front/back buffer currently, it should be
929 * usable for further buffers when doing ARB_draw_buffer support.
930 */
931 static uint32_t
932 gen4_update_renderbuffer_surface(struct brw_context *brw,
933 struct gl_renderbuffer *rb,
934 uint32_t flags, unsigned unit,
935 uint32_t surf_index)
936 {
937 struct gl_context *ctx = &brw->ctx;
938 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
939 struct intel_mipmap_tree *mt = irb->mt;
940 uint32_t *surf;
941 uint32_t tile_x, tile_y;
942 enum isl_format format;
943 uint32_t offset;
944 /* _NEW_BUFFERS */
945 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
946 /* BRW_NEW_FS_PROG_DATA */
947
948 assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
949 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
950
951 if (rb->TexImage && !brw->has_surface_tile_offset) {
952 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
953
954 if (tile_x != 0 || tile_y != 0) {
955 /* Original gen4 hardware couldn't draw to a non-tile-aligned
956 * destination in a miptree unless you actually setup your renderbuffer
957 * as a miptree and used the fragile lod/array_index/etc. controls to
958 * select the image. So, instead, we just make a new single-level
959 * miptree and render into that.
960 */
961 intel_renderbuffer_move_to_temp(brw, irb, false);
962 assert(irb->align_wa_mt);
963 mt = irb->align_wa_mt;
964 }
965 }
966
967 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
968
969 format = brw->mesa_to_isl_render_format[rb_format];
970 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
971 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
972 __func__, _mesa_get_format_name(rb_format));
973 }
974
975 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
976 format << BRW_SURFACE_FORMAT_SHIFT);
977
978 /* reloc */
979 assert(mt->offset % mt->cpp == 0);
980 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
981 mt->bo->offset64 + mt->offset);
982
983 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
984 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
985
986 surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
987 (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
988
989 surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
990
991 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
992 /* Note that the low bits of these fields are missing, so
993 * there's the possibility of getting in trouble.
994 */
995 assert(tile_x % 4 == 0);
996 assert(tile_y % 2 == 0);
997 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
998 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
999 (mt->surf.image_alignment_el.height == 4 ?
1000 BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1001
1002 if (brw->gen < 6) {
1003 /* _NEW_COLOR */
1004 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1005 (ctx->Color.BlendEnabled & (1 << unit)))
1006 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1007
1008 if (!ctx->Color.ColorMask[unit][0])
1009 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1010 if (!ctx->Color.ColorMask[unit][1])
1011 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1012 if (!ctx->Color.ColorMask[unit][2])
1013 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1014
1015 /* As mentioned above, disable writes to the alpha component when the
1016 * renderbuffer is XRGB.
1017 */
1018 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1019 !ctx->Color.ColorMask[unit][3]) {
1020 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1021 }
1022 }
1023
1024 brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1025 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1026
1027 return offset;
1028 }
1029
1030 /**
1031 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1032 */
1033 void
1034 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1035 const struct gl_framebuffer *fb,
1036 uint32_t render_target_start,
1037 uint32_t *surf_offset)
1038 {
1039 GLuint i;
1040 const unsigned int w = _mesa_geometric_width(fb);
1041 const unsigned int h = _mesa_geometric_height(fb);
1042 const unsigned int s = _mesa_geometric_samples(fb);
1043
1044 /* Update surfaces for drawing buffers */
1045 if (fb->_NumColorDrawBuffers >= 1) {
1046 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1047 const uint32_t surf_index = render_target_start + i;
1048 const int flags = (_mesa_geometric_layers(fb) > 0 ?
1049 INTEL_RENDERBUFFER_LAYERED : 0) |
1050 (brw->draw_aux_buffer_disabled[i] ?
1051 INTEL_AUX_BUFFER_DISABLED : 0);
1052
1053 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1054 surf_offset[surf_index] =
1055 brw->vtbl.update_renderbuffer_surface(
1056 brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1057 } else {
1058 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1059 &surf_offset[surf_index]);
1060 }
1061 }
1062 } else {
1063 const uint32_t surf_index = render_target_start;
1064 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1065 &surf_offset[surf_index]);
1066 }
1067 }
1068
1069 static void
1070 update_renderbuffer_surfaces(struct brw_context *brw)
1071 {
1072 const struct gl_context *ctx = &brw->ctx;
1073
1074 /* BRW_NEW_FS_PROG_DATA */
1075 const struct brw_wm_prog_data *wm_prog_data =
1076 brw_wm_prog_data(brw->wm.base.prog_data);
1077
1078 /* _NEW_BUFFERS | _NEW_COLOR */
1079 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1080 brw_update_renderbuffer_surfaces(
1081 brw, fb,
1082 wm_prog_data->binding_table.render_target_start,
1083 brw->wm.base.surf_offset);
1084 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1085 }
1086
1087 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1088 .dirty = {
1089 .mesa = _NEW_BUFFERS |
1090 _NEW_COLOR,
1091 .brw = BRW_NEW_BATCH |
1092 BRW_NEW_BLORP |
1093 BRW_NEW_FS_PROG_DATA,
1094 },
1095 .emit = update_renderbuffer_surfaces,
1096 };
1097
1098 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1099 .dirty = {
1100 .mesa = _NEW_BUFFERS,
1101 .brw = BRW_NEW_BATCH |
1102 BRW_NEW_BLORP,
1103 },
1104 .emit = update_renderbuffer_surfaces,
1105 };
1106
1107 static void
1108 update_renderbuffer_read_surfaces(struct brw_context *brw)
1109 {
1110 const struct gl_context *ctx = &brw->ctx;
1111
1112 /* BRW_NEW_FS_PROG_DATA */
1113 const struct brw_wm_prog_data *wm_prog_data =
1114 brw_wm_prog_data(brw->wm.base.prog_data);
1115
1116 /* BRW_NEW_FRAGMENT_PROGRAM */
1117 if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1118 brw->fragment_program && brw->fragment_program->info.outputs_read) {
1119 /* _NEW_BUFFERS */
1120 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1121
1122 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1123 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1124 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1125 const unsigned surf_index =
1126 wm_prog_data->binding_table.render_target_read_start + i;
1127 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1128
1129 if (irb) {
1130 const enum isl_format format = brw->mesa_to_isl_render_format[
1131 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1132 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1133 format));
1134
1135 /* Override the target of the texture if the render buffer is a
1136 * single slice of a 3D texture (since the minimum array element
1137 * field of the surface state structure is ignored by the sampler
1138 * unit for 3D textures on some hardware), or if the render buffer
1139 * is a 1D array (since shaders always provide the array index
1140 * coordinate at the Z component to avoid state-dependent
1141 * recompiles when changing the texture target of the
1142 * framebuffer).
1143 */
1144 const GLenum target =
1145 (irb->mt->target == GL_TEXTURE_3D &&
1146 irb->layer_count == 1) ? GL_TEXTURE_2D :
1147 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1148 irb->mt->target;
1149
1150 const struct isl_view view = {
1151 .format = format,
1152 .base_level = irb->mt_level - irb->mt->first_level,
1153 .levels = 1,
1154 .base_array_layer = irb->mt_layer,
1155 .array_len = irb->layer_count,
1156 .swizzle = ISL_SWIZZLE_IDENTITY,
1157 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1158 };
1159
1160 enum isl_aux_usage aux_usage =
1161 intel_miptree_texture_aux_usage(brw, irb->mt, format);
1162 if (brw->draw_aux_buffer_disabled[i])
1163 aux_usage = ISL_AUX_USAGE_NONE;
1164
1165 brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1166 tex_mocs[brw->gen],
1167 surf_offset, surf_index,
1168 I915_GEM_DOMAIN_SAMPLER, 0);
1169
1170 } else {
1171 brw->vtbl.emit_null_surface_state(
1172 brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1173 _mesa_geometric_samples(fb), surf_offset);
1174 }
1175 }
1176
1177 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1178 }
1179 }
1180
1181 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1182 .dirty = {
1183 .mesa = _NEW_BUFFERS,
1184 .brw = BRW_NEW_BATCH |
1185 BRW_NEW_FRAGMENT_PROGRAM |
1186 BRW_NEW_FS_PROG_DATA,
1187 },
1188 .emit = update_renderbuffer_read_surfaces,
1189 };
1190
1191 static void
1192 update_stage_texture_surfaces(struct brw_context *brw,
1193 const struct gl_program *prog,
1194 struct brw_stage_state *stage_state,
1195 bool for_gather, uint32_t plane)
1196 {
1197 if (!prog)
1198 return;
1199
1200 struct gl_context *ctx = &brw->ctx;
1201
1202 uint32_t *surf_offset = stage_state->surf_offset;
1203
1204 /* BRW_NEW_*_PROG_DATA */
1205 if (for_gather)
1206 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1207 else
1208 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1209
1210 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1211 for (unsigned s = 0; s < num_samplers; s++) {
1212 surf_offset[s] = 0;
1213
1214 if (prog->SamplersUsed & (1 << s)) {
1215 const unsigned unit = prog->SamplerUnits[s];
1216
1217 /* _NEW_TEXTURE */
1218 if (ctx->Texture.Unit[unit]._Current) {
1219 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1220 }
1221 }
1222 }
1223 }
1224
1225
1226 /**
1227 * Construct SURFACE_STATE objects for enabled textures.
1228 */
1229 static void
1230 brw_update_texture_surfaces(struct brw_context *brw)
1231 {
1232 /* BRW_NEW_VERTEX_PROGRAM */
1233 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1234
1235 /* BRW_NEW_TESS_PROGRAMS */
1236 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1237 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1238
1239 /* BRW_NEW_GEOMETRY_PROGRAM */
1240 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1241
1242 /* BRW_NEW_FRAGMENT_PROGRAM */
1243 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1244
1245 /* _NEW_TEXTURE */
1246 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1247 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1248 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1249 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1250 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1251
1252 /* emit alternate set of surface state for gather. this
1253 * allows the surface format to be overriden for only the
1254 * gather4 messages. */
1255 if (brw->gen < 8) {
1256 if (vs && vs->nir->info.uses_texture_gather)
1257 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1258 if (tcs && tcs->nir->info.uses_texture_gather)
1259 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1260 if (tes && tes->nir->info.uses_texture_gather)
1261 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1262 if (gs && gs->nir->info.uses_texture_gather)
1263 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1264 if (fs && fs->nir->info.uses_texture_gather)
1265 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1266 }
1267
1268 if (fs) {
1269 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1270 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1271 }
1272
1273 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1274 }
1275
1276 const struct brw_tracked_state brw_texture_surfaces = {
1277 .dirty = {
1278 .mesa = _NEW_TEXTURE,
1279 .brw = BRW_NEW_BATCH |
1280 BRW_NEW_BLORP |
1281 BRW_NEW_FRAGMENT_PROGRAM |
1282 BRW_NEW_FS_PROG_DATA |
1283 BRW_NEW_GEOMETRY_PROGRAM |
1284 BRW_NEW_GS_PROG_DATA |
1285 BRW_NEW_TESS_PROGRAMS |
1286 BRW_NEW_TCS_PROG_DATA |
1287 BRW_NEW_TES_PROG_DATA |
1288 BRW_NEW_TEXTURE_BUFFER |
1289 BRW_NEW_VERTEX_PROGRAM |
1290 BRW_NEW_VS_PROG_DATA,
1291 },
1292 .emit = brw_update_texture_surfaces,
1293 };
1294
1295 static void
1296 brw_update_cs_texture_surfaces(struct brw_context *brw)
1297 {
1298 /* BRW_NEW_COMPUTE_PROGRAM */
1299 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1300
1301 /* _NEW_TEXTURE */
1302 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1303
1304 /* emit alternate set of surface state for gather. this
1305 * allows the surface format to be overriden for only the
1306 * gather4 messages.
1307 */
1308 if (brw->gen < 8) {
1309 if (cs && cs->nir->info.uses_texture_gather)
1310 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1311 }
1312
1313 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1314 }
1315
1316 const struct brw_tracked_state brw_cs_texture_surfaces = {
1317 .dirty = {
1318 .mesa = _NEW_TEXTURE,
1319 .brw = BRW_NEW_BATCH |
1320 BRW_NEW_BLORP |
1321 BRW_NEW_COMPUTE_PROGRAM,
1322 },
1323 .emit = brw_update_cs_texture_surfaces,
1324 };
1325
1326
1327 void
1328 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1329 struct brw_stage_state *stage_state,
1330 struct brw_stage_prog_data *prog_data)
1331 {
1332 struct gl_context *ctx = &brw->ctx;
1333
1334 if (!prog)
1335 return;
1336
1337 uint32_t *ubo_surf_offsets =
1338 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1339
1340 for (int i = 0; i < prog->info.num_ubos; i++) {
1341 struct gl_uniform_buffer_binding *binding =
1342 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1343
1344 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1345 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1346 } else {
1347 struct intel_buffer_object *intel_bo =
1348 intel_buffer_object(binding->BufferObject);
1349 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1350 if (!binding->AutomaticSize)
1351 size = MIN2(size, binding->Size);
1352 struct brw_bo *bo =
1353 intel_bufferobj_buffer(brw, intel_bo,
1354 binding->Offset,
1355 size, false);
1356 brw_create_constant_surface(brw, bo, binding->Offset,
1357 size,
1358 &ubo_surf_offsets[i]);
1359 }
1360 }
1361
1362 uint32_t *ssbo_surf_offsets =
1363 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1364
1365 for (int i = 0; i < prog->info.num_ssbos; i++) {
1366 struct gl_shader_storage_buffer_binding *binding =
1367 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1368
1369 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1370 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1371 } else {
1372 struct intel_buffer_object *intel_bo =
1373 intel_buffer_object(binding->BufferObject);
1374 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1375 if (!binding->AutomaticSize)
1376 size = MIN2(size, binding->Size);
1377 struct brw_bo *bo =
1378 intel_bufferobj_buffer(brw, intel_bo,
1379 binding->Offset,
1380 size, true);
1381 brw_create_buffer_surface(brw, bo, binding->Offset,
1382 size,
1383 &ssbo_surf_offsets[i]);
1384 }
1385 }
1386
1387 stage_state->push_constants_dirty = true;
1388
1389 if (prog->info.num_ubos || prog->info.num_ssbos)
1390 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1391 }
1392
1393 static void
1394 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1395 {
1396 struct gl_context *ctx = &brw->ctx;
1397 /* _NEW_PROGRAM */
1398 struct gl_program *prog = ctx->FragmentProgram._Current;
1399
1400 /* BRW_NEW_FS_PROG_DATA */
1401 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1402 }
1403
1404 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1405 .dirty = {
1406 .mesa = _NEW_PROGRAM,
1407 .brw = BRW_NEW_BATCH |
1408 BRW_NEW_BLORP |
1409 BRW_NEW_FS_PROG_DATA |
1410 BRW_NEW_UNIFORM_BUFFER,
1411 },
1412 .emit = brw_upload_wm_ubo_surfaces,
1413 };
1414
1415 static void
1416 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1417 {
1418 struct gl_context *ctx = &brw->ctx;
1419 /* _NEW_PROGRAM */
1420 struct gl_program *prog =
1421 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1422
1423 /* BRW_NEW_CS_PROG_DATA */
1424 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1425 }
1426
1427 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1428 .dirty = {
1429 .mesa = _NEW_PROGRAM,
1430 .brw = BRW_NEW_BATCH |
1431 BRW_NEW_BLORP |
1432 BRW_NEW_CS_PROG_DATA |
1433 BRW_NEW_UNIFORM_BUFFER,
1434 },
1435 .emit = brw_upload_cs_ubo_surfaces,
1436 };
1437
1438 void
1439 brw_upload_abo_surfaces(struct brw_context *brw,
1440 const struct gl_program *prog,
1441 struct brw_stage_state *stage_state,
1442 struct brw_stage_prog_data *prog_data)
1443 {
1444 struct gl_context *ctx = &brw->ctx;
1445 uint32_t *surf_offsets =
1446 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1447
1448 if (prog->info.num_abos) {
1449 for (unsigned i = 0; i < prog->info.num_abos; i++) {
1450 struct gl_atomic_buffer_binding *binding =
1451 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1452 struct intel_buffer_object *intel_bo =
1453 intel_buffer_object(binding->BufferObject);
1454 struct brw_bo *bo =
1455 intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1456 intel_bo->Base.Size - binding->Offset,
1457 true);
1458
1459 brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1460 binding->Offset, ISL_FORMAT_RAW,
1461 bo->size - binding->Offset, 1, true);
1462 }
1463
1464 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1465 }
1466 }
1467
1468 static void
1469 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1470 {
1471 /* _NEW_PROGRAM */
1472 const struct gl_program *wm = brw->fragment_program;
1473
1474 if (wm) {
1475 /* BRW_NEW_FS_PROG_DATA */
1476 brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1477 }
1478 }
1479
1480 const struct brw_tracked_state brw_wm_abo_surfaces = {
1481 .dirty = {
1482 .mesa = _NEW_PROGRAM,
1483 .brw = BRW_NEW_ATOMIC_BUFFER |
1484 BRW_NEW_BLORP |
1485 BRW_NEW_BATCH |
1486 BRW_NEW_FS_PROG_DATA,
1487 },
1488 .emit = brw_upload_wm_abo_surfaces,
1489 };
1490
1491 static void
1492 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1493 {
1494 /* _NEW_PROGRAM */
1495 const struct gl_program *cp = brw->compute_program;
1496
1497 if (cp) {
1498 /* BRW_NEW_CS_PROG_DATA */
1499 brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1500 }
1501 }
1502
1503 const struct brw_tracked_state brw_cs_abo_surfaces = {
1504 .dirty = {
1505 .mesa = _NEW_PROGRAM,
1506 .brw = BRW_NEW_ATOMIC_BUFFER |
1507 BRW_NEW_BLORP |
1508 BRW_NEW_BATCH |
1509 BRW_NEW_CS_PROG_DATA,
1510 },
1511 .emit = brw_upload_cs_abo_surfaces,
1512 };
1513
1514 static void
1515 brw_upload_cs_image_surfaces(struct brw_context *brw)
1516 {
1517 /* _NEW_PROGRAM */
1518 const struct gl_program *cp = brw->compute_program;
1519
1520 if (cp) {
1521 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1522 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1523 brw->cs.base.prog_data);
1524 }
1525 }
1526
1527 const struct brw_tracked_state brw_cs_image_surfaces = {
1528 .dirty = {
1529 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1530 .brw = BRW_NEW_BATCH |
1531 BRW_NEW_BLORP |
1532 BRW_NEW_CS_PROG_DATA |
1533 BRW_NEW_IMAGE_UNITS
1534 },
1535 .emit = brw_upload_cs_image_surfaces,
1536 };
1537
1538 static uint32_t
1539 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1540 {
1541 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1542 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1543 if (access == GL_WRITE_ONLY) {
1544 return hw_format;
1545 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1546 /* Typed surface reads support a very limited subset of the shader
1547 * image formats. Translate it into the closest format the
1548 * hardware supports.
1549 */
1550 return isl_lower_storage_image_format(devinfo, hw_format);
1551 } else {
1552 /* The hardware doesn't actually support a typed format that we can use
1553 * so we have to fall back to untyped read/write messages.
1554 */
1555 return ISL_FORMAT_RAW;
1556 }
1557 }
1558
1559 static void
1560 update_default_image_param(struct brw_context *brw,
1561 struct gl_image_unit *u,
1562 unsigned surface_idx,
1563 struct brw_image_param *param)
1564 {
1565 memset(param, 0, sizeof(*param));
1566 param->surface_idx = surface_idx;
1567 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1568 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1569 * detailed explanation of these parameters.
1570 */
1571 param->swizzling[0] = 0xff;
1572 param->swizzling[1] = 0xff;
1573 }
1574
1575 static void
1576 update_buffer_image_param(struct brw_context *brw,
1577 struct gl_image_unit *u,
1578 unsigned surface_idx,
1579 struct brw_image_param *param)
1580 {
1581 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1582 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1583 update_default_image_param(brw, u, surface_idx, param);
1584
1585 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1586 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1587 }
1588
1589 static unsigned
1590 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1591 unsigned level)
1592 {
1593 if (target == GL_TEXTURE_CUBE_MAP)
1594 return 6;
1595
1596 return target == GL_TEXTURE_3D ?
1597 minify(mt->surf.logical_level0_px.depth, level) :
1598 mt->surf.logical_level0_px.array_len;
1599 }
1600
1601 static void
1602 update_image_surface(struct brw_context *brw,
1603 struct gl_image_unit *u,
1604 GLenum access,
1605 unsigned surface_idx,
1606 uint32_t *surf_offset,
1607 struct brw_image_param *param)
1608 {
1609 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1610 struct gl_texture_object *obj = u->TexObj;
1611 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1612
1613 if (obj->Target == GL_TEXTURE_BUFFER) {
1614 struct intel_buffer_object *intel_obj =
1615 intel_buffer_object(obj->BufferObject);
1616 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1617 _mesa_get_format_bytes(u->_ActualFormat));
1618
1619 brw_emit_buffer_surface_state(
1620 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1621 format, intel_obj->Base.Size, texel_size,
1622 access != GL_READ_ONLY);
1623
1624 update_buffer_image_param(brw, u, surface_idx, param);
1625
1626 } else {
1627 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1628 struct intel_mipmap_tree *mt = intel_obj->mt;
1629 const unsigned num_layers = u->Layered ?
1630 get_image_num_layers(mt, obj->Target, u->Level) : 1;
1631
1632 struct isl_view view = {
1633 .format = format,
1634 .base_level = obj->MinLevel + u->Level,
1635 .levels = 1,
1636 .base_array_layer = obj->MinLayer + u->_Layer,
1637 .array_len = num_layers,
1638 .swizzle = ISL_SWIZZLE_IDENTITY,
1639 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1640 };
1641
1642 if (format == ISL_FORMAT_RAW) {
1643 brw_emit_buffer_surface_state(
1644 brw, surf_offset, mt->bo, mt->offset,
1645 format, mt->bo->size - mt->offset, 1 /* pitch */,
1646 access != GL_READ_ONLY);
1647
1648 } else {
1649 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1650 assert(!intel_miptree_has_color_unresolved(mt,
1651 view.base_level, 1,
1652 view.base_array_layer,
1653 view.array_len));
1654 brw_emit_surface_state(brw, mt, mt->target, view,
1655 ISL_AUX_USAGE_NONE, tex_mocs[brw->gen],
1656 surf_offset, surf_index,
1657 I915_GEM_DOMAIN_SAMPLER,
1658 access == GL_READ_ONLY ? 0 :
1659 I915_GEM_DOMAIN_SAMPLER);
1660 }
1661
1662 isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1663 param->surface_idx = surface_idx;
1664 }
1665
1666 } else {
1667 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1668 update_default_image_param(brw, u, surface_idx, param);
1669 }
1670 }
1671
1672 void
1673 brw_upload_image_surfaces(struct brw_context *brw,
1674 const struct gl_program *prog,
1675 struct brw_stage_state *stage_state,
1676 struct brw_stage_prog_data *prog_data)
1677 {
1678 assert(prog);
1679 struct gl_context *ctx = &brw->ctx;
1680
1681 if (prog->info.num_images) {
1682 for (unsigned i = 0; i < prog->info.num_images; i++) {
1683 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1684 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1685
1686 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1687 surf_idx,
1688 &stage_state->surf_offset[surf_idx],
1689 &prog_data->image_param[i]);
1690 }
1691
1692 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1693 /* This may have changed the image metadata dependent on the context
1694 * image unit state and passed to the program as uniforms, make sure
1695 * that push and pull constants are reuploaded.
1696 */
1697 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1698 }
1699 }
1700
1701 static void
1702 brw_upload_wm_image_surfaces(struct brw_context *brw)
1703 {
1704 /* BRW_NEW_FRAGMENT_PROGRAM */
1705 const struct gl_program *wm = brw->fragment_program;
1706
1707 if (wm) {
1708 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1709 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1710 brw->wm.base.prog_data);
1711 }
1712 }
1713
1714 const struct brw_tracked_state brw_wm_image_surfaces = {
1715 .dirty = {
1716 .mesa = _NEW_TEXTURE,
1717 .brw = BRW_NEW_BATCH |
1718 BRW_NEW_BLORP |
1719 BRW_NEW_FRAGMENT_PROGRAM |
1720 BRW_NEW_FS_PROG_DATA |
1721 BRW_NEW_IMAGE_UNITS
1722 },
1723 .emit = brw_upload_wm_image_surfaces,
1724 };
1725
1726 void
1727 gen4_init_vtable_surface_functions(struct brw_context *brw)
1728 {
1729 brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1730 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1731 }
1732
1733 void
1734 gen6_init_vtable_surface_functions(struct brw_context *brw)
1735 {
1736 gen4_init_vtable_surface_functions(brw);
1737 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1738 }
1739
1740 static void
1741 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1742 {
1743 struct gl_context *ctx = &brw->ctx;
1744 /* _NEW_PROGRAM */
1745 struct gl_program *prog =
1746 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1747 /* BRW_NEW_CS_PROG_DATA */
1748 const struct brw_cs_prog_data *cs_prog_data =
1749 brw_cs_prog_data(brw->cs.base.prog_data);
1750
1751 if (prog && cs_prog_data->uses_num_work_groups) {
1752 const unsigned surf_idx =
1753 cs_prog_data->binding_table.work_groups_start;
1754 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1755 struct brw_bo *bo;
1756 uint32_t bo_offset;
1757
1758 if (brw->compute.num_work_groups_bo == NULL) {
1759 bo = NULL;
1760 intel_upload_data(brw,
1761 (void *)brw->compute.num_work_groups,
1762 3 * sizeof(GLuint),
1763 sizeof(GLuint),
1764 &bo,
1765 &bo_offset);
1766 } else {
1767 bo = brw->compute.num_work_groups_bo;
1768 bo_offset = brw->compute.num_work_groups_offset;
1769 }
1770
1771 brw_emit_buffer_surface_state(brw, surf_offset,
1772 bo, bo_offset,
1773 ISL_FORMAT_RAW,
1774 3 * sizeof(GLuint), 1, true);
1775 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1776 }
1777 }
1778
1779 const struct brw_tracked_state brw_cs_work_groups_surface = {
1780 .dirty = {
1781 .brw = BRW_NEW_BLORP |
1782 BRW_NEW_CS_PROG_DATA |
1783 BRW_NEW_CS_WORK_GROUPS
1784 },
1785 .emit = brw_upload_cs_work_groups_surface,
1786 };