i965/state: Add generic surface update functions based on ISL
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "main/teximage.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_instruction.h"
41 #include "main/framebuffer.h"
42
43 #include "isl/isl.h"
44
45 #include "intel_mipmap_tree.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_tex.h"
48 #include "intel_fbo.h"
49 #include "intel_buffer_objects.h"
50
51 #include "brw_context.h"
52 #include "brw_state.h"
53 #include "brw_defines.h"
54 #include "brw_wm.h"
55
56 struct surface_state_info {
57 unsigned num_dwords;
58 unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
59 unsigned reloc_dw;
60 unsigned aux_reloc_dw;
61 unsigned tex_mocs;
62 unsigned rb_mocs;
63 };
64
65 static const struct surface_state_info surface_state_infos[] = {
66 [4] = {6, 32, 1, 0},
67 [5] = {6, 32, 1, 0},
68 [6] = {6, 32, 1, 0},
69 [7] = {8, 32, 1, 6, GEN7_MOCS_L3, GEN7_MOCS_L3},
70 [8] = {13, 64, 8, 10, BDW_MOCS_WB, BDW_MOCS_PTE},
71 [9] = {16, 64, 8, 10, SKL_MOCS_WB, SKL_MOCS_PTE},
72 };
73
74 void
75 brw_emit_surface_state(struct brw_context *brw,
76 struct intel_mipmap_tree *mt,
77 const struct isl_view *view,
78 uint32_t mocs, bool for_gather,
79 uint32_t *surf_offset, int surf_index,
80 unsigned read_domains, unsigned write_domains)
81 {
82 const struct surface_state_info ss_info = surface_state_infos[brw->gen];
83
84 struct isl_surf surf;
85 intel_miptree_get_isl_surf(brw, mt, &surf);
86
87 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
88
89 struct isl_surf *aux_surf = NULL, aux_surf_s;
90 uint64_t aux_offset = 0;
91 enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
92 if (mt->mcs_mt &&
93 ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
94 mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
95 intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
96 aux_surf = &aux_surf_s;
97 assert(mt->mcs_mt->offset == 0);
98 aux_offset = mt->mcs_mt->bo->offset64;
99
100 /* We only really need a clear color if we also have an auxiliary
101 * surfacae. Without one, it does nothing.
102 */
103 clear_color = intel_miptree_get_isl_clear_color(brw, mt);
104 }
105
106 uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
107 ss_info.num_dwords * 4, ss_info.ss_align,
108 surf_index, surf_offset);
109
110 isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
111 .address = mt->bo->offset64 + mt->offset,
112 .aux_surf = aux_surf, .aux_usage = aux_usage,
113 .aux_address = aux_offset,
114 .mocs = mocs, .clear_color = clear_color);
115
116 drm_intel_bo_emit_reloc(brw->batch.bo,
117 *surf_offset + 4 * ss_info.reloc_dw,
118 mt->bo, mt->offset,
119 read_domains, write_domains);
120
121 if (aux_surf) {
122 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
123 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
124 * contain other control information. Since buffer addresses are always
125 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
126 * an ordinary reloc to do the necessary address translation.
127 */
128 assert((aux_offset & 0xfff) == 0);
129 drm_intel_bo_emit_reloc(brw->batch.bo,
130 *surf_offset + 4 * ss_info.aux_reloc_dw,
131 mt->mcs_mt->bo, dw[ss_info.aux_reloc_dw] & 0xfff,
132 read_domains, write_domains);
133 }
134 }
135
136 uint32_t
137 brw_update_renderbuffer_surface(struct brw_context *brw,
138 struct gl_renderbuffer *rb,
139 bool layered, unsigned unit /* unused */,
140 uint32_t surf_index)
141 {
142 struct gl_context *ctx = &brw->ctx;
143 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
144 struct intel_mipmap_tree *mt = irb->mt;
145
146 assert(brw_render_target_supported(brw, rb));
147 intel_miptree_used_for_rendering(mt);
148
149 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
150 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
151 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
152 __func__, _mesa_get_format_name(rb_format));
153 }
154
155 const unsigned layer_multiplier =
156 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
157 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
158 MAX2(irb->mt->num_samples, 1) : 1;
159
160 struct isl_view view = {
161 .format = brw->render_target_format[rb_format],
162 .base_level = irb->mt_level - irb->mt->first_level,
163 .levels = 1,
164 .base_array_layer = irb->mt_layer / layer_multiplier,
165 .array_len = MAX2(irb->layer_count, 1),
166 .channel_select = {
167 ISL_CHANNEL_SELECT_RED,
168 ISL_CHANNEL_SELECT_GREEN,
169 ISL_CHANNEL_SELECT_BLUE,
170 ISL_CHANNEL_SELECT_ALPHA,
171 },
172 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
173 };
174
175 uint32_t offset;
176 brw_emit_surface_state(brw, mt, &view,
177 surface_state_infos[brw->gen].rb_mocs, false,
178 &offset, surf_index,
179 I915_GEM_DOMAIN_RENDER,
180 I915_GEM_DOMAIN_RENDER);
181 return offset;
182 }
183
184 GLuint
185 translate_tex_target(GLenum target)
186 {
187 switch (target) {
188 case GL_TEXTURE_1D:
189 case GL_TEXTURE_1D_ARRAY_EXT:
190 return BRW_SURFACE_1D;
191
192 case GL_TEXTURE_RECTANGLE_NV:
193 return BRW_SURFACE_2D;
194
195 case GL_TEXTURE_2D:
196 case GL_TEXTURE_2D_ARRAY_EXT:
197 case GL_TEXTURE_EXTERNAL_OES:
198 case GL_TEXTURE_2D_MULTISAMPLE:
199 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
200 return BRW_SURFACE_2D;
201
202 case GL_TEXTURE_3D:
203 return BRW_SURFACE_3D;
204
205 case GL_TEXTURE_CUBE_MAP:
206 case GL_TEXTURE_CUBE_MAP_ARRAY:
207 return BRW_SURFACE_CUBE;
208
209 default:
210 unreachable("not reached");
211 }
212 }
213
214 uint32_t
215 brw_get_surface_tiling_bits(uint32_t tiling)
216 {
217 switch (tiling) {
218 case I915_TILING_X:
219 return BRW_SURFACE_TILED;
220 case I915_TILING_Y:
221 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
222 default:
223 return 0;
224 }
225 }
226
227
228 uint32_t
229 brw_get_surface_num_multisamples(unsigned num_samples)
230 {
231 if (num_samples > 1)
232 return BRW_SURFACE_MULTISAMPLECOUNT_4;
233 else
234 return BRW_SURFACE_MULTISAMPLECOUNT_1;
235 }
236
237 /**
238 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
239 * swizzling.
240 */
241 int
242 brw_get_texture_swizzle(const struct gl_context *ctx,
243 const struct gl_texture_object *t)
244 {
245 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
246
247 int swizzles[SWIZZLE_NIL + 1] = {
248 SWIZZLE_X,
249 SWIZZLE_Y,
250 SWIZZLE_Z,
251 SWIZZLE_W,
252 SWIZZLE_ZERO,
253 SWIZZLE_ONE,
254 SWIZZLE_NIL
255 };
256
257 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
258 img->_BaseFormat == GL_DEPTH_STENCIL) {
259 GLenum depth_mode = t->DepthMode;
260
261 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
262 * with depth component data specified with a sized internal format.
263 * Otherwise, it's left at the old default, GL_LUMINANCE.
264 */
265 if (_mesa_is_gles3(ctx) &&
266 img->InternalFormat != GL_DEPTH_COMPONENT &&
267 img->InternalFormat != GL_DEPTH_STENCIL) {
268 depth_mode = GL_RED;
269 }
270
271 switch (depth_mode) {
272 case GL_ALPHA:
273 swizzles[0] = SWIZZLE_ZERO;
274 swizzles[1] = SWIZZLE_ZERO;
275 swizzles[2] = SWIZZLE_ZERO;
276 swizzles[3] = SWIZZLE_X;
277 break;
278 case GL_LUMINANCE:
279 swizzles[0] = SWIZZLE_X;
280 swizzles[1] = SWIZZLE_X;
281 swizzles[2] = SWIZZLE_X;
282 swizzles[3] = SWIZZLE_ONE;
283 break;
284 case GL_INTENSITY:
285 swizzles[0] = SWIZZLE_X;
286 swizzles[1] = SWIZZLE_X;
287 swizzles[2] = SWIZZLE_X;
288 swizzles[3] = SWIZZLE_X;
289 break;
290 case GL_RED:
291 swizzles[0] = SWIZZLE_X;
292 swizzles[1] = SWIZZLE_ZERO;
293 swizzles[2] = SWIZZLE_ZERO;
294 swizzles[3] = SWIZZLE_ONE;
295 break;
296 }
297 }
298
299 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
300
301 /* If the texture's format is alpha-only, force R, G, and B to
302 * 0.0. Similarly, if the texture's format has no alpha channel,
303 * force the alpha value read to 1.0. This allows for the
304 * implementation to use an RGBA texture for any of these formats
305 * without leaking any unexpected values.
306 */
307 switch (img->_BaseFormat) {
308 case GL_ALPHA:
309 swizzles[0] = SWIZZLE_ZERO;
310 swizzles[1] = SWIZZLE_ZERO;
311 swizzles[2] = SWIZZLE_ZERO;
312 break;
313 case GL_LUMINANCE:
314 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
315 swizzles[0] = SWIZZLE_X;
316 swizzles[1] = SWIZZLE_X;
317 swizzles[2] = SWIZZLE_X;
318 swizzles[3] = SWIZZLE_ONE;
319 }
320 break;
321 case GL_LUMINANCE_ALPHA:
322 if (datatype == GL_SIGNED_NORMALIZED) {
323 swizzles[0] = SWIZZLE_X;
324 swizzles[1] = SWIZZLE_X;
325 swizzles[2] = SWIZZLE_X;
326 swizzles[3] = SWIZZLE_W;
327 }
328 break;
329 case GL_INTENSITY:
330 if (datatype == GL_SIGNED_NORMALIZED) {
331 swizzles[0] = SWIZZLE_X;
332 swizzles[1] = SWIZZLE_X;
333 swizzles[2] = SWIZZLE_X;
334 swizzles[3] = SWIZZLE_X;
335 }
336 break;
337 case GL_RED:
338 case GL_RG:
339 case GL_RGB:
340 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
341 swizzles[3] = SWIZZLE_ONE;
342 break;
343 }
344
345 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
346 swizzles[GET_SWZ(t->_Swizzle, 1)],
347 swizzles[GET_SWZ(t->_Swizzle, 2)],
348 swizzles[GET_SWZ(t->_Swizzle, 3)]);
349 }
350
351 /**
352 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
353 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
354 *
355 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
356 * 0 1 2 3 4 5
357 * 4 5 6 7 0 1
358 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
359 *
360 * which is simply adding 4 then modding by 8 (or anding with 7).
361 *
362 * We then may need to apply workarounds for textureGather hardware bugs.
363 */
364 static unsigned
365 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
366 {
367 unsigned scs = (swizzle + 4) & 7;
368
369 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
370 }
371
372 void
373 brw_update_texture_surface(struct gl_context *ctx,
374 unsigned unit,
375 uint32_t *surf_offset,
376 bool for_gather,
377 uint32_t plane)
378 {
379 struct brw_context *brw = brw_context(ctx);
380 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
381
382 if (obj->Target == GL_TEXTURE_BUFFER) {
383 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
384
385 } else {
386 struct intel_texture_object *intel_obj = intel_texture_object(obj);
387 struct intel_mipmap_tree *mt = intel_obj->mt;
388 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
389 /* If this is a view with restricted NumLayers, then our effective depth
390 * is not just the miptree depth.
391 */
392 const unsigned mt_num_layers =
393 mt->logical_depth0 * (_mesa_is_cube_map_texture(mt->target) ? 6 : 1);
394 const unsigned view_num_layers =
395 (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
396 mt_num_layers;
397
398 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
399 * texturing functions that return a float, as our code generation always
400 * selects the .x channel (which would always be 0).
401 */
402 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
403 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
404 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
405 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
406 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
407 brw_get_texture_swizzle(&brw->ctx, obj));
408
409 unsigned format = translate_tex_format(
410 brw, intel_obj->_Format, sampler->sRGBDecode);
411
412 /* Implement gen6 and gen7 gather work-around */
413 bool need_green_to_blue = false;
414 if (for_gather) {
415 if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
416 format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
417 need_green_to_blue = brw->is_haswell;
418 } else if (brw->gen == 6) {
419 /* Sandybridge's gather4 message is broken for integer formats.
420 * To work around this, we pretend the surface is UNORM for
421 * 8 or 16-bit formats, and emit shader instructions to recover
422 * the real INT/UINT value. For 32-bit formats, we pretend
423 * the surface is FLOAT, and simply reinterpret the resulting
424 * bits.
425 */
426 switch (format) {
427 case BRW_SURFACEFORMAT_R8_SINT:
428 case BRW_SURFACEFORMAT_R8_UINT:
429 format = BRW_SURFACEFORMAT_R8_UNORM;
430 break;
431
432 case BRW_SURFACEFORMAT_R16_SINT:
433 case BRW_SURFACEFORMAT_R16_UINT:
434 format = BRW_SURFACEFORMAT_R16_UNORM;
435 break;
436
437 case BRW_SURFACEFORMAT_R32_SINT:
438 case BRW_SURFACEFORMAT_R32_UINT:
439 format = BRW_SURFACEFORMAT_R32_FLOAT;
440 break;
441
442 default:
443 break;
444 }
445 }
446 }
447
448 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
449 assert(brw->gen >= 8);
450 mt = mt->stencil_mt;
451 format = BRW_SURFACEFORMAT_R8_UINT;
452 } else if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
453 if (plane > 0)
454 mt = mt->plane[plane - 1];
455 if (mt == NULL)
456 return;
457 format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
458 }
459
460 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
461
462 struct isl_view view = {
463 .format = format,
464 .base_level = obj->MinLevel + obj->BaseLevel,
465 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
466 .base_array_layer = obj->MinLayer,
467 .array_len = view_num_layers,
468 .channel_select = {
469 swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
470 swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
471 swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
472 swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
473 },
474 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
475 };
476
477 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
478 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
479 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
480
481 brw_emit_surface_state(brw, mt, &view,
482 surface_state_infos[brw->gen].tex_mocs, for_gather,
483 surf_offset, surf_index,
484 I915_GEM_DOMAIN_SAMPLER, 0);
485 }
486 }
487
488 static void
489 gen4_emit_buffer_surface_state(struct brw_context *brw,
490 uint32_t *out_offset,
491 drm_intel_bo *bo,
492 unsigned buffer_offset,
493 unsigned surface_format,
494 unsigned buffer_size,
495 unsigned pitch,
496 bool rw)
497 {
498 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
499 6 * 4, 32, out_offset);
500 memset(surf, 0, 6 * 4);
501
502 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
503 surface_format << BRW_SURFACE_FORMAT_SHIFT |
504 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
505 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
506 surf[2] = ((buffer_size - 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
507 (((buffer_size - 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
508 surf[3] = (((buffer_size - 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
509 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
510
511 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
512 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
513 * physical cache. It is mapped in hardware to the sampler cache."
514 */
515 if (bo) {
516 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
517 bo, buffer_offset,
518 I915_GEM_DOMAIN_SAMPLER,
519 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
520 }
521 }
522
523 void
524 brw_update_buffer_texture_surface(struct gl_context *ctx,
525 unsigned unit,
526 uint32_t *surf_offset)
527 {
528 struct brw_context *brw = brw_context(ctx);
529 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
530 struct intel_buffer_object *intel_obj =
531 intel_buffer_object(tObj->BufferObject);
532 uint32_t size = tObj->BufferSize;
533 drm_intel_bo *bo = NULL;
534 mesa_format format = tObj->_BufferObjectFormat;
535 uint32_t brw_format = brw_format_for_mesa_format(format);
536 int texel_size = _mesa_get_format_bytes(format);
537
538 if (intel_obj) {
539 size = MIN2(size, intel_obj->Base.Size);
540 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
541 }
542
543 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
544 _mesa_problem(NULL, "bad format %s for texture buffer\n",
545 _mesa_get_format_name(format));
546 }
547
548 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
549 tObj->BufferOffset,
550 brw_format,
551 size / texel_size,
552 texel_size,
553 false /* rw */);
554 }
555
556 static void
557 gen4_update_texture_surface(struct gl_context *ctx,
558 unsigned unit,
559 uint32_t *surf_offset,
560 bool for_gather,
561 uint32_t plane)
562 {
563 struct brw_context *brw = brw_context(ctx);
564 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
565 struct intel_texture_object *intelObj = intel_texture_object(tObj);
566 struct intel_mipmap_tree *mt = intelObj->mt;
567 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
568 uint32_t *surf;
569
570 /* BRW_NEW_TEXTURE_BUFFER */
571 if (tObj->Target == GL_TEXTURE_BUFFER) {
572 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
573 return;
574 }
575
576 if (plane > 0) {
577 if (mt->plane[plane - 1] == NULL)
578 return;
579 mt = mt->plane[plane - 1];
580 }
581
582 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
583 6 * 4, 32, surf_offset);
584
585 mesa_format mesa_fmt = plane == 0 ? intelObj->_Format : mt->format;
586 uint32_t tex_format = translate_tex_format(brw, mesa_fmt,
587 sampler->sRGBDecode);
588
589 if (for_gather) {
590 /* Sandybridge's gather4 message is broken for integer formats.
591 * To work around this, we pretend the surface is UNORM for
592 * 8 or 16-bit formats, and emit shader instructions to recover
593 * the real INT/UINT value. For 32-bit formats, we pretend
594 * the surface is FLOAT, and simply reinterpret the resulting
595 * bits.
596 */
597 switch (tex_format) {
598 case BRW_SURFACEFORMAT_R8_SINT:
599 case BRW_SURFACEFORMAT_R8_UINT:
600 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
601 break;
602
603 case BRW_SURFACEFORMAT_R16_SINT:
604 case BRW_SURFACEFORMAT_R16_UINT:
605 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
606 break;
607
608 case BRW_SURFACEFORMAT_R32_SINT:
609 case BRW_SURFACEFORMAT_R32_UINT:
610 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
611 break;
612
613 default:
614 break;
615 }
616 }
617
618 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
619 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
620 BRW_SURFACE_CUBEFACE_ENABLES |
621 tex_format << BRW_SURFACE_FORMAT_SHIFT);
622
623 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
624
625 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
626 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
627 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
628
629 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
630 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
631 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
632
633 const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
634 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
635 SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
636 SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
637
638 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
639
640 /* Emit relocation to surface contents */
641 drm_intel_bo_emit_reloc(brw->batch.bo,
642 *surf_offset + 4,
643 mt->bo,
644 surf[1] - mt->bo->offset64,
645 I915_GEM_DOMAIN_SAMPLER, 0);
646 }
647
648 /**
649 * Create the constant buffer surface. Vertex/fragment shader constants will be
650 * read from this buffer with Data Port Read instructions/messages.
651 */
652 void
653 brw_create_constant_surface(struct brw_context *brw,
654 drm_intel_bo *bo,
655 uint32_t offset,
656 uint32_t size,
657 uint32_t *out_offset)
658 {
659 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
660 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
661 size, 1, false);
662 }
663
664 /**
665 * Create the buffer surface. Shader buffer variables will be
666 * read from / write to this buffer with Data Port Read/Write
667 * instructions/messages.
668 */
669 void
670 brw_create_buffer_surface(struct brw_context *brw,
671 drm_intel_bo *bo,
672 uint32_t offset,
673 uint32_t size,
674 uint32_t *out_offset)
675 {
676 /* Use a raw surface so we can reuse existing untyped read/write/atomic
677 * messages. We need these specifically for the fragment shader since they
678 * include a pixel mask header that we need to ensure correct behavior
679 * with helper invocations, which cannot write to the buffer.
680 */
681 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
682 BRW_SURFACEFORMAT_RAW,
683 size, 1, true);
684 }
685
686 /**
687 * Set up a binding table entry for use by stream output logic (transform
688 * feedback).
689 *
690 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
691 */
692 void
693 brw_update_sol_surface(struct brw_context *brw,
694 struct gl_buffer_object *buffer_obj,
695 uint32_t *out_offset, unsigned num_vector_components,
696 unsigned stride_dwords, unsigned offset_dwords)
697 {
698 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
699 uint32_t offset_bytes = 4 * offset_dwords;
700 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
701 offset_bytes,
702 buffer_obj->Size - offset_bytes);
703 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
704 out_offset);
705 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
706 size_t size_dwords = buffer_obj->Size / 4;
707 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
708
709 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
710 * too big to map using a single binding table entry?
711 */
712 assert((size_dwords - offset_dwords) / stride_dwords
713 <= BRW_MAX_NUM_BUFFER_ENTRIES);
714
715 if (size_dwords > offset_dwords + num_vector_components) {
716 /* There is room for at least 1 transform feedback output in the buffer.
717 * Compute the number of additional transform feedback outputs the
718 * buffer has room for.
719 */
720 buffer_size_minus_1 =
721 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
722 } else {
723 /* There isn't even room for a single transform feedback output in the
724 * buffer. We can't configure the binding table entry to prevent output
725 * entirely; we'll have to rely on the geometry shader to detect
726 * overflow. But to minimize the damage in case of a bug, set up the
727 * binding table entry to just allow a single output.
728 */
729 buffer_size_minus_1 = 0;
730 }
731 width = buffer_size_minus_1 & 0x7f;
732 height = (buffer_size_minus_1 & 0xfff80) >> 7;
733 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
734
735 switch (num_vector_components) {
736 case 1:
737 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
738 break;
739 case 2:
740 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
741 break;
742 case 3:
743 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
744 break;
745 case 4:
746 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
747 break;
748 default:
749 unreachable("Invalid vector size for transform feedback output");
750 }
751
752 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
753 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
754 surface_format << BRW_SURFACE_FORMAT_SHIFT |
755 BRW_SURFACE_RC_READ_WRITE;
756 surf[1] = bo->offset64 + offset_bytes; /* reloc */
757 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
758 height << BRW_SURFACE_HEIGHT_SHIFT);
759 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
760 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
761 surf[4] = 0;
762 surf[5] = 0;
763
764 /* Emit relocation to surface contents. */
765 drm_intel_bo_emit_reloc(brw->batch.bo,
766 *out_offset + 4,
767 bo, offset_bytes,
768 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
769 }
770
771 /* Creates a new WM constant buffer reflecting the current fragment program's
772 * constants, if needed by the fragment program.
773 *
774 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
775 * state atom.
776 */
777 static void
778 brw_upload_wm_pull_constants(struct brw_context *brw)
779 {
780 struct brw_stage_state *stage_state = &brw->wm.base;
781 /* BRW_NEW_FRAGMENT_PROGRAM */
782 struct brw_fragment_program *fp =
783 (struct brw_fragment_program *) brw->fragment_program;
784 /* BRW_NEW_FS_PROG_DATA */
785 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
786
787 /* _NEW_PROGRAM_CONSTANTS */
788 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
789 stage_state, prog_data);
790 }
791
792 const struct brw_tracked_state brw_wm_pull_constants = {
793 .dirty = {
794 .mesa = _NEW_PROGRAM_CONSTANTS,
795 .brw = BRW_NEW_BATCH |
796 BRW_NEW_BLORP |
797 BRW_NEW_FRAGMENT_PROGRAM |
798 BRW_NEW_FS_PROG_DATA,
799 },
800 .emit = brw_upload_wm_pull_constants,
801 };
802
803 /**
804 * Creates a null renderbuffer surface.
805 *
806 * This is used when the shader doesn't write to any color output. An FB
807 * write to target 0 will still be emitted, because that's how the thread is
808 * terminated (and computed depth is returned), so we need to have the
809 * hardware discard the target 0 color output..
810 */
811 static void
812 brw_emit_null_surface_state(struct brw_context *brw,
813 unsigned width,
814 unsigned height,
815 unsigned samples,
816 uint32_t *out_offset)
817 {
818 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
819 * Notes):
820 *
821 * A null surface will be used in instances where an actual surface is
822 * not bound. When a write message is generated to a null surface, no
823 * actual surface is written to. When a read message (including any
824 * sampling engine message) is generated to a null surface, the result
825 * is all zeros. Note that a null surface type is allowed to be used
826 * with all messages, even if it is not specificially indicated as
827 * supported. All of the remaining fields in surface state are ignored
828 * for null surfaces, with the following exceptions:
829 *
830 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
831 * depth buffer’s corresponding state for all render target surfaces,
832 * including null.
833 *
834 * - Surface Format must be R8G8B8A8_UNORM.
835 */
836 unsigned surface_type = BRW_SURFACE_NULL;
837 drm_intel_bo *bo = NULL;
838 unsigned pitch_minus_1 = 0;
839 uint32_t multisampling_state = 0;
840 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
841 out_offset);
842
843 if (samples > 1) {
844 /* On Gen6, null render targets seem to cause GPU hangs when
845 * multisampling. So work around this problem by rendering into dummy
846 * color buffer.
847 *
848 * To decrease the amount of memory needed by the workaround buffer, we
849 * set its pitch to 128 bytes (the width of a Y tile). This means that
850 * the amount of memory needed for the workaround buffer is
851 * (width_in_tiles + height_in_tiles - 1) tiles.
852 *
853 * Note that since the workaround buffer will be interpreted by the
854 * hardware as an interleaved multisampled buffer, we need to compute
855 * width_in_tiles and height_in_tiles by dividing the width and height
856 * by 16 rather than the normal Y-tile size of 32.
857 */
858 unsigned width_in_tiles = ALIGN(width, 16) / 16;
859 unsigned height_in_tiles = ALIGN(height, 16) / 16;
860 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
861 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
862 size_needed);
863 bo = brw->wm.multisampled_null_render_target_bo;
864 surface_type = BRW_SURFACE_2D;
865 pitch_minus_1 = 127;
866 multisampling_state = brw_get_surface_num_multisamples(samples);
867 }
868
869 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
870 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
871 if (brw->gen < 6) {
872 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
873 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
874 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
875 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
876 }
877 surf[1] = bo ? bo->offset64 : 0;
878 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
879 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
880
881 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
882 * Notes):
883 *
884 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
885 */
886 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
887 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
888 surf[4] = multisampling_state;
889 surf[5] = 0;
890
891 if (bo) {
892 drm_intel_bo_emit_reloc(brw->batch.bo,
893 *out_offset + 4,
894 bo, 0,
895 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
896 }
897 }
898
899 /**
900 * Sets up a surface state structure to point at the given region.
901 * While it is only used for the front/back buffer currently, it should be
902 * usable for further buffers when doing ARB_draw_buffer support.
903 */
904 static uint32_t
905 gen4_update_renderbuffer_surface(struct brw_context *brw,
906 struct gl_renderbuffer *rb,
907 bool layered, unsigned unit,
908 uint32_t surf_index)
909 {
910 struct gl_context *ctx = &brw->ctx;
911 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
912 struct intel_mipmap_tree *mt = irb->mt;
913 uint32_t *surf;
914 uint32_t tile_x, tile_y;
915 uint32_t format = 0;
916 uint32_t offset;
917 /* _NEW_BUFFERS */
918 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
919 /* BRW_NEW_FS_PROG_DATA */
920
921 assert(!layered);
922
923 if (rb->TexImage && !brw->has_surface_tile_offset) {
924 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
925
926 if (tile_x != 0 || tile_y != 0) {
927 /* Original gen4 hardware couldn't draw to a non-tile-aligned
928 * destination in a miptree unless you actually setup your renderbuffer
929 * as a miptree and used the fragile lod/array_index/etc. controls to
930 * select the image. So, instead, we just make a new single-level
931 * miptree and render into that.
932 */
933 intel_renderbuffer_move_to_temp(brw, irb, false);
934 mt = irb->mt;
935 }
936 }
937
938 intel_miptree_used_for_rendering(irb->mt);
939
940 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
941
942 format = brw->render_target_format[rb_format];
943 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
944 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
945 __func__, _mesa_get_format_name(rb_format));
946 }
947
948 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
949 format << BRW_SURFACE_FORMAT_SHIFT);
950
951 /* reloc */
952 assert(mt->offset % mt->cpp == 0);
953 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
954 mt->bo->offset64 + mt->offset);
955
956 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
957 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
958
959 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
960 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
961
962 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
963
964 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
965 /* Note that the low bits of these fields are missing, so
966 * there's the possibility of getting in trouble.
967 */
968 assert(tile_x % 4 == 0);
969 assert(tile_y % 2 == 0);
970 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
971 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
972 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
973
974 if (brw->gen < 6) {
975 /* _NEW_COLOR */
976 if (!ctx->Color.ColorLogicOpEnabled &&
977 (ctx->Color.BlendEnabled & (1 << unit)))
978 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
979
980 if (!ctx->Color.ColorMask[unit][0])
981 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
982 if (!ctx->Color.ColorMask[unit][1])
983 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
984 if (!ctx->Color.ColorMask[unit][2])
985 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
986
987 /* As mentioned above, disable writes to the alpha component when the
988 * renderbuffer is XRGB.
989 */
990 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
991 !ctx->Color.ColorMask[unit][3]) {
992 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
993 }
994 }
995
996 drm_intel_bo_emit_reloc(brw->batch.bo,
997 offset + 4,
998 mt->bo,
999 surf[1] - mt->bo->offset64,
1000 I915_GEM_DOMAIN_RENDER,
1001 I915_GEM_DOMAIN_RENDER);
1002
1003 return offset;
1004 }
1005
1006 /**
1007 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1008 */
1009 void
1010 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1011 const struct gl_framebuffer *fb,
1012 uint32_t render_target_start,
1013 uint32_t *surf_offset)
1014 {
1015 GLuint i;
1016 const unsigned int w = _mesa_geometric_width(fb);
1017 const unsigned int h = _mesa_geometric_height(fb);
1018 const unsigned int s = _mesa_geometric_samples(fb);
1019
1020 /* Update surfaces for drawing buffers */
1021 if (fb->_NumColorDrawBuffers >= 1) {
1022 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1023 const uint32_t surf_index = render_target_start + i;
1024
1025 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1026 surf_offset[surf_index] =
1027 brw->vtbl.update_renderbuffer_surface(
1028 brw, fb->_ColorDrawBuffers[i],
1029 _mesa_geometric_layers(fb) > 0, i, surf_index);
1030 } else {
1031 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1032 &surf_offset[surf_index]);
1033 }
1034 }
1035 } else {
1036 const uint32_t surf_index = render_target_start;
1037 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1038 &surf_offset[surf_index]);
1039 }
1040 }
1041
1042 static void
1043 update_renderbuffer_surfaces(struct brw_context *brw)
1044 {
1045 const struct gl_context *ctx = &brw->ctx;
1046
1047 /* _NEW_BUFFERS | _NEW_COLOR */
1048 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1049 brw_update_renderbuffer_surfaces(
1050 brw, fb,
1051 brw->wm.prog_data->binding_table.render_target_start,
1052 brw->wm.base.surf_offset);
1053 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1054 }
1055
1056 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1057 .dirty = {
1058 .mesa = _NEW_BUFFERS |
1059 _NEW_COLOR,
1060 .brw = BRW_NEW_BATCH |
1061 BRW_NEW_BLORP |
1062 BRW_NEW_FS_PROG_DATA,
1063 },
1064 .emit = update_renderbuffer_surfaces,
1065 };
1066
1067 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1068 .dirty = {
1069 .mesa = _NEW_BUFFERS,
1070 .brw = BRW_NEW_BATCH |
1071 BRW_NEW_BLORP,
1072 },
1073 .emit = update_renderbuffer_surfaces,
1074 };
1075
1076
1077 static void
1078 update_stage_texture_surfaces(struct brw_context *brw,
1079 const struct gl_program *prog,
1080 struct brw_stage_state *stage_state,
1081 bool for_gather, uint32_t plane)
1082 {
1083 if (!prog)
1084 return;
1085
1086 struct gl_context *ctx = &brw->ctx;
1087
1088 uint32_t *surf_offset = stage_state->surf_offset;
1089
1090 /* BRW_NEW_*_PROG_DATA */
1091 if (for_gather)
1092 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1093 else
1094 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1095
1096 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
1097 for (unsigned s = 0; s < num_samplers; s++) {
1098 surf_offset[s] = 0;
1099
1100 if (prog->SamplersUsed & (1 << s)) {
1101 const unsigned unit = prog->SamplerUnits[s];
1102
1103 /* _NEW_TEXTURE */
1104 if (ctx->Texture.Unit[unit]._Current) {
1105 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1106 }
1107 }
1108 }
1109 }
1110
1111
1112 /**
1113 * Construct SURFACE_STATE objects for enabled textures.
1114 */
1115 static void
1116 brw_update_texture_surfaces(struct brw_context *brw)
1117 {
1118 /* BRW_NEW_VERTEX_PROGRAM */
1119 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1120
1121 /* BRW_NEW_TESS_PROGRAMS */
1122 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1123 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1124
1125 /* BRW_NEW_GEOMETRY_PROGRAM */
1126 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1127
1128 /* BRW_NEW_FRAGMENT_PROGRAM */
1129 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1130
1131 /* _NEW_TEXTURE */
1132 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1133 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1134 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1135 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1136 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1137
1138 /* emit alternate set of surface state for gather. this
1139 * allows the surface format to be overriden for only the
1140 * gather4 messages. */
1141 if (brw->gen < 8) {
1142 if (vs && vs->UsesGather)
1143 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1144 if (tcs && tcs->UsesGather)
1145 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1146 if (tes && tes->UsesGather)
1147 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1148 if (gs && gs->UsesGather)
1149 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1150 if (fs && fs->UsesGather)
1151 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1152 }
1153
1154 if (fs) {
1155 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1156 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1157 }
1158
1159 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1160 }
1161
1162 const struct brw_tracked_state brw_texture_surfaces = {
1163 .dirty = {
1164 .mesa = _NEW_TEXTURE,
1165 .brw = BRW_NEW_BATCH |
1166 BRW_NEW_BLORP |
1167 BRW_NEW_FRAGMENT_PROGRAM |
1168 BRW_NEW_FS_PROG_DATA |
1169 BRW_NEW_GEOMETRY_PROGRAM |
1170 BRW_NEW_GS_PROG_DATA |
1171 BRW_NEW_TESS_PROGRAMS |
1172 BRW_NEW_TCS_PROG_DATA |
1173 BRW_NEW_TES_PROG_DATA |
1174 BRW_NEW_TEXTURE_BUFFER |
1175 BRW_NEW_VERTEX_PROGRAM |
1176 BRW_NEW_VS_PROG_DATA,
1177 },
1178 .emit = brw_update_texture_surfaces,
1179 };
1180
1181 static void
1182 brw_update_cs_texture_surfaces(struct brw_context *brw)
1183 {
1184 /* BRW_NEW_COMPUTE_PROGRAM */
1185 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1186
1187 /* _NEW_TEXTURE */
1188 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1189
1190 /* emit alternate set of surface state for gather. this
1191 * allows the surface format to be overriden for only the
1192 * gather4 messages.
1193 */
1194 if (brw->gen < 8) {
1195 if (cs && cs->UsesGather)
1196 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1197 }
1198
1199 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1200 }
1201
1202 const struct brw_tracked_state brw_cs_texture_surfaces = {
1203 .dirty = {
1204 .mesa = _NEW_TEXTURE,
1205 .brw = BRW_NEW_BATCH |
1206 BRW_NEW_BLORP |
1207 BRW_NEW_COMPUTE_PROGRAM,
1208 },
1209 .emit = brw_update_cs_texture_surfaces,
1210 };
1211
1212
1213 void
1214 brw_upload_ubo_surfaces(struct brw_context *brw,
1215 struct gl_linked_shader *shader,
1216 struct brw_stage_state *stage_state,
1217 struct brw_stage_prog_data *prog_data)
1218 {
1219 struct gl_context *ctx = &brw->ctx;
1220
1221 if (!shader)
1222 return;
1223
1224 uint32_t *ubo_surf_offsets =
1225 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1226
1227 for (int i = 0; i < shader->NumUniformBlocks; i++) {
1228 struct gl_uniform_buffer_binding *binding =
1229 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1230
1231 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1232 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1233 } else {
1234 struct intel_buffer_object *intel_bo =
1235 intel_buffer_object(binding->BufferObject);
1236 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1237 if (!binding->AutomaticSize)
1238 size = MIN2(size, binding->Size);
1239 drm_intel_bo *bo =
1240 intel_bufferobj_buffer(brw, intel_bo,
1241 binding->Offset,
1242 size);
1243 brw_create_constant_surface(brw, bo, binding->Offset,
1244 size,
1245 &ubo_surf_offsets[i]);
1246 }
1247 }
1248
1249 uint32_t *ssbo_surf_offsets =
1250 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1251
1252 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1253 struct gl_shader_storage_buffer_binding *binding =
1254 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1255
1256 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1257 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1258 } else {
1259 struct intel_buffer_object *intel_bo =
1260 intel_buffer_object(binding->BufferObject);
1261 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1262 if (!binding->AutomaticSize)
1263 size = MIN2(size, binding->Size);
1264 drm_intel_bo *bo =
1265 intel_bufferobj_buffer(brw, intel_bo,
1266 binding->Offset,
1267 size);
1268 brw_create_buffer_surface(brw, bo, binding->Offset,
1269 size,
1270 &ssbo_surf_offsets[i]);
1271 }
1272 }
1273
1274 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1275 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1276 }
1277
1278 static void
1279 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1280 {
1281 struct gl_context *ctx = &brw->ctx;
1282 /* _NEW_PROGRAM */
1283 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1284
1285 if (!prog)
1286 return;
1287
1288 /* BRW_NEW_FS_PROG_DATA */
1289 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1290 &brw->wm.base, &brw->wm.prog_data->base);
1291 }
1292
1293 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1294 .dirty = {
1295 .mesa = _NEW_PROGRAM,
1296 .brw = BRW_NEW_BATCH |
1297 BRW_NEW_BLORP |
1298 BRW_NEW_FS_PROG_DATA |
1299 BRW_NEW_UNIFORM_BUFFER,
1300 },
1301 .emit = brw_upload_wm_ubo_surfaces,
1302 };
1303
1304 static void
1305 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1306 {
1307 struct gl_context *ctx = &brw->ctx;
1308 /* _NEW_PROGRAM */
1309 struct gl_shader_program *prog =
1310 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1311
1312 if (!prog)
1313 return;
1314
1315 /* BRW_NEW_CS_PROG_DATA */
1316 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1317 &brw->cs.base, &brw->cs.prog_data->base);
1318 }
1319
1320 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1321 .dirty = {
1322 .mesa = _NEW_PROGRAM,
1323 .brw = BRW_NEW_BATCH |
1324 BRW_NEW_BLORP |
1325 BRW_NEW_CS_PROG_DATA |
1326 BRW_NEW_UNIFORM_BUFFER,
1327 },
1328 .emit = brw_upload_cs_ubo_surfaces,
1329 };
1330
1331 void
1332 brw_upload_abo_surfaces(struct brw_context *brw,
1333 struct gl_linked_shader *shader,
1334 struct brw_stage_state *stage_state,
1335 struct brw_stage_prog_data *prog_data)
1336 {
1337 struct gl_context *ctx = &brw->ctx;
1338 uint32_t *surf_offsets =
1339 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1340
1341 if (shader && shader->NumAtomicBuffers) {
1342 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1343 struct gl_atomic_buffer_binding *binding =
1344 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1345 struct intel_buffer_object *intel_bo =
1346 intel_buffer_object(binding->BufferObject);
1347 drm_intel_bo *bo = intel_bufferobj_buffer(
1348 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1349
1350 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1351 binding->Offset, BRW_SURFACEFORMAT_RAW,
1352 bo->size - binding->Offset, 1, true);
1353 }
1354
1355 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1356 }
1357 }
1358
1359 static void
1360 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1361 {
1362 struct gl_context *ctx = &brw->ctx;
1363 /* _NEW_PROGRAM */
1364 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1365
1366 if (prog) {
1367 /* BRW_NEW_FS_PROG_DATA */
1368 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1369 &brw->wm.base, &brw->wm.prog_data->base);
1370 }
1371 }
1372
1373 const struct brw_tracked_state brw_wm_abo_surfaces = {
1374 .dirty = {
1375 .mesa = _NEW_PROGRAM,
1376 .brw = BRW_NEW_ATOMIC_BUFFER |
1377 BRW_NEW_BLORP |
1378 BRW_NEW_BATCH |
1379 BRW_NEW_FS_PROG_DATA,
1380 },
1381 .emit = brw_upload_wm_abo_surfaces,
1382 };
1383
1384 static void
1385 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1386 {
1387 struct gl_context *ctx = &brw->ctx;
1388 /* _NEW_PROGRAM */
1389 struct gl_shader_program *prog =
1390 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1391
1392 if (prog) {
1393 /* BRW_NEW_CS_PROG_DATA */
1394 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1395 &brw->cs.base, &brw->cs.prog_data->base);
1396 }
1397 }
1398
1399 const struct brw_tracked_state brw_cs_abo_surfaces = {
1400 .dirty = {
1401 .mesa = _NEW_PROGRAM,
1402 .brw = BRW_NEW_ATOMIC_BUFFER |
1403 BRW_NEW_BLORP |
1404 BRW_NEW_BATCH |
1405 BRW_NEW_CS_PROG_DATA,
1406 },
1407 .emit = brw_upload_cs_abo_surfaces,
1408 };
1409
1410 static void
1411 brw_upload_cs_image_surfaces(struct brw_context *brw)
1412 {
1413 struct gl_context *ctx = &brw->ctx;
1414 /* _NEW_PROGRAM */
1415 struct gl_shader_program *prog =
1416 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1417
1418 if (prog) {
1419 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1420 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1421 &brw->cs.base, &brw->cs.prog_data->base);
1422 }
1423 }
1424
1425 const struct brw_tracked_state brw_cs_image_surfaces = {
1426 .dirty = {
1427 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1428 .brw = BRW_NEW_BATCH |
1429 BRW_NEW_BLORP |
1430 BRW_NEW_CS_PROG_DATA |
1431 BRW_NEW_IMAGE_UNITS
1432 },
1433 .emit = brw_upload_cs_image_surfaces,
1434 };
1435
1436 static uint32_t
1437 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1438 {
1439 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1440 uint32_t hw_format = brw_format_for_mesa_format(format);
1441 if (access == GL_WRITE_ONLY) {
1442 return hw_format;
1443 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1444 /* Typed surface reads support a very limited subset of the shader
1445 * image formats. Translate it into the closest format the
1446 * hardware supports.
1447 */
1448 return isl_lower_storage_image_format(devinfo, hw_format);
1449 } else {
1450 /* The hardware doesn't actually support a typed format that we can use
1451 * so we have to fall back to untyped read/write messages.
1452 */
1453 return BRW_SURFACEFORMAT_RAW;
1454 }
1455 }
1456
1457 static void
1458 update_default_image_param(struct brw_context *brw,
1459 struct gl_image_unit *u,
1460 unsigned surface_idx,
1461 struct brw_image_param *param)
1462 {
1463 memset(param, 0, sizeof(*param));
1464 param->surface_idx = surface_idx;
1465 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1466 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1467 * detailed explanation of these parameters.
1468 */
1469 param->swizzling[0] = 0xff;
1470 param->swizzling[1] = 0xff;
1471 }
1472
1473 static void
1474 update_buffer_image_param(struct brw_context *brw,
1475 struct gl_image_unit *u,
1476 unsigned surface_idx,
1477 struct brw_image_param *param)
1478 {
1479 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1480
1481 update_default_image_param(brw, u, surface_idx, param);
1482
1483 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1484 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1485 }
1486
1487 static void
1488 update_texture_image_param(struct brw_context *brw,
1489 struct gl_image_unit *u,
1490 unsigned surface_idx,
1491 struct brw_image_param *param)
1492 {
1493 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1494
1495 update_default_image_param(brw, u, surface_idx, param);
1496
1497 param->size[0] = minify(mt->logical_width0, u->Level);
1498 param->size[1] = minify(mt->logical_height0, u->Level);
1499 param->size[2] = (!u->Layered ? 1 :
1500 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1501 u->TexObj->Target == GL_TEXTURE_3D ?
1502 minify(mt->logical_depth0, u->Level) :
1503 mt->logical_depth0);
1504
1505 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1506 &param->offset[0],
1507 &param->offset[1]);
1508
1509 param->stride[0] = mt->cpp;
1510 param->stride[1] = mt->pitch / mt->cpp;
1511 param->stride[2] =
1512 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1513 param->stride[3] =
1514 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1515
1516 if (mt->tiling == I915_TILING_X) {
1517 /* An X tile is a rectangular block of 512x8 bytes. */
1518 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1519 param->tiling[1] = _mesa_logbase2(8);
1520
1521 if (brw->has_swizzling) {
1522 /* Right shifts required to swizzle bits 9 and 10 of the memory
1523 * address with bit 6.
1524 */
1525 param->swizzling[0] = 3;
1526 param->swizzling[1] = 4;
1527 }
1528 } else if (mt->tiling == I915_TILING_Y) {
1529 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1530 * different to the layout of an X-tiled surface, we simply pretend that
1531 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1532 * one arranged in X-major order just like is the case for X-tiling.
1533 */
1534 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1535 param->tiling[1] = _mesa_logbase2(32);
1536
1537 if (brw->has_swizzling) {
1538 /* Right shift required to swizzle bit 9 of the memory address with
1539 * bit 6.
1540 */
1541 param->swizzling[0] = 3;
1542 }
1543 }
1544
1545 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1546 * address calculation algorithm (emit_address_calculation() in
1547 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1548 * modulus equal to the LOD.
1549 */
1550 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1551 0);
1552 }
1553
1554 static void
1555 update_image_surface(struct brw_context *brw,
1556 struct gl_image_unit *u,
1557 GLenum access,
1558 unsigned surface_idx,
1559 uint32_t *surf_offset,
1560 struct brw_image_param *param)
1561 {
1562 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1563 struct gl_texture_object *obj = u->TexObj;
1564 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1565
1566 if (obj->Target == GL_TEXTURE_BUFFER) {
1567 struct intel_buffer_object *intel_obj =
1568 intel_buffer_object(obj->BufferObject);
1569 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1570 _mesa_get_format_bytes(u->_ActualFormat));
1571
1572 brw->vtbl.emit_buffer_surface_state(
1573 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1574 format, intel_obj->Base.Size / texel_size, texel_size,
1575 access != GL_READ_ONLY);
1576
1577 update_buffer_image_param(brw, u, surface_idx, param);
1578
1579 } else {
1580 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1581 struct intel_mipmap_tree *mt = intel_obj->mt;
1582
1583 if (format == BRW_SURFACEFORMAT_RAW) {
1584 brw->vtbl.emit_buffer_surface_state(
1585 brw, surf_offset, mt->bo, mt->offset,
1586 format, mt->bo->size - mt->offset, 1 /* pitch */,
1587 access != GL_READ_ONLY);
1588
1589 } else {
1590 const unsigned num_layers = (!u->Layered ? 1 :
1591 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1592 mt->logical_depth0);
1593
1594 struct isl_view view = {
1595 .format = format,
1596 .base_level = obj->MinLevel + u->Level,
1597 .levels = 1,
1598 .base_array_layer = obj->MinLayer + u->_Layer,
1599 .array_len = num_layers,
1600 .channel_select = {
1601 ISL_CHANNEL_SELECT_RED,
1602 ISL_CHANNEL_SELECT_GREEN,
1603 ISL_CHANNEL_SELECT_BLUE,
1604 ISL_CHANNEL_SELECT_ALPHA,
1605 },
1606 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1607 };
1608
1609 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1610
1611 brw_emit_surface_state(brw, mt, &view,
1612 surface_state_infos[brw->gen].rb_mocs, false,
1613 surf_offset, surf_index,
1614 I915_GEM_DOMAIN_SAMPLER,
1615 access == GL_READ_ONLY ? 0 :
1616 I915_GEM_DOMAIN_SAMPLER);
1617 }
1618
1619 update_texture_image_param(brw, u, surface_idx, param);
1620 }
1621
1622 } else {
1623 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1624 update_default_image_param(brw, u, surface_idx, param);
1625 }
1626 }
1627
1628 void
1629 brw_upload_image_surfaces(struct brw_context *brw,
1630 struct gl_linked_shader *shader,
1631 struct brw_stage_state *stage_state,
1632 struct brw_stage_prog_data *prog_data)
1633 {
1634 struct gl_context *ctx = &brw->ctx;
1635
1636 if (shader && shader->NumImages) {
1637 for (unsigned i = 0; i < shader->NumImages; i++) {
1638 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1639 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1640
1641 update_image_surface(brw, u, shader->ImageAccess[i],
1642 surf_idx,
1643 &stage_state->surf_offset[surf_idx],
1644 &prog_data->image_param[i]);
1645 }
1646
1647 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1648 /* This may have changed the image metadata dependent on the context
1649 * image unit state and passed to the program as uniforms, make sure
1650 * that push and pull constants are reuploaded.
1651 */
1652 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1653 }
1654 }
1655
1656 static void
1657 brw_upload_wm_image_surfaces(struct brw_context *brw)
1658 {
1659 struct gl_context *ctx = &brw->ctx;
1660 /* BRW_NEW_FRAGMENT_PROGRAM */
1661 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1662
1663 if (prog) {
1664 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1665 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1666 &brw->wm.base, &brw->wm.prog_data->base);
1667 }
1668 }
1669
1670 const struct brw_tracked_state brw_wm_image_surfaces = {
1671 .dirty = {
1672 .mesa = _NEW_TEXTURE,
1673 .brw = BRW_NEW_BATCH |
1674 BRW_NEW_BLORP |
1675 BRW_NEW_FRAGMENT_PROGRAM |
1676 BRW_NEW_FS_PROG_DATA |
1677 BRW_NEW_IMAGE_UNITS
1678 },
1679 .emit = brw_upload_wm_image_surfaces,
1680 };
1681
1682 void
1683 gen4_init_vtable_surface_functions(struct brw_context *brw)
1684 {
1685 brw->vtbl.update_texture_surface = gen4_update_texture_surface;
1686 brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1687 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1688 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1689 }
1690
1691 static void
1692 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1693 {
1694 struct gl_context *ctx = &brw->ctx;
1695 /* _NEW_PROGRAM */
1696 struct gl_shader_program *prog =
1697 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1698
1699 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1700 const unsigned surf_idx =
1701 brw->cs.prog_data->binding_table.work_groups_start;
1702 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1703 drm_intel_bo *bo;
1704 uint32_t bo_offset;
1705
1706 if (brw->compute.num_work_groups_bo == NULL) {
1707 bo = NULL;
1708 intel_upload_data(brw,
1709 (void *)brw->compute.num_work_groups,
1710 3 * sizeof(GLuint),
1711 sizeof(GLuint),
1712 &bo,
1713 &bo_offset);
1714 } else {
1715 bo = brw->compute.num_work_groups_bo;
1716 bo_offset = brw->compute.num_work_groups_offset;
1717 }
1718
1719 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1720 bo, bo_offset,
1721 BRW_SURFACEFORMAT_RAW,
1722 3 * sizeof(GLuint), 1, true);
1723 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1724 }
1725 }
1726
1727 const struct brw_tracked_state brw_cs_work_groups_surface = {
1728 .dirty = {
1729 .brw = BRW_NEW_BLORP |
1730 BRW_NEW_CS_WORK_GROUPS
1731 },
1732 .emit = brw_upload_cs_work_groups_surface,
1733 };