i965: Rename some vague format members of brw_context
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 enum {
59 INTEL_RENDERBUFFER_LAYERED = 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED = 1 << 1,
61 };
62
63 uint32_t tex_mocs[] = {
64 [7] = GEN7_MOCS_L3,
65 [8] = BDW_MOCS_WB,
66 [9] = SKL_MOCS_WB,
67 [10] = CNL_MOCS_WB,
68 };
69
70 uint32_t rb_mocs[] = {
71 [7] = GEN7_MOCS_L3,
72 [8] = BDW_MOCS_PTE,
73 [9] = SKL_MOCS_PTE,
74 [10] = CNL_MOCS_PTE,
75 };
76
77 static void
78 brw_emit_surface_state(struct brw_context *brw,
79 struct intel_mipmap_tree *mt, uint32_t flags,
80 GLenum target, struct isl_view view,
81 uint32_t mocs, uint32_t *surf_offset, int surf_index,
82 unsigned read_domains, unsigned write_domains)
83 {
84 uint32_t tile_x = mt->level[0].slice[0].x_offset;
85 uint32_t tile_y = mt->level[0].slice[0].y_offset;
86 uint32_t offset = mt->offset;
87
88 struct isl_surf surf;
89 intel_miptree_get_isl_surf(brw, mt, &surf);
90
91 surf.dim = get_isl_surf_dim(target);
92
93 const enum isl_dim_layout dim_layout =
94 get_isl_dim_layout(&brw->screen->devinfo, mt->tiling, target,
95 mt->array_layout);
96
97 if (surf.dim_layout != dim_layout) {
98 /* The layout of the specified texture target is not compatible with the
99 * actual layout of the miptree structure in memory -- You're entering
100 * dangerous territory, this can only possibly work if you only intended
101 * to access a single level and slice of the texture, and the hardware
102 * supports the tile offset feature in order to allow non-tile-aligned
103 * base offsets, since we'll have to point the hardware to the first
104 * texel of the level instead of relying on the usual base level/layer
105 * controls.
106 */
107 assert(brw->has_surface_tile_offset);
108 assert(view.levels == 1 && view.array_len == 1);
109 assert(tile_x == 0 && tile_y == 0);
110
111 offset += intel_miptree_get_tile_offsets(mt, view.base_level,
112 view.base_array_layer,
113 &tile_x, &tile_y);
114
115 /* Minify the logical dimensions of the texture. */
116 const unsigned l = view.base_level - mt->first_level;
117 surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
118 surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
119 minify(surf.logical_level0_px.height, l);
120 surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
121 minify(surf.logical_level0_px.depth, l);
122
123 /* Only the base level and layer can be addressed with the overridden
124 * layout.
125 */
126 surf.logical_level0_px.array_len = 1;
127 surf.levels = 1;
128 surf.dim_layout = dim_layout;
129
130 /* The requested slice of the texture is now at the base level and
131 * layer.
132 */
133 view.base_level = 0;
134 view.base_array_layer = 0;
135 }
136
137 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
138
139 struct brw_bo *aux_bo;
140 struct isl_surf *aux_surf = NULL;
141 uint64_t aux_offset = 0;
142 enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
143 if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
144 !(flags & INTEL_AUX_BUFFER_DISABLED)) {
145 aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
146
147 if (mt->mcs_buf) {
148 aux_surf = &mt->mcs_buf->surf;
149
150 assert(mt->mcs_buf->offset == 0);
151 aux_bo = mt->mcs_buf->bo;
152 aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
153 } else {
154 aux_surf = &mt->hiz_buf->surf;
155
156 aux_bo = mt->hiz_buf->bo;
157 aux_offset = mt->hiz_buf->bo->offset64;
158 }
159
160 /* We only really need a clear color if we also have an auxiliary
161 * surface. Without one, it does nothing.
162 */
163 clear_color = mt->fast_clear_color;
164 }
165
166 void *state = brw_state_batch(brw,
167 brw->isl_dev.ss.size,
168 brw->isl_dev.ss.align,
169 surf_offset);
170
171 isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
172 .address = mt->bo->offset64 + offset,
173 .aux_surf = aux_surf, .aux_usage = aux_usage,
174 .aux_address = aux_offset,
175 .mocs = mocs, .clear_color = clear_color,
176 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
177
178 brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
179 mt->bo, offset, read_domains, write_domains);
180
181 if (aux_surf) {
182 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
183 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
184 * contain other control information. Since buffer addresses are always
185 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
186 * an ordinary reloc to do the necessary address translation.
187 */
188 assert((aux_offset & 0xfff) == 0);
189 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
190 brw_emit_reloc(&brw->batch,
191 *surf_offset + brw->isl_dev.ss.aux_addr_offset,
192 aux_bo, *aux_addr - aux_bo->offset64,
193 read_domains, write_domains);
194 }
195 }
196
197 uint32_t
198 brw_update_renderbuffer_surface(struct brw_context *brw,
199 struct gl_renderbuffer *rb,
200 uint32_t flags, unsigned unit /* unused */,
201 uint32_t surf_index)
202 {
203 struct gl_context *ctx = &brw->ctx;
204 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
205 struct intel_mipmap_tree *mt = irb->mt;
206
207 if (brw->gen < 9) {
208 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
209 }
210
211 assert(brw_render_target_supported(brw, rb));
212
213 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
214 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
215 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
216 __func__, _mesa_get_format_name(rb_format));
217 }
218
219 const unsigned layer_multiplier =
220 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
221 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
222 MAX2(irb->mt->num_samples, 1) : 1;
223
224 struct isl_view view = {
225 .format = brw->mesa_to_isl_render_format[rb_format],
226 .base_level = irb->mt_level - irb->mt->first_level,
227 .levels = 1,
228 .base_array_layer = irb->mt_layer / layer_multiplier,
229 .array_len = MAX2(irb->layer_count, 1),
230 .swizzle = ISL_SWIZZLE_IDENTITY,
231 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
232 };
233
234 uint32_t offset;
235 brw_emit_surface_state(brw, mt, flags, mt->target, view,
236 rb_mocs[brw->gen],
237 &offset, surf_index,
238 I915_GEM_DOMAIN_RENDER,
239 I915_GEM_DOMAIN_RENDER);
240 return offset;
241 }
242
243 GLuint
244 translate_tex_target(GLenum target)
245 {
246 switch (target) {
247 case GL_TEXTURE_1D:
248 case GL_TEXTURE_1D_ARRAY_EXT:
249 return BRW_SURFACE_1D;
250
251 case GL_TEXTURE_RECTANGLE_NV:
252 return BRW_SURFACE_2D;
253
254 case GL_TEXTURE_2D:
255 case GL_TEXTURE_2D_ARRAY_EXT:
256 case GL_TEXTURE_EXTERNAL_OES:
257 case GL_TEXTURE_2D_MULTISAMPLE:
258 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
259 return BRW_SURFACE_2D;
260
261 case GL_TEXTURE_3D:
262 return BRW_SURFACE_3D;
263
264 case GL_TEXTURE_CUBE_MAP:
265 case GL_TEXTURE_CUBE_MAP_ARRAY:
266 return BRW_SURFACE_CUBE;
267
268 default:
269 unreachable("not reached");
270 }
271 }
272
273 uint32_t
274 brw_get_surface_tiling_bits(uint32_t tiling)
275 {
276 switch (tiling) {
277 case I915_TILING_X:
278 return BRW_SURFACE_TILED;
279 case I915_TILING_Y:
280 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
281 default:
282 return 0;
283 }
284 }
285
286
287 uint32_t
288 brw_get_surface_num_multisamples(unsigned num_samples)
289 {
290 if (num_samples > 1)
291 return BRW_SURFACE_MULTISAMPLECOUNT_4;
292 else
293 return BRW_SURFACE_MULTISAMPLECOUNT_1;
294 }
295
296 /**
297 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
298 * swizzling.
299 */
300 int
301 brw_get_texture_swizzle(const struct gl_context *ctx,
302 const struct gl_texture_object *t)
303 {
304 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
305
306 int swizzles[SWIZZLE_NIL + 1] = {
307 SWIZZLE_X,
308 SWIZZLE_Y,
309 SWIZZLE_Z,
310 SWIZZLE_W,
311 SWIZZLE_ZERO,
312 SWIZZLE_ONE,
313 SWIZZLE_NIL
314 };
315
316 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
317 img->_BaseFormat == GL_DEPTH_STENCIL) {
318 GLenum depth_mode = t->DepthMode;
319
320 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
321 * with depth component data specified with a sized internal format.
322 * Otherwise, it's left at the old default, GL_LUMINANCE.
323 */
324 if (_mesa_is_gles3(ctx) &&
325 img->InternalFormat != GL_DEPTH_COMPONENT &&
326 img->InternalFormat != GL_DEPTH_STENCIL) {
327 depth_mode = GL_RED;
328 }
329
330 switch (depth_mode) {
331 case GL_ALPHA:
332 swizzles[0] = SWIZZLE_ZERO;
333 swizzles[1] = SWIZZLE_ZERO;
334 swizzles[2] = SWIZZLE_ZERO;
335 swizzles[3] = SWIZZLE_X;
336 break;
337 case GL_LUMINANCE:
338 swizzles[0] = SWIZZLE_X;
339 swizzles[1] = SWIZZLE_X;
340 swizzles[2] = SWIZZLE_X;
341 swizzles[3] = SWIZZLE_ONE;
342 break;
343 case GL_INTENSITY:
344 swizzles[0] = SWIZZLE_X;
345 swizzles[1] = SWIZZLE_X;
346 swizzles[2] = SWIZZLE_X;
347 swizzles[3] = SWIZZLE_X;
348 break;
349 case GL_RED:
350 swizzles[0] = SWIZZLE_X;
351 swizzles[1] = SWIZZLE_ZERO;
352 swizzles[2] = SWIZZLE_ZERO;
353 swizzles[3] = SWIZZLE_ONE;
354 break;
355 }
356 }
357
358 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
359
360 /* If the texture's format is alpha-only, force R, G, and B to
361 * 0.0. Similarly, if the texture's format has no alpha channel,
362 * force the alpha value read to 1.0. This allows for the
363 * implementation to use an RGBA texture for any of these formats
364 * without leaking any unexpected values.
365 */
366 switch (img->_BaseFormat) {
367 case GL_ALPHA:
368 swizzles[0] = SWIZZLE_ZERO;
369 swizzles[1] = SWIZZLE_ZERO;
370 swizzles[2] = SWIZZLE_ZERO;
371 break;
372 case GL_LUMINANCE:
373 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
374 swizzles[0] = SWIZZLE_X;
375 swizzles[1] = SWIZZLE_X;
376 swizzles[2] = SWIZZLE_X;
377 swizzles[3] = SWIZZLE_ONE;
378 }
379 break;
380 case GL_LUMINANCE_ALPHA:
381 if (datatype == GL_SIGNED_NORMALIZED) {
382 swizzles[0] = SWIZZLE_X;
383 swizzles[1] = SWIZZLE_X;
384 swizzles[2] = SWIZZLE_X;
385 swizzles[3] = SWIZZLE_W;
386 }
387 break;
388 case GL_INTENSITY:
389 if (datatype == GL_SIGNED_NORMALIZED) {
390 swizzles[0] = SWIZZLE_X;
391 swizzles[1] = SWIZZLE_X;
392 swizzles[2] = SWIZZLE_X;
393 swizzles[3] = SWIZZLE_X;
394 }
395 break;
396 case GL_RED:
397 case GL_RG:
398 case GL_RGB:
399 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
400 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
401 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
402 swizzles[3] = SWIZZLE_ONE;
403 break;
404 }
405
406 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
407 swizzles[GET_SWZ(t->_Swizzle, 1)],
408 swizzles[GET_SWZ(t->_Swizzle, 2)],
409 swizzles[GET_SWZ(t->_Swizzle, 3)]);
410 }
411
412 /**
413 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
414 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
415 *
416 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
417 * 0 1 2 3 4 5
418 * 4 5 6 7 0 1
419 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
420 *
421 * which is simply adding 4 then modding by 8 (or anding with 7).
422 *
423 * We then may need to apply workarounds for textureGather hardware bugs.
424 */
425 static unsigned
426 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
427 {
428 unsigned scs = (swizzle + 4) & 7;
429
430 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
431 }
432
433 static unsigned
434 brw_find_matching_rb(const struct gl_framebuffer *fb,
435 const struct intel_mipmap_tree *mt)
436 {
437 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
438 const struct intel_renderbuffer *irb =
439 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
440
441 if (irb && irb->mt == mt)
442 return i;
443 }
444
445 return fb->_NumColorDrawBuffers;
446 }
447
448 static inline bool
449 brw_texture_view_sane(const struct brw_context *brw,
450 const struct intel_mipmap_tree *mt,
451 const struct isl_view *view)
452 {
453 /* There are special cases only for lossless compression. */
454 if (!intel_miptree_is_lossless_compressed(brw, mt))
455 return true;
456
457 if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
458 return true;
459
460 /* Logic elsewhere needs to take care to resolve the color buffer prior
461 * to sampling it as non-compressed.
462 */
463 if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
464 view->base_array_layer,
465 view->array_len))
466 return false;
467
468 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
469 const unsigned rb_index = brw_find_matching_rb(fb, mt);
470
471 if (rb_index == fb->_NumColorDrawBuffers)
472 return true;
473
474 /* Underlying surface is compressed but it is sampled using a format that
475 * the sampling engine doesn't support as compressed. Compression must be
476 * disabled for both sampling engine and data port in case the same surface
477 * is used also as render target.
478 */
479 return brw->draw_aux_buffer_disabled[rb_index];
480 }
481
482 static bool
483 brw_disable_aux_surface(const struct brw_context *brw,
484 const struct intel_mipmap_tree *mt,
485 const struct isl_view *view)
486 {
487 /* Nothing to disable. */
488 if (!mt->mcs_buf)
489 return false;
490
491 const bool is_unresolved = intel_miptree_has_color_unresolved(
492 mt, view->base_level, view->levels,
493 view->base_array_layer, view->array_len);
494
495 /* There are special cases only for lossless compression. */
496 if (!intel_miptree_is_lossless_compressed(brw, mt))
497 return !is_unresolved;
498
499 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
500 const unsigned rb_index = brw_find_matching_rb(fb, mt);
501
502 /* If we are drawing into this with compression enabled, then we must also
503 * enable compression when texturing from it regardless of
504 * fast_clear_state. If we don't then, after the first draw call with
505 * this setup, there will be data in the CCS which won't get picked up by
506 * subsequent texturing operations as required by ARB_texture_barrier.
507 * Since we don't want to re-emit the binding table or do a resolve
508 * operation every draw call, the easiest thing to do is just enable
509 * compression on the texturing side. This is completely safe to do
510 * since, if compressed texturing weren't allowed, we would have disabled
511 * compression of render targets in whatever_that_function_is_called().
512 */
513 if (rb_index < fb->_NumColorDrawBuffers) {
514 if (brw->draw_aux_buffer_disabled[rb_index]) {
515 assert(!is_unresolved);
516 }
517
518 return brw->draw_aux_buffer_disabled[rb_index];
519 }
520
521 return !is_unresolved;
522 }
523
524 void
525 brw_update_texture_surface(struct gl_context *ctx,
526 unsigned unit,
527 uint32_t *surf_offset,
528 bool for_gather,
529 uint32_t plane)
530 {
531 struct brw_context *brw = brw_context(ctx);
532 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
533
534 if (obj->Target == GL_TEXTURE_BUFFER) {
535 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
536
537 } else {
538 struct intel_texture_object *intel_obj = intel_texture_object(obj);
539 struct intel_mipmap_tree *mt = intel_obj->mt;
540
541 if (plane > 0) {
542 if (mt->plane[plane - 1] == NULL)
543 return;
544 mt = mt->plane[plane - 1];
545 }
546
547 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
548 /* If this is a view with restricted NumLayers, then our effective depth
549 * is not just the miptree depth.
550 */
551 const unsigned view_num_layers =
552 (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
553 mt->logical_depth0;
554
555 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
556 * texturing functions that return a float, as our code generation always
557 * selects the .x channel (which would always be 0).
558 */
559 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
560 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
561 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
562 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
563 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
564 brw_get_texture_swizzle(&brw->ctx, obj));
565
566 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
567 enum isl_format format = translate_tex_format(brw, mesa_fmt,
568 sampler->sRGBDecode);
569
570 /* Implement gen6 and gen7 gather work-around */
571 bool need_green_to_blue = false;
572 if (for_gather) {
573 if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
574 format == ISL_FORMAT_R32G32_SINT ||
575 format == ISL_FORMAT_R32G32_UINT)) {
576 format = ISL_FORMAT_R32G32_FLOAT_LD;
577 need_green_to_blue = brw->is_haswell;
578 } else if (brw->gen == 6) {
579 /* Sandybridge's gather4 message is broken for integer formats.
580 * To work around this, we pretend the surface is UNORM for
581 * 8 or 16-bit formats, and emit shader instructions to recover
582 * the real INT/UINT value. For 32-bit formats, we pretend
583 * the surface is FLOAT, and simply reinterpret the resulting
584 * bits.
585 */
586 switch (format) {
587 case ISL_FORMAT_R8_SINT:
588 case ISL_FORMAT_R8_UINT:
589 format = ISL_FORMAT_R8_UNORM;
590 break;
591
592 case ISL_FORMAT_R16_SINT:
593 case ISL_FORMAT_R16_UINT:
594 format = ISL_FORMAT_R16_UNORM;
595 break;
596
597 case ISL_FORMAT_R32_SINT:
598 case ISL_FORMAT_R32_UINT:
599 format = ISL_FORMAT_R32_FLOAT;
600 break;
601
602 default:
603 break;
604 }
605 }
606 }
607
608 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
609 if (brw->gen <= 7) {
610 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
611 mt = mt->r8stencil_mt;
612 } else {
613 mt = mt->stencil_mt;
614 }
615 format = ISL_FORMAT_R8_UINT;
616 } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
617 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
618 mt = mt->r8stencil_mt;
619 format = ISL_FORMAT_R8_UINT;
620 }
621
622 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
623
624 struct isl_view view = {
625 .format = format,
626 .base_level = obj->MinLevel + obj->BaseLevel,
627 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
628 .base_array_layer = obj->MinLayer,
629 .array_len = view_num_layers,
630 .swizzle = {
631 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
632 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
633 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
634 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
635 },
636 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
637 };
638
639 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
640 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
641 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
642
643 assert(brw_texture_view_sane(brw, mt, &view));
644
645 const int flags = brw_disable_aux_surface(brw, mt, &view) ?
646 INTEL_AUX_BUFFER_DISABLED : 0;
647 brw_emit_surface_state(brw, mt, flags, mt->target, view,
648 tex_mocs[brw->gen],
649 surf_offset, surf_index,
650 I915_GEM_DOMAIN_SAMPLER, 0);
651 }
652 }
653
654 void
655 brw_emit_buffer_surface_state(struct brw_context *brw,
656 uint32_t *out_offset,
657 struct brw_bo *bo,
658 unsigned buffer_offset,
659 unsigned surface_format,
660 unsigned buffer_size,
661 unsigned pitch,
662 bool rw)
663 {
664 uint32_t *dw = brw_state_batch(brw,
665 brw->isl_dev.ss.size,
666 brw->isl_dev.ss.align,
667 out_offset);
668
669 isl_buffer_fill_state(&brw->isl_dev, dw,
670 .address = (bo ? bo->offset64 : 0) + buffer_offset,
671 .size = buffer_size,
672 .format = surface_format,
673 .stride = pitch,
674 .mocs = tex_mocs[brw->gen]);
675
676 if (bo) {
677 brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
678 bo, buffer_offset,
679 I915_GEM_DOMAIN_SAMPLER,
680 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
681 }
682 }
683
684 void
685 brw_update_buffer_texture_surface(struct gl_context *ctx,
686 unsigned unit,
687 uint32_t *surf_offset)
688 {
689 struct brw_context *brw = brw_context(ctx);
690 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
691 struct intel_buffer_object *intel_obj =
692 intel_buffer_object(tObj->BufferObject);
693 uint32_t size = tObj->BufferSize;
694 struct brw_bo *bo = NULL;
695 mesa_format format = tObj->_BufferObjectFormat;
696 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
697 int texel_size = _mesa_get_format_bytes(format);
698
699 if (intel_obj) {
700 size = MIN2(size, intel_obj->Base.Size);
701 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
702 }
703
704 /* The ARB_texture_buffer_specification says:
705 *
706 * "The number of texels in the buffer texture's texel array is given by
707 *
708 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
709 *
710 * where <buffer_size> is the size of the buffer object, in basic
711 * machine units and <components> and <base_type> are the element count
712 * and base data type for elements, as specified in Table X.1. The
713 * number of texels in the texel array is then clamped to the
714 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
715 *
716 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
717 * so that when ISL divides by stride to obtain the number of texels, that
718 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
719 */
720 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
721
722 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
723 _mesa_problem(NULL, "bad format %s for texture buffer\n",
724 _mesa_get_format_name(format));
725 }
726
727 brw_emit_buffer_surface_state(brw, surf_offset, bo,
728 tObj->BufferOffset,
729 isl_format,
730 size,
731 texel_size,
732 false /* rw */);
733 }
734
735 /**
736 * Create the constant buffer surface. Vertex/fragment shader constants will be
737 * read from this buffer with Data Port Read instructions/messages.
738 */
739 void
740 brw_create_constant_surface(struct brw_context *brw,
741 struct brw_bo *bo,
742 uint32_t offset,
743 uint32_t size,
744 uint32_t *out_offset)
745 {
746 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
747 ISL_FORMAT_R32G32B32A32_FLOAT,
748 size, 1, false);
749 }
750
751 /**
752 * Create the buffer surface. Shader buffer variables will be
753 * read from / write to this buffer with Data Port Read/Write
754 * instructions/messages.
755 */
756 void
757 brw_create_buffer_surface(struct brw_context *brw,
758 struct brw_bo *bo,
759 uint32_t offset,
760 uint32_t size,
761 uint32_t *out_offset)
762 {
763 /* Use a raw surface so we can reuse existing untyped read/write/atomic
764 * messages. We need these specifically for the fragment shader since they
765 * include a pixel mask header that we need to ensure correct behavior
766 * with helper invocations, which cannot write to the buffer.
767 */
768 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
769 ISL_FORMAT_RAW,
770 size, 1, true);
771 }
772
773 /**
774 * Set up a binding table entry for use by stream output logic (transform
775 * feedback).
776 *
777 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
778 */
779 void
780 brw_update_sol_surface(struct brw_context *brw,
781 struct gl_buffer_object *buffer_obj,
782 uint32_t *out_offset, unsigned num_vector_components,
783 unsigned stride_dwords, unsigned offset_dwords)
784 {
785 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
786 uint32_t offset_bytes = 4 * offset_dwords;
787 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
788 offset_bytes,
789 buffer_obj->Size - offset_bytes);
790 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
791 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
792 size_t size_dwords = buffer_obj->Size / 4;
793 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
794
795 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
796 * too big to map using a single binding table entry?
797 */
798 assert((size_dwords - offset_dwords) / stride_dwords
799 <= BRW_MAX_NUM_BUFFER_ENTRIES);
800
801 if (size_dwords > offset_dwords + num_vector_components) {
802 /* There is room for at least 1 transform feedback output in the buffer.
803 * Compute the number of additional transform feedback outputs the
804 * buffer has room for.
805 */
806 buffer_size_minus_1 =
807 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
808 } else {
809 /* There isn't even room for a single transform feedback output in the
810 * buffer. We can't configure the binding table entry to prevent output
811 * entirely; we'll have to rely on the geometry shader to detect
812 * overflow. But to minimize the damage in case of a bug, set up the
813 * binding table entry to just allow a single output.
814 */
815 buffer_size_minus_1 = 0;
816 }
817 width = buffer_size_minus_1 & 0x7f;
818 height = (buffer_size_minus_1 & 0xfff80) >> 7;
819 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
820
821 switch (num_vector_components) {
822 case 1:
823 surface_format = ISL_FORMAT_R32_FLOAT;
824 break;
825 case 2:
826 surface_format = ISL_FORMAT_R32G32_FLOAT;
827 break;
828 case 3:
829 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
830 break;
831 case 4:
832 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
833 break;
834 default:
835 unreachable("Invalid vector size for transform feedback output");
836 }
837
838 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
839 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
840 surface_format << BRW_SURFACE_FORMAT_SHIFT |
841 BRW_SURFACE_RC_READ_WRITE;
842 surf[1] = bo->offset64 + offset_bytes; /* reloc */
843 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
844 height << BRW_SURFACE_HEIGHT_SHIFT);
845 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
846 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
847 surf[4] = 0;
848 surf[5] = 0;
849
850 /* Emit relocation to surface contents. */
851 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
852 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
853 }
854
855 /* Creates a new WM constant buffer reflecting the current fragment program's
856 * constants, if needed by the fragment program.
857 *
858 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
859 * state atom.
860 */
861 static void
862 brw_upload_wm_pull_constants(struct brw_context *brw)
863 {
864 struct brw_stage_state *stage_state = &brw->wm.base;
865 /* BRW_NEW_FRAGMENT_PROGRAM */
866 struct brw_program *fp = (struct brw_program *) brw->fragment_program;
867 /* BRW_NEW_FS_PROG_DATA */
868 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
869
870 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
871 /* _NEW_PROGRAM_CONSTANTS */
872 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
873 stage_state, prog_data);
874 }
875
876 const struct brw_tracked_state brw_wm_pull_constants = {
877 .dirty = {
878 .mesa = _NEW_PROGRAM_CONSTANTS,
879 .brw = BRW_NEW_BATCH |
880 BRW_NEW_BLORP |
881 BRW_NEW_FRAGMENT_PROGRAM |
882 BRW_NEW_FS_PROG_DATA,
883 },
884 .emit = brw_upload_wm_pull_constants,
885 };
886
887 /**
888 * Creates a null renderbuffer surface.
889 *
890 * This is used when the shader doesn't write to any color output. An FB
891 * write to target 0 will still be emitted, because that's how the thread is
892 * terminated (and computed depth is returned), so we need to have the
893 * hardware discard the target 0 color output..
894 */
895 static void
896 brw_emit_null_surface_state(struct brw_context *brw,
897 unsigned width,
898 unsigned height,
899 unsigned samples,
900 uint32_t *out_offset)
901 {
902 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
903 * Notes):
904 *
905 * A null surface will be used in instances where an actual surface is
906 * not bound. When a write message is generated to a null surface, no
907 * actual surface is written to. When a read message (including any
908 * sampling engine message) is generated to a null surface, the result
909 * is all zeros. Note that a null surface type is allowed to be used
910 * with all messages, even if it is not specificially indicated as
911 * supported. All of the remaining fields in surface state are ignored
912 * for null surfaces, with the following exceptions:
913 *
914 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
915 * depth buffer’s corresponding state for all render target surfaces,
916 * including null.
917 *
918 * - Surface Format must be R8G8B8A8_UNORM.
919 */
920 unsigned surface_type = BRW_SURFACE_NULL;
921 struct brw_bo *bo = NULL;
922 unsigned pitch_minus_1 = 0;
923 uint32_t multisampling_state = 0;
924 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
925
926 if (samples > 1) {
927 /* On Gen6, null render targets seem to cause GPU hangs when
928 * multisampling. So work around this problem by rendering into dummy
929 * color buffer.
930 *
931 * To decrease the amount of memory needed by the workaround buffer, we
932 * set its pitch to 128 bytes (the width of a Y tile). This means that
933 * the amount of memory needed for the workaround buffer is
934 * (width_in_tiles + height_in_tiles - 1) tiles.
935 *
936 * Note that since the workaround buffer will be interpreted by the
937 * hardware as an interleaved multisampled buffer, we need to compute
938 * width_in_tiles and height_in_tiles by dividing the width and height
939 * by 16 rather than the normal Y-tile size of 32.
940 */
941 unsigned width_in_tiles = ALIGN(width, 16) / 16;
942 unsigned height_in_tiles = ALIGN(height, 16) / 16;
943 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
944 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
945 size_needed);
946 bo = brw->wm.multisampled_null_render_target_bo;
947 surface_type = BRW_SURFACE_2D;
948 pitch_minus_1 = 127;
949 multisampling_state = brw_get_surface_num_multisamples(samples);
950 }
951
952 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
953 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
954 if (brw->gen < 6) {
955 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
956 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
957 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
958 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
959 }
960 surf[1] = bo ? bo->offset64 : 0;
961 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
962 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
963
964 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
965 * Notes):
966 *
967 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
968 */
969 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
970 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
971 surf[4] = multisampling_state;
972 surf[5] = 0;
973
974 if (bo) {
975 brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
976 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
977 }
978 }
979
980 /**
981 * Sets up a surface state structure to point at the given region.
982 * While it is only used for the front/back buffer currently, it should be
983 * usable for further buffers when doing ARB_draw_buffer support.
984 */
985 static uint32_t
986 gen4_update_renderbuffer_surface(struct brw_context *brw,
987 struct gl_renderbuffer *rb,
988 uint32_t flags, unsigned unit,
989 uint32_t surf_index)
990 {
991 struct gl_context *ctx = &brw->ctx;
992 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
993 struct intel_mipmap_tree *mt = irb->mt;
994 uint32_t *surf;
995 uint32_t tile_x, tile_y;
996 enum isl_format format;
997 uint32_t offset;
998 /* _NEW_BUFFERS */
999 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1000 /* BRW_NEW_FS_PROG_DATA */
1001
1002 assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1003 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1004
1005 if (rb->TexImage && !brw->has_surface_tile_offset) {
1006 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1007
1008 if (tile_x != 0 || tile_y != 0) {
1009 /* Original gen4 hardware couldn't draw to a non-tile-aligned
1010 * destination in a miptree unless you actually setup your renderbuffer
1011 * as a miptree and used the fragile lod/array_index/etc. controls to
1012 * select the image. So, instead, we just make a new single-level
1013 * miptree and render into that.
1014 */
1015 intel_renderbuffer_move_to_temp(brw, irb, false);
1016 assert(irb->align_wa_mt);
1017 mt = irb->align_wa_mt;
1018 }
1019 }
1020
1021 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1022
1023 format = brw->mesa_to_isl_render_format[rb_format];
1024 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1025 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1026 __func__, _mesa_get_format_name(rb_format));
1027 }
1028
1029 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1030 format << BRW_SURFACE_FORMAT_SHIFT);
1031
1032 /* reloc */
1033 assert(mt->offset % mt->cpp == 0);
1034 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1035 mt->bo->offset64 + mt->offset);
1036
1037 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1038 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1039
1040 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
1041 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1042
1043 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
1044
1045 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1046 /* Note that the low bits of these fields are missing, so
1047 * there's the possibility of getting in trouble.
1048 */
1049 assert(tile_x % 4 == 0);
1050 assert(tile_y % 2 == 0);
1051 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1052 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1053 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1054
1055 if (brw->gen < 6) {
1056 /* _NEW_COLOR */
1057 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1058 (ctx->Color.BlendEnabled & (1 << unit)))
1059 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1060
1061 if (!ctx->Color.ColorMask[unit][0])
1062 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1063 if (!ctx->Color.ColorMask[unit][1])
1064 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1065 if (!ctx->Color.ColorMask[unit][2])
1066 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1067
1068 /* As mentioned above, disable writes to the alpha component when the
1069 * renderbuffer is XRGB.
1070 */
1071 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1072 !ctx->Color.ColorMask[unit][3]) {
1073 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1074 }
1075 }
1076
1077 brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1078 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1079
1080 return offset;
1081 }
1082
1083 /**
1084 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1085 */
1086 void
1087 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1088 const struct gl_framebuffer *fb,
1089 uint32_t render_target_start,
1090 uint32_t *surf_offset)
1091 {
1092 GLuint i;
1093 const unsigned int w = _mesa_geometric_width(fb);
1094 const unsigned int h = _mesa_geometric_height(fb);
1095 const unsigned int s = _mesa_geometric_samples(fb);
1096
1097 /* Update surfaces for drawing buffers */
1098 if (fb->_NumColorDrawBuffers >= 1) {
1099 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1100 const uint32_t surf_index = render_target_start + i;
1101 const int flags = (_mesa_geometric_layers(fb) > 0 ?
1102 INTEL_RENDERBUFFER_LAYERED : 0) |
1103 (brw->draw_aux_buffer_disabled[i] ?
1104 INTEL_AUX_BUFFER_DISABLED : 0);
1105
1106 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1107 surf_offset[surf_index] =
1108 brw->vtbl.update_renderbuffer_surface(
1109 brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1110 } else {
1111 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1112 &surf_offset[surf_index]);
1113 }
1114 }
1115 } else {
1116 const uint32_t surf_index = render_target_start;
1117 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1118 &surf_offset[surf_index]);
1119 }
1120 }
1121
1122 static void
1123 update_renderbuffer_surfaces(struct brw_context *brw)
1124 {
1125 const struct gl_context *ctx = &brw->ctx;
1126
1127 /* BRW_NEW_FS_PROG_DATA */
1128 const struct brw_wm_prog_data *wm_prog_data =
1129 brw_wm_prog_data(brw->wm.base.prog_data);
1130
1131 /* _NEW_BUFFERS | _NEW_COLOR */
1132 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1133 brw_update_renderbuffer_surfaces(
1134 brw, fb,
1135 wm_prog_data->binding_table.render_target_start,
1136 brw->wm.base.surf_offset);
1137 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1138 }
1139
1140 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1141 .dirty = {
1142 .mesa = _NEW_BUFFERS |
1143 _NEW_COLOR,
1144 .brw = BRW_NEW_BATCH |
1145 BRW_NEW_BLORP |
1146 BRW_NEW_FS_PROG_DATA,
1147 },
1148 .emit = update_renderbuffer_surfaces,
1149 };
1150
1151 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1152 .dirty = {
1153 .mesa = _NEW_BUFFERS,
1154 .brw = BRW_NEW_BATCH |
1155 BRW_NEW_BLORP,
1156 },
1157 .emit = update_renderbuffer_surfaces,
1158 };
1159
1160 static void
1161 update_renderbuffer_read_surfaces(struct brw_context *brw)
1162 {
1163 const struct gl_context *ctx = &brw->ctx;
1164
1165 /* BRW_NEW_FS_PROG_DATA */
1166 const struct brw_wm_prog_data *wm_prog_data =
1167 brw_wm_prog_data(brw->wm.base.prog_data);
1168
1169 /* BRW_NEW_FRAGMENT_PROGRAM */
1170 if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1171 brw->fragment_program && brw->fragment_program->info.outputs_read) {
1172 /* _NEW_BUFFERS */
1173 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1174
1175 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1176 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1177 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1178 const unsigned surf_index =
1179 wm_prog_data->binding_table.render_target_read_start + i;
1180 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1181
1182 if (irb) {
1183 const enum isl_format format = brw->mesa_to_isl_render_format[
1184 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1185 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1186 format));
1187
1188 /* Override the target of the texture if the render buffer is a
1189 * single slice of a 3D texture (since the minimum array element
1190 * field of the surface state structure is ignored by the sampler
1191 * unit for 3D textures on some hardware), or if the render buffer
1192 * is a 1D array (since shaders always provide the array index
1193 * coordinate at the Z component to avoid state-dependent
1194 * recompiles when changing the texture target of the
1195 * framebuffer).
1196 */
1197 const GLenum target =
1198 (irb->mt->target == GL_TEXTURE_3D &&
1199 irb->layer_count == 1) ? GL_TEXTURE_2D :
1200 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1201 irb->mt->target;
1202
1203 /* intel_renderbuffer::mt_layer is expressed in sample units for
1204 * the UMS and CMS multisample layouts, but
1205 * intel_renderbuffer::layer_count is expressed in units of whole
1206 * logical layers regardless of the multisample layout.
1207 */
1208 const unsigned mt_layer_unit =
1209 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
1210 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
1211 MAX2(irb->mt->num_samples, 1) : 1;
1212
1213 const struct isl_view view = {
1214 .format = format,
1215 .base_level = irb->mt_level - irb->mt->first_level,
1216 .levels = 1,
1217 .base_array_layer = irb->mt_layer / mt_layer_unit,
1218 .array_len = irb->layer_count,
1219 .swizzle = ISL_SWIZZLE_IDENTITY,
1220 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1221 };
1222
1223 const int flags = brw->draw_aux_buffer_disabled[i] ?
1224 INTEL_AUX_BUFFER_DISABLED : 0;
1225 brw_emit_surface_state(brw, irb->mt, flags, target, view,
1226 tex_mocs[brw->gen],
1227 surf_offset, surf_index,
1228 I915_GEM_DOMAIN_SAMPLER, 0);
1229
1230 } else {
1231 brw->vtbl.emit_null_surface_state(
1232 brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1233 _mesa_geometric_samples(fb), surf_offset);
1234 }
1235 }
1236
1237 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1238 }
1239 }
1240
1241 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1242 .dirty = {
1243 .mesa = _NEW_BUFFERS,
1244 .brw = BRW_NEW_BATCH |
1245 BRW_NEW_FRAGMENT_PROGRAM |
1246 BRW_NEW_FS_PROG_DATA,
1247 },
1248 .emit = update_renderbuffer_read_surfaces,
1249 };
1250
1251 static void
1252 update_stage_texture_surfaces(struct brw_context *brw,
1253 const struct gl_program *prog,
1254 struct brw_stage_state *stage_state,
1255 bool for_gather, uint32_t plane)
1256 {
1257 if (!prog)
1258 return;
1259
1260 struct gl_context *ctx = &brw->ctx;
1261
1262 uint32_t *surf_offset = stage_state->surf_offset;
1263
1264 /* BRW_NEW_*_PROG_DATA */
1265 if (for_gather)
1266 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1267 else
1268 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1269
1270 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1271 for (unsigned s = 0; s < num_samplers; s++) {
1272 surf_offset[s] = 0;
1273
1274 if (prog->SamplersUsed & (1 << s)) {
1275 const unsigned unit = prog->SamplerUnits[s];
1276
1277 /* _NEW_TEXTURE */
1278 if (ctx->Texture.Unit[unit]._Current) {
1279 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1280 }
1281 }
1282 }
1283 }
1284
1285
1286 /**
1287 * Construct SURFACE_STATE objects for enabled textures.
1288 */
1289 static void
1290 brw_update_texture_surfaces(struct brw_context *brw)
1291 {
1292 /* BRW_NEW_VERTEX_PROGRAM */
1293 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1294
1295 /* BRW_NEW_TESS_PROGRAMS */
1296 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1297 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1298
1299 /* BRW_NEW_GEOMETRY_PROGRAM */
1300 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1301
1302 /* BRW_NEW_FRAGMENT_PROGRAM */
1303 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1304
1305 /* _NEW_TEXTURE */
1306 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1307 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1308 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1309 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1310 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1311
1312 /* emit alternate set of surface state for gather. this
1313 * allows the surface format to be overriden for only the
1314 * gather4 messages. */
1315 if (brw->gen < 8) {
1316 if (vs && vs->nir->info.uses_texture_gather)
1317 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1318 if (tcs && tcs->nir->info.uses_texture_gather)
1319 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1320 if (tes && tes->nir->info.uses_texture_gather)
1321 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1322 if (gs && gs->nir->info.uses_texture_gather)
1323 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1324 if (fs && fs->nir->info.uses_texture_gather)
1325 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1326 }
1327
1328 if (fs) {
1329 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1330 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1331 }
1332
1333 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1334 }
1335
1336 const struct brw_tracked_state brw_texture_surfaces = {
1337 .dirty = {
1338 .mesa = _NEW_TEXTURE,
1339 .brw = BRW_NEW_BATCH |
1340 BRW_NEW_BLORP |
1341 BRW_NEW_FRAGMENT_PROGRAM |
1342 BRW_NEW_FS_PROG_DATA |
1343 BRW_NEW_GEOMETRY_PROGRAM |
1344 BRW_NEW_GS_PROG_DATA |
1345 BRW_NEW_TESS_PROGRAMS |
1346 BRW_NEW_TCS_PROG_DATA |
1347 BRW_NEW_TES_PROG_DATA |
1348 BRW_NEW_TEXTURE_BUFFER |
1349 BRW_NEW_VERTEX_PROGRAM |
1350 BRW_NEW_VS_PROG_DATA,
1351 },
1352 .emit = brw_update_texture_surfaces,
1353 };
1354
1355 static void
1356 brw_update_cs_texture_surfaces(struct brw_context *brw)
1357 {
1358 /* BRW_NEW_COMPUTE_PROGRAM */
1359 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1360
1361 /* _NEW_TEXTURE */
1362 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1363
1364 /* emit alternate set of surface state for gather. this
1365 * allows the surface format to be overriden for only the
1366 * gather4 messages.
1367 */
1368 if (brw->gen < 8) {
1369 if (cs && cs->nir->info.uses_texture_gather)
1370 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1371 }
1372
1373 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1374 }
1375
1376 const struct brw_tracked_state brw_cs_texture_surfaces = {
1377 .dirty = {
1378 .mesa = _NEW_TEXTURE,
1379 .brw = BRW_NEW_BATCH |
1380 BRW_NEW_BLORP |
1381 BRW_NEW_COMPUTE_PROGRAM,
1382 },
1383 .emit = brw_update_cs_texture_surfaces,
1384 };
1385
1386
1387 void
1388 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1389 struct brw_stage_state *stage_state,
1390 struct brw_stage_prog_data *prog_data)
1391 {
1392 struct gl_context *ctx = &brw->ctx;
1393
1394 if (!prog)
1395 return;
1396
1397 uint32_t *ubo_surf_offsets =
1398 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1399
1400 for (int i = 0; i < prog->info.num_ubos; i++) {
1401 struct gl_uniform_buffer_binding *binding =
1402 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1403
1404 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1405 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1406 } else {
1407 struct intel_buffer_object *intel_bo =
1408 intel_buffer_object(binding->BufferObject);
1409 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1410 if (!binding->AutomaticSize)
1411 size = MIN2(size, binding->Size);
1412 struct brw_bo *bo =
1413 intel_bufferobj_buffer(brw, intel_bo,
1414 binding->Offset,
1415 size);
1416 brw_create_constant_surface(brw, bo, binding->Offset,
1417 size,
1418 &ubo_surf_offsets[i]);
1419 }
1420 }
1421
1422 uint32_t *ssbo_surf_offsets =
1423 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1424
1425 for (int i = 0; i < prog->info.num_ssbos; i++) {
1426 struct gl_shader_storage_buffer_binding *binding =
1427 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1428
1429 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1430 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1431 } else {
1432 struct intel_buffer_object *intel_bo =
1433 intel_buffer_object(binding->BufferObject);
1434 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1435 if (!binding->AutomaticSize)
1436 size = MIN2(size, binding->Size);
1437 struct brw_bo *bo =
1438 intel_bufferobj_buffer(brw, intel_bo,
1439 binding->Offset,
1440 size);
1441 brw_create_buffer_surface(brw, bo, binding->Offset,
1442 size,
1443 &ssbo_surf_offsets[i]);
1444 }
1445 }
1446
1447 if (prog->info.num_ubos || prog->info.num_ssbos)
1448 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1449 }
1450
1451 static void
1452 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1453 {
1454 struct gl_context *ctx = &brw->ctx;
1455 /* _NEW_PROGRAM */
1456 struct gl_program *prog = ctx->FragmentProgram._Current;
1457
1458 /* BRW_NEW_FS_PROG_DATA */
1459 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1460 }
1461
1462 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1463 .dirty = {
1464 .mesa = _NEW_PROGRAM,
1465 .brw = BRW_NEW_BATCH |
1466 BRW_NEW_BLORP |
1467 BRW_NEW_FS_PROG_DATA |
1468 BRW_NEW_UNIFORM_BUFFER,
1469 },
1470 .emit = brw_upload_wm_ubo_surfaces,
1471 };
1472
1473 static void
1474 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1475 {
1476 struct gl_context *ctx = &brw->ctx;
1477 /* _NEW_PROGRAM */
1478 struct gl_program *prog =
1479 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1480
1481 /* BRW_NEW_CS_PROG_DATA */
1482 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1483 }
1484
1485 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1486 .dirty = {
1487 .mesa = _NEW_PROGRAM,
1488 .brw = BRW_NEW_BATCH |
1489 BRW_NEW_BLORP |
1490 BRW_NEW_CS_PROG_DATA |
1491 BRW_NEW_UNIFORM_BUFFER,
1492 },
1493 .emit = brw_upload_cs_ubo_surfaces,
1494 };
1495
1496 void
1497 brw_upload_abo_surfaces(struct brw_context *brw,
1498 const struct gl_program *prog,
1499 struct brw_stage_state *stage_state,
1500 struct brw_stage_prog_data *prog_data)
1501 {
1502 struct gl_context *ctx = &brw->ctx;
1503 uint32_t *surf_offsets =
1504 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1505
1506 if (prog->info.num_abos) {
1507 for (unsigned i = 0; i < prog->info.num_abos; i++) {
1508 struct gl_atomic_buffer_binding *binding =
1509 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1510 struct intel_buffer_object *intel_bo =
1511 intel_buffer_object(binding->BufferObject);
1512 struct brw_bo *bo = intel_bufferobj_buffer(
1513 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1514
1515 brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1516 binding->Offset, ISL_FORMAT_RAW,
1517 bo->size - binding->Offset, 1, true);
1518 }
1519
1520 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1521 }
1522 }
1523
1524 static void
1525 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1526 {
1527 /* _NEW_PROGRAM */
1528 const struct gl_program *wm = brw->fragment_program;
1529
1530 if (wm) {
1531 /* BRW_NEW_FS_PROG_DATA */
1532 brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1533 }
1534 }
1535
1536 const struct brw_tracked_state brw_wm_abo_surfaces = {
1537 .dirty = {
1538 .mesa = _NEW_PROGRAM,
1539 .brw = BRW_NEW_ATOMIC_BUFFER |
1540 BRW_NEW_BLORP |
1541 BRW_NEW_BATCH |
1542 BRW_NEW_FS_PROG_DATA,
1543 },
1544 .emit = brw_upload_wm_abo_surfaces,
1545 };
1546
1547 static void
1548 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1549 {
1550 /* _NEW_PROGRAM */
1551 const struct gl_program *cp = brw->compute_program;
1552
1553 if (cp) {
1554 /* BRW_NEW_CS_PROG_DATA */
1555 brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1556 }
1557 }
1558
1559 const struct brw_tracked_state brw_cs_abo_surfaces = {
1560 .dirty = {
1561 .mesa = _NEW_PROGRAM,
1562 .brw = BRW_NEW_ATOMIC_BUFFER |
1563 BRW_NEW_BLORP |
1564 BRW_NEW_BATCH |
1565 BRW_NEW_CS_PROG_DATA,
1566 },
1567 .emit = brw_upload_cs_abo_surfaces,
1568 };
1569
1570 static void
1571 brw_upload_cs_image_surfaces(struct brw_context *brw)
1572 {
1573 /* _NEW_PROGRAM */
1574 const struct gl_program *cp = brw->compute_program;
1575
1576 if (cp) {
1577 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1578 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1579 brw->cs.base.prog_data);
1580 }
1581 }
1582
1583 const struct brw_tracked_state brw_cs_image_surfaces = {
1584 .dirty = {
1585 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1586 .brw = BRW_NEW_BATCH |
1587 BRW_NEW_BLORP |
1588 BRW_NEW_CS_PROG_DATA |
1589 BRW_NEW_IMAGE_UNITS
1590 },
1591 .emit = brw_upload_cs_image_surfaces,
1592 };
1593
1594 static uint32_t
1595 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1596 {
1597 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1598 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1599 if (access == GL_WRITE_ONLY) {
1600 return hw_format;
1601 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1602 /* Typed surface reads support a very limited subset of the shader
1603 * image formats. Translate it into the closest format the
1604 * hardware supports.
1605 */
1606 return isl_lower_storage_image_format(devinfo, hw_format);
1607 } else {
1608 /* The hardware doesn't actually support a typed format that we can use
1609 * so we have to fall back to untyped read/write messages.
1610 */
1611 return ISL_FORMAT_RAW;
1612 }
1613 }
1614
1615 static void
1616 update_default_image_param(struct brw_context *brw,
1617 struct gl_image_unit *u,
1618 unsigned surface_idx,
1619 struct brw_image_param *param)
1620 {
1621 memset(param, 0, sizeof(*param));
1622 param->surface_idx = surface_idx;
1623 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1624 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1625 * detailed explanation of these parameters.
1626 */
1627 param->swizzling[0] = 0xff;
1628 param->swizzling[1] = 0xff;
1629 }
1630
1631 static void
1632 update_buffer_image_param(struct brw_context *brw,
1633 struct gl_image_unit *u,
1634 unsigned surface_idx,
1635 struct brw_image_param *param)
1636 {
1637 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1638 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1639 update_default_image_param(brw, u, surface_idx, param);
1640
1641 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1642 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1643 }
1644
1645 static void
1646 update_texture_image_param(struct brw_context *brw,
1647 struct gl_image_unit *u,
1648 unsigned surface_idx,
1649 struct brw_image_param *param)
1650 {
1651 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1652
1653 update_default_image_param(brw, u, surface_idx, param);
1654
1655 param->size[0] = minify(mt->logical_width0, u->Level);
1656 param->size[1] = minify(mt->logical_height0, u->Level);
1657 param->size[2] = (!u->Layered ? 1 :
1658 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1659 u->TexObj->Target == GL_TEXTURE_3D ?
1660 minify(mt->logical_depth0, u->Level) :
1661 mt->logical_depth0);
1662
1663 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1664 &param->offset[0],
1665 &param->offset[1]);
1666
1667 param->stride[0] = mt->cpp;
1668 param->stride[1] = mt->pitch / mt->cpp;
1669 param->stride[2] =
1670 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1671 param->stride[3] =
1672 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1673
1674 if (mt->tiling == I915_TILING_X) {
1675 /* An X tile is a rectangular block of 512x8 bytes. */
1676 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1677 param->tiling[1] = _mesa_logbase2(8);
1678
1679 if (brw->has_swizzling) {
1680 /* Right shifts required to swizzle bits 9 and 10 of the memory
1681 * address with bit 6.
1682 */
1683 param->swizzling[0] = 3;
1684 param->swizzling[1] = 4;
1685 }
1686 } else if (mt->tiling == I915_TILING_Y) {
1687 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1688 * different to the layout of an X-tiled surface, we simply pretend that
1689 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1690 * one arranged in X-major order just like is the case for X-tiling.
1691 */
1692 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1693 param->tiling[1] = _mesa_logbase2(32);
1694
1695 if (brw->has_swizzling) {
1696 /* Right shift required to swizzle bit 9 of the memory address with
1697 * bit 6.
1698 */
1699 param->swizzling[0] = 3;
1700 }
1701 }
1702
1703 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1704 * address calculation algorithm (emit_address_calculation() in
1705 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1706 * modulus equal to the LOD.
1707 */
1708 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1709 0);
1710 }
1711
1712 static void
1713 update_image_surface(struct brw_context *brw,
1714 struct gl_image_unit *u,
1715 GLenum access,
1716 unsigned surface_idx,
1717 uint32_t *surf_offset,
1718 struct brw_image_param *param)
1719 {
1720 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1721 struct gl_texture_object *obj = u->TexObj;
1722 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1723
1724 if (obj->Target == GL_TEXTURE_BUFFER) {
1725 struct intel_buffer_object *intel_obj =
1726 intel_buffer_object(obj->BufferObject);
1727 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1728 _mesa_get_format_bytes(u->_ActualFormat));
1729
1730 brw_emit_buffer_surface_state(
1731 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1732 format, intel_obj->Base.Size, texel_size,
1733 access != GL_READ_ONLY);
1734
1735 update_buffer_image_param(brw, u, surface_idx, param);
1736
1737 } else {
1738 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1739 struct intel_mipmap_tree *mt = intel_obj->mt;
1740
1741 if (format == ISL_FORMAT_RAW) {
1742 brw_emit_buffer_surface_state(
1743 brw, surf_offset, mt->bo, mt->offset,
1744 format, mt->bo->size - mt->offset, 1 /* pitch */,
1745 access != GL_READ_ONLY);
1746
1747 } else {
1748 const unsigned num_layers = (!u->Layered ? 1 :
1749 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1750 mt->logical_depth0);
1751
1752 struct isl_view view = {
1753 .format = format,
1754 .base_level = obj->MinLevel + u->Level,
1755 .levels = 1,
1756 .base_array_layer = obj->MinLayer + u->_Layer,
1757 .array_len = num_layers,
1758 .swizzle = ISL_SWIZZLE_IDENTITY,
1759 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1760 };
1761
1762 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1763 assert(!intel_miptree_has_color_unresolved(mt,
1764 view.base_level, 1,
1765 view.base_array_layer,
1766 view.array_len));
1767 brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1768 mt->target, view, tex_mocs[brw->gen],
1769 surf_offset, surf_index,
1770 I915_GEM_DOMAIN_SAMPLER,
1771 access == GL_READ_ONLY ? 0 :
1772 I915_GEM_DOMAIN_SAMPLER);
1773 }
1774
1775 update_texture_image_param(brw, u, surface_idx, param);
1776 }
1777
1778 } else {
1779 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1780 update_default_image_param(brw, u, surface_idx, param);
1781 }
1782 }
1783
1784 void
1785 brw_upload_image_surfaces(struct brw_context *brw,
1786 const struct gl_program *prog,
1787 struct brw_stage_state *stage_state,
1788 struct brw_stage_prog_data *prog_data)
1789 {
1790 assert(prog);
1791 struct gl_context *ctx = &brw->ctx;
1792
1793 if (prog->info.num_images) {
1794 for (unsigned i = 0; i < prog->info.num_images; i++) {
1795 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1796 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1797
1798 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1799 surf_idx,
1800 &stage_state->surf_offset[surf_idx],
1801 &prog_data->image_param[i]);
1802 }
1803
1804 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1805 /* This may have changed the image metadata dependent on the context
1806 * image unit state and passed to the program as uniforms, make sure
1807 * that push and pull constants are reuploaded.
1808 */
1809 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1810 }
1811 }
1812
1813 static void
1814 brw_upload_wm_image_surfaces(struct brw_context *brw)
1815 {
1816 /* BRW_NEW_FRAGMENT_PROGRAM */
1817 const struct gl_program *wm = brw->fragment_program;
1818
1819 if (wm) {
1820 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1821 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1822 brw->wm.base.prog_data);
1823 }
1824 }
1825
1826 const struct brw_tracked_state brw_wm_image_surfaces = {
1827 .dirty = {
1828 .mesa = _NEW_TEXTURE,
1829 .brw = BRW_NEW_BATCH |
1830 BRW_NEW_BLORP |
1831 BRW_NEW_FRAGMENT_PROGRAM |
1832 BRW_NEW_FS_PROG_DATA |
1833 BRW_NEW_IMAGE_UNITS
1834 },
1835 .emit = brw_upload_wm_image_surfaces,
1836 };
1837
1838 void
1839 gen4_init_vtable_surface_functions(struct brw_context *brw)
1840 {
1841 brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1842 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1843 }
1844
1845 void
1846 gen6_init_vtable_surface_functions(struct brw_context *brw)
1847 {
1848 gen4_init_vtable_surface_functions(brw);
1849 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1850 }
1851
1852 static void
1853 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1854 {
1855 struct gl_context *ctx = &brw->ctx;
1856 /* _NEW_PROGRAM */
1857 struct gl_program *prog =
1858 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1859 /* BRW_NEW_CS_PROG_DATA */
1860 const struct brw_cs_prog_data *cs_prog_data =
1861 brw_cs_prog_data(brw->cs.base.prog_data);
1862
1863 if (prog && cs_prog_data->uses_num_work_groups) {
1864 const unsigned surf_idx =
1865 cs_prog_data->binding_table.work_groups_start;
1866 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1867 struct brw_bo *bo;
1868 uint32_t bo_offset;
1869
1870 if (brw->compute.num_work_groups_bo == NULL) {
1871 bo = NULL;
1872 intel_upload_data(brw,
1873 (void *)brw->compute.num_work_groups,
1874 3 * sizeof(GLuint),
1875 sizeof(GLuint),
1876 &bo,
1877 &bo_offset);
1878 } else {
1879 bo = brw->compute.num_work_groups_bo;
1880 bo_offset = brw->compute.num_work_groups_offset;
1881 }
1882
1883 brw_emit_buffer_surface_state(brw, surf_offset,
1884 bo, bo_offset,
1885 ISL_FORMAT_RAW,
1886 3 * sizeof(GLuint), 1, true);
1887 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1888 }
1889 }
1890
1891 const struct brw_tracked_state brw_cs_work_groups_surface = {
1892 .dirty = {
1893 .brw = BRW_NEW_BLORP |
1894 BRW_NEW_CS_PROG_DATA |
1895 BRW_NEW_CS_WORK_GROUPS
1896 },
1897 .emit = brw_upload_cs_work_groups_surface,
1898 };