i965: "Fix" aux offsets
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 enum {
59 INTEL_RENDERBUFFER_LAYERED = 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED = 1 << 1,
61 };
62
63 uint32_t tex_mocs[] = {
64 [7] = GEN7_MOCS_L3,
65 [8] = BDW_MOCS_WB,
66 [9] = SKL_MOCS_WB,
67 };
68
69 uint32_t rb_mocs[] = {
70 [7] = GEN7_MOCS_L3,
71 [8] = BDW_MOCS_PTE,
72 [9] = SKL_MOCS_PTE,
73 };
74
75 static void
76 brw_emit_surface_state(struct brw_context *brw,
77 struct intel_mipmap_tree *mt, uint32_t flags,
78 GLenum target, struct isl_view view,
79 uint32_t mocs, uint32_t *surf_offset, int surf_index,
80 unsigned read_domains, unsigned write_domains)
81 {
82 uint32_t tile_x = mt->level[0].slice[0].x_offset;
83 uint32_t tile_y = mt->level[0].slice[0].y_offset;
84 uint32_t offset = mt->offset;
85
86 struct isl_surf surf;
87 intel_miptree_get_isl_surf(brw, mt, &surf);
88
89 surf.dim = get_isl_surf_dim(target);
90
91 const enum isl_dim_layout dim_layout =
92 get_isl_dim_layout(&brw->screen->devinfo, mt->tiling, target);
93
94 if (surf.dim_layout != dim_layout) {
95 /* The layout of the specified texture target is not compatible with the
96 * actual layout of the miptree structure in memory -- You're entering
97 * dangerous territory, this can only possibly work if you only intended
98 * to access a single level and slice of the texture, and the hardware
99 * supports the tile offset feature in order to allow non-tile-aligned
100 * base offsets, since we'll have to point the hardware to the first
101 * texel of the level instead of relying on the usual base level/layer
102 * controls.
103 */
104 assert(brw->has_surface_tile_offset);
105 assert(view.levels == 1 && view.array_len == 1);
106 assert(tile_x == 0 && tile_y == 0);
107
108 offset += intel_miptree_get_tile_offsets(mt, view.base_level,
109 view.base_array_layer,
110 &tile_x, &tile_y);
111
112 /* Minify the logical dimensions of the texture. */
113 const unsigned l = view.base_level - mt->first_level;
114 surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
115 surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
116 minify(surf.logical_level0_px.height, l);
117 surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
118 minify(surf.logical_level0_px.depth, l);
119
120 /* Only the base level and layer can be addressed with the overridden
121 * layout.
122 */
123 surf.logical_level0_px.array_len = 1;
124 surf.levels = 1;
125 surf.dim_layout = dim_layout;
126
127 /* The requested slice of the texture is now at the base level and
128 * layer.
129 */
130 view.base_level = 0;
131 view.base_array_layer = 0;
132 }
133
134 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
135
136 drm_intel_bo *aux_bo;
137 struct isl_surf *aux_surf = NULL, aux_surf_s;
138 uint64_t aux_offset = 0;
139 enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
140 if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
141 !(flags & INTEL_AUX_BUFFER_DISABLED)) {
142 intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
143 aux_surf = &aux_surf_s;
144
145 if (mt->mcs_buf) {
146 assert(mt->mcs_buf->offset == 0);
147 aux_bo = mt->mcs_buf->bo;
148 aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
149 } else {
150 aux_bo = mt->hiz_buf->aux_base.bo;
151 aux_offset = mt->hiz_buf->aux_base.bo->offset64 +
152 mt->hiz_buf->mt->offset;
153 }
154
155 /* We only really need a clear color if we also have an auxiliary
156 * surface. Without one, it does nothing.
157 */
158 clear_color = intel_miptree_get_isl_clear_color(brw, mt);
159 }
160
161 void *state = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
162 brw->isl_dev.ss.size,
163 brw->isl_dev.ss.align,
164 surf_index, surf_offset);
165
166 isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
167 .address = mt->bo->offset64 + offset,
168 .aux_surf = aux_surf, .aux_usage = aux_usage,
169 .aux_address = aux_offset,
170 .mocs = mocs, .clear_color = clear_color,
171 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
172
173 drm_intel_bo_emit_reloc(brw->batch.bo,
174 *surf_offset + brw->isl_dev.ss.addr_offset,
175 mt->bo, offset,
176 read_domains, write_domains);
177
178 if (aux_surf) {
179 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
180 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
181 * contain other control information. Since buffer addresses are always
182 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
183 * an ordinary reloc to do the necessary address translation.
184 */
185 assert((aux_offset & 0xfff) == 0);
186 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
187 drm_intel_bo_emit_reloc(brw->batch.bo,
188 *surf_offset + brw->isl_dev.ss.aux_addr_offset,
189 aux_bo, *aux_addr & 0xfff,
190 read_domains, write_domains);
191 }
192 }
193
194 uint32_t
195 brw_update_renderbuffer_surface(struct brw_context *brw,
196 struct gl_renderbuffer *rb,
197 uint32_t flags, unsigned unit /* unused */,
198 uint32_t surf_index)
199 {
200 struct gl_context *ctx = &brw->ctx;
201 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
202 struct intel_mipmap_tree *mt = irb->mt;
203
204 if (brw->gen < 9) {
205 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
206 }
207
208 assert(brw_render_target_supported(brw, rb));
209 intel_miptree_used_for_rendering(mt);
210
211 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
212 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
213 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
214 __func__, _mesa_get_format_name(rb_format));
215 }
216
217 const unsigned layer_multiplier =
218 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
219 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
220 MAX2(irb->mt->num_samples, 1) : 1;
221
222 struct isl_view view = {
223 .format = brw->render_target_format[rb_format],
224 .base_level = irb->mt_level - irb->mt->first_level,
225 .levels = 1,
226 .base_array_layer = irb->mt_layer / layer_multiplier,
227 .array_len = MAX2(irb->layer_count, 1),
228 .swizzle = ISL_SWIZZLE_IDENTITY,
229 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
230 };
231
232 uint32_t offset;
233 brw_emit_surface_state(brw, mt, flags, mt->target, view,
234 rb_mocs[brw->gen],
235 &offset, surf_index,
236 I915_GEM_DOMAIN_RENDER,
237 I915_GEM_DOMAIN_RENDER);
238 return offset;
239 }
240
241 GLuint
242 translate_tex_target(GLenum target)
243 {
244 switch (target) {
245 case GL_TEXTURE_1D:
246 case GL_TEXTURE_1D_ARRAY_EXT:
247 return BRW_SURFACE_1D;
248
249 case GL_TEXTURE_RECTANGLE_NV:
250 return BRW_SURFACE_2D;
251
252 case GL_TEXTURE_2D:
253 case GL_TEXTURE_2D_ARRAY_EXT:
254 case GL_TEXTURE_EXTERNAL_OES:
255 case GL_TEXTURE_2D_MULTISAMPLE:
256 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
257 return BRW_SURFACE_2D;
258
259 case GL_TEXTURE_3D:
260 return BRW_SURFACE_3D;
261
262 case GL_TEXTURE_CUBE_MAP:
263 case GL_TEXTURE_CUBE_MAP_ARRAY:
264 return BRW_SURFACE_CUBE;
265
266 default:
267 unreachable("not reached");
268 }
269 }
270
271 uint32_t
272 brw_get_surface_tiling_bits(uint32_t tiling)
273 {
274 switch (tiling) {
275 case I915_TILING_X:
276 return BRW_SURFACE_TILED;
277 case I915_TILING_Y:
278 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
279 default:
280 return 0;
281 }
282 }
283
284
285 uint32_t
286 brw_get_surface_num_multisamples(unsigned num_samples)
287 {
288 if (num_samples > 1)
289 return BRW_SURFACE_MULTISAMPLECOUNT_4;
290 else
291 return BRW_SURFACE_MULTISAMPLECOUNT_1;
292 }
293
294 /**
295 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
296 * swizzling.
297 */
298 int
299 brw_get_texture_swizzle(const struct gl_context *ctx,
300 const struct gl_texture_object *t)
301 {
302 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
303
304 int swizzles[SWIZZLE_NIL + 1] = {
305 SWIZZLE_X,
306 SWIZZLE_Y,
307 SWIZZLE_Z,
308 SWIZZLE_W,
309 SWIZZLE_ZERO,
310 SWIZZLE_ONE,
311 SWIZZLE_NIL
312 };
313
314 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
315 img->_BaseFormat == GL_DEPTH_STENCIL) {
316 GLenum depth_mode = t->DepthMode;
317
318 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
319 * with depth component data specified with a sized internal format.
320 * Otherwise, it's left at the old default, GL_LUMINANCE.
321 */
322 if (_mesa_is_gles3(ctx) &&
323 img->InternalFormat != GL_DEPTH_COMPONENT &&
324 img->InternalFormat != GL_DEPTH_STENCIL) {
325 depth_mode = GL_RED;
326 }
327
328 switch (depth_mode) {
329 case GL_ALPHA:
330 swizzles[0] = SWIZZLE_ZERO;
331 swizzles[1] = SWIZZLE_ZERO;
332 swizzles[2] = SWIZZLE_ZERO;
333 swizzles[3] = SWIZZLE_X;
334 break;
335 case GL_LUMINANCE:
336 swizzles[0] = SWIZZLE_X;
337 swizzles[1] = SWIZZLE_X;
338 swizzles[2] = SWIZZLE_X;
339 swizzles[3] = SWIZZLE_ONE;
340 break;
341 case GL_INTENSITY:
342 swizzles[0] = SWIZZLE_X;
343 swizzles[1] = SWIZZLE_X;
344 swizzles[2] = SWIZZLE_X;
345 swizzles[3] = SWIZZLE_X;
346 break;
347 case GL_RED:
348 swizzles[0] = SWIZZLE_X;
349 swizzles[1] = SWIZZLE_ZERO;
350 swizzles[2] = SWIZZLE_ZERO;
351 swizzles[3] = SWIZZLE_ONE;
352 break;
353 }
354 }
355
356 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
357
358 /* If the texture's format is alpha-only, force R, G, and B to
359 * 0.0. Similarly, if the texture's format has no alpha channel,
360 * force the alpha value read to 1.0. This allows for the
361 * implementation to use an RGBA texture for any of these formats
362 * without leaking any unexpected values.
363 */
364 switch (img->_BaseFormat) {
365 case GL_ALPHA:
366 swizzles[0] = SWIZZLE_ZERO;
367 swizzles[1] = SWIZZLE_ZERO;
368 swizzles[2] = SWIZZLE_ZERO;
369 break;
370 case GL_LUMINANCE:
371 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
372 swizzles[0] = SWIZZLE_X;
373 swizzles[1] = SWIZZLE_X;
374 swizzles[2] = SWIZZLE_X;
375 swizzles[3] = SWIZZLE_ONE;
376 }
377 break;
378 case GL_LUMINANCE_ALPHA:
379 if (datatype == GL_SIGNED_NORMALIZED) {
380 swizzles[0] = SWIZZLE_X;
381 swizzles[1] = SWIZZLE_X;
382 swizzles[2] = SWIZZLE_X;
383 swizzles[3] = SWIZZLE_W;
384 }
385 break;
386 case GL_INTENSITY:
387 if (datatype == GL_SIGNED_NORMALIZED) {
388 swizzles[0] = SWIZZLE_X;
389 swizzles[1] = SWIZZLE_X;
390 swizzles[2] = SWIZZLE_X;
391 swizzles[3] = SWIZZLE_X;
392 }
393 break;
394 case GL_RED:
395 case GL_RG:
396 case GL_RGB:
397 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
398 swizzles[3] = SWIZZLE_ONE;
399 break;
400 }
401
402 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
403 swizzles[GET_SWZ(t->_Swizzle, 1)],
404 swizzles[GET_SWZ(t->_Swizzle, 2)],
405 swizzles[GET_SWZ(t->_Swizzle, 3)]);
406 }
407
408 /**
409 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
410 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
411 *
412 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
413 * 0 1 2 3 4 5
414 * 4 5 6 7 0 1
415 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
416 *
417 * which is simply adding 4 then modding by 8 (or anding with 7).
418 *
419 * We then may need to apply workarounds for textureGather hardware bugs.
420 */
421 static unsigned
422 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
423 {
424 unsigned scs = (swizzle + 4) & 7;
425
426 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
427 }
428
429 static unsigned
430 brw_find_matching_rb(const struct gl_framebuffer *fb,
431 const struct intel_mipmap_tree *mt)
432 {
433 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
434 const struct intel_renderbuffer *irb =
435 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
436
437 if (irb && irb->mt == mt)
438 return i;
439 }
440
441 return fb->_NumColorDrawBuffers;
442 }
443
444 static inline bool
445 brw_texture_view_sane(const struct brw_context *brw,
446 const struct intel_mipmap_tree *mt, unsigned format)
447 {
448 /* There are special cases only for lossless compression. */
449 if (!intel_miptree_is_lossless_compressed(brw, mt))
450 return true;
451
452 if (isl_format_supports_lossless_compression(&brw->screen->devinfo,
453 format))
454 return true;
455
456 /* Logic elsewhere needs to take care to resolve the color buffer prior
457 * to sampling it as non-compressed.
458 */
459 if (mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)
460 return false;
461
462 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
463 const unsigned rb_index = brw_find_matching_rb(fb, mt);
464
465 if (rb_index == fb->_NumColorDrawBuffers)
466 return true;
467
468 /* Underlying surface is compressed but it is sampled using a format that
469 * the sampling engine doesn't support as compressed. Compression must be
470 * disabled for both sampling engine and data port in case the same surface
471 * is used also as render target.
472 */
473 return brw->draw_aux_buffer_disabled[rb_index];
474 }
475
476 static bool
477 brw_disable_aux_surface(const struct brw_context *brw,
478 const struct intel_mipmap_tree *mt)
479 {
480 /* Nothing to disable. */
481 if (!mt->mcs_buf)
482 return false;
483
484 /* There are special cases only for lossless compression. */
485 if (!intel_miptree_is_lossless_compressed(brw, mt))
486 return mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED;
487
488 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
489 const unsigned rb_index = brw_find_matching_rb(fb, mt);
490
491 /* If we are drawing into this with compression enabled, then we must also
492 * enable compression when texturing from it regardless of
493 * fast_clear_state. If we don't then, after the first draw call with
494 * this setup, there will be data in the CCS which won't get picked up by
495 * subsequent texturing operations as required by ARB_texture_barrier.
496 * Since we don't want to re-emit the binding table or do a resolve
497 * operation every draw call, the easiest thing to do is just enable
498 * compression on the texturing side. This is completely safe to do
499 * since, if compressed texturing weren't allowed, we would have disabled
500 * compression of render targets in whatever_that_function_is_called().
501 */
502 if (rb_index < fb->_NumColorDrawBuffers) {
503 if (brw->draw_aux_buffer_disabled[rb_index]) {
504 assert(mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED);
505 }
506
507 return brw->draw_aux_buffer_disabled[rb_index];
508 }
509
510 return mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED;
511 }
512
513 void
514 brw_update_texture_surface(struct gl_context *ctx,
515 unsigned unit,
516 uint32_t *surf_offset,
517 bool for_gather,
518 uint32_t plane)
519 {
520 struct brw_context *brw = brw_context(ctx);
521 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
522
523 if (obj->Target == GL_TEXTURE_BUFFER) {
524 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
525
526 } else {
527 struct intel_texture_object *intel_obj = intel_texture_object(obj);
528 struct intel_mipmap_tree *mt = intel_obj->mt;
529
530 if (plane > 0) {
531 if (mt->plane[plane - 1] == NULL)
532 return;
533 mt = mt->plane[plane - 1];
534 }
535
536 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
537 /* If this is a view with restricted NumLayers, then our effective depth
538 * is not just the miptree depth.
539 */
540 const unsigned view_num_layers =
541 (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
542 mt->logical_depth0;
543
544 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
545 * texturing functions that return a float, as our code generation always
546 * selects the .x channel (which would always be 0).
547 */
548 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
549 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
550 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
551 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
552 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
553 brw_get_texture_swizzle(&brw->ctx, obj));
554
555 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
556 unsigned format = translate_tex_format(brw, mesa_fmt,
557 sampler->sRGBDecode);
558
559 /* Implement gen6 and gen7 gather work-around */
560 bool need_green_to_blue = false;
561 if (for_gather) {
562 if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
563 format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
564 need_green_to_blue = brw->is_haswell;
565 } else if (brw->gen == 6) {
566 /* Sandybridge's gather4 message is broken for integer formats.
567 * To work around this, we pretend the surface is UNORM for
568 * 8 or 16-bit formats, and emit shader instructions to recover
569 * the real INT/UINT value. For 32-bit formats, we pretend
570 * the surface is FLOAT, and simply reinterpret the resulting
571 * bits.
572 */
573 switch (format) {
574 case BRW_SURFACEFORMAT_R8_SINT:
575 case BRW_SURFACEFORMAT_R8_UINT:
576 format = BRW_SURFACEFORMAT_R8_UNORM;
577 break;
578
579 case BRW_SURFACEFORMAT_R16_SINT:
580 case BRW_SURFACEFORMAT_R16_UINT:
581 format = BRW_SURFACEFORMAT_R16_UNORM;
582 break;
583
584 case BRW_SURFACEFORMAT_R32_SINT:
585 case BRW_SURFACEFORMAT_R32_UINT:
586 format = BRW_SURFACEFORMAT_R32_FLOAT;
587 break;
588
589 default:
590 break;
591 }
592 }
593 }
594
595 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
596 if (brw->gen <= 7) {
597 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
598 mt = mt->r8stencil_mt;
599 } else {
600 mt = mt->stencil_mt;
601 }
602 format = BRW_SURFACEFORMAT_R8_UINT;
603 } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
604 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
605 mt = mt->r8stencil_mt;
606 format = BRW_SURFACEFORMAT_R8_UINT;
607 }
608
609 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
610
611 struct isl_view view = {
612 .format = format,
613 .base_level = obj->MinLevel + obj->BaseLevel,
614 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
615 .base_array_layer = obj->MinLayer,
616 .array_len = view_num_layers,
617 .swizzle = {
618 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
619 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
620 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
621 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
622 },
623 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
624 };
625
626 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
627 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
628 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
629
630 assert(brw_texture_view_sane(brw, mt, format));
631
632 const int flags =
633 brw_disable_aux_surface(brw, mt) ? INTEL_AUX_BUFFER_DISABLED : 0;
634 brw_emit_surface_state(brw, mt, flags, mt->target, view,
635 tex_mocs[brw->gen],
636 surf_offset, surf_index,
637 I915_GEM_DOMAIN_SAMPLER, 0);
638 }
639 }
640
641 void
642 brw_emit_buffer_surface_state(struct brw_context *brw,
643 uint32_t *out_offset,
644 drm_intel_bo *bo,
645 unsigned buffer_offset,
646 unsigned surface_format,
647 unsigned buffer_size,
648 unsigned pitch,
649 bool rw)
650 {
651 uint32_t *dw = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
652 brw->isl_dev.ss.size,
653 brw->isl_dev.ss.align,
654 out_offset);
655
656 isl_buffer_fill_state(&brw->isl_dev, dw,
657 .address = (bo ? bo->offset64 : 0) + buffer_offset,
658 .size = buffer_size,
659 .format = surface_format,
660 .stride = pitch,
661 .mocs = tex_mocs[brw->gen]);
662
663 if (bo) {
664 drm_intel_bo_emit_reloc(brw->batch.bo,
665 *out_offset + brw->isl_dev.ss.addr_offset,
666 bo, buffer_offset,
667 I915_GEM_DOMAIN_SAMPLER,
668 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
669 }
670 }
671
672 void
673 brw_update_buffer_texture_surface(struct gl_context *ctx,
674 unsigned unit,
675 uint32_t *surf_offset)
676 {
677 struct brw_context *brw = brw_context(ctx);
678 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
679 struct intel_buffer_object *intel_obj =
680 intel_buffer_object(tObj->BufferObject);
681 uint32_t size = tObj->BufferSize;
682 drm_intel_bo *bo = NULL;
683 mesa_format format = tObj->_BufferObjectFormat;
684 uint32_t brw_format = brw_format_for_mesa_format(format);
685 int texel_size = _mesa_get_format_bytes(format);
686
687 if (intel_obj) {
688 size = MIN2(size, intel_obj->Base.Size);
689 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
690 }
691
692 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
693 _mesa_problem(NULL, "bad format %s for texture buffer\n",
694 _mesa_get_format_name(format));
695 }
696
697 brw_emit_buffer_surface_state(brw, surf_offset, bo,
698 tObj->BufferOffset,
699 brw_format,
700 size,
701 texel_size,
702 false /* rw */);
703 }
704
705 /**
706 * Create the constant buffer surface. Vertex/fragment shader constants will be
707 * read from this buffer with Data Port Read instructions/messages.
708 */
709 void
710 brw_create_constant_surface(struct brw_context *brw,
711 drm_intel_bo *bo,
712 uint32_t offset,
713 uint32_t size,
714 uint32_t *out_offset)
715 {
716 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
717 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
718 size, 1, false);
719 }
720
721 /**
722 * Create the buffer surface. Shader buffer variables will be
723 * read from / write to this buffer with Data Port Read/Write
724 * instructions/messages.
725 */
726 void
727 brw_create_buffer_surface(struct brw_context *brw,
728 drm_intel_bo *bo,
729 uint32_t offset,
730 uint32_t size,
731 uint32_t *out_offset)
732 {
733 /* Use a raw surface so we can reuse existing untyped read/write/atomic
734 * messages. We need these specifically for the fragment shader since they
735 * include a pixel mask header that we need to ensure correct behavior
736 * with helper invocations, which cannot write to the buffer.
737 */
738 brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
739 BRW_SURFACEFORMAT_RAW,
740 size, 1, true);
741 }
742
743 /**
744 * Set up a binding table entry for use by stream output logic (transform
745 * feedback).
746 *
747 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
748 */
749 void
750 brw_update_sol_surface(struct brw_context *brw,
751 struct gl_buffer_object *buffer_obj,
752 uint32_t *out_offset, unsigned num_vector_components,
753 unsigned stride_dwords, unsigned offset_dwords)
754 {
755 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
756 uint32_t offset_bytes = 4 * offset_dwords;
757 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
758 offset_bytes,
759 buffer_obj->Size - offset_bytes);
760 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
761 out_offset);
762 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
763 size_t size_dwords = buffer_obj->Size / 4;
764 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
765
766 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
767 * too big to map using a single binding table entry?
768 */
769 assert((size_dwords - offset_dwords) / stride_dwords
770 <= BRW_MAX_NUM_BUFFER_ENTRIES);
771
772 if (size_dwords > offset_dwords + num_vector_components) {
773 /* There is room for at least 1 transform feedback output in the buffer.
774 * Compute the number of additional transform feedback outputs the
775 * buffer has room for.
776 */
777 buffer_size_minus_1 =
778 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
779 } else {
780 /* There isn't even room for a single transform feedback output in the
781 * buffer. We can't configure the binding table entry to prevent output
782 * entirely; we'll have to rely on the geometry shader to detect
783 * overflow. But to minimize the damage in case of a bug, set up the
784 * binding table entry to just allow a single output.
785 */
786 buffer_size_minus_1 = 0;
787 }
788 width = buffer_size_minus_1 & 0x7f;
789 height = (buffer_size_minus_1 & 0xfff80) >> 7;
790 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
791
792 switch (num_vector_components) {
793 case 1:
794 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
795 break;
796 case 2:
797 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
798 break;
799 case 3:
800 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
801 break;
802 case 4:
803 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
804 break;
805 default:
806 unreachable("Invalid vector size for transform feedback output");
807 }
808
809 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
810 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
811 surface_format << BRW_SURFACE_FORMAT_SHIFT |
812 BRW_SURFACE_RC_READ_WRITE;
813 surf[1] = bo->offset64 + offset_bytes; /* reloc */
814 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
815 height << BRW_SURFACE_HEIGHT_SHIFT);
816 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
817 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
818 surf[4] = 0;
819 surf[5] = 0;
820
821 /* Emit relocation to surface contents. */
822 drm_intel_bo_emit_reloc(brw->batch.bo,
823 *out_offset + 4,
824 bo, offset_bytes,
825 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
826 }
827
828 /* Creates a new WM constant buffer reflecting the current fragment program's
829 * constants, if needed by the fragment program.
830 *
831 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
832 * state atom.
833 */
834 static void
835 brw_upload_wm_pull_constants(struct brw_context *brw)
836 {
837 struct brw_stage_state *stage_state = &brw->wm.base;
838 /* BRW_NEW_FRAGMENT_PROGRAM */
839 struct brw_program *fp = (struct brw_program *) brw->fragment_program;
840 /* BRW_NEW_FS_PROG_DATA */
841 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
842
843 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
844 /* _NEW_PROGRAM_CONSTANTS */
845 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
846 stage_state, prog_data);
847 }
848
849 const struct brw_tracked_state brw_wm_pull_constants = {
850 .dirty = {
851 .mesa = _NEW_PROGRAM_CONSTANTS,
852 .brw = BRW_NEW_BATCH |
853 BRW_NEW_BLORP |
854 BRW_NEW_FRAGMENT_PROGRAM |
855 BRW_NEW_FS_PROG_DATA,
856 },
857 .emit = brw_upload_wm_pull_constants,
858 };
859
860 /**
861 * Creates a null renderbuffer surface.
862 *
863 * This is used when the shader doesn't write to any color output. An FB
864 * write to target 0 will still be emitted, because that's how the thread is
865 * terminated (and computed depth is returned), so we need to have the
866 * hardware discard the target 0 color output..
867 */
868 static void
869 brw_emit_null_surface_state(struct brw_context *brw,
870 unsigned width,
871 unsigned height,
872 unsigned samples,
873 uint32_t *out_offset)
874 {
875 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
876 * Notes):
877 *
878 * A null surface will be used in instances where an actual surface is
879 * not bound. When a write message is generated to a null surface, no
880 * actual surface is written to. When a read message (including any
881 * sampling engine message) is generated to a null surface, the result
882 * is all zeros. Note that a null surface type is allowed to be used
883 * with all messages, even if it is not specificially indicated as
884 * supported. All of the remaining fields in surface state are ignored
885 * for null surfaces, with the following exceptions:
886 *
887 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
888 * depth buffer’s corresponding state for all render target surfaces,
889 * including null.
890 *
891 * - Surface Format must be R8G8B8A8_UNORM.
892 */
893 unsigned surface_type = BRW_SURFACE_NULL;
894 drm_intel_bo *bo = NULL;
895 unsigned pitch_minus_1 = 0;
896 uint32_t multisampling_state = 0;
897 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
898 out_offset);
899
900 if (samples > 1) {
901 /* On Gen6, null render targets seem to cause GPU hangs when
902 * multisampling. So work around this problem by rendering into dummy
903 * color buffer.
904 *
905 * To decrease the amount of memory needed by the workaround buffer, we
906 * set its pitch to 128 bytes (the width of a Y tile). This means that
907 * the amount of memory needed for the workaround buffer is
908 * (width_in_tiles + height_in_tiles - 1) tiles.
909 *
910 * Note that since the workaround buffer will be interpreted by the
911 * hardware as an interleaved multisampled buffer, we need to compute
912 * width_in_tiles and height_in_tiles by dividing the width and height
913 * by 16 rather than the normal Y-tile size of 32.
914 */
915 unsigned width_in_tiles = ALIGN(width, 16) / 16;
916 unsigned height_in_tiles = ALIGN(height, 16) / 16;
917 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
918 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
919 size_needed);
920 bo = brw->wm.multisampled_null_render_target_bo;
921 surface_type = BRW_SURFACE_2D;
922 pitch_minus_1 = 127;
923 multisampling_state = brw_get_surface_num_multisamples(samples);
924 }
925
926 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
927 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
928 if (brw->gen < 6) {
929 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
930 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
931 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
932 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
933 }
934 surf[1] = bo ? bo->offset64 : 0;
935 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
936 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
937
938 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
939 * Notes):
940 *
941 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
942 */
943 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
944 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
945 surf[4] = multisampling_state;
946 surf[5] = 0;
947
948 if (bo) {
949 drm_intel_bo_emit_reloc(brw->batch.bo,
950 *out_offset + 4,
951 bo, 0,
952 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
953 }
954 }
955
956 /**
957 * Sets up a surface state structure to point at the given region.
958 * While it is only used for the front/back buffer currently, it should be
959 * usable for further buffers when doing ARB_draw_buffer support.
960 */
961 static uint32_t
962 gen4_update_renderbuffer_surface(struct brw_context *brw,
963 struct gl_renderbuffer *rb,
964 uint32_t flags, unsigned unit,
965 uint32_t surf_index)
966 {
967 struct gl_context *ctx = &brw->ctx;
968 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
969 struct intel_mipmap_tree *mt = irb->mt;
970 uint32_t *surf;
971 uint32_t tile_x, tile_y;
972 uint32_t format = 0;
973 uint32_t offset;
974 /* _NEW_BUFFERS */
975 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
976 /* BRW_NEW_FS_PROG_DATA */
977
978 assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
979 assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
980
981 if (rb->TexImage && !brw->has_surface_tile_offset) {
982 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
983
984 if (tile_x != 0 || tile_y != 0) {
985 /* Original gen4 hardware couldn't draw to a non-tile-aligned
986 * destination in a miptree unless you actually setup your renderbuffer
987 * as a miptree and used the fragile lod/array_index/etc. controls to
988 * select the image. So, instead, we just make a new single-level
989 * miptree and render into that.
990 */
991 intel_renderbuffer_move_to_temp(brw, irb, false);
992 mt = irb->mt;
993 }
994 }
995
996 intel_miptree_used_for_rendering(irb->mt);
997
998 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
999
1000 format = brw->render_target_format[rb_format];
1001 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
1002 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1003 __func__, _mesa_get_format_name(rb_format));
1004 }
1005
1006 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1007 format << BRW_SURFACE_FORMAT_SHIFT);
1008
1009 /* reloc */
1010 assert(mt->offset % mt->cpp == 0);
1011 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1012 mt->bo->offset64 + mt->offset);
1013
1014 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1015 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1016
1017 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
1018 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1019
1020 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
1021
1022 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1023 /* Note that the low bits of these fields are missing, so
1024 * there's the possibility of getting in trouble.
1025 */
1026 assert(tile_x % 4 == 0);
1027 assert(tile_y % 2 == 0);
1028 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1029 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1030 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1031
1032 if (brw->gen < 6) {
1033 /* _NEW_COLOR */
1034 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1035 (ctx->Color.BlendEnabled & (1 << unit)))
1036 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1037
1038 if (!ctx->Color.ColorMask[unit][0])
1039 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1040 if (!ctx->Color.ColorMask[unit][1])
1041 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1042 if (!ctx->Color.ColorMask[unit][2])
1043 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1044
1045 /* As mentioned above, disable writes to the alpha component when the
1046 * renderbuffer is XRGB.
1047 */
1048 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1049 !ctx->Color.ColorMask[unit][3]) {
1050 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1051 }
1052 }
1053
1054 drm_intel_bo_emit_reloc(brw->batch.bo,
1055 offset + 4,
1056 mt->bo,
1057 surf[1] - mt->bo->offset64,
1058 I915_GEM_DOMAIN_RENDER,
1059 I915_GEM_DOMAIN_RENDER);
1060
1061 return offset;
1062 }
1063
1064 /**
1065 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1066 */
1067 void
1068 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1069 const struct gl_framebuffer *fb,
1070 uint32_t render_target_start,
1071 uint32_t *surf_offset)
1072 {
1073 GLuint i;
1074 const unsigned int w = _mesa_geometric_width(fb);
1075 const unsigned int h = _mesa_geometric_height(fb);
1076 const unsigned int s = _mesa_geometric_samples(fb);
1077
1078 /* Update surfaces for drawing buffers */
1079 if (fb->_NumColorDrawBuffers >= 1) {
1080 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1081 const uint32_t surf_index = render_target_start + i;
1082 const int flags = (_mesa_geometric_layers(fb) > 0 ?
1083 INTEL_RENDERBUFFER_LAYERED : 0) |
1084 (brw->draw_aux_buffer_disabled[i] ?
1085 INTEL_AUX_BUFFER_DISABLED : 0);
1086
1087 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1088 surf_offset[surf_index] =
1089 brw->vtbl.update_renderbuffer_surface(
1090 brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1091 } else {
1092 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1093 &surf_offset[surf_index]);
1094 }
1095 }
1096 } else {
1097 const uint32_t surf_index = render_target_start;
1098 brw->vtbl.emit_null_surface_state(brw, w, h, s,
1099 &surf_offset[surf_index]);
1100 }
1101 }
1102
1103 static void
1104 update_renderbuffer_surfaces(struct brw_context *brw)
1105 {
1106 const struct gl_context *ctx = &brw->ctx;
1107
1108 /* BRW_NEW_FS_PROG_DATA */
1109 const struct brw_wm_prog_data *wm_prog_data =
1110 brw_wm_prog_data(brw->wm.base.prog_data);
1111
1112 /* _NEW_BUFFERS | _NEW_COLOR */
1113 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1114 brw_update_renderbuffer_surfaces(
1115 brw, fb,
1116 wm_prog_data->binding_table.render_target_start,
1117 brw->wm.base.surf_offset);
1118 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1119 }
1120
1121 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1122 .dirty = {
1123 .mesa = _NEW_BUFFERS |
1124 _NEW_COLOR,
1125 .brw = BRW_NEW_BATCH |
1126 BRW_NEW_BLORP |
1127 BRW_NEW_FS_PROG_DATA,
1128 },
1129 .emit = update_renderbuffer_surfaces,
1130 };
1131
1132 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1133 .dirty = {
1134 .mesa = _NEW_BUFFERS,
1135 .brw = BRW_NEW_BATCH |
1136 BRW_NEW_BLORP,
1137 },
1138 .emit = update_renderbuffer_surfaces,
1139 };
1140
1141 static void
1142 update_renderbuffer_read_surfaces(struct brw_context *brw)
1143 {
1144 const struct gl_context *ctx = &brw->ctx;
1145
1146 /* BRW_NEW_FS_PROG_DATA */
1147 const struct brw_wm_prog_data *wm_prog_data =
1148 brw_wm_prog_data(brw->wm.base.prog_data);
1149
1150 /* BRW_NEW_FRAGMENT_PROGRAM */
1151 if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1152 brw->fragment_program && brw->fragment_program->info.outputs_read) {
1153 /* _NEW_BUFFERS */
1154 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1155
1156 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1157 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1158 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1159 const unsigned surf_index =
1160 wm_prog_data->binding_table.render_target_read_start + i;
1161 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1162
1163 if (irb) {
1164 const unsigned format = brw->render_target_format[
1165 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1166 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1167 format));
1168
1169 /* Override the target of the texture if the render buffer is a
1170 * single slice of a 3D texture (since the minimum array element
1171 * field of the surface state structure is ignored by the sampler
1172 * unit for 3D textures on some hardware), or if the render buffer
1173 * is a 1D array (since shaders always provide the array index
1174 * coordinate at the Z component to avoid state-dependent
1175 * recompiles when changing the texture target of the
1176 * framebuffer).
1177 */
1178 const GLenum target =
1179 (irb->mt->target == GL_TEXTURE_3D &&
1180 irb->layer_count == 1) ? GL_TEXTURE_2D :
1181 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1182 irb->mt->target;
1183
1184 /* intel_renderbuffer::mt_layer is expressed in sample units for
1185 * the UMS and CMS multisample layouts, but
1186 * intel_renderbuffer::layer_count is expressed in units of whole
1187 * logical layers regardless of the multisample layout.
1188 */
1189 const unsigned mt_layer_unit =
1190 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
1191 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
1192 MAX2(irb->mt->num_samples, 1) : 1;
1193
1194 const struct isl_view view = {
1195 .format = format,
1196 .base_level = irb->mt_level - irb->mt->first_level,
1197 .levels = 1,
1198 .base_array_layer = irb->mt_layer / mt_layer_unit,
1199 .array_len = irb->layer_count,
1200 .swizzle = ISL_SWIZZLE_IDENTITY,
1201 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1202 };
1203
1204 const int flags = brw->draw_aux_buffer_disabled[i] ?
1205 INTEL_AUX_BUFFER_DISABLED : 0;
1206 brw_emit_surface_state(brw, irb->mt, flags, target, view,
1207 tex_mocs[brw->gen],
1208 surf_offset, surf_index,
1209 I915_GEM_DOMAIN_SAMPLER, 0);
1210
1211 } else {
1212 brw->vtbl.emit_null_surface_state(
1213 brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1214 _mesa_geometric_samples(fb), surf_offset);
1215 }
1216 }
1217
1218 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1219 }
1220 }
1221
1222 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1223 .dirty = {
1224 .mesa = _NEW_BUFFERS,
1225 .brw = BRW_NEW_BATCH |
1226 BRW_NEW_FRAGMENT_PROGRAM |
1227 BRW_NEW_FS_PROG_DATA,
1228 },
1229 .emit = update_renderbuffer_read_surfaces,
1230 };
1231
1232 static void
1233 update_stage_texture_surfaces(struct brw_context *brw,
1234 const struct gl_program *prog,
1235 struct brw_stage_state *stage_state,
1236 bool for_gather, uint32_t plane)
1237 {
1238 if (!prog)
1239 return;
1240
1241 struct gl_context *ctx = &brw->ctx;
1242
1243 uint32_t *surf_offset = stage_state->surf_offset;
1244
1245 /* BRW_NEW_*_PROG_DATA */
1246 if (for_gather)
1247 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1248 else
1249 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1250
1251 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1252 for (unsigned s = 0; s < num_samplers; s++) {
1253 surf_offset[s] = 0;
1254
1255 if (prog->SamplersUsed & (1 << s)) {
1256 const unsigned unit = prog->SamplerUnits[s];
1257
1258 /* _NEW_TEXTURE */
1259 if (ctx->Texture.Unit[unit]._Current) {
1260 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1261 }
1262 }
1263 }
1264 }
1265
1266
1267 /**
1268 * Construct SURFACE_STATE objects for enabled textures.
1269 */
1270 static void
1271 brw_update_texture_surfaces(struct brw_context *brw)
1272 {
1273 /* BRW_NEW_VERTEX_PROGRAM */
1274 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1275
1276 /* BRW_NEW_TESS_PROGRAMS */
1277 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1278 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1279
1280 /* BRW_NEW_GEOMETRY_PROGRAM */
1281 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1282
1283 /* BRW_NEW_FRAGMENT_PROGRAM */
1284 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1285
1286 /* _NEW_TEXTURE */
1287 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1288 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1289 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1290 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1291 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1292
1293 /* emit alternate set of surface state for gather. this
1294 * allows the surface format to be overriden for only the
1295 * gather4 messages. */
1296 if (brw->gen < 8) {
1297 if (vs && vs->nir->info->uses_texture_gather)
1298 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1299 if (tcs && tcs->nir->info->uses_texture_gather)
1300 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1301 if (tes && tes->nir->info->uses_texture_gather)
1302 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1303 if (gs && gs->nir->info->uses_texture_gather)
1304 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1305 if (fs && fs->nir->info->uses_texture_gather)
1306 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1307 }
1308
1309 if (fs) {
1310 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1311 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1312 }
1313
1314 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1315 }
1316
1317 const struct brw_tracked_state brw_texture_surfaces = {
1318 .dirty = {
1319 .mesa = _NEW_TEXTURE,
1320 .brw = BRW_NEW_BATCH |
1321 BRW_NEW_BLORP |
1322 BRW_NEW_FRAGMENT_PROGRAM |
1323 BRW_NEW_FS_PROG_DATA |
1324 BRW_NEW_GEOMETRY_PROGRAM |
1325 BRW_NEW_GS_PROG_DATA |
1326 BRW_NEW_TESS_PROGRAMS |
1327 BRW_NEW_TCS_PROG_DATA |
1328 BRW_NEW_TES_PROG_DATA |
1329 BRW_NEW_TEXTURE_BUFFER |
1330 BRW_NEW_VERTEX_PROGRAM |
1331 BRW_NEW_VS_PROG_DATA,
1332 },
1333 .emit = brw_update_texture_surfaces,
1334 };
1335
1336 static void
1337 brw_update_cs_texture_surfaces(struct brw_context *brw)
1338 {
1339 /* BRW_NEW_COMPUTE_PROGRAM */
1340 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1341
1342 /* _NEW_TEXTURE */
1343 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1344
1345 /* emit alternate set of surface state for gather. this
1346 * allows the surface format to be overriden for only the
1347 * gather4 messages.
1348 */
1349 if (brw->gen < 8) {
1350 if (cs && cs->nir->info->uses_texture_gather)
1351 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1352 }
1353
1354 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1355 }
1356
1357 const struct brw_tracked_state brw_cs_texture_surfaces = {
1358 .dirty = {
1359 .mesa = _NEW_TEXTURE,
1360 .brw = BRW_NEW_BATCH |
1361 BRW_NEW_BLORP |
1362 BRW_NEW_COMPUTE_PROGRAM,
1363 },
1364 .emit = brw_update_cs_texture_surfaces,
1365 };
1366
1367
1368 void
1369 brw_upload_ubo_surfaces(struct brw_context *brw,
1370 struct gl_linked_shader *shader,
1371 struct brw_stage_state *stage_state,
1372 struct brw_stage_prog_data *prog_data)
1373 {
1374 struct gl_context *ctx = &brw->ctx;
1375
1376 if (!shader)
1377 return;
1378
1379 uint32_t *ubo_surf_offsets =
1380 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1381
1382 for (int i = 0; i < shader->NumUniformBlocks; i++) {
1383 struct gl_uniform_buffer_binding *binding =
1384 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1385
1386 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1387 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1388 } else {
1389 struct intel_buffer_object *intel_bo =
1390 intel_buffer_object(binding->BufferObject);
1391 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1392 if (!binding->AutomaticSize)
1393 size = MIN2(size, binding->Size);
1394 drm_intel_bo *bo =
1395 intel_bufferobj_buffer(brw, intel_bo,
1396 binding->Offset,
1397 size);
1398 brw_create_constant_surface(brw, bo, binding->Offset,
1399 size,
1400 &ubo_surf_offsets[i]);
1401 }
1402 }
1403
1404 uint32_t *ssbo_surf_offsets =
1405 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1406
1407 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1408 struct gl_shader_storage_buffer_binding *binding =
1409 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1410
1411 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1412 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1413 } else {
1414 struct intel_buffer_object *intel_bo =
1415 intel_buffer_object(binding->BufferObject);
1416 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1417 if (!binding->AutomaticSize)
1418 size = MIN2(size, binding->Size);
1419 drm_intel_bo *bo =
1420 intel_bufferobj_buffer(brw, intel_bo,
1421 binding->Offset,
1422 size);
1423 brw_create_buffer_surface(brw, bo, binding->Offset,
1424 size,
1425 &ssbo_surf_offsets[i]);
1426 }
1427 }
1428
1429 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1430 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1431 }
1432
1433 static void
1434 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1435 {
1436 struct gl_context *ctx = &brw->ctx;
1437 /* _NEW_PROGRAM */
1438 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1439
1440 if (!prog)
1441 return;
1442
1443 /* BRW_NEW_FS_PROG_DATA */
1444 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1445 &brw->wm.base, brw->wm.base.prog_data);
1446 }
1447
1448 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1449 .dirty = {
1450 .mesa = _NEW_PROGRAM,
1451 .brw = BRW_NEW_BATCH |
1452 BRW_NEW_BLORP |
1453 BRW_NEW_FS_PROG_DATA |
1454 BRW_NEW_UNIFORM_BUFFER,
1455 },
1456 .emit = brw_upload_wm_ubo_surfaces,
1457 };
1458
1459 static void
1460 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1461 {
1462 struct gl_context *ctx = &brw->ctx;
1463 /* _NEW_PROGRAM */
1464 struct gl_shader_program *prog =
1465 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1466
1467 if (!prog)
1468 return;
1469
1470 /* BRW_NEW_CS_PROG_DATA */
1471 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1472 &brw->cs.base, brw->cs.base.prog_data);
1473 }
1474
1475 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1476 .dirty = {
1477 .mesa = _NEW_PROGRAM,
1478 .brw = BRW_NEW_BATCH |
1479 BRW_NEW_BLORP |
1480 BRW_NEW_CS_PROG_DATA |
1481 BRW_NEW_UNIFORM_BUFFER,
1482 },
1483 .emit = brw_upload_cs_ubo_surfaces,
1484 };
1485
1486 void
1487 brw_upload_abo_surfaces(struct brw_context *brw,
1488 struct gl_linked_shader *shader,
1489 struct brw_stage_state *stage_state,
1490 struct brw_stage_prog_data *prog_data)
1491 {
1492 struct gl_context *ctx = &brw->ctx;
1493 uint32_t *surf_offsets =
1494 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1495
1496 if (shader && shader->NumAtomicBuffers) {
1497 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1498 struct gl_atomic_buffer_binding *binding =
1499 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1500 struct intel_buffer_object *intel_bo =
1501 intel_buffer_object(binding->BufferObject);
1502 drm_intel_bo *bo = intel_bufferobj_buffer(
1503 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1504
1505 brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1506 binding->Offset, BRW_SURFACEFORMAT_RAW,
1507 bo->size - binding->Offset, 1, true);
1508 }
1509
1510 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1511 }
1512 }
1513
1514 static void
1515 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1516 {
1517 struct gl_context *ctx = &brw->ctx;
1518 /* _NEW_PROGRAM */
1519 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1520
1521 if (prog) {
1522 /* BRW_NEW_FS_PROG_DATA */
1523 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1524 &brw->wm.base, brw->wm.base.prog_data);
1525 }
1526 }
1527
1528 const struct brw_tracked_state brw_wm_abo_surfaces = {
1529 .dirty = {
1530 .mesa = _NEW_PROGRAM,
1531 .brw = BRW_NEW_ATOMIC_BUFFER |
1532 BRW_NEW_BLORP |
1533 BRW_NEW_BATCH |
1534 BRW_NEW_FS_PROG_DATA,
1535 },
1536 .emit = brw_upload_wm_abo_surfaces,
1537 };
1538
1539 static void
1540 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1541 {
1542 struct gl_context *ctx = &brw->ctx;
1543 /* _NEW_PROGRAM */
1544 struct gl_shader_program *prog =
1545 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1546
1547 if (prog) {
1548 /* BRW_NEW_CS_PROG_DATA */
1549 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1550 &brw->cs.base, brw->cs.base.prog_data);
1551 }
1552 }
1553
1554 const struct brw_tracked_state brw_cs_abo_surfaces = {
1555 .dirty = {
1556 .mesa = _NEW_PROGRAM,
1557 .brw = BRW_NEW_ATOMIC_BUFFER |
1558 BRW_NEW_BLORP |
1559 BRW_NEW_BATCH |
1560 BRW_NEW_CS_PROG_DATA,
1561 },
1562 .emit = brw_upload_cs_abo_surfaces,
1563 };
1564
1565 static void
1566 brw_upload_cs_image_surfaces(struct brw_context *brw)
1567 {
1568 struct gl_context *ctx = &brw->ctx;
1569 /* _NEW_PROGRAM */
1570 struct gl_shader_program *prog =
1571 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1572
1573 if (prog) {
1574 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1575 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1576 &brw->cs.base, brw->cs.base.prog_data);
1577 }
1578 }
1579
1580 const struct brw_tracked_state brw_cs_image_surfaces = {
1581 .dirty = {
1582 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1583 .brw = BRW_NEW_BATCH |
1584 BRW_NEW_BLORP |
1585 BRW_NEW_CS_PROG_DATA |
1586 BRW_NEW_IMAGE_UNITS
1587 },
1588 .emit = brw_upload_cs_image_surfaces,
1589 };
1590
1591 static uint32_t
1592 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1593 {
1594 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1595 uint32_t hw_format = brw_format_for_mesa_format(format);
1596 if (access == GL_WRITE_ONLY) {
1597 return hw_format;
1598 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1599 /* Typed surface reads support a very limited subset of the shader
1600 * image formats. Translate it into the closest format the
1601 * hardware supports.
1602 */
1603 return isl_lower_storage_image_format(devinfo, hw_format);
1604 } else {
1605 /* The hardware doesn't actually support a typed format that we can use
1606 * so we have to fall back to untyped read/write messages.
1607 */
1608 return BRW_SURFACEFORMAT_RAW;
1609 }
1610 }
1611
1612 static void
1613 update_default_image_param(struct brw_context *brw,
1614 struct gl_image_unit *u,
1615 unsigned surface_idx,
1616 struct brw_image_param *param)
1617 {
1618 memset(param, 0, sizeof(*param));
1619 param->surface_idx = surface_idx;
1620 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1621 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1622 * detailed explanation of these parameters.
1623 */
1624 param->swizzling[0] = 0xff;
1625 param->swizzling[1] = 0xff;
1626 }
1627
1628 static void
1629 update_buffer_image_param(struct brw_context *brw,
1630 struct gl_image_unit *u,
1631 unsigned surface_idx,
1632 struct brw_image_param *param)
1633 {
1634 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1635 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1636 update_default_image_param(brw, u, surface_idx, param);
1637
1638 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1639 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1640 }
1641
1642 static void
1643 update_texture_image_param(struct brw_context *brw,
1644 struct gl_image_unit *u,
1645 unsigned surface_idx,
1646 struct brw_image_param *param)
1647 {
1648 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1649
1650 update_default_image_param(brw, u, surface_idx, param);
1651
1652 param->size[0] = minify(mt->logical_width0, u->Level);
1653 param->size[1] = minify(mt->logical_height0, u->Level);
1654 param->size[2] = (!u->Layered ? 1 :
1655 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1656 u->TexObj->Target == GL_TEXTURE_3D ?
1657 minify(mt->logical_depth0, u->Level) :
1658 mt->logical_depth0);
1659
1660 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1661 &param->offset[0],
1662 &param->offset[1]);
1663
1664 param->stride[0] = mt->cpp;
1665 param->stride[1] = mt->pitch / mt->cpp;
1666 param->stride[2] =
1667 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1668 param->stride[3] =
1669 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1670
1671 if (mt->tiling == I915_TILING_X) {
1672 /* An X tile is a rectangular block of 512x8 bytes. */
1673 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1674 param->tiling[1] = _mesa_logbase2(8);
1675
1676 if (brw->has_swizzling) {
1677 /* Right shifts required to swizzle bits 9 and 10 of the memory
1678 * address with bit 6.
1679 */
1680 param->swizzling[0] = 3;
1681 param->swizzling[1] = 4;
1682 }
1683 } else if (mt->tiling == I915_TILING_Y) {
1684 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1685 * different to the layout of an X-tiled surface, we simply pretend that
1686 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1687 * one arranged in X-major order just like is the case for X-tiling.
1688 */
1689 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1690 param->tiling[1] = _mesa_logbase2(32);
1691
1692 if (brw->has_swizzling) {
1693 /* Right shift required to swizzle bit 9 of the memory address with
1694 * bit 6.
1695 */
1696 param->swizzling[0] = 3;
1697 }
1698 }
1699
1700 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1701 * address calculation algorithm (emit_address_calculation() in
1702 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1703 * modulus equal to the LOD.
1704 */
1705 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1706 0);
1707 }
1708
1709 static void
1710 update_image_surface(struct brw_context *brw,
1711 struct gl_image_unit *u,
1712 GLenum access,
1713 unsigned surface_idx,
1714 uint32_t *surf_offset,
1715 struct brw_image_param *param)
1716 {
1717 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1718 struct gl_texture_object *obj = u->TexObj;
1719 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1720
1721 if (obj->Target == GL_TEXTURE_BUFFER) {
1722 struct intel_buffer_object *intel_obj =
1723 intel_buffer_object(obj->BufferObject);
1724 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1725 _mesa_get_format_bytes(u->_ActualFormat));
1726
1727 brw_emit_buffer_surface_state(
1728 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1729 format, intel_obj->Base.Size, texel_size,
1730 access != GL_READ_ONLY);
1731
1732 update_buffer_image_param(brw, u, surface_idx, param);
1733
1734 } else {
1735 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1736 struct intel_mipmap_tree *mt = intel_obj->mt;
1737
1738 if (format == BRW_SURFACEFORMAT_RAW) {
1739 brw_emit_buffer_surface_state(
1740 brw, surf_offset, mt->bo, mt->offset,
1741 format, mt->bo->size - mt->offset, 1 /* pitch */,
1742 access != GL_READ_ONLY);
1743
1744 } else {
1745 const unsigned num_layers = (!u->Layered ? 1 :
1746 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1747 mt->logical_depth0);
1748
1749 struct isl_view view = {
1750 .format = format,
1751 .base_level = obj->MinLevel + u->Level,
1752 .levels = 1,
1753 .base_array_layer = obj->MinLayer + u->_Layer,
1754 .array_len = num_layers,
1755 .swizzle = ISL_SWIZZLE_IDENTITY,
1756 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1757 };
1758
1759 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1760 const int flags =
1761 mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED ?
1762 INTEL_AUX_BUFFER_DISABLED : 0;
1763 brw_emit_surface_state(brw, mt, flags, mt->target, view,
1764 tex_mocs[brw->gen],
1765 surf_offset, surf_index,
1766 I915_GEM_DOMAIN_SAMPLER,
1767 access == GL_READ_ONLY ? 0 :
1768 I915_GEM_DOMAIN_SAMPLER);
1769 }
1770
1771 update_texture_image_param(brw, u, surface_idx, param);
1772 }
1773
1774 } else {
1775 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1776 update_default_image_param(brw, u, surface_idx, param);
1777 }
1778 }
1779
1780 void
1781 brw_upload_image_surfaces(struct brw_context *brw,
1782 struct gl_linked_shader *shader,
1783 struct brw_stage_state *stage_state,
1784 struct brw_stage_prog_data *prog_data)
1785 {
1786 struct gl_context *ctx = &brw->ctx;
1787
1788 if (shader && shader->NumImages) {
1789 for (unsigned i = 0; i < shader->NumImages; i++) {
1790 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1791 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1792
1793 update_image_surface(brw, u, shader->ImageAccess[i],
1794 surf_idx,
1795 &stage_state->surf_offset[surf_idx],
1796 &prog_data->image_param[i]);
1797 }
1798
1799 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1800 /* This may have changed the image metadata dependent on the context
1801 * image unit state and passed to the program as uniforms, make sure
1802 * that push and pull constants are reuploaded.
1803 */
1804 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1805 }
1806 }
1807
1808 static void
1809 brw_upload_wm_image_surfaces(struct brw_context *brw)
1810 {
1811 struct gl_context *ctx = &brw->ctx;
1812 /* BRW_NEW_FRAGMENT_PROGRAM */
1813 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1814
1815 if (prog) {
1816 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1817 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1818 &brw->wm.base, brw->wm.base.prog_data);
1819 }
1820 }
1821
1822 const struct brw_tracked_state brw_wm_image_surfaces = {
1823 .dirty = {
1824 .mesa = _NEW_TEXTURE,
1825 .brw = BRW_NEW_BATCH |
1826 BRW_NEW_BLORP |
1827 BRW_NEW_FRAGMENT_PROGRAM |
1828 BRW_NEW_FS_PROG_DATA |
1829 BRW_NEW_IMAGE_UNITS
1830 },
1831 .emit = brw_upload_wm_image_surfaces,
1832 };
1833
1834 void
1835 gen4_init_vtable_surface_functions(struct brw_context *brw)
1836 {
1837 brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1838 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1839 }
1840
1841 void
1842 gen6_init_vtable_surface_functions(struct brw_context *brw)
1843 {
1844 gen4_init_vtable_surface_functions(brw);
1845 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1846 }
1847
1848 static void
1849 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1850 {
1851 struct gl_context *ctx = &brw->ctx;
1852 /* _NEW_PROGRAM */
1853 struct gl_shader_program *prog =
1854 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1855 /* BRW_NEW_CS_PROG_DATA */
1856 const struct brw_cs_prog_data *cs_prog_data =
1857 brw_cs_prog_data(brw->cs.base.prog_data);
1858
1859 if (prog && cs_prog_data->uses_num_work_groups) {
1860 const unsigned surf_idx =
1861 cs_prog_data->binding_table.work_groups_start;
1862 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1863 drm_intel_bo *bo;
1864 uint32_t bo_offset;
1865
1866 if (brw->compute.num_work_groups_bo == NULL) {
1867 bo = NULL;
1868 intel_upload_data(brw,
1869 (void *)brw->compute.num_work_groups,
1870 3 * sizeof(GLuint),
1871 sizeof(GLuint),
1872 &bo,
1873 &bo_offset);
1874 } else {
1875 bo = brw->compute.num_work_groups_bo;
1876 bo_offset = brw->compute.num_work_groups_offset;
1877 }
1878
1879 brw_emit_buffer_surface_state(brw, surf_offset,
1880 bo, bo_offset,
1881 BRW_SURFACEFORMAT_RAW,
1882 3 * sizeof(GLuint), 1, true);
1883 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1884 }
1885 }
1886
1887 const struct brw_tracked_state brw_cs_work_groups_surface = {
1888 .dirty = {
1889 .brw = BRW_NEW_BLORP |
1890 BRW_NEW_CS_PROG_DATA |
1891 BRW_NEW_CS_WORK_GROUPS
1892 },
1893 .emit = brw_upload_cs_work_groups_surface,
1894 };