2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
59 INTEL_RENDERBUFFER_LAYERED
= 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED
= 1 << 1,
63 uint32_t tex_mocs
[] = {
69 uint32_t rb_mocs
[] = {
76 brw_emit_surface_state(struct brw_context
*brw
,
77 struct intel_mipmap_tree
*mt
, uint32_t flags
,
78 GLenum target
, struct isl_view view
,
79 uint32_t mocs
, uint32_t *surf_offset
, int surf_index
,
80 unsigned read_domains
, unsigned write_domains
)
82 uint32_t tile_x
= mt
->level
[0].slice
[0].x_offset
;
83 uint32_t tile_y
= mt
->level
[0].slice
[0].y_offset
;
84 uint32_t offset
= mt
->offset
;
87 intel_miptree_get_isl_surf(brw
, mt
, &surf
);
89 surf
.dim
= get_isl_surf_dim(target
);
91 const enum isl_dim_layout dim_layout
=
92 get_isl_dim_layout(&brw
->screen
->devinfo
, mt
->tiling
, target
);
94 if (surf
.dim_layout
!= dim_layout
) {
95 /* The layout of the specified texture target is not compatible with the
96 * actual layout of the miptree structure in memory -- You're entering
97 * dangerous territory, this can only possibly work if you only intended
98 * to access a single level and slice of the texture, and the hardware
99 * supports the tile offset feature in order to allow non-tile-aligned
100 * base offsets, since we'll have to point the hardware to the first
101 * texel of the level instead of relying on the usual base level/layer
104 assert(brw
->has_surface_tile_offset
);
105 assert(view
.levels
== 1 && view
.array_len
== 1);
106 assert(tile_x
== 0 && tile_y
== 0);
108 offset
+= intel_miptree_get_tile_offsets(mt
, view
.base_level
,
109 view
.base_array_layer
,
112 /* Minify the logical dimensions of the texture. */
113 const unsigned l
= view
.base_level
- mt
->first_level
;
114 surf
.logical_level0_px
.width
= minify(surf
.logical_level0_px
.width
, l
);
115 surf
.logical_level0_px
.height
= surf
.dim
<= ISL_SURF_DIM_1D
? 1 :
116 minify(surf
.logical_level0_px
.height
, l
);
117 surf
.logical_level0_px
.depth
= surf
.dim
<= ISL_SURF_DIM_2D
? 1 :
118 minify(surf
.logical_level0_px
.depth
, l
);
120 /* Only the base level and layer can be addressed with the overridden
123 surf
.logical_level0_px
.array_len
= 1;
125 surf
.dim_layout
= dim_layout
;
127 /* The requested slice of the texture is now at the base level and
131 view
.base_array_layer
= 0;
134 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
136 drm_intel_bo
*aux_bo
;
137 struct isl_surf
*aux_surf
= NULL
, aux_surf_s
;
138 uint64_t aux_offset
= 0;
139 enum isl_aux_usage aux_usage
= ISL_AUX_USAGE_NONE
;
140 if ((mt
->mcs_buf
|| intel_miptree_sample_with_hiz(brw
, mt
)) &&
141 !(flags
& INTEL_AUX_BUFFER_DISABLED
)) {
142 intel_miptree_get_aux_isl_surf(brw
, mt
, &aux_surf_s
, &aux_usage
);
143 aux_surf
= &aux_surf_s
;
146 assert(mt
->mcs_buf
->offset
== 0);
147 aux_bo
= mt
->mcs_buf
->bo
;
148 aux_offset
= mt
->mcs_buf
->bo
->offset64
+ mt
->mcs_buf
->offset
;
150 aux_bo
= mt
->hiz_buf
->aux_base
.bo
;
151 aux_offset
= mt
->hiz_buf
->aux_base
.bo
->offset64
+
152 mt
->hiz_buf
->mt
->offset
;
155 /* We only really need a clear color if we also have an auxiliary
156 * surface. Without one, it does nothing.
158 clear_color
= intel_miptree_get_isl_clear_color(brw
, mt
);
161 void *state
= __brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
162 brw
->isl_dev
.ss
.size
,
163 brw
->isl_dev
.ss
.align
,
164 surf_index
, surf_offset
);
166 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &surf
, .view
= &view
,
167 .address
= mt
->bo
->offset64
+ offset
,
168 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
169 .aux_address
= aux_offset
,
170 .mocs
= mocs
, .clear_color
= clear_color
,
171 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
173 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
174 *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
176 read_domains
, write_domains
);
179 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
180 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
181 * contain other control information. Since buffer addresses are always
182 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
183 * an ordinary reloc to do the necessary address translation.
185 assert((aux_offset
& 0xfff) == 0);
186 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
187 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
188 *surf_offset
+ brw
->isl_dev
.ss
.aux_addr_offset
,
189 aux_bo
, *aux_addr
& 0xfff,
190 read_domains
, write_domains
);
195 brw_update_renderbuffer_surface(struct brw_context
*brw
,
196 struct gl_renderbuffer
*rb
,
197 uint32_t flags
, unsigned unit
/* unused */,
200 struct gl_context
*ctx
= &brw
->ctx
;
201 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
202 struct intel_mipmap_tree
*mt
= irb
->mt
;
205 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
208 assert(brw_render_target_supported(brw
, rb
));
209 intel_miptree_used_for_rendering(mt
);
211 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
212 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
213 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
214 __func__
, _mesa_get_format_name(rb_format
));
217 const unsigned layer_multiplier
=
218 (irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
219 irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
220 MAX2(irb
->mt
->num_samples
, 1) : 1;
222 struct isl_view view
= {
223 .format
= brw
->render_target_format
[rb_format
],
224 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
226 .base_array_layer
= irb
->mt_layer
/ layer_multiplier
,
227 .array_len
= MAX2(irb
->layer_count
, 1),
228 .swizzle
= ISL_SWIZZLE_IDENTITY
,
229 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
233 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
236 I915_GEM_DOMAIN_RENDER
,
237 I915_GEM_DOMAIN_RENDER
);
242 translate_tex_target(GLenum target
)
246 case GL_TEXTURE_1D_ARRAY_EXT
:
247 return BRW_SURFACE_1D
;
249 case GL_TEXTURE_RECTANGLE_NV
:
250 return BRW_SURFACE_2D
;
253 case GL_TEXTURE_2D_ARRAY_EXT
:
254 case GL_TEXTURE_EXTERNAL_OES
:
255 case GL_TEXTURE_2D_MULTISAMPLE
:
256 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
257 return BRW_SURFACE_2D
;
260 return BRW_SURFACE_3D
;
262 case GL_TEXTURE_CUBE_MAP
:
263 case GL_TEXTURE_CUBE_MAP_ARRAY
:
264 return BRW_SURFACE_CUBE
;
267 unreachable("not reached");
272 brw_get_surface_tiling_bits(uint32_t tiling
)
276 return BRW_SURFACE_TILED
;
278 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
286 brw_get_surface_num_multisamples(unsigned num_samples
)
289 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
291 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
295 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
299 brw_get_texture_swizzle(const struct gl_context
*ctx
,
300 const struct gl_texture_object
*t
)
302 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
304 int swizzles
[SWIZZLE_NIL
+ 1] = {
314 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
315 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
316 GLenum depth_mode
= t
->DepthMode
;
318 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
319 * with depth component data specified with a sized internal format.
320 * Otherwise, it's left at the old default, GL_LUMINANCE.
322 if (_mesa_is_gles3(ctx
) &&
323 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
324 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
328 switch (depth_mode
) {
330 swizzles
[0] = SWIZZLE_ZERO
;
331 swizzles
[1] = SWIZZLE_ZERO
;
332 swizzles
[2] = SWIZZLE_ZERO
;
333 swizzles
[3] = SWIZZLE_X
;
336 swizzles
[0] = SWIZZLE_X
;
337 swizzles
[1] = SWIZZLE_X
;
338 swizzles
[2] = SWIZZLE_X
;
339 swizzles
[3] = SWIZZLE_ONE
;
342 swizzles
[0] = SWIZZLE_X
;
343 swizzles
[1] = SWIZZLE_X
;
344 swizzles
[2] = SWIZZLE_X
;
345 swizzles
[3] = SWIZZLE_X
;
348 swizzles
[0] = SWIZZLE_X
;
349 swizzles
[1] = SWIZZLE_ZERO
;
350 swizzles
[2] = SWIZZLE_ZERO
;
351 swizzles
[3] = SWIZZLE_ONE
;
356 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
358 /* If the texture's format is alpha-only, force R, G, and B to
359 * 0.0. Similarly, if the texture's format has no alpha channel,
360 * force the alpha value read to 1.0. This allows for the
361 * implementation to use an RGBA texture for any of these formats
362 * without leaking any unexpected values.
364 switch (img
->_BaseFormat
) {
366 swizzles
[0] = SWIZZLE_ZERO
;
367 swizzles
[1] = SWIZZLE_ZERO
;
368 swizzles
[2] = SWIZZLE_ZERO
;
371 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
372 swizzles
[0] = SWIZZLE_X
;
373 swizzles
[1] = SWIZZLE_X
;
374 swizzles
[2] = SWIZZLE_X
;
375 swizzles
[3] = SWIZZLE_ONE
;
378 case GL_LUMINANCE_ALPHA
:
379 if (datatype
== GL_SIGNED_NORMALIZED
) {
380 swizzles
[0] = SWIZZLE_X
;
381 swizzles
[1] = SWIZZLE_X
;
382 swizzles
[2] = SWIZZLE_X
;
383 swizzles
[3] = SWIZZLE_W
;
387 if (datatype
== GL_SIGNED_NORMALIZED
) {
388 swizzles
[0] = SWIZZLE_X
;
389 swizzles
[1] = SWIZZLE_X
;
390 swizzles
[2] = SWIZZLE_X
;
391 swizzles
[3] = SWIZZLE_X
;
397 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0)
398 swizzles
[3] = SWIZZLE_ONE
;
402 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
403 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
404 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
405 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
409 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
410 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
412 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
415 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
417 * which is simply adding 4 then modding by 8 (or anding with 7).
419 * We then may need to apply workarounds for textureGather hardware bugs.
422 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
424 unsigned scs
= (swizzle
+ 4) & 7;
426 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
430 brw_find_matching_rb(const struct gl_framebuffer
*fb
,
431 const struct intel_mipmap_tree
*mt
)
433 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
434 const struct intel_renderbuffer
*irb
=
435 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
437 if (irb
&& irb
->mt
== mt
)
441 return fb
->_NumColorDrawBuffers
;
445 brw_texture_view_sane(const struct brw_context
*brw
,
446 const struct intel_mipmap_tree
*mt
, unsigned format
)
448 /* There are special cases only for lossless compression. */
449 if (!intel_miptree_is_lossless_compressed(brw
, mt
))
452 if (isl_format_supports_lossless_compression(&brw
->screen
->devinfo
,
456 /* Logic elsewhere needs to take care to resolve the color buffer prior
457 * to sampling it as non-compressed.
459 if (mt
->fast_clear_state
!= INTEL_FAST_CLEAR_STATE_RESOLVED
)
462 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
463 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
465 if (rb_index
== fb
->_NumColorDrawBuffers
)
468 /* Underlying surface is compressed but it is sampled using a format that
469 * the sampling engine doesn't support as compressed. Compression must be
470 * disabled for both sampling engine and data port in case the same surface
471 * is used also as render target.
473 return brw
->draw_aux_buffer_disabled
[rb_index
];
477 brw_disable_aux_surface(const struct brw_context
*brw
,
478 const struct intel_mipmap_tree
*mt
)
480 /* Nothing to disable. */
484 /* There are special cases only for lossless compression. */
485 if (!intel_miptree_is_lossless_compressed(brw
, mt
))
486 return mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_RESOLVED
;
488 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
489 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
491 /* If we are drawing into this with compression enabled, then we must also
492 * enable compression when texturing from it regardless of
493 * fast_clear_state. If we don't then, after the first draw call with
494 * this setup, there will be data in the CCS which won't get picked up by
495 * subsequent texturing operations as required by ARB_texture_barrier.
496 * Since we don't want to re-emit the binding table or do a resolve
497 * operation every draw call, the easiest thing to do is just enable
498 * compression on the texturing side. This is completely safe to do
499 * since, if compressed texturing weren't allowed, we would have disabled
500 * compression of render targets in whatever_that_function_is_called().
502 if (rb_index
< fb
->_NumColorDrawBuffers
) {
503 if (brw
->draw_aux_buffer_disabled
[rb_index
]) {
504 assert(mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_RESOLVED
);
507 return brw
->draw_aux_buffer_disabled
[rb_index
];
510 return mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_RESOLVED
;
514 brw_update_texture_surface(struct gl_context
*ctx
,
516 uint32_t *surf_offset
,
520 struct brw_context
*brw
= brw_context(ctx
);
521 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
523 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
524 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
527 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
528 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
531 if (mt
->plane
[plane
- 1] == NULL
)
533 mt
= mt
->plane
[plane
- 1];
536 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
537 /* If this is a view with restricted NumLayers, then our effective depth
538 * is not just the miptree depth.
540 const unsigned view_num_layers
=
541 (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) ? obj
->NumLayers
:
544 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
545 * texturing functions that return a float, as our code generation always
546 * selects the .x channel (which would always be 0).
548 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
549 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
550 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
551 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
552 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
553 brw_get_texture_swizzle(&brw
->ctx
, obj
));
555 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
556 unsigned format
= translate_tex_format(brw
, mesa_fmt
,
557 sampler
->sRGBDecode
);
559 /* Implement gen6 and gen7 gather work-around */
560 bool need_green_to_blue
= false;
562 if (brw
->gen
== 7 && format
== BRW_SURFACEFORMAT_R32G32_FLOAT
) {
563 format
= BRW_SURFACEFORMAT_R32G32_FLOAT_LD
;
564 need_green_to_blue
= brw
->is_haswell
;
565 } else if (brw
->gen
== 6) {
566 /* Sandybridge's gather4 message is broken for integer formats.
567 * To work around this, we pretend the surface is UNORM for
568 * 8 or 16-bit formats, and emit shader instructions to recover
569 * the real INT/UINT value. For 32-bit formats, we pretend
570 * the surface is FLOAT, and simply reinterpret the resulting
574 case BRW_SURFACEFORMAT_R8_SINT
:
575 case BRW_SURFACEFORMAT_R8_UINT
:
576 format
= BRW_SURFACEFORMAT_R8_UNORM
;
579 case BRW_SURFACEFORMAT_R16_SINT
:
580 case BRW_SURFACEFORMAT_R16_UINT
:
581 format
= BRW_SURFACEFORMAT_R16_UNORM
;
584 case BRW_SURFACEFORMAT_R32_SINT
:
585 case BRW_SURFACEFORMAT_R32_UINT
:
586 format
= BRW_SURFACEFORMAT_R32_FLOAT
;
595 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
597 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
598 mt
= mt
->r8stencil_mt
;
602 format
= BRW_SURFACEFORMAT_R8_UINT
;
603 } else if (brw
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
604 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
605 mt
= mt
->r8stencil_mt
;
606 format
= BRW_SURFACEFORMAT_R8_UINT
;
609 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
611 struct isl_view view
= {
613 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
614 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
615 .base_array_layer
= obj
->MinLayer
,
616 .array_len
= view_num_layers
,
618 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
619 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
620 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
621 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
623 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
626 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
627 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
628 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
630 assert(brw_texture_view_sane(brw
, mt
, format
));
633 brw_disable_aux_surface(brw
, mt
) ? INTEL_AUX_BUFFER_DISABLED
: 0;
634 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
636 surf_offset
, surf_index
,
637 I915_GEM_DOMAIN_SAMPLER
, 0);
642 brw_emit_buffer_surface_state(struct brw_context
*brw
,
643 uint32_t *out_offset
,
645 unsigned buffer_offset
,
646 unsigned surface_format
,
647 unsigned buffer_size
,
651 uint32_t *dw
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
652 brw
->isl_dev
.ss
.size
,
653 brw
->isl_dev
.ss
.align
,
656 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
657 .address
= (bo
? bo
->offset64
: 0) + buffer_offset
,
659 .format
= surface_format
,
661 .mocs
= tex_mocs
[brw
->gen
]);
664 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
665 *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
667 I915_GEM_DOMAIN_SAMPLER
,
668 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
673 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
675 uint32_t *surf_offset
)
677 struct brw_context
*brw
= brw_context(ctx
);
678 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
679 struct intel_buffer_object
*intel_obj
=
680 intel_buffer_object(tObj
->BufferObject
);
681 uint32_t size
= tObj
->BufferSize
;
682 drm_intel_bo
*bo
= NULL
;
683 mesa_format format
= tObj
->_BufferObjectFormat
;
684 uint32_t brw_format
= brw_format_for_mesa_format(format
);
685 int texel_size
= _mesa_get_format_bytes(format
);
688 size
= MIN2(size
, intel_obj
->Base
.Size
);
689 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
);
692 if (brw_format
== 0 && format
!= MESA_FORMAT_RGBA_FLOAT32
) {
693 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
694 _mesa_get_format_name(format
));
697 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
706 * Create the constant buffer surface. Vertex/fragment shader constants will be
707 * read from this buffer with Data Port Read instructions/messages.
710 brw_create_constant_surface(struct brw_context
*brw
,
714 uint32_t *out_offset
)
716 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
717 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
,
722 * Create the buffer surface. Shader buffer variables will be
723 * read from / write to this buffer with Data Port Read/Write
724 * instructions/messages.
727 brw_create_buffer_surface(struct brw_context
*brw
,
731 uint32_t *out_offset
)
733 /* Use a raw surface so we can reuse existing untyped read/write/atomic
734 * messages. We need these specifically for the fragment shader since they
735 * include a pixel mask header that we need to ensure correct behavior
736 * with helper invocations, which cannot write to the buffer.
738 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
739 BRW_SURFACEFORMAT_RAW
,
744 * Set up a binding table entry for use by stream output logic (transform
747 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
750 brw_update_sol_surface(struct brw_context
*brw
,
751 struct gl_buffer_object
*buffer_obj
,
752 uint32_t *out_offset
, unsigned num_vector_components
,
753 unsigned stride_dwords
, unsigned offset_dwords
)
755 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
756 uint32_t offset_bytes
= 4 * offset_dwords
;
757 drm_intel_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
759 buffer_obj
->Size
- offset_bytes
);
760 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
762 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
763 size_t size_dwords
= buffer_obj
->Size
/ 4;
764 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
766 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
767 * too big to map using a single binding table entry?
769 assert((size_dwords
- offset_dwords
) / stride_dwords
770 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
772 if (size_dwords
> offset_dwords
+ num_vector_components
) {
773 /* There is room for at least 1 transform feedback output in the buffer.
774 * Compute the number of additional transform feedback outputs the
775 * buffer has room for.
777 buffer_size_minus_1
=
778 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
780 /* There isn't even room for a single transform feedback output in the
781 * buffer. We can't configure the binding table entry to prevent output
782 * entirely; we'll have to rely on the geometry shader to detect
783 * overflow. But to minimize the damage in case of a bug, set up the
784 * binding table entry to just allow a single output.
786 buffer_size_minus_1
= 0;
788 width
= buffer_size_minus_1
& 0x7f;
789 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
790 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
792 switch (num_vector_components
) {
794 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
797 surface_format
= BRW_SURFACEFORMAT_R32G32_FLOAT
;
800 surface_format
= BRW_SURFACEFORMAT_R32G32B32_FLOAT
;
803 surface_format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
806 unreachable("Invalid vector size for transform feedback output");
809 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
810 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
811 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
812 BRW_SURFACE_RC_READ_WRITE
;
813 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
814 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
815 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
816 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
817 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
821 /* Emit relocation to surface contents. */
822 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
825 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
828 /* Creates a new WM constant buffer reflecting the current fragment program's
829 * constants, if needed by the fragment program.
831 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
835 brw_upload_wm_pull_constants(struct brw_context
*brw
)
837 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
838 /* BRW_NEW_FRAGMENT_PROGRAM */
839 struct brw_program
*fp
= (struct brw_program
*) brw
->fragment_program
;
840 /* BRW_NEW_FS_PROG_DATA */
841 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
843 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
844 /* _NEW_PROGRAM_CONSTANTS */
845 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
846 stage_state
, prog_data
);
849 const struct brw_tracked_state brw_wm_pull_constants
= {
851 .mesa
= _NEW_PROGRAM_CONSTANTS
,
852 .brw
= BRW_NEW_BATCH
|
854 BRW_NEW_FRAGMENT_PROGRAM
|
855 BRW_NEW_FS_PROG_DATA
,
857 .emit
= brw_upload_wm_pull_constants
,
861 * Creates a null renderbuffer surface.
863 * This is used when the shader doesn't write to any color output. An FB
864 * write to target 0 will still be emitted, because that's how the thread is
865 * terminated (and computed depth is returned), so we need to have the
866 * hardware discard the target 0 color output..
869 brw_emit_null_surface_state(struct brw_context
*brw
,
873 uint32_t *out_offset
)
875 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
878 * A null surface will be used in instances where an actual surface is
879 * not bound. When a write message is generated to a null surface, no
880 * actual surface is written to. When a read message (including any
881 * sampling engine message) is generated to a null surface, the result
882 * is all zeros. Note that a null surface type is allowed to be used
883 * with all messages, even if it is not specificially indicated as
884 * supported. All of the remaining fields in surface state are ignored
885 * for null surfaces, with the following exceptions:
887 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
888 * depth buffer’s corresponding state for all render target surfaces,
891 * - Surface Format must be R8G8B8A8_UNORM.
893 unsigned surface_type
= BRW_SURFACE_NULL
;
894 drm_intel_bo
*bo
= NULL
;
895 unsigned pitch_minus_1
= 0;
896 uint32_t multisampling_state
= 0;
897 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
901 /* On Gen6, null render targets seem to cause GPU hangs when
902 * multisampling. So work around this problem by rendering into dummy
905 * To decrease the amount of memory needed by the workaround buffer, we
906 * set its pitch to 128 bytes (the width of a Y tile). This means that
907 * the amount of memory needed for the workaround buffer is
908 * (width_in_tiles + height_in_tiles - 1) tiles.
910 * Note that since the workaround buffer will be interpreted by the
911 * hardware as an interleaved multisampled buffer, we need to compute
912 * width_in_tiles and height_in_tiles by dividing the width and height
913 * by 16 rather than the normal Y-tile size of 32.
915 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
916 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
917 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
918 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
920 bo
= brw
->wm
.multisampled_null_render_target_bo
;
921 surface_type
= BRW_SURFACE_2D
;
923 multisampling_state
= brw_get_surface_num_multisamples(samples
);
926 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
927 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
929 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
930 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
931 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
932 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
934 surf
[1] = bo
? bo
->offset64
: 0;
935 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
936 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
938 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
941 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
943 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
944 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
945 surf
[4] = multisampling_state
;
949 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
952 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
957 * Sets up a surface state structure to point at the given region.
958 * While it is only used for the front/back buffer currently, it should be
959 * usable for further buffers when doing ARB_draw_buffer support.
962 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
963 struct gl_renderbuffer
*rb
,
964 uint32_t flags
, unsigned unit
,
967 struct gl_context
*ctx
= &brw
->ctx
;
968 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
969 struct intel_mipmap_tree
*mt
= irb
->mt
;
971 uint32_t tile_x
, tile_y
;
975 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
976 /* BRW_NEW_FS_PROG_DATA */
978 assert(!(flags
& INTEL_RENDERBUFFER_LAYERED
));
979 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
981 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
982 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
984 if (tile_x
!= 0 || tile_y
!= 0) {
985 /* Original gen4 hardware couldn't draw to a non-tile-aligned
986 * destination in a miptree unless you actually setup your renderbuffer
987 * as a miptree and used the fragile lod/array_index/etc. controls to
988 * select the image. So, instead, we just make a new single-level
989 * miptree and render into that.
991 intel_renderbuffer_move_to_temp(brw
, irb
, false);
996 intel_miptree_used_for_rendering(irb
->mt
);
998 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32, &offset
);
1000 format
= brw
->render_target_format
[rb_format
];
1001 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
1002 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
1003 __func__
, _mesa_get_format_name(rb_format
));
1006 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
1007 format
<< BRW_SURFACE_FORMAT_SHIFT
);
1010 assert(mt
->offset
% mt
->cpp
== 0);
1011 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
1012 mt
->bo
->offset64
+ mt
->offset
);
1014 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
1015 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
1017 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
1018 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
1020 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
1022 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
1023 /* Note that the low bits of these fields are missing, so
1024 * there's the possibility of getting in trouble.
1026 assert(tile_x
% 4 == 0);
1027 assert(tile_y
% 2 == 0);
1028 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
1029 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
1030 (mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
1034 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
1035 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
1036 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
1038 if (!ctx
->Color
.ColorMask
[unit
][0])
1039 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
1040 if (!ctx
->Color
.ColorMask
[unit
][1])
1041 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
1042 if (!ctx
->Color
.ColorMask
[unit
][2])
1043 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
1045 /* As mentioned above, disable writes to the alpha component when the
1046 * renderbuffer is XRGB.
1048 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
1049 !ctx
->Color
.ColorMask
[unit
][3]) {
1050 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
1054 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
1057 surf
[1] - mt
->bo
->offset64
,
1058 I915_GEM_DOMAIN_RENDER
,
1059 I915_GEM_DOMAIN_RENDER
);
1065 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1068 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
1069 const struct gl_framebuffer
*fb
,
1070 uint32_t render_target_start
,
1071 uint32_t *surf_offset
)
1074 const unsigned int w
= _mesa_geometric_width(fb
);
1075 const unsigned int h
= _mesa_geometric_height(fb
);
1076 const unsigned int s
= _mesa_geometric_samples(fb
);
1078 /* Update surfaces for drawing buffers */
1079 if (fb
->_NumColorDrawBuffers
>= 1) {
1080 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1081 const uint32_t surf_index
= render_target_start
+ i
;
1082 const int flags
= (_mesa_geometric_layers(fb
) > 0 ?
1083 INTEL_RENDERBUFFER_LAYERED
: 0) |
1084 (brw
->draw_aux_buffer_disabled
[i
] ?
1085 INTEL_AUX_BUFFER_DISABLED
: 0);
1087 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
1088 surf_offset
[surf_index
] =
1089 brw
->vtbl
.update_renderbuffer_surface(
1090 brw
, fb
->_ColorDrawBuffers
[i
], flags
, i
, surf_index
);
1092 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1093 &surf_offset
[surf_index
]);
1097 const uint32_t surf_index
= render_target_start
;
1098 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1099 &surf_offset
[surf_index
]);
1104 update_renderbuffer_surfaces(struct brw_context
*brw
)
1106 const struct gl_context
*ctx
= &brw
->ctx
;
1108 /* BRW_NEW_FS_PROG_DATA */
1109 const struct brw_wm_prog_data
*wm_prog_data
=
1110 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1112 /* _NEW_BUFFERS | _NEW_COLOR */
1113 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1114 brw_update_renderbuffer_surfaces(
1116 wm_prog_data
->binding_table
.render_target_start
,
1117 brw
->wm
.base
.surf_offset
);
1118 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1121 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1123 .mesa
= _NEW_BUFFERS
|
1125 .brw
= BRW_NEW_BATCH
|
1127 BRW_NEW_FS_PROG_DATA
,
1129 .emit
= update_renderbuffer_surfaces
,
1132 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1134 .mesa
= _NEW_BUFFERS
,
1135 .brw
= BRW_NEW_BATCH
|
1138 .emit
= update_renderbuffer_surfaces
,
1142 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1144 const struct gl_context
*ctx
= &brw
->ctx
;
1146 /* BRW_NEW_FS_PROG_DATA */
1147 const struct brw_wm_prog_data
*wm_prog_data
=
1148 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1150 /* BRW_NEW_FRAGMENT_PROGRAM */
1151 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
1152 brw
->fragment_program
&& brw
->fragment_program
->info
.outputs_read
) {
1154 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1156 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1157 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1158 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1159 const unsigned surf_index
=
1160 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1161 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1164 const unsigned format
= brw
->render_target_format
[
1165 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1166 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1169 /* Override the target of the texture if the render buffer is a
1170 * single slice of a 3D texture (since the minimum array element
1171 * field of the surface state structure is ignored by the sampler
1172 * unit for 3D textures on some hardware), or if the render buffer
1173 * is a 1D array (since shaders always provide the array index
1174 * coordinate at the Z component to avoid state-dependent
1175 * recompiles when changing the texture target of the
1178 const GLenum target
=
1179 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1180 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1181 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1184 /* intel_renderbuffer::mt_layer is expressed in sample units for
1185 * the UMS and CMS multisample layouts, but
1186 * intel_renderbuffer::layer_count is expressed in units of whole
1187 * logical layers regardless of the multisample layout.
1189 const unsigned mt_layer_unit
=
1190 (irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
1191 irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
1192 MAX2(irb
->mt
->num_samples
, 1) : 1;
1194 const struct isl_view view
= {
1196 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1198 .base_array_layer
= irb
->mt_layer
/ mt_layer_unit
,
1199 .array_len
= irb
->layer_count
,
1200 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1201 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1204 const int flags
= brw
->draw_aux_buffer_disabled
[i
] ?
1205 INTEL_AUX_BUFFER_DISABLED
: 0;
1206 brw_emit_surface_state(brw
, irb
->mt
, flags
, target
, view
,
1208 surf_offset
, surf_index
,
1209 I915_GEM_DOMAIN_SAMPLER
, 0);
1212 brw
->vtbl
.emit_null_surface_state(
1213 brw
, _mesa_geometric_width(fb
), _mesa_geometric_height(fb
),
1214 _mesa_geometric_samples(fb
), surf_offset
);
1218 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1222 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1224 .mesa
= _NEW_BUFFERS
,
1225 .brw
= BRW_NEW_BATCH
|
1226 BRW_NEW_FRAGMENT_PROGRAM
|
1227 BRW_NEW_FS_PROG_DATA
,
1229 .emit
= update_renderbuffer_read_surfaces
,
1233 update_stage_texture_surfaces(struct brw_context
*brw
,
1234 const struct gl_program
*prog
,
1235 struct brw_stage_state
*stage_state
,
1236 bool for_gather
, uint32_t plane
)
1241 struct gl_context
*ctx
= &brw
->ctx
;
1243 uint32_t *surf_offset
= stage_state
->surf_offset
;
1245 /* BRW_NEW_*_PROG_DATA */
1247 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1249 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1251 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1252 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1255 if (prog
->SamplersUsed
& (1 << s
)) {
1256 const unsigned unit
= prog
->SamplerUnits
[s
];
1259 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1260 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
1268 * Construct SURFACE_STATE objects for enabled textures.
1271 brw_update_texture_surfaces(struct brw_context
*brw
)
1273 /* BRW_NEW_VERTEX_PROGRAM */
1274 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
1276 /* BRW_NEW_TESS_PROGRAMS */
1277 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
1278 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
1280 /* BRW_NEW_GEOMETRY_PROGRAM */
1281 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
1283 /* BRW_NEW_FRAGMENT_PROGRAM */
1284 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
1287 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1288 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1289 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1290 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1291 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1293 /* emit alternate set of surface state for gather. this
1294 * allows the surface format to be overriden for only the
1295 * gather4 messages. */
1297 if (vs
&& vs
->nir
->info
->uses_texture_gather
)
1298 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1299 if (tcs
&& tcs
->nir
->info
->uses_texture_gather
)
1300 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1301 if (tes
&& tes
->nir
->info
->uses_texture_gather
)
1302 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1303 if (gs
&& gs
->nir
->info
->uses_texture_gather
)
1304 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1305 if (fs
&& fs
->nir
->info
->uses_texture_gather
)
1306 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1310 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1311 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1314 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1317 const struct brw_tracked_state brw_texture_surfaces
= {
1319 .mesa
= _NEW_TEXTURE
,
1320 .brw
= BRW_NEW_BATCH
|
1322 BRW_NEW_FRAGMENT_PROGRAM
|
1323 BRW_NEW_FS_PROG_DATA
|
1324 BRW_NEW_GEOMETRY_PROGRAM
|
1325 BRW_NEW_GS_PROG_DATA
|
1326 BRW_NEW_TESS_PROGRAMS
|
1327 BRW_NEW_TCS_PROG_DATA
|
1328 BRW_NEW_TES_PROG_DATA
|
1329 BRW_NEW_TEXTURE_BUFFER
|
1330 BRW_NEW_VERTEX_PROGRAM
|
1331 BRW_NEW_VS_PROG_DATA
,
1333 .emit
= brw_update_texture_surfaces
,
1337 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1339 /* BRW_NEW_COMPUTE_PROGRAM */
1340 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1343 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1345 /* emit alternate set of surface state for gather. this
1346 * allows the surface format to be overriden for only the
1350 if (cs
&& cs
->nir
->info
->uses_texture_gather
)
1351 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1354 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1357 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1359 .mesa
= _NEW_TEXTURE
,
1360 .brw
= BRW_NEW_BATCH
|
1362 BRW_NEW_COMPUTE_PROGRAM
,
1364 .emit
= brw_update_cs_texture_surfaces
,
1369 brw_upload_ubo_surfaces(struct brw_context
*brw
,
1370 struct gl_linked_shader
*shader
,
1371 struct brw_stage_state
*stage_state
,
1372 struct brw_stage_prog_data
*prog_data
)
1374 struct gl_context
*ctx
= &brw
->ctx
;
1379 uint32_t *ubo_surf_offsets
=
1380 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1382 for (int i
= 0; i
< shader
->NumUniformBlocks
; i
++) {
1383 struct gl_uniform_buffer_binding
*binding
=
1384 &ctx
->UniformBufferBindings
[shader
->UniformBlocks
[i
]->Binding
];
1386 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1387 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1389 struct intel_buffer_object
*intel_bo
=
1390 intel_buffer_object(binding
->BufferObject
);
1391 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1392 if (!binding
->AutomaticSize
)
1393 size
= MIN2(size
, binding
->Size
);
1395 intel_bufferobj_buffer(brw
, intel_bo
,
1398 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1400 &ubo_surf_offsets
[i
]);
1404 uint32_t *ssbo_surf_offsets
=
1405 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1407 for (int i
= 0; i
< shader
->NumShaderStorageBlocks
; i
++) {
1408 struct gl_shader_storage_buffer_binding
*binding
=
1409 &ctx
->ShaderStorageBufferBindings
[shader
->ShaderStorageBlocks
[i
]->Binding
];
1411 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1412 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1414 struct intel_buffer_object
*intel_bo
=
1415 intel_buffer_object(binding
->BufferObject
);
1416 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1417 if (!binding
->AutomaticSize
)
1418 size
= MIN2(size
, binding
->Size
);
1420 intel_bufferobj_buffer(brw
, intel_bo
,
1423 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1425 &ssbo_surf_offsets
[i
]);
1429 if (shader
->NumUniformBlocks
|| shader
->NumShaderStorageBlocks
)
1430 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1434 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1436 struct gl_context
*ctx
= &brw
->ctx
;
1438 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1443 /* BRW_NEW_FS_PROG_DATA */
1444 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1445 &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1448 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1450 .mesa
= _NEW_PROGRAM
,
1451 .brw
= BRW_NEW_BATCH
|
1453 BRW_NEW_FS_PROG_DATA
|
1454 BRW_NEW_UNIFORM_BUFFER
,
1456 .emit
= brw_upload_wm_ubo_surfaces
,
1460 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1462 struct gl_context
*ctx
= &brw
->ctx
;
1464 struct gl_shader_program
*prog
=
1465 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1470 /* BRW_NEW_CS_PROG_DATA */
1471 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1472 &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1475 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1477 .mesa
= _NEW_PROGRAM
,
1478 .brw
= BRW_NEW_BATCH
|
1480 BRW_NEW_CS_PROG_DATA
|
1481 BRW_NEW_UNIFORM_BUFFER
,
1483 .emit
= brw_upload_cs_ubo_surfaces
,
1487 brw_upload_abo_surfaces(struct brw_context
*brw
,
1488 struct gl_linked_shader
*shader
,
1489 struct brw_stage_state
*stage_state
,
1490 struct brw_stage_prog_data
*prog_data
)
1492 struct gl_context
*ctx
= &brw
->ctx
;
1493 uint32_t *surf_offsets
=
1494 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1496 if (shader
&& shader
->NumAtomicBuffers
) {
1497 for (unsigned i
= 0; i
< shader
->NumAtomicBuffers
; i
++) {
1498 struct gl_atomic_buffer_binding
*binding
=
1499 &ctx
->AtomicBufferBindings
[shader
->AtomicBuffers
[i
]->Binding
];
1500 struct intel_buffer_object
*intel_bo
=
1501 intel_buffer_object(binding
->BufferObject
);
1502 drm_intel_bo
*bo
= intel_bufferobj_buffer(
1503 brw
, intel_bo
, binding
->Offset
, intel_bo
->Base
.Size
- binding
->Offset
);
1505 brw_emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1506 binding
->Offset
, BRW_SURFACEFORMAT_RAW
,
1507 bo
->size
- binding
->Offset
, 1, true);
1510 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1515 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1517 struct gl_context
*ctx
= &brw
->ctx
;
1519 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1522 /* BRW_NEW_FS_PROG_DATA */
1523 brw_upload_abo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1524 &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1528 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1530 .mesa
= _NEW_PROGRAM
,
1531 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1534 BRW_NEW_FS_PROG_DATA
,
1536 .emit
= brw_upload_wm_abo_surfaces
,
1540 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1542 struct gl_context
*ctx
= &brw
->ctx
;
1544 struct gl_shader_program
*prog
=
1545 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1548 /* BRW_NEW_CS_PROG_DATA */
1549 brw_upload_abo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1550 &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1554 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1556 .mesa
= _NEW_PROGRAM
,
1557 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1560 BRW_NEW_CS_PROG_DATA
,
1562 .emit
= brw_upload_cs_abo_surfaces
,
1566 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1568 struct gl_context
*ctx
= &brw
->ctx
;
1570 struct gl_shader_program
*prog
=
1571 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1574 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1575 brw_upload_image_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1576 &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1580 const struct brw_tracked_state brw_cs_image_surfaces
= {
1582 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1583 .brw
= BRW_NEW_BATCH
|
1585 BRW_NEW_CS_PROG_DATA
|
1588 .emit
= brw_upload_cs_image_surfaces
,
1592 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1594 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1595 uint32_t hw_format
= brw_format_for_mesa_format(format
);
1596 if (access
== GL_WRITE_ONLY
) {
1598 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1599 /* Typed surface reads support a very limited subset of the shader
1600 * image formats. Translate it into the closest format the
1601 * hardware supports.
1603 return isl_lower_storage_image_format(devinfo
, hw_format
);
1605 /* The hardware doesn't actually support a typed format that we can use
1606 * so we have to fall back to untyped read/write messages.
1608 return BRW_SURFACEFORMAT_RAW
;
1613 update_default_image_param(struct brw_context
*brw
,
1614 struct gl_image_unit
*u
,
1615 unsigned surface_idx
,
1616 struct brw_image_param
*param
)
1618 memset(param
, 0, sizeof(*param
));
1619 param
->surface_idx
= surface_idx
;
1620 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1621 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1622 * detailed explanation of these parameters.
1624 param
->swizzling
[0] = 0xff;
1625 param
->swizzling
[1] = 0xff;
1629 update_buffer_image_param(struct brw_context
*brw
,
1630 struct gl_image_unit
*u
,
1631 unsigned surface_idx
,
1632 struct brw_image_param
*param
)
1634 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1635 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1636 update_default_image_param(brw
, u
, surface_idx
, param
);
1638 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1639 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1643 update_texture_image_param(struct brw_context
*brw
,
1644 struct gl_image_unit
*u
,
1645 unsigned surface_idx
,
1646 struct brw_image_param
*param
)
1648 struct intel_mipmap_tree
*mt
= intel_texture_object(u
->TexObj
)->mt
;
1650 update_default_image_param(brw
, u
, surface_idx
, param
);
1652 param
->size
[0] = minify(mt
->logical_width0
, u
->Level
);
1653 param
->size
[1] = minify(mt
->logical_height0
, u
->Level
);
1654 param
->size
[2] = (!u
->Layered
? 1 :
1655 u
->TexObj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1656 u
->TexObj
->Target
== GL_TEXTURE_3D
?
1657 minify(mt
->logical_depth0
, u
->Level
) :
1658 mt
->logical_depth0
);
1660 intel_miptree_get_image_offset(mt
, u
->Level
, u
->_Layer
,
1664 param
->stride
[0] = mt
->cpp
;
1665 param
->stride
[1] = mt
->pitch
/ mt
->cpp
;
1667 brw_miptree_get_horizontal_slice_pitch(brw
, mt
, u
->Level
);
1669 brw_miptree_get_vertical_slice_pitch(brw
, mt
, u
->Level
);
1671 if (mt
->tiling
== I915_TILING_X
) {
1672 /* An X tile is a rectangular block of 512x8 bytes. */
1673 param
->tiling
[0] = _mesa_logbase2(512 / mt
->cpp
);
1674 param
->tiling
[1] = _mesa_logbase2(8);
1676 if (brw
->has_swizzling
) {
1677 /* Right shifts required to swizzle bits 9 and 10 of the memory
1678 * address with bit 6.
1680 param
->swizzling
[0] = 3;
1681 param
->swizzling
[1] = 4;
1683 } else if (mt
->tiling
== I915_TILING_Y
) {
1684 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1685 * different to the layout of an X-tiled surface, we simply pretend that
1686 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1687 * one arranged in X-major order just like is the case for X-tiling.
1689 param
->tiling
[0] = _mesa_logbase2(16 / mt
->cpp
);
1690 param
->tiling
[1] = _mesa_logbase2(32);
1692 if (brw
->has_swizzling
) {
1693 /* Right shift required to swizzle bit 9 of the memory address with
1696 param
->swizzling
[0] = 3;
1700 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1701 * address calculation algorithm (emit_address_calculation() in
1702 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1703 * modulus equal to the LOD.
1705 param
->tiling
[2] = (u
->TexObj
->Target
== GL_TEXTURE_3D
? u
->Level
:
1710 update_image_surface(struct brw_context
*brw
,
1711 struct gl_image_unit
*u
,
1713 unsigned surface_idx
,
1714 uint32_t *surf_offset
,
1715 struct brw_image_param
*param
)
1717 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1718 struct gl_texture_object
*obj
= u
->TexObj
;
1719 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1721 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1722 struct intel_buffer_object
*intel_obj
=
1723 intel_buffer_object(obj
->BufferObject
);
1724 const unsigned texel_size
= (format
== BRW_SURFACEFORMAT_RAW
? 1 :
1725 _mesa_get_format_bytes(u
->_ActualFormat
));
1727 brw_emit_buffer_surface_state(
1728 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1729 format
, intel_obj
->Base
.Size
, texel_size
,
1730 access
!= GL_READ_ONLY
);
1732 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1735 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1736 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1738 if (format
== BRW_SURFACEFORMAT_RAW
) {
1739 brw_emit_buffer_surface_state(
1740 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1741 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1742 access
!= GL_READ_ONLY
);
1745 const unsigned num_layers
= (!u
->Layered
? 1 :
1746 obj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1747 mt
->logical_depth0
);
1749 struct isl_view view
= {
1751 .base_level
= obj
->MinLevel
+ u
->Level
,
1753 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1754 .array_len
= num_layers
,
1755 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1756 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1759 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1761 mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_RESOLVED
?
1762 INTEL_AUX_BUFFER_DISABLED
: 0;
1763 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
1765 surf_offset
, surf_index
,
1766 I915_GEM_DOMAIN_SAMPLER
,
1767 access
== GL_READ_ONLY
? 0 :
1768 I915_GEM_DOMAIN_SAMPLER
);
1771 update_texture_image_param(brw
, u
, surface_idx
, param
);
1775 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1776 update_default_image_param(brw
, u
, surface_idx
, param
);
1781 brw_upload_image_surfaces(struct brw_context
*brw
,
1782 struct gl_linked_shader
*shader
,
1783 struct brw_stage_state
*stage_state
,
1784 struct brw_stage_prog_data
*prog_data
)
1786 struct gl_context
*ctx
= &brw
->ctx
;
1788 if (shader
&& shader
->NumImages
) {
1789 for (unsigned i
= 0; i
< shader
->NumImages
; i
++) {
1790 struct gl_image_unit
*u
= &ctx
->ImageUnits
[shader
->ImageUnits
[i
]];
1791 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1793 update_image_surface(brw
, u
, shader
->ImageAccess
[i
],
1795 &stage_state
->surf_offset
[surf_idx
],
1796 &prog_data
->image_param
[i
]);
1799 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1800 /* This may have changed the image metadata dependent on the context
1801 * image unit state and passed to the program as uniforms, make sure
1802 * that push and pull constants are reuploaded.
1804 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1809 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1811 struct gl_context
*ctx
= &brw
->ctx
;
1812 /* BRW_NEW_FRAGMENT_PROGRAM */
1813 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1816 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1817 brw_upload_image_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1818 &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1822 const struct brw_tracked_state brw_wm_image_surfaces
= {
1824 .mesa
= _NEW_TEXTURE
,
1825 .brw
= BRW_NEW_BATCH
|
1827 BRW_NEW_FRAGMENT_PROGRAM
|
1828 BRW_NEW_FS_PROG_DATA
|
1831 .emit
= brw_upload_wm_image_surfaces
,
1835 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1837 brw
->vtbl
.update_renderbuffer_surface
= gen4_update_renderbuffer_surface
;
1838 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1842 gen6_init_vtable_surface_functions(struct brw_context
*brw
)
1844 gen4_init_vtable_surface_functions(brw
);
1845 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1849 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1851 struct gl_context
*ctx
= &brw
->ctx
;
1853 struct gl_shader_program
*prog
=
1854 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1855 /* BRW_NEW_CS_PROG_DATA */
1856 const struct brw_cs_prog_data
*cs_prog_data
=
1857 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1859 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1860 const unsigned surf_idx
=
1861 cs_prog_data
->binding_table
.work_groups_start
;
1862 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1866 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1868 intel_upload_data(brw
,
1869 (void *)brw
->compute
.num_work_groups
,
1875 bo
= brw
->compute
.num_work_groups_bo
;
1876 bo_offset
= brw
->compute
.num_work_groups_offset
;
1879 brw_emit_buffer_surface_state(brw
, surf_offset
,
1881 BRW_SURFACEFORMAT_RAW
,
1882 3 * sizeof(GLuint
), 1, true);
1883 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1887 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1889 .brw
= BRW_NEW_BLORP
|
1890 BRW_NEW_CS_PROG_DATA
|
1891 BRW_NEW_CS_WORK_GROUPS
1893 .emit
= brw_upload_cs_work_groups_surface
,