2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
59 INTEL_RENDERBUFFER_LAYERED
= 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED
= 1 << 1,
63 uint32_t tex_mocs
[] = {
70 uint32_t rb_mocs
[] = {
78 brw_emit_surface_state(struct brw_context
*brw
,
79 struct intel_mipmap_tree
*mt
, uint32_t flags
,
80 GLenum target
, struct isl_view view
,
81 uint32_t mocs
, uint32_t *surf_offset
, int surf_index
,
82 unsigned read_domains
, unsigned write_domains
)
84 uint32_t tile_x
= mt
->level
[0].slice
[0].x_offset
;
85 uint32_t tile_y
= mt
->level
[0].slice
[0].y_offset
;
86 uint32_t offset
= mt
->offset
;
89 intel_miptree_get_isl_surf(brw
, mt
, &surf
);
91 surf
.dim
= get_isl_surf_dim(target
);
93 const enum isl_dim_layout dim_layout
=
94 get_isl_dim_layout(&brw
->screen
->devinfo
, mt
->tiling
, target
,
97 if (surf
.dim_layout
!= dim_layout
) {
98 /* The layout of the specified texture target is not compatible with the
99 * actual layout of the miptree structure in memory -- You're entering
100 * dangerous territory, this can only possibly work if you only intended
101 * to access a single level and slice of the texture, and the hardware
102 * supports the tile offset feature in order to allow non-tile-aligned
103 * base offsets, since we'll have to point the hardware to the first
104 * texel of the level instead of relying on the usual base level/layer
107 assert(brw
->has_surface_tile_offset
);
108 assert(view
.levels
== 1 && view
.array_len
== 1);
109 assert(tile_x
== 0 && tile_y
== 0);
111 offset
+= intel_miptree_get_tile_offsets(mt
, view
.base_level
,
112 view
.base_array_layer
,
115 /* Minify the logical dimensions of the texture. */
116 const unsigned l
= view
.base_level
- mt
->first_level
;
117 surf
.logical_level0_px
.width
= minify(surf
.logical_level0_px
.width
, l
);
118 surf
.logical_level0_px
.height
= surf
.dim
<= ISL_SURF_DIM_1D
? 1 :
119 minify(surf
.logical_level0_px
.height
, l
);
120 surf
.logical_level0_px
.depth
= surf
.dim
<= ISL_SURF_DIM_2D
? 1 :
121 minify(surf
.logical_level0_px
.depth
, l
);
123 /* Only the base level and layer can be addressed with the overridden
126 surf
.logical_level0_px
.array_len
= 1;
128 surf
.dim_layout
= dim_layout
;
130 /* The requested slice of the texture is now at the base level and
134 view
.base_array_layer
= 0;
137 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
139 struct brw_bo
*aux_bo
;
140 struct isl_surf
*aux_surf
= NULL
;
141 uint64_t aux_offset
= 0;
142 enum isl_aux_usage aux_usage
= ISL_AUX_USAGE_NONE
;
143 if ((mt
->mcs_buf
|| intel_miptree_sample_with_hiz(brw
, mt
)) &&
144 !(flags
& INTEL_AUX_BUFFER_DISABLED
)) {
145 aux_usage
= intel_miptree_get_aux_isl_usage(brw
, mt
);
148 aux_surf
= &mt
->mcs_buf
->surf
;
150 assert(mt
->mcs_buf
->offset
== 0);
151 aux_bo
= mt
->mcs_buf
->bo
;
152 aux_offset
= mt
->mcs_buf
->bo
->offset64
+ mt
->mcs_buf
->offset
;
154 aux_surf
= &mt
->hiz_buf
->surf
;
156 aux_bo
= mt
->hiz_buf
->bo
;
157 aux_offset
= mt
->hiz_buf
->bo
->offset64
;
160 /* We only really need a clear color if we also have an auxiliary
161 * surface. Without one, it does nothing.
163 clear_color
= mt
->fast_clear_color
;
166 void *state
= brw_state_batch(brw
,
167 brw
->isl_dev
.ss
.size
,
168 brw
->isl_dev
.ss
.align
,
171 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &surf
, .view
= &view
,
172 .address
= mt
->bo
->offset64
+ offset
,
173 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
174 .aux_address
= aux_offset
,
175 .mocs
= mocs
, .clear_color
= clear_color
,
176 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
178 brw_emit_reloc(&brw
->batch
, *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
179 mt
->bo
, offset
, read_domains
, write_domains
);
182 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
183 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
184 * contain other control information. Since buffer addresses are always
185 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
186 * an ordinary reloc to do the necessary address translation.
188 assert((aux_offset
& 0xfff) == 0);
189 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
190 brw_emit_reloc(&brw
->batch
,
191 *surf_offset
+ brw
->isl_dev
.ss
.aux_addr_offset
,
192 aux_bo
, *aux_addr
- aux_bo
->offset64
,
193 read_domains
, write_domains
);
198 brw_update_renderbuffer_surface(struct brw_context
*brw
,
199 struct gl_renderbuffer
*rb
,
200 uint32_t flags
, unsigned unit
/* unused */,
203 struct gl_context
*ctx
= &brw
->ctx
;
204 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
205 struct intel_mipmap_tree
*mt
= irb
->mt
;
208 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
211 assert(brw_render_target_supported(brw
, rb
));
213 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
214 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
215 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
216 __func__
, _mesa_get_format_name(rb_format
));
219 const unsigned layer_multiplier
=
220 (irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
221 irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
222 MAX2(irb
->mt
->num_samples
, 1) : 1;
224 struct isl_view view
= {
225 .format
= brw
->mesa_to_isl_render_format
[rb_format
],
226 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
228 .base_array_layer
= irb
->mt_layer
/ layer_multiplier
,
229 .array_len
= MAX2(irb
->layer_count
, 1),
230 .swizzle
= ISL_SWIZZLE_IDENTITY
,
231 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
235 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
238 I915_GEM_DOMAIN_RENDER
,
239 I915_GEM_DOMAIN_RENDER
);
244 translate_tex_target(GLenum target
)
248 case GL_TEXTURE_1D_ARRAY_EXT
:
249 return BRW_SURFACE_1D
;
251 case GL_TEXTURE_RECTANGLE_NV
:
252 return BRW_SURFACE_2D
;
255 case GL_TEXTURE_2D_ARRAY_EXT
:
256 case GL_TEXTURE_EXTERNAL_OES
:
257 case GL_TEXTURE_2D_MULTISAMPLE
:
258 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
259 return BRW_SURFACE_2D
;
262 return BRW_SURFACE_3D
;
264 case GL_TEXTURE_CUBE_MAP
:
265 case GL_TEXTURE_CUBE_MAP_ARRAY
:
266 return BRW_SURFACE_CUBE
;
269 unreachable("not reached");
274 brw_get_surface_tiling_bits(uint32_t tiling
)
278 return BRW_SURFACE_TILED
;
280 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
288 brw_get_surface_num_multisamples(unsigned num_samples
)
291 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
293 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
297 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
301 brw_get_texture_swizzle(const struct gl_context
*ctx
,
302 const struct gl_texture_object
*t
)
304 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
306 int swizzles
[SWIZZLE_NIL
+ 1] = {
316 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
317 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
318 GLenum depth_mode
= t
->DepthMode
;
320 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
321 * with depth component data specified with a sized internal format.
322 * Otherwise, it's left at the old default, GL_LUMINANCE.
324 if (_mesa_is_gles3(ctx
) &&
325 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
326 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
330 switch (depth_mode
) {
332 swizzles
[0] = SWIZZLE_ZERO
;
333 swizzles
[1] = SWIZZLE_ZERO
;
334 swizzles
[2] = SWIZZLE_ZERO
;
335 swizzles
[3] = SWIZZLE_X
;
338 swizzles
[0] = SWIZZLE_X
;
339 swizzles
[1] = SWIZZLE_X
;
340 swizzles
[2] = SWIZZLE_X
;
341 swizzles
[3] = SWIZZLE_ONE
;
344 swizzles
[0] = SWIZZLE_X
;
345 swizzles
[1] = SWIZZLE_X
;
346 swizzles
[2] = SWIZZLE_X
;
347 swizzles
[3] = SWIZZLE_X
;
350 swizzles
[0] = SWIZZLE_X
;
351 swizzles
[1] = SWIZZLE_ZERO
;
352 swizzles
[2] = SWIZZLE_ZERO
;
353 swizzles
[3] = SWIZZLE_ONE
;
358 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
360 /* If the texture's format is alpha-only, force R, G, and B to
361 * 0.0. Similarly, if the texture's format has no alpha channel,
362 * force the alpha value read to 1.0. This allows for the
363 * implementation to use an RGBA texture for any of these formats
364 * without leaking any unexpected values.
366 switch (img
->_BaseFormat
) {
368 swizzles
[0] = SWIZZLE_ZERO
;
369 swizzles
[1] = SWIZZLE_ZERO
;
370 swizzles
[2] = SWIZZLE_ZERO
;
373 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
374 swizzles
[0] = SWIZZLE_X
;
375 swizzles
[1] = SWIZZLE_X
;
376 swizzles
[2] = SWIZZLE_X
;
377 swizzles
[3] = SWIZZLE_ONE
;
380 case GL_LUMINANCE_ALPHA
:
381 if (datatype
== GL_SIGNED_NORMALIZED
) {
382 swizzles
[0] = SWIZZLE_X
;
383 swizzles
[1] = SWIZZLE_X
;
384 swizzles
[2] = SWIZZLE_X
;
385 swizzles
[3] = SWIZZLE_W
;
389 if (datatype
== GL_SIGNED_NORMALIZED
) {
390 swizzles
[0] = SWIZZLE_X
;
391 swizzles
[1] = SWIZZLE_X
;
392 swizzles
[2] = SWIZZLE_X
;
393 swizzles
[3] = SWIZZLE_X
;
399 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
400 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
401 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
402 swizzles
[3] = SWIZZLE_ONE
;
406 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
407 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
408 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
409 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
413 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
414 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
416 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
419 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
421 * which is simply adding 4 then modding by 8 (or anding with 7).
423 * We then may need to apply workarounds for textureGather hardware bugs.
426 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
428 unsigned scs
= (swizzle
+ 4) & 7;
430 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
434 brw_find_matching_rb(const struct gl_framebuffer
*fb
,
435 const struct intel_mipmap_tree
*mt
)
437 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
438 const struct intel_renderbuffer
*irb
=
439 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
441 if (irb
&& irb
->mt
== mt
)
445 return fb
->_NumColorDrawBuffers
;
449 brw_texture_view_sane(const struct brw_context
*brw
,
450 const struct intel_mipmap_tree
*mt
,
451 const struct isl_view
*view
)
453 /* There are special cases only for lossless compression. */
454 if (mt
->aux_usage
!= ISL_AUX_USAGE_CCS_E
)
457 if (isl_format_supports_ccs_e(&brw
->screen
->devinfo
, view
->format
))
460 /* Logic elsewhere needs to take care to resolve the color buffer prior
461 * to sampling it as non-compressed.
463 if (intel_miptree_has_color_unresolved(mt
, view
->base_level
, view
->levels
,
464 view
->base_array_layer
,
468 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
469 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
471 if (rb_index
== fb
->_NumColorDrawBuffers
)
474 /* Underlying surface is compressed but it is sampled using a format that
475 * the sampling engine doesn't support as compressed. Compression must be
476 * disabled for both sampling engine and data port in case the same surface
477 * is used also as render target.
479 return brw
->draw_aux_buffer_disabled
[rb_index
];
483 brw_disable_aux_surface(const struct brw_context
*brw
,
484 const struct intel_mipmap_tree
*mt
,
485 const struct isl_view
*view
)
487 /* Nothing to disable. */
491 const bool is_unresolved
= intel_miptree_has_color_unresolved(
492 mt
, view
->base_level
, view
->levels
,
493 view
->base_array_layer
, view
->array_len
);
495 /* There are special cases only for lossless compression. */
496 if (mt
->aux_usage
!= ISL_AUX_USAGE_CCS_E
)
497 return !is_unresolved
;
499 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
500 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
502 /* If we are drawing into this with compression enabled, then we must also
503 * enable compression when texturing from it regardless of
504 * fast_clear_state. If we don't then, after the first draw call with
505 * this setup, there will be data in the CCS which won't get picked up by
506 * subsequent texturing operations as required by ARB_texture_barrier.
507 * Since we don't want to re-emit the binding table or do a resolve
508 * operation every draw call, the easiest thing to do is just enable
509 * compression on the texturing side. This is completely safe to do
510 * since, if compressed texturing weren't allowed, we would have disabled
511 * compression of render targets in whatever_that_function_is_called().
513 if (rb_index
< fb
->_NumColorDrawBuffers
) {
514 if (brw
->draw_aux_buffer_disabled
[rb_index
]) {
515 assert(!is_unresolved
);
518 return brw
->draw_aux_buffer_disabled
[rb_index
];
521 return !is_unresolved
;
525 brw_update_texture_surface(struct gl_context
*ctx
,
527 uint32_t *surf_offset
,
531 struct brw_context
*brw
= brw_context(ctx
);
532 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
534 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
535 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
538 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
539 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
542 if (mt
->plane
[plane
- 1] == NULL
)
544 mt
= mt
->plane
[plane
- 1];
547 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
548 /* If this is a view with restricted NumLayers, then our effective depth
549 * is not just the miptree depth.
551 const unsigned view_num_layers
=
552 (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) ? obj
->NumLayers
:
555 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
556 * texturing functions that return a float, as our code generation always
557 * selects the .x channel (which would always be 0).
559 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
560 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
561 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
562 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
563 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
564 brw_get_texture_swizzle(&brw
->ctx
, obj
));
566 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
567 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
568 sampler
->sRGBDecode
);
570 /* Implement gen6 and gen7 gather work-around */
571 bool need_green_to_blue
= false;
573 if (brw
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
574 format
== ISL_FORMAT_R32G32_SINT
||
575 format
== ISL_FORMAT_R32G32_UINT
)) {
576 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
577 need_green_to_blue
= brw
->is_haswell
;
578 } else if (brw
->gen
== 6) {
579 /* Sandybridge's gather4 message is broken for integer formats.
580 * To work around this, we pretend the surface is UNORM for
581 * 8 or 16-bit formats, and emit shader instructions to recover
582 * the real INT/UINT value. For 32-bit formats, we pretend
583 * the surface is FLOAT, and simply reinterpret the resulting
587 case ISL_FORMAT_R8_SINT
:
588 case ISL_FORMAT_R8_UINT
:
589 format
= ISL_FORMAT_R8_UNORM
;
592 case ISL_FORMAT_R16_SINT
:
593 case ISL_FORMAT_R16_UINT
:
594 format
= ISL_FORMAT_R16_UNORM
;
597 case ISL_FORMAT_R32_SINT
:
598 case ISL_FORMAT_R32_UINT
:
599 format
= ISL_FORMAT_R32_FLOAT
;
608 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
610 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
611 mt
= mt
->r8stencil_mt
;
615 format
= ISL_FORMAT_R8_UINT
;
616 } else if (brw
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
617 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
618 mt
= mt
->r8stencil_mt
;
619 format
= ISL_FORMAT_R8_UINT
;
622 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
624 struct isl_view view
= {
626 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
627 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
628 .base_array_layer
= obj
->MinLayer
,
629 .array_len
= view_num_layers
,
631 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
632 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
633 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
634 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
636 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
639 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
640 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
641 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
643 assert(brw_texture_view_sane(brw
, mt
, &view
));
645 const int flags
= brw_disable_aux_surface(brw
, mt
, &view
) ?
646 INTEL_AUX_BUFFER_DISABLED
: 0;
647 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
649 surf_offset
, surf_index
,
650 I915_GEM_DOMAIN_SAMPLER
, 0);
655 brw_emit_buffer_surface_state(struct brw_context
*brw
,
656 uint32_t *out_offset
,
658 unsigned buffer_offset
,
659 unsigned surface_format
,
660 unsigned buffer_size
,
664 uint32_t *dw
= brw_state_batch(brw
,
665 brw
->isl_dev
.ss
.size
,
666 brw
->isl_dev
.ss
.align
,
669 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
670 .address
= (bo
? bo
->offset64
: 0) + buffer_offset
,
672 .format
= surface_format
,
674 .mocs
= tex_mocs
[brw
->gen
]);
677 brw_emit_reloc(&brw
->batch
, *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
679 I915_GEM_DOMAIN_SAMPLER
,
680 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
685 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
687 uint32_t *surf_offset
)
689 struct brw_context
*brw
= brw_context(ctx
);
690 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
691 struct intel_buffer_object
*intel_obj
=
692 intel_buffer_object(tObj
->BufferObject
);
693 uint32_t size
= tObj
->BufferSize
;
694 struct brw_bo
*bo
= NULL
;
695 mesa_format format
= tObj
->_BufferObjectFormat
;
696 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
697 int texel_size
= _mesa_get_format_bytes(format
);
700 size
= MIN2(size
, intel_obj
->Base
.Size
);
701 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
705 /* The ARB_texture_buffer_specification says:
707 * "The number of texels in the buffer texture's texel array is given by
709 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
711 * where <buffer_size> is the size of the buffer object, in basic
712 * machine units and <components> and <base_type> are the element count
713 * and base data type for elements, as specified in Table X.1. The
714 * number of texels in the texel array is then clamped to the
715 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
717 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
718 * so that when ISL divides by stride to obtain the number of texels, that
719 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
721 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
723 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
724 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
725 _mesa_get_format_name(format
));
728 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
737 * Create the constant buffer surface. Vertex/fragment shader constants will be
738 * read from this buffer with Data Port Read instructions/messages.
741 brw_create_constant_surface(struct brw_context
*brw
,
745 uint32_t *out_offset
)
747 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
748 ISL_FORMAT_R32G32B32A32_FLOAT
,
753 * Create the buffer surface. Shader buffer variables will be
754 * read from / write to this buffer with Data Port Read/Write
755 * instructions/messages.
758 brw_create_buffer_surface(struct brw_context
*brw
,
762 uint32_t *out_offset
)
764 /* Use a raw surface so we can reuse existing untyped read/write/atomic
765 * messages. We need these specifically for the fragment shader since they
766 * include a pixel mask header that we need to ensure correct behavior
767 * with helper invocations, which cannot write to the buffer.
769 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
775 * Set up a binding table entry for use by stream output logic (transform
778 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
781 brw_update_sol_surface(struct brw_context
*brw
,
782 struct gl_buffer_object
*buffer_obj
,
783 uint32_t *out_offset
, unsigned num_vector_components
,
784 unsigned stride_dwords
, unsigned offset_dwords
)
786 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
787 uint32_t offset_bytes
= 4 * offset_dwords
;
788 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
790 buffer_obj
->Size
- offset_bytes
,
792 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
793 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
794 size_t size_dwords
= buffer_obj
->Size
/ 4;
795 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
797 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
798 * too big to map using a single binding table entry?
800 assert((size_dwords
- offset_dwords
) / stride_dwords
801 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
803 if (size_dwords
> offset_dwords
+ num_vector_components
) {
804 /* There is room for at least 1 transform feedback output in the buffer.
805 * Compute the number of additional transform feedback outputs the
806 * buffer has room for.
808 buffer_size_minus_1
=
809 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
811 /* There isn't even room for a single transform feedback output in the
812 * buffer. We can't configure the binding table entry to prevent output
813 * entirely; we'll have to rely on the geometry shader to detect
814 * overflow. But to minimize the damage in case of a bug, set up the
815 * binding table entry to just allow a single output.
817 buffer_size_minus_1
= 0;
819 width
= buffer_size_minus_1
& 0x7f;
820 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
821 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
823 switch (num_vector_components
) {
825 surface_format
= ISL_FORMAT_R32_FLOAT
;
828 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
831 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
834 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
837 unreachable("Invalid vector size for transform feedback output");
840 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
841 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
842 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
843 BRW_SURFACE_RC_READ_WRITE
;
844 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
845 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
846 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
847 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
848 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
852 /* Emit relocation to surface contents. */
853 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, offset_bytes
,
854 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
857 /* Creates a new WM constant buffer reflecting the current fragment program's
858 * constants, if needed by the fragment program.
860 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
864 brw_upload_wm_pull_constants(struct brw_context
*brw
)
866 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
867 /* BRW_NEW_FRAGMENT_PROGRAM */
868 struct brw_program
*fp
= (struct brw_program
*) brw
->fragment_program
;
869 /* BRW_NEW_FS_PROG_DATA */
870 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
872 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
873 /* _NEW_PROGRAM_CONSTANTS */
874 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
875 stage_state
, prog_data
);
878 const struct brw_tracked_state brw_wm_pull_constants
= {
880 .mesa
= _NEW_PROGRAM_CONSTANTS
,
881 .brw
= BRW_NEW_BATCH
|
883 BRW_NEW_FRAGMENT_PROGRAM
|
884 BRW_NEW_FS_PROG_DATA
,
886 .emit
= brw_upload_wm_pull_constants
,
890 * Creates a null renderbuffer surface.
892 * This is used when the shader doesn't write to any color output. An FB
893 * write to target 0 will still be emitted, because that's how the thread is
894 * terminated (and computed depth is returned), so we need to have the
895 * hardware discard the target 0 color output..
898 brw_emit_null_surface_state(struct brw_context
*brw
,
902 uint32_t *out_offset
)
904 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
907 * A null surface will be used in instances where an actual surface is
908 * not bound. When a write message is generated to a null surface, no
909 * actual surface is written to. When a read message (including any
910 * sampling engine message) is generated to a null surface, the result
911 * is all zeros. Note that a null surface type is allowed to be used
912 * with all messages, even if it is not specificially indicated as
913 * supported. All of the remaining fields in surface state are ignored
914 * for null surfaces, with the following exceptions:
916 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
917 * depth buffer’s corresponding state for all render target surfaces,
920 * - Surface Format must be R8G8B8A8_UNORM.
922 unsigned surface_type
= BRW_SURFACE_NULL
;
923 struct brw_bo
*bo
= NULL
;
924 unsigned pitch_minus_1
= 0;
925 uint32_t multisampling_state
= 0;
926 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
929 /* On Gen6, null render targets seem to cause GPU hangs when
930 * multisampling. So work around this problem by rendering into dummy
933 * To decrease the amount of memory needed by the workaround buffer, we
934 * set its pitch to 128 bytes (the width of a Y tile). This means that
935 * the amount of memory needed for the workaround buffer is
936 * (width_in_tiles + height_in_tiles - 1) tiles.
938 * Note that since the workaround buffer will be interpreted by the
939 * hardware as an interleaved multisampled buffer, we need to compute
940 * width_in_tiles and height_in_tiles by dividing the width and height
941 * by 16 rather than the normal Y-tile size of 32.
943 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
944 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
945 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
946 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
948 bo
= brw
->wm
.multisampled_null_render_target_bo
;
949 surface_type
= BRW_SURFACE_2D
;
951 multisampling_state
= brw_get_surface_num_multisamples(samples
);
954 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
955 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
957 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
958 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
959 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
960 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
962 surf
[1] = bo
? bo
->offset64
: 0;
963 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
964 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
966 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
969 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
971 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
972 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
973 surf
[4] = multisampling_state
;
977 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, 0,
978 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
983 * Sets up a surface state structure to point at the given region.
984 * While it is only used for the front/back buffer currently, it should be
985 * usable for further buffers when doing ARB_draw_buffer support.
988 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
989 struct gl_renderbuffer
*rb
,
990 uint32_t flags
, unsigned unit
,
993 struct gl_context
*ctx
= &brw
->ctx
;
994 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
995 struct intel_mipmap_tree
*mt
= irb
->mt
;
997 uint32_t tile_x
, tile_y
;
998 enum isl_format format
;
1001 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
1002 /* BRW_NEW_FS_PROG_DATA */
1004 assert(!(flags
& INTEL_RENDERBUFFER_LAYERED
));
1005 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
1007 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
1008 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
1010 if (tile_x
!= 0 || tile_y
!= 0) {
1011 /* Original gen4 hardware couldn't draw to a non-tile-aligned
1012 * destination in a miptree unless you actually setup your renderbuffer
1013 * as a miptree and used the fragile lod/array_index/etc. controls to
1014 * select the image. So, instead, we just make a new single-level
1015 * miptree and render into that.
1017 intel_renderbuffer_move_to_temp(brw
, irb
, false);
1018 assert(irb
->align_wa_mt
);
1019 mt
= irb
->align_wa_mt
;
1023 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
1025 format
= brw
->mesa_to_isl_render_format
[rb_format
];
1026 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
1027 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
1028 __func__
, _mesa_get_format_name(rb_format
));
1031 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
1032 format
<< BRW_SURFACE_FORMAT_SHIFT
);
1035 assert(mt
->offset
% mt
->cpp
== 0);
1036 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
1037 mt
->bo
->offset64
+ mt
->offset
);
1039 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
1040 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
1042 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
1043 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
1045 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
1047 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
1048 /* Note that the low bits of these fields are missing, so
1049 * there's the possibility of getting in trouble.
1051 assert(tile_x
% 4 == 0);
1052 assert(tile_y
% 2 == 0);
1053 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
1054 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
1055 (mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
1059 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
1060 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
1061 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
1063 if (!ctx
->Color
.ColorMask
[unit
][0])
1064 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
1065 if (!ctx
->Color
.ColorMask
[unit
][1])
1066 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
1067 if (!ctx
->Color
.ColorMask
[unit
][2])
1068 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
1070 /* As mentioned above, disable writes to the alpha component when the
1071 * renderbuffer is XRGB.
1073 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
1074 !ctx
->Color
.ColorMask
[unit
][3]) {
1075 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
1079 brw_emit_reloc(&brw
->batch
, offset
+ 4, mt
->bo
, surf
[1] - mt
->bo
->offset64
,
1080 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
1086 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1089 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
1090 const struct gl_framebuffer
*fb
,
1091 uint32_t render_target_start
,
1092 uint32_t *surf_offset
)
1095 const unsigned int w
= _mesa_geometric_width(fb
);
1096 const unsigned int h
= _mesa_geometric_height(fb
);
1097 const unsigned int s
= _mesa_geometric_samples(fb
);
1099 /* Update surfaces for drawing buffers */
1100 if (fb
->_NumColorDrawBuffers
>= 1) {
1101 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1102 const uint32_t surf_index
= render_target_start
+ i
;
1103 const int flags
= (_mesa_geometric_layers(fb
) > 0 ?
1104 INTEL_RENDERBUFFER_LAYERED
: 0) |
1105 (brw
->draw_aux_buffer_disabled
[i
] ?
1106 INTEL_AUX_BUFFER_DISABLED
: 0);
1108 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
1109 surf_offset
[surf_index
] =
1110 brw
->vtbl
.update_renderbuffer_surface(
1111 brw
, fb
->_ColorDrawBuffers
[i
], flags
, i
, surf_index
);
1113 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1114 &surf_offset
[surf_index
]);
1118 const uint32_t surf_index
= render_target_start
;
1119 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1120 &surf_offset
[surf_index
]);
1125 update_renderbuffer_surfaces(struct brw_context
*brw
)
1127 const struct gl_context
*ctx
= &brw
->ctx
;
1129 /* BRW_NEW_FS_PROG_DATA */
1130 const struct brw_wm_prog_data
*wm_prog_data
=
1131 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1133 /* _NEW_BUFFERS | _NEW_COLOR */
1134 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1135 brw_update_renderbuffer_surfaces(
1137 wm_prog_data
->binding_table
.render_target_start
,
1138 brw
->wm
.base
.surf_offset
);
1139 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1142 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1144 .mesa
= _NEW_BUFFERS
|
1146 .brw
= BRW_NEW_BATCH
|
1148 BRW_NEW_FS_PROG_DATA
,
1150 .emit
= update_renderbuffer_surfaces
,
1153 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1155 .mesa
= _NEW_BUFFERS
,
1156 .brw
= BRW_NEW_BATCH
|
1159 .emit
= update_renderbuffer_surfaces
,
1163 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1165 const struct gl_context
*ctx
= &brw
->ctx
;
1167 /* BRW_NEW_FS_PROG_DATA */
1168 const struct brw_wm_prog_data
*wm_prog_data
=
1169 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1171 /* BRW_NEW_FRAGMENT_PROGRAM */
1172 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
1173 brw
->fragment_program
&& brw
->fragment_program
->info
.outputs_read
) {
1175 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1177 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1178 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1179 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1180 const unsigned surf_index
=
1181 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1182 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1185 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1186 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1187 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1190 /* Override the target of the texture if the render buffer is a
1191 * single slice of a 3D texture (since the minimum array element
1192 * field of the surface state structure is ignored by the sampler
1193 * unit for 3D textures on some hardware), or if the render buffer
1194 * is a 1D array (since shaders always provide the array index
1195 * coordinate at the Z component to avoid state-dependent
1196 * recompiles when changing the texture target of the
1199 const GLenum target
=
1200 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1201 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1202 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1205 /* intel_renderbuffer::mt_layer is expressed in sample units for
1206 * the UMS and CMS multisample layouts, but
1207 * intel_renderbuffer::layer_count is expressed in units of whole
1208 * logical layers regardless of the multisample layout.
1210 const unsigned mt_layer_unit
=
1211 (irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
1212 irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
1213 MAX2(irb
->mt
->num_samples
, 1) : 1;
1215 const struct isl_view view
= {
1217 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1219 .base_array_layer
= irb
->mt_layer
/ mt_layer_unit
,
1220 .array_len
= irb
->layer_count
,
1221 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1222 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1225 const int flags
= brw
->draw_aux_buffer_disabled
[i
] ?
1226 INTEL_AUX_BUFFER_DISABLED
: 0;
1227 brw_emit_surface_state(brw
, irb
->mt
, flags
, target
, view
,
1229 surf_offset
, surf_index
,
1230 I915_GEM_DOMAIN_SAMPLER
, 0);
1233 brw
->vtbl
.emit_null_surface_state(
1234 brw
, _mesa_geometric_width(fb
), _mesa_geometric_height(fb
),
1235 _mesa_geometric_samples(fb
), surf_offset
);
1239 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1243 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1245 .mesa
= _NEW_BUFFERS
,
1246 .brw
= BRW_NEW_BATCH
|
1247 BRW_NEW_FRAGMENT_PROGRAM
|
1248 BRW_NEW_FS_PROG_DATA
,
1250 .emit
= update_renderbuffer_read_surfaces
,
1254 update_stage_texture_surfaces(struct brw_context
*brw
,
1255 const struct gl_program
*prog
,
1256 struct brw_stage_state
*stage_state
,
1257 bool for_gather
, uint32_t plane
)
1262 struct gl_context
*ctx
= &brw
->ctx
;
1264 uint32_t *surf_offset
= stage_state
->surf_offset
;
1266 /* BRW_NEW_*_PROG_DATA */
1268 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1270 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1272 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1273 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1276 if (prog
->SamplersUsed
& (1 << s
)) {
1277 const unsigned unit
= prog
->SamplerUnits
[s
];
1280 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1281 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
1289 * Construct SURFACE_STATE objects for enabled textures.
1292 brw_update_texture_surfaces(struct brw_context
*brw
)
1294 /* BRW_NEW_VERTEX_PROGRAM */
1295 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
1297 /* BRW_NEW_TESS_PROGRAMS */
1298 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
1299 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
1301 /* BRW_NEW_GEOMETRY_PROGRAM */
1302 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
1304 /* BRW_NEW_FRAGMENT_PROGRAM */
1305 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
1308 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1309 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1310 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1311 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1312 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1314 /* emit alternate set of surface state for gather. this
1315 * allows the surface format to be overriden for only the
1316 * gather4 messages. */
1318 if (vs
&& vs
->nir
->info
.uses_texture_gather
)
1319 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1320 if (tcs
&& tcs
->nir
->info
.uses_texture_gather
)
1321 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1322 if (tes
&& tes
->nir
->info
.uses_texture_gather
)
1323 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1324 if (gs
&& gs
->nir
->info
.uses_texture_gather
)
1325 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1326 if (fs
&& fs
->nir
->info
.uses_texture_gather
)
1327 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1331 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1332 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1335 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1338 const struct brw_tracked_state brw_texture_surfaces
= {
1340 .mesa
= _NEW_TEXTURE
,
1341 .brw
= BRW_NEW_BATCH
|
1343 BRW_NEW_FRAGMENT_PROGRAM
|
1344 BRW_NEW_FS_PROG_DATA
|
1345 BRW_NEW_GEOMETRY_PROGRAM
|
1346 BRW_NEW_GS_PROG_DATA
|
1347 BRW_NEW_TESS_PROGRAMS
|
1348 BRW_NEW_TCS_PROG_DATA
|
1349 BRW_NEW_TES_PROG_DATA
|
1350 BRW_NEW_TEXTURE_BUFFER
|
1351 BRW_NEW_VERTEX_PROGRAM
|
1352 BRW_NEW_VS_PROG_DATA
,
1354 .emit
= brw_update_texture_surfaces
,
1358 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1360 /* BRW_NEW_COMPUTE_PROGRAM */
1361 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1364 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1366 /* emit alternate set of surface state for gather. this
1367 * allows the surface format to be overriden for only the
1371 if (cs
&& cs
->nir
->info
.uses_texture_gather
)
1372 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1375 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1378 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1380 .mesa
= _NEW_TEXTURE
,
1381 .brw
= BRW_NEW_BATCH
|
1383 BRW_NEW_COMPUTE_PROGRAM
,
1385 .emit
= brw_update_cs_texture_surfaces
,
1390 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1391 struct brw_stage_state
*stage_state
,
1392 struct brw_stage_prog_data
*prog_data
)
1394 struct gl_context
*ctx
= &brw
->ctx
;
1399 uint32_t *ubo_surf_offsets
=
1400 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1402 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1403 struct gl_uniform_buffer_binding
*binding
=
1404 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1406 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1407 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1409 struct intel_buffer_object
*intel_bo
=
1410 intel_buffer_object(binding
->BufferObject
);
1411 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1412 if (!binding
->AutomaticSize
)
1413 size
= MIN2(size
, binding
->Size
);
1415 intel_bufferobj_buffer(brw
, intel_bo
,
1418 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1420 &ubo_surf_offsets
[i
]);
1424 uint32_t *ssbo_surf_offsets
=
1425 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1427 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1428 struct gl_shader_storage_buffer_binding
*binding
=
1429 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1431 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1432 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1434 struct intel_buffer_object
*intel_bo
=
1435 intel_buffer_object(binding
->BufferObject
);
1436 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1437 if (!binding
->AutomaticSize
)
1438 size
= MIN2(size
, binding
->Size
);
1440 intel_bufferobj_buffer(brw
, intel_bo
,
1443 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1445 &ssbo_surf_offsets
[i
]);
1449 stage_state
->push_constants_dirty
= true;
1451 if (prog
->info
.num_ubos
|| prog
->info
.num_ssbos
)
1452 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1456 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1458 struct gl_context
*ctx
= &brw
->ctx
;
1460 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1462 /* BRW_NEW_FS_PROG_DATA */
1463 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1466 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1468 .mesa
= _NEW_PROGRAM
,
1469 .brw
= BRW_NEW_BATCH
|
1471 BRW_NEW_FS_PROG_DATA
|
1472 BRW_NEW_UNIFORM_BUFFER
,
1474 .emit
= brw_upload_wm_ubo_surfaces
,
1478 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1480 struct gl_context
*ctx
= &brw
->ctx
;
1482 struct gl_program
*prog
=
1483 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1485 /* BRW_NEW_CS_PROG_DATA */
1486 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1489 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1491 .mesa
= _NEW_PROGRAM
,
1492 .brw
= BRW_NEW_BATCH
|
1494 BRW_NEW_CS_PROG_DATA
|
1495 BRW_NEW_UNIFORM_BUFFER
,
1497 .emit
= brw_upload_cs_ubo_surfaces
,
1501 brw_upload_abo_surfaces(struct brw_context
*brw
,
1502 const struct gl_program
*prog
,
1503 struct brw_stage_state
*stage_state
,
1504 struct brw_stage_prog_data
*prog_data
)
1506 struct gl_context
*ctx
= &brw
->ctx
;
1507 uint32_t *surf_offsets
=
1508 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1510 if (prog
->info
.num_abos
) {
1511 for (unsigned i
= 0; i
< prog
->info
.num_abos
; i
++) {
1512 struct gl_atomic_buffer_binding
*binding
=
1513 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1514 struct intel_buffer_object
*intel_bo
=
1515 intel_buffer_object(binding
->BufferObject
);
1517 intel_bufferobj_buffer(brw
, intel_bo
, binding
->Offset
,
1518 intel_bo
->Base
.Size
- binding
->Offset
,
1521 brw_emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1522 binding
->Offset
, ISL_FORMAT_RAW
,
1523 bo
->size
- binding
->Offset
, 1, true);
1526 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1531 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1534 const struct gl_program
*wm
= brw
->fragment_program
;
1537 /* BRW_NEW_FS_PROG_DATA */
1538 brw_upload_abo_surfaces(brw
, wm
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1542 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1544 .mesa
= _NEW_PROGRAM
,
1545 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1548 BRW_NEW_FS_PROG_DATA
,
1550 .emit
= brw_upload_wm_abo_surfaces
,
1554 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1557 const struct gl_program
*cp
= brw
->compute_program
;
1560 /* BRW_NEW_CS_PROG_DATA */
1561 brw_upload_abo_surfaces(brw
, cp
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1565 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1567 .mesa
= _NEW_PROGRAM
,
1568 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1571 BRW_NEW_CS_PROG_DATA
,
1573 .emit
= brw_upload_cs_abo_surfaces
,
1577 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1580 const struct gl_program
*cp
= brw
->compute_program
;
1583 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1584 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1585 brw
->cs
.base
.prog_data
);
1589 const struct brw_tracked_state brw_cs_image_surfaces
= {
1591 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1592 .brw
= BRW_NEW_BATCH
|
1594 BRW_NEW_CS_PROG_DATA
|
1597 .emit
= brw_upload_cs_image_surfaces
,
1601 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1603 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1604 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1605 if (access
== GL_WRITE_ONLY
) {
1607 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1608 /* Typed surface reads support a very limited subset of the shader
1609 * image formats. Translate it into the closest format the
1610 * hardware supports.
1612 return isl_lower_storage_image_format(devinfo
, hw_format
);
1614 /* The hardware doesn't actually support a typed format that we can use
1615 * so we have to fall back to untyped read/write messages.
1617 return ISL_FORMAT_RAW
;
1622 update_default_image_param(struct brw_context
*brw
,
1623 struct gl_image_unit
*u
,
1624 unsigned surface_idx
,
1625 struct brw_image_param
*param
)
1627 memset(param
, 0, sizeof(*param
));
1628 param
->surface_idx
= surface_idx
;
1629 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1630 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1631 * detailed explanation of these parameters.
1633 param
->swizzling
[0] = 0xff;
1634 param
->swizzling
[1] = 0xff;
1638 update_buffer_image_param(struct brw_context
*brw
,
1639 struct gl_image_unit
*u
,
1640 unsigned surface_idx
,
1641 struct brw_image_param
*param
)
1643 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1644 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1645 update_default_image_param(brw
, u
, surface_idx
, param
);
1647 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1648 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1652 update_texture_image_param(struct brw_context
*brw
,
1653 struct gl_image_unit
*u
,
1654 unsigned surface_idx
,
1655 struct brw_image_param
*param
)
1657 struct intel_mipmap_tree
*mt
= intel_texture_object(u
->TexObj
)->mt
;
1659 update_default_image_param(brw
, u
, surface_idx
, param
);
1661 param
->size
[0] = minify(mt
->logical_width0
, u
->Level
);
1662 param
->size
[1] = minify(mt
->logical_height0
, u
->Level
);
1663 param
->size
[2] = (!u
->Layered
? 1 :
1664 u
->TexObj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1665 u
->TexObj
->Target
== GL_TEXTURE_3D
?
1666 minify(mt
->logical_depth0
, u
->Level
) :
1667 mt
->logical_depth0
);
1669 intel_miptree_get_image_offset(mt
, u
->Level
, u
->_Layer
,
1673 param
->stride
[0] = mt
->cpp
;
1674 param
->stride
[1] = mt
->pitch
/ mt
->cpp
;
1676 brw_miptree_get_horizontal_slice_pitch(brw
, mt
, u
->Level
);
1678 brw_miptree_get_vertical_slice_pitch(brw
, mt
, u
->Level
);
1680 if (mt
->tiling
== I915_TILING_X
) {
1681 /* An X tile is a rectangular block of 512x8 bytes. */
1682 param
->tiling
[0] = _mesa_logbase2(512 / mt
->cpp
);
1683 param
->tiling
[1] = _mesa_logbase2(8);
1685 if (brw
->has_swizzling
) {
1686 /* Right shifts required to swizzle bits 9 and 10 of the memory
1687 * address with bit 6.
1689 param
->swizzling
[0] = 3;
1690 param
->swizzling
[1] = 4;
1692 } else if (mt
->tiling
== I915_TILING_Y
) {
1693 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1694 * different to the layout of an X-tiled surface, we simply pretend that
1695 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1696 * one arranged in X-major order just like is the case for X-tiling.
1698 param
->tiling
[0] = _mesa_logbase2(16 / mt
->cpp
);
1699 param
->tiling
[1] = _mesa_logbase2(32);
1701 if (brw
->has_swizzling
) {
1702 /* Right shift required to swizzle bit 9 of the memory address with
1705 param
->swizzling
[0] = 3;
1709 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1710 * address calculation algorithm (emit_address_calculation() in
1711 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1712 * modulus equal to the LOD.
1714 param
->tiling
[2] = (u
->TexObj
->Target
== GL_TEXTURE_3D
? u
->Level
:
1719 update_image_surface(struct brw_context
*brw
,
1720 struct gl_image_unit
*u
,
1722 unsigned surface_idx
,
1723 uint32_t *surf_offset
,
1724 struct brw_image_param
*param
)
1726 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1727 struct gl_texture_object
*obj
= u
->TexObj
;
1728 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1730 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1731 struct intel_buffer_object
*intel_obj
=
1732 intel_buffer_object(obj
->BufferObject
);
1733 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1734 _mesa_get_format_bytes(u
->_ActualFormat
));
1736 brw_emit_buffer_surface_state(
1737 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1738 format
, intel_obj
->Base
.Size
, texel_size
,
1739 access
!= GL_READ_ONLY
);
1741 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1744 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1745 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1747 if (format
== ISL_FORMAT_RAW
) {
1748 brw_emit_buffer_surface_state(
1749 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1750 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1751 access
!= GL_READ_ONLY
);
1754 const unsigned num_layers
= (!u
->Layered
? 1 :
1755 obj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1756 mt
->logical_depth0
);
1758 struct isl_view view
= {
1760 .base_level
= obj
->MinLevel
+ u
->Level
,
1762 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1763 .array_len
= num_layers
,
1764 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1765 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1768 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1769 assert(!intel_miptree_has_color_unresolved(mt
,
1771 view
.base_array_layer
,
1773 brw_emit_surface_state(brw
, mt
, INTEL_AUX_BUFFER_DISABLED
,
1774 mt
->target
, view
, tex_mocs
[brw
->gen
],
1775 surf_offset
, surf_index
,
1776 I915_GEM_DOMAIN_SAMPLER
,
1777 access
== GL_READ_ONLY
? 0 :
1778 I915_GEM_DOMAIN_SAMPLER
);
1781 update_texture_image_param(brw
, u
, surface_idx
, param
);
1785 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1786 update_default_image_param(brw
, u
, surface_idx
, param
);
1791 brw_upload_image_surfaces(struct brw_context
*brw
,
1792 const struct gl_program
*prog
,
1793 struct brw_stage_state
*stage_state
,
1794 struct brw_stage_prog_data
*prog_data
)
1797 struct gl_context
*ctx
= &brw
->ctx
;
1799 if (prog
->info
.num_images
) {
1800 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1801 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1802 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1804 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1806 &stage_state
->surf_offset
[surf_idx
],
1807 &prog_data
->image_param
[i
]);
1810 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1811 /* This may have changed the image metadata dependent on the context
1812 * image unit state and passed to the program as uniforms, make sure
1813 * that push and pull constants are reuploaded.
1815 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1820 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1822 /* BRW_NEW_FRAGMENT_PROGRAM */
1823 const struct gl_program
*wm
= brw
->fragment_program
;
1826 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1827 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1828 brw
->wm
.base
.prog_data
);
1832 const struct brw_tracked_state brw_wm_image_surfaces
= {
1834 .mesa
= _NEW_TEXTURE
,
1835 .brw
= BRW_NEW_BATCH
|
1837 BRW_NEW_FRAGMENT_PROGRAM
|
1838 BRW_NEW_FS_PROG_DATA
|
1841 .emit
= brw_upload_wm_image_surfaces
,
1845 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1847 brw
->vtbl
.update_renderbuffer_surface
= gen4_update_renderbuffer_surface
;
1848 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1852 gen6_init_vtable_surface_functions(struct brw_context
*brw
)
1854 gen4_init_vtable_surface_functions(brw
);
1855 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1859 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1861 struct gl_context
*ctx
= &brw
->ctx
;
1863 struct gl_program
*prog
=
1864 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1865 /* BRW_NEW_CS_PROG_DATA */
1866 const struct brw_cs_prog_data
*cs_prog_data
=
1867 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1869 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1870 const unsigned surf_idx
=
1871 cs_prog_data
->binding_table
.work_groups_start
;
1872 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1876 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1878 intel_upload_data(brw
,
1879 (void *)brw
->compute
.num_work_groups
,
1885 bo
= brw
->compute
.num_work_groups_bo
;
1886 bo_offset
= brw
->compute
.num_work_groups_offset
;
1889 brw_emit_buffer_surface_state(brw
, surf_offset
,
1892 3 * sizeof(GLuint
), 1, true);
1893 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1897 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1899 .brw
= BRW_NEW_BLORP
|
1900 BRW_NEW_CS_PROG_DATA
|
1901 BRW_NEW_CS_WORK_GROUPS
1903 .emit
= brw_upload_cs_work_groups_surface
,