2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
59 INTEL_RENDERBUFFER_LAYERED
= 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED
= 1 << 1,
63 uint32_t tex_mocs
[] = {
70 uint32_t rb_mocs
[] = {
78 get_isl_surf(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
79 GLenum target
, struct isl_view
*view
,
80 uint32_t *tile_x
, uint32_t *tile_y
,
81 uint32_t *offset
, struct isl_surf
*surf
)
83 if (mt
->surf
.size
> 0) {
86 intel_miptree_get_isl_surf(brw
, mt
, surf
);
88 surf
->dim
= get_isl_surf_dim(target
);
91 const enum isl_dim_layout dim_layout
=
92 get_isl_dim_layout(&brw
->screen
->devinfo
, mt
->surf
.tiling
, target
,
95 if (surf
->dim_layout
== dim_layout
)
98 /* The layout of the specified texture target is not compatible with the
99 * actual layout of the miptree structure in memory -- You're entering
100 * dangerous territory, this can only possibly work if you only intended
101 * to access a single level and slice of the texture, and the hardware
102 * supports the tile offset feature in order to allow non-tile-aligned
103 * base offsets, since we'll have to point the hardware to the first
104 * texel of the level instead of relying on the usual base level/layer
107 assert(brw
->has_surface_tile_offset
);
108 assert(view
->levels
== 1 && view
->array_len
== 1);
109 assert(*tile_x
== 0 && *tile_y
== 0);
111 offset
+= intel_miptree_get_tile_offsets(mt
, view
->base_level
,
112 view
->base_array_layer
,
115 /* Minify the logical dimensions of the texture. */
116 const unsigned l
= view
->base_level
- mt
->first_level
;
117 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, l
);
118 surf
->logical_level0_px
.height
= surf
->dim
<= ISL_SURF_DIM_1D
? 1 :
119 minify(surf
->logical_level0_px
.height
, l
);
120 surf
->logical_level0_px
.depth
= surf
->dim
<= ISL_SURF_DIM_2D
? 1 :
121 minify(surf
->logical_level0_px
.depth
, l
);
123 /* Only the base level and layer can be addressed with the overridden
126 surf
->logical_level0_px
.array_len
= 1;
128 surf
->dim_layout
= dim_layout
;
130 /* The requested slice of the texture is now at the base level and
133 view
->base_level
= 0;
134 view
->base_array_layer
= 0;
138 brw_emit_surface_state(struct brw_context
*brw
,
139 struct intel_mipmap_tree
*mt
, uint32_t flags
,
140 GLenum target
, struct isl_view view
,
141 uint32_t mocs
, uint32_t *surf_offset
, int surf_index
,
142 unsigned read_domains
, unsigned write_domains
)
144 uint32_t tile_x
= mt
->level
[0].level_x
;
145 uint32_t tile_y
= mt
->level
[0].level_y
;
146 uint32_t offset
= mt
->offset
;
148 struct isl_surf surf
;
150 get_isl_surf(brw
, mt
, target
, &view
, &tile_x
, &tile_y
, &offset
, &surf
);
152 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
154 struct brw_bo
*aux_bo
;
155 struct isl_surf
*aux_surf
= NULL
;
156 uint64_t aux_offset
= 0;
157 enum isl_aux_usage aux_usage
= ISL_AUX_USAGE_NONE
;
158 if ((mt
->mcs_buf
|| intel_miptree_sample_with_hiz(brw
, mt
)) &&
159 !(flags
& INTEL_AUX_BUFFER_DISABLED
)) {
160 aux_usage
= intel_miptree_get_aux_isl_usage(brw
, mt
);
163 aux_surf
= &mt
->mcs_buf
->surf
;
165 aux_bo
= mt
->mcs_buf
->bo
;
166 aux_offset
= mt
->mcs_buf
->bo
->offset64
+ mt
->mcs_buf
->offset
;
168 aux_surf
= &mt
->hiz_buf
->surf
;
170 aux_bo
= mt
->hiz_buf
->bo
;
171 aux_offset
= mt
->hiz_buf
->bo
->offset64
;
174 /* We only really need a clear color if we also have an auxiliary
175 * surface. Without one, it does nothing.
177 clear_color
= mt
->fast_clear_color
;
180 void *state
= brw_state_batch(brw
,
181 brw
->isl_dev
.ss
.size
,
182 brw
->isl_dev
.ss
.align
,
185 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &surf
, .view
= &view
,
186 .address
= mt
->bo
->offset64
+ offset
,
187 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
188 .aux_address
= aux_offset
,
189 .mocs
= mocs
, .clear_color
= clear_color
,
190 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
192 brw_emit_reloc(&brw
->batch
, *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
193 mt
->bo
, offset
, read_domains
, write_domains
);
196 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
197 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
198 * contain other control information. Since buffer addresses are always
199 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
200 * an ordinary reloc to do the necessary address translation.
202 assert((aux_offset
& 0xfff) == 0);
203 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
204 brw_emit_reloc(&brw
->batch
,
205 *surf_offset
+ brw
->isl_dev
.ss
.aux_addr_offset
,
206 aux_bo
, *aux_addr
- aux_bo
->offset64
,
207 read_domains
, write_domains
);
212 brw_update_renderbuffer_surface(struct brw_context
*brw
,
213 struct gl_renderbuffer
*rb
,
214 uint32_t flags
, unsigned unit
/* unused */,
217 struct gl_context
*ctx
= &brw
->ctx
;
218 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
219 struct intel_mipmap_tree
*mt
= irb
->mt
;
222 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
225 assert(brw_render_target_supported(brw
, rb
));
227 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
228 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
229 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
230 __func__
, _mesa_get_format_name(rb_format
));
233 struct isl_view view
= {
234 .format
= brw
->mesa_to_isl_render_format
[rb_format
],
235 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
237 .base_array_layer
= irb
->mt_layer
,
238 .array_len
= MAX2(irb
->layer_count
, 1),
239 .swizzle
= ISL_SWIZZLE_IDENTITY
,
240 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
244 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
247 I915_GEM_DOMAIN_RENDER
,
248 I915_GEM_DOMAIN_RENDER
);
253 translate_tex_target(GLenum target
)
257 case GL_TEXTURE_1D_ARRAY_EXT
:
258 return BRW_SURFACE_1D
;
260 case GL_TEXTURE_RECTANGLE_NV
:
261 return BRW_SURFACE_2D
;
264 case GL_TEXTURE_2D_ARRAY_EXT
:
265 case GL_TEXTURE_EXTERNAL_OES
:
266 case GL_TEXTURE_2D_MULTISAMPLE
:
267 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
268 return BRW_SURFACE_2D
;
271 return BRW_SURFACE_3D
;
273 case GL_TEXTURE_CUBE_MAP
:
274 case GL_TEXTURE_CUBE_MAP_ARRAY
:
275 return BRW_SURFACE_CUBE
;
278 unreachable("not reached");
283 brw_get_surface_tiling_bits(enum isl_tiling tiling
)
287 return BRW_SURFACE_TILED
;
289 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
297 brw_get_surface_num_multisamples(unsigned num_samples
)
300 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
302 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
306 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
310 brw_get_texture_swizzle(const struct gl_context
*ctx
,
311 const struct gl_texture_object
*t
)
313 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
315 int swizzles
[SWIZZLE_NIL
+ 1] = {
325 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
326 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
327 GLenum depth_mode
= t
->DepthMode
;
329 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
330 * with depth component data specified with a sized internal format.
331 * Otherwise, it's left at the old default, GL_LUMINANCE.
333 if (_mesa_is_gles3(ctx
) &&
334 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
335 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
339 switch (depth_mode
) {
341 swizzles
[0] = SWIZZLE_ZERO
;
342 swizzles
[1] = SWIZZLE_ZERO
;
343 swizzles
[2] = SWIZZLE_ZERO
;
344 swizzles
[3] = SWIZZLE_X
;
347 swizzles
[0] = SWIZZLE_X
;
348 swizzles
[1] = SWIZZLE_X
;
349 swizzles
[2] = SWIZZLE_X
;
350 swizzles
[3] = SWIZZLE_ONE
;
353 swizzles
[0] = SWIZZLE_X
;
354 swizzles
[1] = SWIZZLE_X
;
355 swizzles
[2] = SWIZZLE_X
;
356 swizzles
[3] = SWIZZLE_X
;
359 swizzles
[0] = SWIZZLE_X
;
360 swizzles
[1] = SWIZZLE_ZERO
;
361 swizzles
[2] = SWIZZLE_ZERO
;
362 swizzles
[3] = SWIZZLE_ONE
;
367 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
369 /* If the texture's format is alpha-only, force R, G, and B to
370 * 0.0. Similarly, if the texture's format has no alpha channel,
371 * force the alpha value read to 1.0. This allows for the
372 * implementation to use an RGBA texture for any of these formats
373 * without leaking any unexpected values.
375 switch (img
->_BaseFormat
) {
377 swizzles
[0] = SWIZZLE_ZERO
;
378 swizzles
[1] = SWIZZLE_ZERO
;
379 swizzles
[2] = SWIZZLE_ZERO
;
382 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
383 swizzles
[0] = SWIZZLE_X
;
384 swizzles
[1] = SWIZZLE_X
;
385 swizzles
[2] = SWIZZLE_X
;
386 swizzles
[3] = SWIZZLE_ONE
;
389 case GL_LUMINANCE_ALPHA
:
390 if (datatype
== GL_SIGNED_NORMALIZED
) {
391 swizzles
[0] = SWIZZLE_X
;
392 swizzles
[1] = SWIZZLE_X
;
393 swizzles
[2] = SWIZZLE_X
;
394 swizzles
[3] = SWIZZLE_W
;
398 if (datatype
== GL_SIGNED_NORMALIZED
) {
399 swizzles
[0] = SWIZZLE_X
;
400 swizzles
[1] = SWIZZLE_X
;
401 swizzles
[2] = SWIZZLE_X
;
402 swizzles
[3] = SWIZZLE_X
;
408 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
409 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
410 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
411 swizzles
[3] = SWIZZLE_ONE
;
415 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
416 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
417 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
418 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
422 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
423 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
425 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
428 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
430 * which is simply adding 4 then modding by 8 (or anding with 7).
432 * We then may need to apply workarounds for textureGather hardware bugs.
435 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
437 unsigned scs
= (swizzle
+ 4) & 7;
439 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
443 brw_find_matching_rb(const struct gl_framebuffer
*fb
,
444 const struct intel_mipmap_tree
*mt
)
446 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
447 const struct intel_renderbuffer
*irb
=
448 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
450 if (irb
&& irb
->mt
== mt
)
454 return fb
->_NumColorDrawBuffers
;
458 brw_texture_view_sane(const struct brw_context
*brw
,
459 const struct intel_mipmap_tree
*mt
,
460 const struct isl_view
*view
)
462 /* There are special cases only for lossless compression. */
463 if (mt
->aux_usage
!= ISL_AUX_USAGE_CCS_E
)
466 if (isl_format_supports_ccs_e(&brw
->screen
->devinfo
, view
->format
))
469 /* Logic elsewhere needs to take care to resolve the color buffer prior
470 * to sampling it as non-compressed.
472 if (intel_miptree_has_color_unresolved(mt
, view
->base_level
, view
->levels
,
473 view
->base_array_layer
,
477 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
478 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
480 if (rb_index
== fb
->_NumColorDrawBuffers
)
483 /* Underlying surface is compressed but it is sampled using a format that
484 * the sampling engine doesn't support as compressed. Compression must be
485 * disabled for both sampling engine and data port in case the same surface
486 * is used also as render target.
488 return brw
->draw_aux_buffer_disabled
[rb_index
];
492 brw_disable_aux_surface(const struct brw_context
*brw
,
493 const struct intel_mipmap_tree
*mt
,
494 const struct isl_view
*view
)
496 /* Nothing to disable. */
500 const bool is_unresolved
= intel_miptree_has_color_unresolved(
501 mt
, view
->base_level
, view
->levels
,
502 view
->base_array_layer
, view
->array_len
);
504 /* There are special cases only for lossless compression. */
505 if (mt
->aux_usage
!= ISL_AUX_USAGE_CCS_E
)
506 return !is_unresolved
;
508 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
509 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
511 /* If we are drawing into this with compression enabled, then we must also
512 * enable compression when texturing from it regardless of
513 * fast_clear_state. If we don't then, after the first draw call with
514 * this setup, there will be data in the CCS which won't get picked up by
515 * subsequent texturing operations as required by ARB_texture_barrier.
516 * Since we don't want to re-emit the binding table or do a resolve
517 * operation every draw call, the easiest thing to do is just enable
518 * compression on the texturing side. This is completely safe to do
519 * since, if compressed texturing weren't allowed, we would have disabled
520 * compression of render targets in whatever_that_function_is_called().
522 if (rb_index
< fb
->_NumColorDrawBuffers
) {
523 if (brw
->draw_aux_buffer_disabled
[rb_index
]) {
524 assert(!is_unresolved
);
527 return brw
->draw_aux_buffer_disabled
[rb_index
];
530 return !is_unresolved
;
534 brw_update_texture_surface(struct gl_context
*ctx
,
536 uint32_t *surf_offset
,
540 struct brw_context
*brw
= brw_context(ctx
);
541 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
543 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
544 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
547 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
548 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
551 if (mt
->plane
[plane
- 1] == NULL
)
553 mt
= mt
->plane
[plane
- 1];
556 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
557 /* If this is a view with restricted NumLayers, then our effective depth
558 * is not just the miptree depth.
560 unsigned view_num_layers
;
561 if (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) {
562 view_num_layers
= obj
->NumLayers
;
563 } else if (mt
->surf
.size
> 0) {
564 view_num_layers
= mt
->surf
.dim
== ISL_SURF_DIM_3D
?
565 mt
->surf
.logical_level0_px
.depth
:
566 mt
->surf
.logical_level0_px
.array_len
;
568 view_num_layers
= mt
->logical_depth0
;
571 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
572 * texturing functions that return a float, as our code generation always
573 * selects the .x channel (which would always be 0).
575 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
576 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
577 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
578 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
579 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
580 brw_get_texture_swizzle(&brw
->ctx
, obj
));
582 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
583 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
584 sampler
->sRGBDecode
);
586 /* Implement gen6 and gen7 gather work-around */
587 bool need_green_to_blue
= false;
589 if (brw
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
590 format
== ISL_FORMAT_R32G32_SINT
||
591 format
== ISL_FORMAT_R32G32_UINT
)) {
592 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
593 need_green_to_blue
= brw
->is_haswell
;
594 } else if (brw
->gen
== 6) {
595 /* Sandybridge's gather4 message is broken for integer formats.
596 * To work around this, we pretend the surface is UNORM for
597 * 8 or 16-bit formats, and emit shader instructions to recover
598 * the real INT/UINT value. For 32-bit formats, we pretend
599 * the surface is FLOAT, and simply reinterpret the resulting
603 case ISL_FORMAT_R8_SINT
:
604 case ISL_FORMAT_R8_UINT
:
605 format
= ISL_FORMAT_R8_UNORM
;
608 case ISL_FORMAT_R16_SINT
:
609 case ISL_FORMAT_R16_UINT
:
610 format
= ISL_FORMAT_R16_UNORM
;
613 case ISL_FORMAT_R32_SINT
:
614 case ISL_FORMAT_R32_UINT
:
615 format
= ISL_FORMAT_R32_FLOAT
;
624 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
626 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
627 mt
= mt
->r8stencil_mt
;
631 format
= ISL_FORMAT_R8_UINT
;
632 } else if (brw
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
633 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
634 mt
= mt
->r8stencil_mt
;
635 format
= ISL_FORMAT_R8_UINT
;
638 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
640 struct isl_view view
= {
642 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
643 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
644 .base_array_layer
= obj
->MinLayer
,
645 .array_len
= view_num_layers
,
647 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
648 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
649 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
650 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
652 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
655 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
656 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
657 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
659 assert(brw_texture_view_sane(brw
, mt
, &view
));
661 const int flags
= brw_disable_aux_surface(brw
, mt
, &view
) ?
662 INTEL_AUX_BUFFER_DISABLED
: 0;
663 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
665 surf_offset
, surf_index
,
666 I915_GEM_DOMAIN_SAMPLER
, 0);
671 brw_emit_buffer_surface_state(struct brw_context
*brw
,
672 uint32_t *out_offset
,
674 unsigned buffer_offset
,
675 unsigned surface_format
,
676 unsigned buffer_size
,
680 uint32_t *dw
= brw_state_batch(brw
,
681 brw
->isl_dev
.ss
.size
,
682 brw
->isl_dev
.ss
.align
,
685 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
686 .address
= (bo
? bo
->offset64
: 0) + buffer_offset
,
688 .format
= surface_format
,
690 .mocs
= tex_mocs
[brw
->gen
]);
693 brw_emit_reloc(&brw
->batch
, *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
695 I915_GEM_DOMAIN_SAMPLER
,
696 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
701 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
703 uint32_t *surf_offset
)
705 struct brw_context
*brw
= brw_context(ctx
);
706 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
707 struct intel_buffer_object
*intel_obj
=
708 intel_buffer_object(tObj
->BufferObject
);
709 uint32_t size
= tObj
->BufferSize
;
710 struct brw_bo
*bo
= NULL
;
711 mesa_format format
= tObj
->_BufferObjectFormat
;
712 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
713 int texel_size
= _mesa_get_format_bytes(format
);
716 size
= MIN2(size
, intel_obj
->Base
.Size
);
717 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
721 /* The ARB_texture_buffer_specification says:
723 * "The number of texels in the buffer texture's texel array is given by
725 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
727 * where <buffer_size> is the size of the buffer object, in basic
728 * machine units and <components> and <base_type> are the element count
729 * and base data type for elements, as specified in Table X.1. The
730 * number of texels in the texel array is then clamped to the
731 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
733 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
734 * so that when ISL divides by stride to obtain the number of texels, that
735 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
737 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
739 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
740 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
741 _mesa_get_format_name(format
));
744 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
753 * Create the constant buffer surface. Vertex/fragment shader constants will be
754 * read from this buffer with Data Port Read instructions/messages.
757 brw_create_constant_surface(struct brw_context
*brw
,
761 uint32_t *out_offset
)
763 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
764 ISL_FORMAT_R32G32B32A32_FLOAT
,
769 * Create the buffer surface. Shader buffer variables will be
770 * read from / write to this buffer with Data Port Read/Write
771 * instructions/messages.
774 brw_create_buffer_surface(struct brw_context
*brw
,
778 uint32_t *out_offset
)
780 /* Use a raw surface so we can reuse existing untyped read/write/atomic
781 * messages. We need these specifically for the fragment shader since they
782 * include a pixel mask header that we need to ensure correct behavior
783 * with helper invocations, which cannot write to the buffer.
785 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
791 * Set up a binding table entry for use by stream output logic (transform
794 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
797 brw_update_sol_surface(struct brw_context
*brw
,
798 struct gl_buffer_object
*buffer_obj
,
799 uint32_t *out_offset
, unsigned num_vector_components
,
800 unsigned stride_dwords
, unsigned offset_dwords
)
802 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
803 uint32_t offset_bytes
= 4 * offset_dwords
;
804 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
806 buffer_obj
->Size
- offset_bytes
,
808 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
809 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
810 size_t size_dwords
= buffer_obj
->Size
/ 4;
811 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
813 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
814 * too big to map using a single binding table entry?
816 assert((size_dwords
- offset_dwords
) / stride_dwords
817 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
819 if (size_dwords
> offset_dwords
+ num_vector_components
) {
820 /* There is room for at least 1 transform feedback output in the buffer.
821 * Compute the number of additional transform feedback outputs the
822 * buffer has room for.
824 buffer_size_minus_1
=
825 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
827 /* There isn't even room for a single transform feedback output in the
828 * buffer. We can't configure the binding table entry to prevent output
829 * entirely; we'll have to rely on the geometry shader to detect
830 * overflow. But to minimize the damage in case of a bug, set up the
831 * binding table entry to just allow a single output.
833 buffer_size_minus_1
= 0;
835 width
= buffer_size_minus_1
& 0x7f;
836 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
837 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
839 switch (num_vector_components
) {
841 surface_format
= ISL_FORMAT_R32_FLOAT
;
844 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
847 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
850 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
853 unreachable("Invalid vector size for transform feedback output");
856 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
857 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
858 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
859 BRW_SURFACE_RC_READ_WRITE
;
860 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
861 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
862 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
863 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
864 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
868 /* Emit relocation to surface contents. */
869 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, offset_bytes
,
870 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
873 /* Creates a new WM constant buffer reflecting the current fragment program's
874 * constants, if needed by the fragment program.
876 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
880 brw_upload_wm_pull_constants(struct brw_context
*brw
)
882 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
883 /* BRW_NEW_FRAGMENT_PROGRAM */
884 struct brw_program
*fp
= (struct brw_program
*) brw
->fragment_program
;
885 /* BRW_NEW_FS_PROG_DATA */
886 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
888 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
889 /* _NEW_PROGRAM_CONSTANTS */
890 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
891 stage_state
, prog_data
);
894 const struct brw_tracked_state brw_wm_pull_constants
= {
896 .mesa
= _NEW_PROGRAM_CONSTANTS
,
897 .brw
= BRW_NEW_BATCH
|
899 BRW_NEW_FRAGMENT_PROGRAM
|
900 BRW_NEW_FS_PROG_DATA
,
902 .emit
= brw_upload_wm_pull_constants
,
906 * Creates a null renderbuffer surface.
908 * This is used when the shader doesn't write to any color output. An FB
909 * write to target 0 will still be emitted, because that's how the thread is
910 * terminated (and computed depth is returned), so we need to have the
911 * hardware discard the target 0 color output..
914 brw_emit_null_surface_state(struct brw_context
*brw
,
918 uint32_t *out_offset
)
920 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
923 * A null surface will be used in instances where an actual surface is
924 * not bound. When a write message is generated to a null surface, no
925 * actual surface is written to. When a read message (including any
926 * sampling engine message) is generated to a null surface, the result
927 * is all zeros. Note that a null surface type is allowed to be used
928 * with all messages, even if it is not specificially indicated as
929 * supported. All of the remaining fields in surface state are ignored
930 * for null surfaces, with the following exceptions:
932 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
933 * depth buffer’s corresponding state for all render target surfaces,
936 * - Surface Format must be R8G8B8A8_UNORM.
938 unsigned surface_type
= BRW_SURFACE_NULL
;
939 struct brw_bo
*bo
= NULL
;
940 unsigned pitch_minus_1
= 0;
941 uint32_t multisampling_state
= 0;
942 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
945 /* On Gen6, null render targets seem to cause GPU hangs when
946 * multisampling. So work around this problem by rendering into dummy
949 * To decrease the amount of memory needed by the workaround buffer, we
950 * set its pitch to 128 bytes (the width of a Y tile). This means that
951 * the amount of memory needed for the workaround buffer is
952 * (width_in_tiles + height_in_tiles - 1) tiles.
954 * Note that since the workaround buffer will be interpreted by the
955 * hardware as an interleaved multisampled buffer, we need to compute
956 * width_in_tiles and height_in_tiles by dividing the width and height
957 * by 16 rather than the normal Y-tile size of 32.
959 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
960 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
961 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
962 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
964 bo
= brw
->wm
.multisampled_null_render_target_bo
;
965 surface_type
= BRW_SURFACE_2D
;
967 multisampling_state
= brw_get_surface_num_multisamples(samples
);
970 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
971 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
973 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
974 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
975 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
976 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
978 surf
[1] = bo
? bo
->offset64
: 0;
979 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
980 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
982 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
985 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
987 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
988 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
989 surf
[4] = multisampling_state
;
993 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, 0,
994 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
999 * Sets up a surface state structure to point at the given region.
1000 * While it is only used for the front/back buffer currently, it should be
1001 * usable for further buffers when doing ARB_draw_buffer support.
1004 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
1005 struct gl_renderbuffer
*rb
,
1006 uint32_t flags
, unsigned unit
,
1007 uint32_t surf_index
)
1009 struct gl_context
*ctx
= &brw
->ctx
;
1010 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1011 struct intel_mipmap_tree
*mt
= irb
->mt
;
1013 uint32_t tile_x
, tile_y
;
1014 enum isl_format format
;
1017 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
1018 /* BRW_NEW_FS_PROG_DATA */
1020 assert(!(flags
& INTEL_RENDERBUFFER_LAYERED
));
1021 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
1023 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
1024 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
1026 if (tile_x
!= 0 || tile_y
!= 0) {
1027 /* Original gen4 hardware couldn't draw to a non-tile-aligned
1028 * destination in a miptree unless you actually setup your renderbuffer
1029 * as a miptree and used the fragile lod/array_index/etc. controls to
1030 * select the image. So, instead, we just make a new single-level
1031 * miptree and render into that.
1033 intel_renderbuffer_move_to_temp(brw
, irb
, false);
1034 assert(irb
->align_wa_mt
);
1035 mt
= irb
->align_wa_mt
;
1039 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
1041 format
= brw
->mesa_to_isl_render_format
[rb_format
];
1042 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
1043 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
1044 __func__
, _mesa_get_format_name(rb_format
));
1047 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
1048 format
<< BRW_SURFACE_FORMAT_SHIFT
);
1051 assert(mt
->offset
% mt
->cpp
== 0);
1052 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
1053 mt
->bo
->offset64
+ mt
->offset
);
1055 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
1056 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
1058 surf
[3] = (brw_get_surface_tiling_bits(mt
->surf
.tiling
) |
1059 (mt
->surf
.row_pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
1061 surf
[4] = brw_get_surface_num_multisamples(mt
->surf
.samples
);
1063 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
1064 /* Note that the low bits of these fields are missing, so
1065 * there's the possibility of getting in trouble.
1067 assert(tile_x
% 4 == 0);
1068 assert(tile_y
% 2 == 0);
1069 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
1070 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
1071 (mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
1075 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
1076 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
1077 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
1079 if (!ctx
->Color
.ColorMask
[unit
][0])
1080 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
1081 if (!ctx
->Color
.ColorMask
[unit
][1])
1082 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
1083 if (!ctx
->Color
.ColorMask
[unit
][2])
1084 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
1086 /* As mentioned above, disable writes to the alpha component when the
1087 * renderbuffer is XRGB.
1089 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
1090 !ctx
->Color
.ColorMask
[unit
][3]) {
1091 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
1095 brw_emit_reloc(&brw
->batch
, offset
+ 4, mt
->bo
, surf
[1] - mt
->bo
->offset64
,
1096 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
1102 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1105 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
1106 const struct gl_framebuffer
*fb
,
1107 uint32_t render_target_start
,
1108 uint32_t *surf_offset
)
1111 const unsigned int w
= _mesa_geometric_width(fb
);
1112 const unsigned int h
= _mesa_geometric_height(fb
);
1113 const unsigned int s
= _mesa_geometric_samples(fb
);
1115 /* Update surfaces for drawing buffers */
1116 if (fb
->_NumColorDrawBuffers
>= 1) {
1117 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1118 const uint32_t surf_index
= render_target_start
+ i
;
1119 const int flags
= (_mesa_geometric_layers(fb
) > 0 ?
1120 INTEL_RENDERBUFFER_LAYERED
: 0) |
1121 (brw
->draw_aux_buffer_disabled
[i
] ?
1122 INTEL_AUX_BUFFER_DISABLED
: 0);
1124 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
1125 surf_offset
[surf_index
] =
1126 brw
->vtbl
.update_renderbuffer_surface(
1127 brw
, fb
->_ColorDrawBuffers
[i
], flags
, i
, surf_index
);
1129 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1130 &surf_offset
[surf_index
]);
1134 const uint32_t surf_index
= render_target_start
;
1135 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1136 &surf_offset
[surf_index
]);
1141 update_renderbuffer_surfaces(struct brw_context
*brw
)
1143 const struct gl_context
*ctx
= &brw
->ctx
;
1145 /* BRW_NEW_FS_PROG_DATA */
1146 const struct brw_wm_prog_data
*wm_prog_data
=
1147 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1149 /* _NEW_BUFFERS | _NEW_COLOR */
1150 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1151 brw_update_renderbuffer_surfaces(
1153 wm_prog_data
->binding_table
.render_target_start
,
1154 brw
->wm
.base
.surf_offset
);
1155 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1158 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1160 .mesa
= _NEW_BUFFERS
|
1162 .brw
= BRW_NEW_BATCH
|
1164 BRW_NEW_FS_PROG_DATA
,
1166 .emit
= update_renderbuffer_surfaces
,
1169 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1171 .mesa
= _NEW_BUFFERS
,
1172 .brw
= BRW_NEW_BATCH
|
1175 .emit
= update_renderbuffer_surfaces
,
1179 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1181 const struct gl_context
*ctx
= &brw
->ctx
;
1183 /* BRW_NEW_FS_PROG_DATA */
1184 const struct brw_wm_prog_data
*wm_prog_data
=
1185 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1187 /* BRW_NEW_FRAGMENT_PROGRAM */
1188 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
1189 brw
->fragment_program
&& brw
->fragment_program
->info
.outputs_read
) {
1191 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1193 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1194 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1195 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1196 const unsigned surf_index
=
1197 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1198 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1201 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1202 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1203 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1206 /* Override the target of the texture if the render buffer is a
1207 * single slice of a 3D texture (since the minimum array element
1208 * field of the surface state structure is ignored by the sampler
1209 * unit for 3D textures on some hardware), or if the render buffer
1210 * is a 1D array (since shaders always provide the array index
1211 * coordinate at the Z component to avoid state-dependent
1212 * recompiles when changing the texture target of the
1215 const GLenum target
=
1216 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1217 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1218 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1221 const struct isl_view view
= {
1223 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1225 .base_array_layer
= irb
->mt_layer
,
1226 .array_len
= irb
->layer_count
,
1227 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1228 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1231 const int flags
= brw
->draw_aux_buffer_disabled
[i
] ?
1232 INTEL_AUX_BUFFER_DISABLED
: 0;
1233 brw_emit_surface_state(brw
, irb
->mt
, flags
, target
, view
,
1235 surf_offset
, surf_index
,
1236 I915_GEM_DOMAIN_SAMPLER
, 0);
1239 brw
->vtbl
.emit_null_surface_state(
1240 brw
, _mesa_geometric_width(fb
), _mesa_geometric_height(fb
),
1241 _mesa_geometric_samples(fb
), surf_offset
);
1245 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1249 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1251 .mesa
= _NEW_BUFFERS
,
1252 .brw
= BRW_NEW_BATCH
|
1253 BRW_NEW_FRAGMENT_PROGRAM
|
1254 BRW_NEW_FS_PROG_DATA
,
1256 .emit
= update_renderbuffer_read_surfaces
,
1260 update_stage_texture_surfaces(struct brw_context
*brw
,
1261 const struct gl_program
*prog
,
1262 struct brw_stage_state
*stage_state
,
1263 bool for_gather
, uint32_t plane
)
1268 struct gl_context
*ctx
= &brw
->ctx
;
1270 uint32_t *surf_offset
= stage_state
->surf_offset
;
1272 /* BRW_NEW_*_PROG_DATA */
1274 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1276 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1278 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1279 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1282 if (prog
->SamplersUsed
& (1 << s
)) {
1283 const unsigned unit
= prog
->SamplerUnits
[s
];
1286 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1287 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
1295 * Construct SURFACE_STATE objects for enabled textures.
1298 brw_update_texture_surfaces(struct brw_context
*brw
)
1300 /* BRW_NEW_VERTEX_PROGRAM */
1301 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
1303 /* BRW_NEW_TESS_PROGRAMS */
1304 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
1305 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
1307 /* BRW_NEW_GEOMETRY_PROGRAM */
1308 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
1310 /* BRW_NEW_FRAGMENT_PROGRAM */
1311 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
1314 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1315 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1316 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1317 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1318 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1320 /* emit alternate set of surface state for gather. this
1321 * allows the surface format to be overriden for only the
1322 * gather4 messages. */
1324 if (vs
&& vs
->nir
->info
.uses_texture_gather
)
1325 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1326 if (tcs
&& tcs
->nir
->info
.uses_texture_gather
)
1327 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1328 if (tes
&& tes
->nir
->info
.uses_texture_gather
)
1329 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1330 if (gs
&& gs
->nir
->info
.uses_texture_gather
)
1331 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1332 if (fs
&& fs
->nir
->info
.uses_texture_gather
)
1333 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1337 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1338 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1341 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1344 const struct brw_tracked_state brw_texture_surfaces
= {
1346 .mesa
= _NEW_TEXTURE
,
1347 .brw
= BRW_NEW_BATCH
|
1349 BRW_NEW_FRAGMENT_PROGRAM
|
1350 BRW_NEW_FS_PROG_DATA
|
1351 BRW_NEW_GEOMETRY_PROGRAM
|
1352 BRW_NEW_GS_PROG_DATA
|
1353 BRW_NEW_TESS_PROGRAMS
|
1354 BRW_NEW_TCS_PROG_DATA
|
1355 BRW_NEW_TES_PROG_DATA
|
1356 BRW_NEW_TEXTURE_BUFFER
|
1357 BRW_NEW_VERTEX_PROGRAM
|
1358 BRW_NEW_VS_PROG_DATA
,
1360 .emit
= brw_update_texture_surfaces
,
1364 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1366 /* BRW_NEW_COMPUTE_PROGRAM */
1367 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1370 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1372 /* emit alternate set of surface state for gather. this
1373 * allows the surface format to be overriden for only the
1377 if (cs
&& cs
->nir
->info
.uses_texture_gather
)
1378 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1381 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1384 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1386 .mesa
= _NEW_TEXTURE
,
1387 .brw
= BRW_NEW_BATCH
|
1389 BRW_NEW_COMPUTE_PROGRAM
,
1391 .emit
= brw_update_cs_texture_surfaces
,
1396 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1397 struct brw_stage_state
*stage_state
,
1398 struct brw_stage_prog_data
*prog_data
)
1400 struct gl_context
*ctx
= &brw
->ctx
;
1405 uint32_t *ubo_surf_offsets
=
1406 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1408 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1409 struct gl_uniform_buffer_binding
*binding
=
1410 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1412 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1413 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1415 struct intel_buffer_object
*intel_bo
=
1416 intel_buffer_object(binding
->BufferObject
);
1417 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1418 if (!binding
->AutomaticSize
)
1419 size
= MIN2(size
, binding
->Size
);
1421 intel_bufferobj_buffer(brw
, intel_bo
,
1424 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1426 &ubo_surf_offsets
[i
]);
1430 uint32_t *ssbo_surf_offsets
=
1431 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1433 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1434 struct gl_shader_storage_buffer_binding
*binding
=
1435 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1437 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1438 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1440 struct intel_buffer_object
*intel_bo
=
1441 intel_buffer_object(binding
->BufferObject
);
1442 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1443 if (!binding
->AutomaticSize
)
1444 size
= MIN2(size
, binding
->Size
);
1446 intel_bufferobj_buffer(brw
, intel_bo
,
1449 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1451 &ssbo_surf_offsets
[i
]);
1455 stage_state
->push_constants_dirty
= true;
1457 if (prog
->info
.num_ubos
|| prog
->info
.num_ssbos
)
1458 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1462 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1464 struct gl_context
*ctx
= &brw
->ctx
;
1466 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1468 /* BRW_NEW_FS_PROG_DATA */
1469 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1472 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1474 .mesa
= _NEW_PROGRAM
,
1475 .brw
= BRW_NEW_BATCH
|
1477 BRW_NEW_FS_PROG_DATA
|
1478 BRW_NEW_UNIFORM_BUFFER
,
1480 .emit
= brw_upload_wm_ubo_surfaces
,
1484 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1486 struct gl_context
*ctx
= &brw
->ctx
;
1488 struct gl_program
*prog
=
1489 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1491 /* BRW_NEW_CS_PROG_DATA */
1492 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1495 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1497 .mesa
= _NEW_PROGRAM
,
1498 .brw
= BRW_NEW_BATCH
|
1500 BRW_NEW_CS_PROG_DATA
|
1501 BRW_NEW_UNIFORM_BUFFER
,
1503 .emit
= brw_upload_cs_ubo_surfaces
,
1507 brw_upload_abo_surfaces(struct brw_context
*brw
,
1508 const struct gl_program
*prog
,
1509 struct brw_stage_state
*stage_state
,
1510 struct brw_stage_prog_data
*prog_data
)
1512 struct gl_context
*ctx
= &brw
->ctx
;
1513 uint32_t *surf_offsets
=
1514 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1516 if (prog
->info
.num_abos
) {
1517 for (unsigned i
= 0; i
< prog
->info
.num_abos
; i
++) {
1518 struct gl_atomic_buffer_binding
*binding
=
1519 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1520 struct intel_buffer_object
*intel_bo
=
1521 intel_buffer_object(binding
->BufferObject
);
1523 intel_bufferobj_buffer(brw
, intel_bo
, binding
->Offset
,
1524 intel_bo
->Base
.Size
- binding
->Offset
,
1527 brw_emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1528 binding
->Offset
, ISL_FORMAT_RAW
,
1529 bo
->size
- binding
->Offset
, 1, true);
1532 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1537 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1540 const struct gl_program
*wm
= brw
->fragment_program
;
1543 /* BRW_NEW_FS_PROG_DATA */
1544 brw_upload_abo_surfaces(brw
, wm
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1548 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1550 .mesa
= _NEW_PROGRAM
,
1551 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1554 BRW_NEW_FS_PROG_DATA
,
1556 .emit
= brw_upload_wm_abo_surfaces
,
1560 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1563 const struct gl_program
*cp
= brw
->compute_program
;
1566 /* BRW_NEW_CS_PROG_DATA */
1567 brw_upload_abo_surfaces(brw
, cp
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1571 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1573 .mesa
= _NEW_PROGRAM
,
1574 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1577 BRW_NEW_CS_PROG_DATA
,
1579 .emit
= brw_upload_cs_abo_surfaces
,
1583 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1586 const struct gl_program
*cp
= brw
->compute_program
;
1589 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1590 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1591 brw
->cs
.base
.prog_data
);
1595 const struct brw_tracked_state brw_cs_image_surfaces
= {
1597 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1598 .brw
= BRW_NEW_BATCH
|
1600 BRW_NEW_CS_PROG_DATA
|
1603 .emit
= brw_upload_cs_image_surfaces
,
1607 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1609 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1610 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1611 if (access
== GL_WRITE_ONLY
) {
1613 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1614 /* Typed surface reads support a very limited subset of the shader
1615 * image formats. Translate it into the closest format the
1616 * hardware supports.
1618 return isl_lower_storage_image_format(devinfo
, hw_format
);
1620 /* The hardware doesn't actually support a typed format that we can use
1621 * so we have to fall back to untyped read/write messages.
1623 return ISL_FORMAT_RAW
;
1628 update_default_image_param(struct brw_context
*brw
,
1629 struct gl_image_unit
*u
,
1630 unsigned surface_idx
,
1631 struct brw_image_param
*param
)
1633 memset(param
, 0, sizeof(*param
));
1634 param
->surface_idx
= surface_idx
;
1635 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1636 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1637 * detailed explanation of these parameters.
1639 param
->swizzling
[0] = 0xff;
1640 param
->swizzling
[1] = 0xff;
1644 update_buffer_image_param(struct brw_context
*brw
,
1645 struct gl_image_unit
*u
,
1646 unsigned surface_idx
,
1647 struct brw_image_param
*param
)
1649 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1650 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1651 update_default_image_param(brw
, u
, surface_idx
, param
);
1653 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1654 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1658 get_image_num_layers(const struct intel_mipmap_tree
*mt
, GLenum target
,
1661 if (target
== GL_TEXTURE_CUBE_MAP
)
1664 if (mt
->surf
.size
> 0) {
1665 return target
== GL_TEXTURE_3D
?
1666 minify(mt
->surf
.logical_level0_px
.depth
, level
) :
1667 mt
->surf
.logical_level0_px
.array_len
;
1670 return target
== GL_TEXTURE_3D
?
1671 minify(mt
->logical_depth0
, level
) : mt
->logical_depth0
;
1675 update_image_surface(struct brw_context
*brw
,
1676 struct gl_image_unit
*u
,
1678 unsigned surface_idx
,
1679 uint32_t *surf_offset
,
1680 struct brw_image_param
*param
)
1682 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1683 struct gl_texture_object
*obj
= u
->TexObj
;
1684 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1686 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1687 struct intel_buffer_object
*intel_obj
=
1688 intel_buffer_object(obj
->BufferObject
);
1689 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1690 _mesa_get_format_bytes(u
->_ActualFormat
));
1692 brw_emit_buffer_surface_state(
1693 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1694 format
, intel_obj
->Base
.Size
, texel_size
,
1695 access
!= GL_READ_ONLY
);
1697 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1700 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1701 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1702 const unsigned num_layers
= u
->Layered
?
1703 get_image_num_layers(mt
, obj
->Target
, u
->Level
) : 1;
1705 struct isl_view view
= {
1707 .base_level
= obj
->MinLevel
+ u
->Level
,
1709 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1710 .array_len
= num_layers
,
1711 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1712 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1715 if (format
== ISL_FORMAT_RAW
) {
1716 brw_emit_buffer_surface_state(
1717 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1718 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1719 access
!= GL_READ_ONLY
);
1722 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1723 assert(!intel_miptree_has_color_unresolved(mt
,
1725 view
.base_array_layer
,
1727 brw_emit_surface_state(brw
, mt
, INTEL_AUX_BUFFER_DISABLED
,
1728 mt
->target
, view
, tex_mocs
[brw
->gen
],
1729 surf_offset
, surf_index
,
1730 I915_GEM_DOMAIN_SAMPLER
,
1731 access
== GL_READ_ONLY
? 0 :
1732 I915_GEM_DOMAIN_SAMPLER
);
1735 struct isl_surf surf
;
1736 intel_miptree_get_isl_surf(brw
, mt
, &surf
);
1738 isl_surf_fill_image_param(&brw
->isl_dev
, param
, &surf
, &view
);
1739 param
->surface_idx
= surface_idx
;
1743 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1744 update_default_image_param(brw
, u
, surface_idx
, param
);
1749 brw_upload_image_surfaces(struct brw_context
*brw
,
1750 const struct gl_program
*prog
,
1751 struct brw_stage_state
*stage_state
,
1752 struct brw_stage_prog_data
*prog_data
)
1755 struct gl_context
*ctx
= &brw
->ctx
;
1757 if (prog
->info
.num_images
) {
1758 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1759 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1760 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1762 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1764 &stage_state
->surf_offset
[surf_idx
],
1765 &prog_data
->image_param
[i
]);
1768 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1769 /* This may have changed the image metadata dependent on the context
1770 * image unit state and passed to the program as uniforms, make sure
1771 * that push and pull constants are reuploaded.
1773 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1778 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1780 /* BRW_NEW_FRAGMENT_PROGRAM */
1781 const struct gl_program
*wm
= brw
->fragment_program
;
1784 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1785 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1786 brw
->wm
.base
.prog_data
);
1790 const struct brw_tracked_state brw_wm_image_surfaces
= {
1792 .mesa
= _NEW_TEXTURE
,
1793 .brw
= BRW_NEW_BATCH
|
1795 BRW_NEW_FRAGMENT_PROGRAM
|
1796 BRW_NEW_FS_PROG_DATA
|
1799 .emit
= brw_upload_wm_image_surfaces
,
1803 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1805 brw
->vtbl
.update_renderbuffer_surface
= gen4_update_renderbuffer_surface
;
1806 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1810 gen6_init_vtable_surface_functions(struct brw_context
*brw
)
1812 gen4_init_vtable_surface_functions(brw
);
1813 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1817 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1819 struct gl_context
*ctx
= &brw
->ctx
;
1821 struct gl_program
*prog
=
1822 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1823 /* BRW_NEW_CS_PROG_DATA */
1824 const struct brw_cs_prog_data
*cs_prog_data
=
1825 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1827 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1828 const unsigned surf_idx
=
1829 cs_prog_data
->binding_table
.work_groups_start
;
1830 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1834 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1836 intel_upload_data(brw
,
1837 (void *)brw
->compute
.num_work_groups
,
1843 bo
= brw
->compute
.num_work_groups_bo
;
1844 bo_offset
= brw
->compute
.num_work_groups_offset
;
1847 brw_emit_buffer_surface_state(brw
, surf_offset
,
1850 3 * sizeof(GLuint
), 1, true);
1851 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1855 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1857 .brw
= BRW_NEW_BLORP
|
1858 BRW_NEW_CS_PROG_DATA
|
1859 BRW_NEW_CS_WORK_GROUPS
1861 .emit
= brw_upload_cs_work_groups_surface
,