2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
59 INTEL_RENDERBUFFER_LAYERED
= 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED
= 1 << 1,
63 uint32_t tex_mocs
[] = {
70 uint32_t rb_mocs
[] = {
78 get_isl_surf(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
79 GLenum target
, struct isl_view
*view
,
80 uint32_t *tile_x
, uint32_t *tile_y
,
81 uint32_t *offset
, struct isl_surf
*surf
)
83 intel_miptree_get_isl_surf(brw
, mt
, surf
);
85 surf
->dim
= get_isl_surf_dim(target
);
87 const enum isl_dim_layout dim_layout
=
88 get_isl_dim_layout(&brw
->screen
->devinfo
, mt
->surf
.tiling
, target
,
91 if (surf
->dim_layout
== dim_layout
)
94 /* The layout of the specified texture target is not compatible with the
95 * actual layout of the miptree structure in memory -- You're entering
96 * dangerous territory, this can only possibly work if you only intended
97 * to access a single level and slice of the texture, and the hardware
98 * supports the tile offset feature in order to allow non-tile-aligned
99 * base offsets, since we'll have to point the hardware to the first
100 * texel of the level instead of relying on the usual base level/layer
103 assert(brw
->has_surface_tile_offset
);
104 assert(view
->levels
== 1 && view
->array_len
== 1);
105 assert(*tile_x
== 0 && *tile_y
== 0);
107 offset
+= intel_miptree_get_tile_offsets(mt
, view
->base_level
,
108 view
->base_array_layer
,
111 /* Minify the logical dimensions of the texture. */
112 const unsigned l
= view
->base_level
- mt
->first_level
;
113 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, l
);
114 surf
->logical_level0_px
.height
= surf
->dim
<= ISL_SURF_DIM_1D
? 1 :
115 minify(surf
->logical_level0_px
.height
, l
);
116 surf
->logical_level0_px
.depth
= surf
->dim
<= ISL_SURF_DIM_2D
? 1 :
117 minify(surf
->logical_level0_px
.depth
, l
);
119 /* Only the base level and layer can be addressed with the overridden
122 surf
->logical_level0_px
.array_len
= 1;
124 surf
->dim_layout
= dim_layout
;
126 /* The requested slice of the texture is now at the base level and
129 view
->base_level
= 0;
130 view
->base_array_layer
= 0;
134 brw_emit_surface_state(struct brw_context
*brw
,
135 struct intel_mipmap_tree
*mt
, uint32_t flags
,
136 GLenum target
, struct isl_view view
,
137 uint32_t mocs
, uint32_t *surf_offset
, int surf_index
,
138 unsigned read_domains
, unsigned write_domains
)
140 uint32_t tile_x
= mt
->level
[0].level_x
;
141 uint32_t tile_y
= mt
->level
[0].level_y
;
142 uint32_t offset
= mt
->offset
;
144 struct isl_surf surf
;
146 get_isl_surf(brw
, mt
, target
, &view
, &tile_x
, &tile_y
, &offset
, &surf
);
148 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
150 struct brw_bo
*aux_bo
;
151 struct isl_surf
*aux_surf
= NULL
;
152 uint64_t aux_offset
= 0;
153 enum isl_aux_usage aux_usage
= ISL_AUX_USAGE_NONE
;
154 if ((mt
->mcs_buf
|| intel_miptree_sample_with_hiz(brw
, mt
)) &&
155 !(flags
& INTEL_AUX_BUFFER_DISABLED
)) {
156 aux_usage
= intel_miptree_get_aux_isl_usage(brw
, mt
);
159 aux_surf
= &mt
->mcs_buf
->surf
;
161 aux_bo
= mt
->mcs_buf
->bo
;
162 aux_offset
= mt
->mcs_buf
->bo
->offset64
+ mt
->mcs_buf
->offset
;
164 aux_surf
= &mt
->hiz_buf
->surf
;
166 aux_bo
= mt
->hiz_buf
->bo
;
167 aux_offset
= mt
->hiz_buf
->bo
->offset64
;
170 /* We only really need a clear color if we also have an auxiliary
171 * surface. Without one, it does nothing.
173 clear_color
= mt
->fast_clear_color
;
176 void *state
= brw_state_batch(brw
,
177 brw
->isl_dev
.ss
.size
,
178 brw
->isl_dev
.ss
.align
,
181 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &surf
, .view
= &view
,
182 .address
= mt
->bo
->offset64
+ offset
,
183 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
184 .aux_address
= aux_offset
,
185 .mocs
= mocs
, .clear_color
= clear_color
,
186 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
188 brw_emit_reloc(&brw
->batch
, *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
189 mt
->bo
, offset
, read_domains
, write_domains
);
192 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
193 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
194 * contain other control information. Since buffer addresses are always
195 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
196 * an ordinary reloc to do the necessary address translation.
198 assert((aux_offset
& 0xfff) == 0);
199 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
200 brw_emit_reloc(&brw
->batch
,
201 *surf_offset
+ brw
->isl_dev
.ss
.aux_addr_offset
,
202 aux_bo
, *aux_addr
- aux_bo
->offset64
,
203 read_domains
, write_domains
);
208 brw_update_renderbuffer_surface(struct brw_context
*brw
,
209 struct gl_renderbuffer
*rb
,
210 uint32_t flags
, unsigned unit
/* unused */,
213 struct gl_context
*ctx
= &brw
->ctx
;
214 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
215 struct intel_mipmap_tree
*mt
= irb
->mt
;
218 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
221 assert(brw_render_target_supported(brw
, rb
));
223 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
224 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
225 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
226 __func__
, _mesa_get_format_name(rb_format
));
229 struct isl_view view
= {
230 .format
= brw
->mesa_to_isl_render_format
[rb_format
],
231 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
233 .base_array_layer
= irb
->mt_layer
,
234 .array_len
= MAX2(irb
->layer_count
, 1),
235 .swizzle
= ISL_SWIZZLE_IDENTITY
,
236 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
240 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
243 I915_GEM_DOMAIN_RENDER
,
244 I915_GEM_DOMAIN_RENDER
);
249 translate_tex_target(GLenum target
)
253 case GL_TEXTURE_1D_ARRAY_EXT
:
254 return BRW_SURFACE_1D
;
256 case GL_TEXTURE_RECTANGLE_NV
:
257 return BRW_SURFACE_2D
;
260 case GL_TEXTURE_2D_ARRAY_EXT
:
261 case GL_TEXTURE_EXTERNAL_OES
:
262 case GL_TEXTURE_2D_MULTISAMPLE
:
263 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
264 return BRW_SURFACE_2D
;
267 return BRW_SURFACE_3D
;
269 case GL_TEXTURE_CUBE_MAP
:
270 case GL_TEXTURE_CUBE_MAP_ARRAY
:
271 return BRW_SURFACE_CUBE
;
274 unreachable("not reached");
279 brw_get_surface_tiling_bits(enum isl_tiling tiling
)
283 return BRW_SURFACE_TILED
;
285 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
293 brw_get_surface_num_multisamples(unsigned num_samples
)
296 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
298 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
302 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
306 brw_get_texture_swizzle(const struct gl_context
*ctx
,
307 const struct gl_texture_object
*t
)
309 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
311 int swizzles
[SWIZZLE_NIL
+ 1] = {
321 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
322 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
323 GLenum depth_mode
= t
->DepthMode
;
325 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
326 * with depth component data specified with a sized internal format.
327 * Otherwise, it's left at the old default, GL_LUMINANCE.
329 if (_mesa_is_gles3(ctx
) &&
330 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
331 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
335 switch (depth_mode
) {
337 swizzles
[0] = SWIZZLE_ZERO
;
338 swizzles
[1] = SWIZZLE_ZERO
;
339 swizzles
[2] = SWIZZLE_ZERO
;
340 swizzles
[3] = SWIZZLE_X
;
343 swizzles
[0] = SWIZZLE_X
;
344 swizzles
[1] = SWIZZLE_X
;
345 swizzles
[2] = SWIZZLE_X
;
346 swizzles
[3] = SWIZZLE_ONE
;
349 swizzles
[0] = SWIZZLE_X
;
350 swizzles
[1] = SWIZZLE_X
;
351 swizzles
[2] = SWIZZLE_X
;
352 swizzles
[3] = SWIZZLE_X
;
355 swizzles
[0] = SWIZZLE_X
;
356 swizzles
[1] = SWIZZLE_ZERO
;
357 swizzles
[2] = SWIZZLE_ZERO
;
358 swizzles
[3] = SWIZZLE_ONE
;
363 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
365 /* If the texture's format is alpha-only, force R, G, and B to
366 * 0.0. Similarly, if the texture's format has no alpha channel,
367 * force the alpha value read to 1.0. This allows for the
368 * implementation to use an RGBA texture for any of these formats
369 * without leaking any unexpected values.
371 switch (img
->_BaseFormat
) {
373 swizzles
[0] = SWIZZLE_ZERO
;
374 swizzles
[1] = SWIZZLE_ZERO
;
375 swizzles
[2] = SWIZZLE_ZERO
;
378 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
379 swizzles
[0] = SWIZZLE_X
;
380 swizzles
[1] = SWIZZLE_X
;
381 swizzles
[2] = SWIZZLE_X
;
382 swizzles
[3] = SWIZZLE_ONE
;
385 case GL_LUMINANCE_ALPHA
:
386 if (datatype
== GL_SIGNED_NORMALIZED
) {
387 swizzles
[0] = SWIZZLE_X
;
388 swizzles
[1] = SWIZZLE_X
;
389 swizzles
[2] = SWIZZLE_X
;
390 swizzles
[3] = SWIZZLE_W
;
394 if (datatype
== GL_SIGNED_NORMALIZED
) {
395 swizzles
[0] = SWIZZLE_X
;
396 swizzles
[1] = SWIZZLE_X
;
397 swizzles
[2] = SWIZZLE_X
;
398 swizzles
[3] = SWIZZLE_X
;
404 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
405 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
406 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
407 swizzles
[3] = SWIZZLE_ONE
;
411 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
412 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
413 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
414 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
418 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
419 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
421 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
424 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
426 * which is simply adding 4 then modding by 8 (or anding with 7).
428 * We then may need to apply workarounds for textureGather hardware bugs.
431 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
433 unsigned scs
= (swizzle
+ 4) & 7;
435 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
439 brw_find_matching_rb(const struct gl_framebuffer
*fb
,
440 const struct intel_mipmap_tree
*mt
)
442 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
443 const struct intel_renderbuffer
*irb
=
444 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
446 if (irb
&& irb
->mt
== mt
)
450 return fb
->_NumColorDrawBuffers
;
454 brw_texture_view_sane(const struct brw_context
*brw
,
455 const struct intel_mipmap_tree
*mt
,
456 const struct isl_view
*view
)
458 /* There are special cases only for lossless compression. */
459 if (mt
->aux_usage
!= ISL_AUX_USAGE_CCS_E
)
462 if (isl_format_supports_ccs_e(&brw
->screen
->devinfo
, view
->format
))
465 /* Logic elsewhere needs to take care to resolve the color buffer prior
466 * to sampling it as non-compressed.
468 if (intel_miptree_has_color_unresolved(mt
, view
->base_level
, view
->levels
,
469 view
->base_array_layer
,
473 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
474 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
476 if (rb_index
== fb
->_NumColorDrawBuffers
)
479 /* Underlying surface is compressed but it is sampled using a format that
480 * the sampling engine doesn't support as compressed. Compression must be
481 * disabled for both sampling engine and data port in case the same surface
482 * is used also as render target.
484 return brw
->draw_aux_buffer_disabled
[rb_index
];
488 brw_disable_aux_surface(const struct brw_context
*brw
,
489 const struct intel_mipmap_tree
*mt
,
490 const struct isl_view
*view
)
492 /* Nothing to disable. */
496 const bool is_unresolved
= intel_miptree_has_color_unresolved(
497 mt
, view
->base_level
, view
->levels
,
498 view
->base_array_layer
, view
->array_len
);
500 /* There are special cases only for lossless compression. */
501 if (mt
->aux_usage
!= ISL_AUX_USAGE_CCS_E
)
502 return !is_unresolved
;
504 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
505 const unsigned rb_index
= brw_find_matching_rb(fb
, mt
);
507 /* If we are drawing into this with compression enabled, then we must also
508 * enable compression when texturing from it regardless of
509 * fast_clear_state. If we don't then, after the first draw call with
510 * this setup, there will be data in the CCS which won't get picked up by
511 * subsequent texturing operations as required by ARB_texture_barrier.
512 * Since we don't want to re-emit the binding table or do a resolve
513 * operation every draw call, the easiest thing to do is just enable
514 * compression on the texturing side. This is completely safe to do
515 * since, if compressed texturing weren't allowed, we would have disabled
516 * compression of render targets in whatever_that_function_is_called().
518 if (rb_index
< fb
->_NumColorDrawBuffers
) {
519 if (brw
->draw_aux_buffer_disabled
[rb_index
]) {
520 assert(!is_unresolved
);
523 return brw
->draw_aux_buffer_disabled
[rb_index
];
526 return !is_unresolved
;
530 brw_update_texture_surface(struct gl_context
*ctx
,
532 uint32_t *surf_offset
,
536 struct brw_context
*brw
= brw_context(ctx
);
537 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
539 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
540 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
543 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
544 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
547 if (mt
->plane
[plane
- 1] == NULL
)
549 mt
= mt
->plane
[plane
- 1];
552 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
553 /* If this is a view with restricted NumLayers, then our effective depth
554 * is not just the miptree depth.
556 const unsigned view_num_layers
=
557 (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) ? obj
->NumLayers
:
560 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
561 * texturing functions that return a float, as our code generation always
562 * selects the .x channel (which would always be 0).
564 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
565 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
566 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
567 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
568 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
569 brw_get_texture_swizzle(&brw
->ctx
, obj
));
571 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
572 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
573 sampler
->sRGBDecode
);
575 /* Implement gen6 and gen7 gather work-around */
576 bool need_green_to_blue
= false;
578 if (brw
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
579 format
== ISL_FORMAT_R32G32_SINT
||
580 format
== ISL_FORMAT_R32G32_UINT
)) {
581 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
582 need_green_to_blue
= brw
->is_haswell
;
583 } else if (brw
->gen
== 6) {
584 /* Sandybridge's gather4 message is broken for integer formats.
585 * To work around this, we pretend the surface is UNORM for
586 * 8 or 16-bit formats, and emit shader instructions to recover
587 * the real INT/UINT value. For 32-bit formats, we pretend
588 * the surface is FLOAT, and simply reinterpret the resulting
592 case ISL_FORMAT_R8_SINT
:
593 case ISL_FORMAT_R8_UINT
:
594 format
= ISL_FORMAT_R8_UNORM
;
597 case ISL_FORMAT_R16_SINT
:
598 case ISL_FORMAT_R16_UINT
:
599 format
= ISL_FORMAT_R16_UNORM
;
602 case ISL_FORMAT_R32_SINT
:
603 case ISL_FORMAT_R32_UINT
:
604 format
= ISL_FORMAT_R32_FLOAT
;
613 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
615 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
616 mt
= mt
->r8stencil_mt
;
620 format
= ISL_FORMAT_R8_UINT
;
621 } else if (brw
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
622 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
623 mt
= mt
->r8stencil_mt
;
624 format
= ISL_FORMAT_R8_UINT
;
627 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
629 struct isl_view view
= {
631 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
632 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
633 .base_array_layer
= obj
->MinLayer
,
634 .array_len
= view_num_layers
,
636 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
637 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
638 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
639 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
641 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
644 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
645 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
646 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
648 assert(brw_texture_view_sane(brw
, mt
, &view
));
650 const int flags
= brw_disable_aux_surface(brw
, mt
, &view
) ?
651 INTEL_AUX_BUFFER_DISABLED
: 0;
652 brw_emit_surface_state(brw
, mt
, flags
, mt
->target
, view
,
654 surf_offset
, surf_index
,
655 I915_GEM_DOMAIN_SAMPLER
, 0);
660 brw_emit_buffer_surface_state(struct brw_context
*brw
,
661 uint32_t *out_offset
,
663 unsigned buffer_offset
,
664 unsigned surface_format
,
665 unsigned buffer_size
,
669 uint32_t *dw
= brw_state_batch(brw
,
670 brw
->isl_dev
.ss
.size
,
671 brw
->isl_dev
.ss
.align
,
674 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
675 .address
= (bo
? bo
->offset64
: 0) + buffer_offset
,
677 .format
= surface_format
,
679 .mocs
= tex_mocs
[brw
->gen
]);
682 brw_emit_reloc(&brw
->batch
, *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
684 I915_GEM_DOMAIN_SAMPLER
,
685 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
690 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
692 uint32_t *surf_offset
)
694 struct brw_context
*brw
= brw_context(ctx
);
695 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
696 struct intel_buffer_object
*intel_obj
=
697 intel_buffer_object(tObj
->BufferObject
);
698 uint32_t size
= tObj
->BufferSize
;
699 struct brw_bo
*bo
= NULL
;
700 mesa_format format
= tObj
->_BufferObjectFormat
;
701 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
702 int texel_size
= _mesa_get_format_bytes(format
);
705 size
= MIN2(size
, intel_obj
->Base
.Size
);
706 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
710 /* The ARB_texture_buffer_specification says:
712 * "The number of texels in the buffer texture's texel array is given by
714 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
716 * where <buffer_size> is the size of the buffer object, in basic
717 * machine units and <components> and <base_type> are the element count
718 * and base data type for elements, as specified in Table X.1. The
719 * number of texels in the texel array is then clamped to the
720 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
722 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
723 * so that when ISL divides by stride to obtain the number of texels, that
724 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
726 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
728 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
729 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
730 _mesa_get_format_name(format
));
733 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
742 * Create the constant buffer surface. Vertex/fragment shader constants will be
743 * read from this buffer with Data Port Read instructions/messages.
746 brw_create_constant_surface(struct brw_context
*brw
,
750 uint32_t *out_offset
)
752 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
753 ISL_FORMAT_R32G32B32A32_FLOAT
,
758 * Create the buffer surface. Shader buffer variables will be
759 * read from / write to this buffer with Data Port Read/Write
760 * instructions/messages.
763 brw_create_buffer_surface(struct brw_context
*brw
,
767 uint32_t *out_offset
)
769 /* Use a raw surface so we can reuse existing untyped read/write/atomic
770 * messages. We need these specifically for the fragment shader since they
771 * include a pixel mask header that we need to ensure correct behavior
772 * with helper invocations, which cannot write to the buffer.
774 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
780 * Set up a binding table entry for use by stream output logic (transform
783 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
786 brw_update_sol_surface(struct brw_context
*brw
,
787 struct gl_buffer_object
*buffer_obj
,
788 uint32_t *out_offset
, unsigned num_vector_components
,
789 unsigned stride_dwords
, unsigned offset_dwords
)
791 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
792 uint32_t offset_bytes
= 4 * offset_dwords
;
793 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
795 buffer_obj
->Size
- offset_bytes
,
797 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
798 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
799 size_t size_dwords
= buffer_obj
->Size
/ 4;
800 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
802 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
803 * too big to map using a single binding table entry?
805 assert((size_dwords
- offset_dwords
) / stride_dwords
806 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
808 if (size_dwords
> offset_dwords
+ num_vector_components
) {
809 /* There is room for at least 1 transform feedback output in the buffer.
810 * Compute the number of additional transform feedback outputs the
811 * buffer has room for.
813 buffer_size_minus_1
=
814 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
816 /* There isn't even room for a single transform feedback output in the
817 * buffer. We can't configure the binding table entry to prevent output
818 * entirely; we'll have to rely on the geometry shader to detect
819 * overflow. But to minimize the damage in case of a bug, set up the
820 * binding table entry to just allow a single output.
822 buffer_size_minus_1
= 0;
824 width
= buffer_size_minus_1
& 0x7f;
825 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
826 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
828 switch (num_vector_components
) {
830 surface_format
= ISL_FORMAT_R32_FLOAT
;
833 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
836 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
839 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
842 unreachable("Invalid vector size for transform feedback output");
845 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
846 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
847 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
848 BRW_SURFACE_RC_READ_WRITE
;
849 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
850 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
851 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
852 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
853 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
857 /* Emit relocation to surface contents. */
858 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, offset_bytes
,
859 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
862 /* Creates a new WM constant buffer reflecting the current fragment program's
863 * constants, if needed by the fragment program.
865 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
869 brw_upload_wm_pull_constants(struct brw_context
*brw
)
871 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
872 /* BRW_NEW_FRAGMENT_PROGRAM */
873 struct brw_program
*fp
= (struct brw_program
*) brw
->fragment_program
;
874 /* BRW_NEW_FS_PROG_DATA */
875 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
877 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
878 /* _NEW_PROGRAM_CONSTANTS */
879 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
880 stage_state
, prog_data
);
883 const struct brw_tracked_state brw_wm_pull_constants
= {
885 .mesa
= _NEW_PROGRAM_CONSTANTS
,
886 .brw
= BRW_NEW_BATCH
|
888 BRW_NEW_FRAGMENT_PROGRAM
|
889 BRW_NEW_FS_PROG_DATA
,
891 .emit
= brw_upload_wm_pull_constants
,
895 * Creates a null renderbuffer surface.
897 * This is used when the shader doesn't write to any color output. An FB
898 * write to target 0 will still be emitted, because that's how the thread is
899 * terminated (and computed depth is returned), so we need to have the
900 * hardware discard the target 0 color output..
903 brw_emit_null_surface_state(struct brw_context
*brw
,
907 uint32_t *out_offset
)
909 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
912 * A null surface will be used in instances where an actual surface is
913 * not bound. When a write message is generated to a null surface, no
914 * actual surface is written to. When a read message (including any
915 * sampling engine message) is generated to a null surface, the result
916 * is all zeros. Note that a null surface type is allowed to be used
917 * with all messages, even if it is not specificially indicated as
918 * supported. All of the remaining fields in surface state are ignored
919 * for null surfaces, with the following exceptions:
921 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
922 * depth buffer’s corresponding state for all render target surfaces,
925 * - Surface Format must be R8G8B8A8_UNORM.
927 unsigned surface_type
= BRW_SURFACE_NULL
;
928 struct brw_bo
*bo
= NULL
;
929 unsigned pitch_minus_1
= 0;
930 uint32_t multisampling_state
= 0;
931 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
934 /* On Gen6, null render targets seem to cause GPU hangs when
935 * multisampling. So work around this problem by rendering into dummy
938 * To decrease the amount of memory needed by the workaround buffer, we
939 * set its pitch to 128 bytes (the width of a Y tile). This means that
940 * the amount of memory needed for the workaround buffer is
941 * (width_in_tiles + height_in_tiles - 1) tiles.
943 * Note that since the workaround buffer will be interpreted by the
944 * hardware as an interleaved multisampled buffer, we need to compute
945 * width_in_tiles and height_in_tiles by dividing the width and height
946 * by 16 rather than the normal Y-tile size of 32.
948 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
949 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
950 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
951 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
953 bo
= brw
->wm
.multisampled_null_render_target_bo
;
954 surface_type
= BRW_SURFACE_2D
;
956 multisampling_state
= brw_get_surface_num_multisamples(samples
);
959 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
960 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
962 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
963 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
964 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
965 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
967 surf
[1] = bo
? bo
->offset64
: 0;
968 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
969 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
971 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
974 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
976 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
977 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
978 surf
[4] = multisampling_state
;
982 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, 0,
983 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
988 * Sets up a surface state structure to point at the given region.
989 * While it is only used for the front/back buffer currently, it should be
990 * usable for further buffers when doing ARB_draw_buffer support.
993 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
994 struct gl_renderbuffer
*rb
,
995 uint32_t flags
, unsigned unit
,
998 struct gl_context
*ctx
= &brw
->ctx
;
999 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1000 struct intel_mipmap_tree
*mt
= irb
->mt
;
1002 uint32_t tile_x
, tile_y
;
1003 enum isl_format format
;
1006 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
1007 /* BRW_NEW_FS_PROG_DATA */
1009 assert(!(flags
& INTEL_RENDERBUFFER_LAYERED
));
1010 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
1012 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
1013 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
1015 if (tile_x
!= 0 || tile_y
!= 0) {
1016 /* Original gen4 hardware couldn't draw to a non-tile-aligned
1017 * destination in a miptree unless you actually setup your renderbuffer
1018 * as a miptree and used the fragile lod/array_index/etc. controls to
1019 * select the image. So, instead, we just make a new single-level
1020 * miptree and render into that.
1022 intel_renderbuffer_move_to_temp(brw
, irb
, false);
1023 assert(irb
->align_wa_mt
);
1024 mt
= irb
->align_wa_mt
;
1028 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
1030 format
= brw
->mesa_to_isl_render_format
[rb_format
];
1031 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
1032 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
1033 __func__
, _mesa_get_format_name(rb_format
));
1036 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
1037 format
<< BRW_SURFACE_FORMAT_SHIFT
);
1040 assert(mt
->offset
% mt
->cpp
== 0);
1041 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
1042 mt
->bo
->offset64
+ mt
->offset
);
1044 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
1045 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
1047 surf
[3] = (brw_get_surface_tiling_bits(mt
->surf
.tiling
) |
1048 (mt
->surf
.row_pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
1050 surf
[4] = brw_get_surface_num_multisamples(mt
->surf
.samples
);
1052 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
1053 /* Note that the low bits of these fields are missing, so
1054 * there's the possibility of getting in trouble.
1056 assert(tile_x
% 4 == 0);
1057 assert(tile_y
% 2 == 0);
1058 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
1059 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
1060 (mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
1064 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
1065 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
1066 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
1068 if (!ctx
->Color
.ColorMask
[unit
][0])
1069 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
1070 if (!ctx
->Color
.ColorMask
[unit
][1])
1071 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
1072 if (!ctx
->Color
.ColorMask
[unit
][2])
1073 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
1075 /* As mentioned above, disable writes to the alpha component when the
1076 * renderbuffer is XRGB.
1078 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
1079 !ctx
->Color
.ColorMask
[unit
][3]) {
1080 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
1084 brw_emit_reloc(&brw
->batch
, offset
+ 4, mt
->bo
, surf
[1] - mt
->bo
->offset64
,
1085 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
1091 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1094 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
1095 const struct gl_framebuffer
*fb
,
1096 uint32_t render_target_start
,
1097 uint32_t *surf_offset
)
1100 const unsigned int w
= _mesa_geometric_width(fb
);
1101 const unsigned int h
= _mesa_geometric_height(fb
);
1102 const unsigned int s
= _mesa_geometric_samples(fb
);
1104 /* Update surfaces for drawing buffers */
1105 if (fb
->_NumColorDrawBuffers
>= 1) {
1106 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1107 const uint32_t surf_index
= render_target_start
+ i
;
1108 const int flags
= (_mesa_geometric_layers(fb
) > 0 ?
1109 INTEL_RENDERBUFFER_LAYERED
: 0) |
1110 (brw
->draw_aux_buffer_disabled
[i
] ?
1111 INTEL_AUX_BUFFER_DISABLED
: 0);
1113 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
1114 surf_offset
[surf_index
] =
1115 brw
->vtbl
.update_renderbuffer_surface(
1116 brw
, fb
->_ColorDrawBuffers
[i
], flags
, i
, surf_index
);
1118 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1119 &surf_offset
[surf_index
]);
1123 const uint32_t surf_index
= render_target_start
;
1124 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1125 &surf_offset
[surf_index
]);
1130 update_renderbuffer_surfaces(struct brw_context
*brw
)
1132 const struct gl_context
*ctx
= &brw
->ctx
;
1134 /* BRW_NEW_FS_PROG_DATA */
1135 const struct brw_wm_prog_data
*wm_prog_data
=
1136 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1138 /* _NEW_BUFFERS | _NEW_COLOR */
1139 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1140 brw_update_renderbuffer_surfaces(
1142 wm_prog_data
->binding_table
.render_target_start
,
1143 brw
->wm
.base
.surf_offset
);
1144 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1147 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1149 .mesa
= _NEW_BUFFERS
|
1151 .brw
= BRW_NEW_BATCH
|
1153 BRW_NEW_FS_PROG_DATA
,
1155 .emit
= update_renderbuffer_surfaces
,
1158 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1160 .mesa
= _NEW_BUFFERS
,
1161 .brw
= BRW_NEW_BATCH
|
1164 .emit
= update_renderbuffer_surfaces
,
1168 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1170 const struct gl_context
*ctx
= &brw
->ctx
;
1172 /* BRW_NEW_FS_PROG_DATA */
1173 const struct brw_wm_prog_data
*wm_prog_data
=
1174 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1176 /* BRW_NEW_FRAGMENT_PROGRAM */
1177 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
1178 brw
->fragment_program
&& brw
->fragment_program
->info
.outputs_read
) {
1180 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1182 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1183 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1184 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1185 const unsigned surf_index
=
1186 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1187 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1190 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1191 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1192 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1195 /* Override the target of the texture if the render buffer is a
1196 * single slice of a 3D texture (since the minimum array element
1197 * field of the surface state structure is ignored by the sampler
1198 * unit for 3D textures on some hardware), or if the render buffer
1199 * is a 1D array (since shaders always provide the array index
1200 * coordinate at the Z component to avoid state-dependent
1201 * recompiles when changing the texture target of the
1204 const GLenum target
=
1205 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1206 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1207 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1210 const struct isl_view view
= {
1212 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1214 .base_array_layer
= irb
->mt_layer
,
1215 .array_len
= irb
->layer_count
,
1216 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1217 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1220 const int flags
= brw
->draw_aux_buffer_disabled
[i
] ?
1221 INTEL_AUX_BUFFER_DISABLED
: 0;
1222 brw_emit_surface_state(brw
, irb
->mt
, flags
, target
, view
,
1224 surf_offset
, surf_index
,
1225 I915_GEM_DOMAIN_SAMPLER
, 0);
1228 brw
->vtbl
.emit_null_surface_state(
1229 brw
, _mesa_geometric_width(fb
), _mesa_geometric_height(fb
),
1230 _mesa_geometric_samples(fb
), surf_offset
);
1234 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1238 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1240 .mesa
= _NEW_BUFFERS
,
1241 .brw
= BRW_NEW_BATCH
|
1242 BRW_NEW_FRAGMENT_PROGRAM
|
1243 BRW_NEW_FS_PROG_DATA
,
1245 .emit
= update_renderbuffer_read_surfaces
,
1249 update_stage_texture_surfaces(struct brw_context
*brw
,
1250 const struct gl_program
*prog
,
1251 struct brw_stage_state
*stage_state
,
1252 bool for_gather
, uint32_t plane
)
1257 struct gl_context
*ctx
= &brw
->ctx
;
1259 uint32_t *surf_offset
= stage_state
->surf_offset
;
1261 /* BRW_NEW_*_PROG_DATA */
1263 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1265 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1267 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1268 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1271 if (prog
->SamplersUsed
& (1 << s
)) {
1272 const unsigned unit
= prog
->SamplerUnits
[s
];
1275 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1276 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
1284 * Construct SURFACE_STATE objects for enabled textures.
1287 brw_update_texture_surfaces(struct brw_context
*brw
)
1289 /* BRW_NEW_VERTEX_PROGRAM */
1290 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
1292 /* BRW_NEW_TESS_PROGRAMS */
1293 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
1294 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
1296 /* BRW_NEW_GEOMETRY_PROGRAM */
1297 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
1299 /* BRW_NEW_FRAGMENT_PROGRAM */
1300 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
1303 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1304 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1305 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1306 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1307 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1309 /* emit alternate set of surface state for gather. this
1310 * allows the surface format to be overriden for only the
1311 * gather4 messages. */
1313 if (vs
&& vs
->nir
->info
.uses_texture_gather
)
1314 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1315 if (tcs
&& tcs
->nir
->info
.uses_texture_gather
)
1316 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1317 if (tes
&& tes
->nir
->info
.uses_texture_gather
)
1318 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1319 if (gs
&& gs
->nir
->info
.uses_texture_gather
)
1320 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1321 if (fs
&& fs
->nir
->info
.uses_texture_gather
)
1322 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1326 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1327 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1330 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1333 const struct brw_tracked_state brw_texture_surfaces
= {
1335 .mesa
= _NEW_TEXTURE
,
1336 .brw
= BRW_NEW_BATCH
|
1338 BRW_NEW_FRAGMENT_PROGRAM
|
1339 BRW_NEW_FS_PROG_DATA
|
1340 BRW_NEW_GEOMETRY_PROGRAM
|
1341 BRW_NEW_GS_PROG_DATA
|
1342 BRW_NEW_TESS_PROGRAMS
|
1343 BRW_NEW_TCS_PROG_DATA
|
1344 BRW_NEW_TES_PROG_DATA
|
1345 BRW_NEW_TEXTURE_BUFFER
|
1346 BRW_NEW_VERTEX_PROGRAM
|
1347 BRW_NEW_VS_PROG_DATA
,
1349 .emit
= brw_update_texture_surfaces
,
1353 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1355 /* BRW_NEW_COMPUTE_PROGRAM */
1356 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1359 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1361 /* emit alternate set of surface state for gather. this
1362 * allows the surface format to be overriden for only the
1366 if (cs
&& cs
->nir
->info
.uses_texture_gather
)
1367 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1370 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1373 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1375 .mesa
= _NEW_TEXTURE
,
1376 .brw
= BRW_NEW_BATCH
|
1378 BRW_NEW_COMPUTE_PROGRAM
,
1380 .emit
= brw_update_cs_texture_surfaces
,
1385 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1386 struct brw_stage_state
*stage_state
,
1387 struct brw_stage_prog_data
*prog_data
)
1389 struct gl_context
*ctx
= &brw
->ctx
;
1394 uint32_t *ubo_surf_offsets
=
1395 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1397 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1398 struct gl_uniform_buffer_binding
*binding
=
1399 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1401 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1402 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1404 struct intel_buffer_object
*intel_bo
=
1405 intel_buffer_object(binding
->BufferObject
);
1406 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1407 if (!binding
->AutomaticSize
)
1408 size
= MIN2(size
, binding
->Size
);
1410 intel_bufferobj_buffer(brw
, intel_bo
,
1413 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1415 &ubo_surf_offsets
[i
]);
1419 uint32_t *ssbo_surf_offsets
=
1420 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1422 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1423 struct gl_shader_storage_buffer_binding
*binding
=
1424 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1426 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1427 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1429 struct intel_buffer_object
*intel_bo
=
1430 intel_buffer_object(binding
->BufferObject
);
1431 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1432 if (!binding
->AutomaticSize
)
1433 size
= MIN2(size
, binding
->Size
);
1435 intel_bufferobj_buffer(brw
, intel_bo
,
1438 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1440 &ssbo_surf_offsets
[i
]);
1444 stage_state
->push_constants_dirty
= true;
1446 if (prog
->info
.num_ubos
|| prog
->info
.num_ssbos
)
1447 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1451 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1453 struct gl_context
*ctx
= &brw
->ctx
;
1455 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1457 /* BRW_NEW_FS_PROG_DATA */
1458 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1461 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1463 .mesa
= _NEW_PROGRAM
,
1464 .brw
= BRW_NEW_BATCH
|
1466 BRW_NEW_FS_PROG_DATA
|
1467 BRW_NEW_UNIFORM_BUFFER
,
1469 .emit
= brw_upload_wm_ubo_surfaces
,
1473 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1475 struct gl_context
*ctx
= &brw
->ctx
;
1477 struct gl_program
*prog
=
1478 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1480 /* BRW_NEW_CS_PROG_DATA */
1481 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1484 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1486 .mesa
= _NEW_PROGRAM
,
1487 .brw
= BRW_NEW_BATCH
|
1489 BRW_NEW_CS_PROG_DATA
|
1490 BRW_NEW_UNIFORM_BUFFER
,
1492 .emit
= brw_upload_cs_ubo_surfaces
,
1496 brw_upload_abo_surfaces(struct brw_context
*brw
,
1497 const struct gl_program
*prog
,
1498 struct brw_stage_state
*stage_state
,
1499 struct brw_stage_prog_data
*prog_data
)
1501 struct gl_context
*ctx
= &brw
->ctx
;
1502 uint32_t *surf_offsets
=
1503 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1505 if (prog
->info
.num_abos
) {
1506 for (unsigned i
= 0; i
< prog
->info
.num_abos
; i
++) {
1507 struct gl_atomic_buffer_binding
*binding
=
1508 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1509 struct intel_buffer_object
*intel_bo
=
1510 intel_buffer_object(binding
->BufferObject
);
1512 intel_bufferobj_buffer(brw
, intel_bo
, binding
->Offset
,
1513 intel_bo
->Base
.Size
- binding
->Offset
,
1516 brw_emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1517 binding
->Offset
, ISL_FORMAT_RAW
,
1518 bo
->size
- binding
->Offset
, 1, true);
1521 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1526 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1529 const struct gl_program
*wm
= brw
->fragment_program
;
1532 /* BRW_NEW_FS_PROG_DATA */
1533 brw_upload_abo_surfaces(brw
, wm
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1537 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1539 .mesa
= _NEW_PROGRAM
,
1540 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1543 BRW_NEW_FS_PROG_DATA
,
1545 .emit
= brw_upload_wm_abo_surfaces
,
1549 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1552 const struct gl_program
*cp
= brw
->compute_program
;
1555 /* BRW_NEW_CS_PROG_DATA */
1556 brw_upload_abo_surfaces(brw
, cp
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1560 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1562 .mesa
= _NEW_PROGRAM
,
1563 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1566 BRW_NEW_CS_PROG_DATA
,
1568 .emit
= brw_upload_cs_abo_surfaces
,
1572 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1575 const struct gl_program
*cp
= brw
->compute_program
;
1578 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1579 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1580 brw
->cs
.base
.prog_data
);
1584 const struct brw_tracked_state brw_cs_image_surfaces
= {
1586 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1587 .brw
= BRW_NEW_BATCH
|
1589 BRW_NEW_CS_PROG_DATA
|
1592 .emit
= brw_upload_cs_image_surfaces
,
1596 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1598 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1599 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1600 if (access
== GL_WRITE_ONLY
) {
1602 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1603 /* Typed surface reads support a very limited subset of the shader
1604 * image formats. Translate it into the closest format the
1605 * hardware supports.
1607 return isl_lower_storage_image_format(devinfo
, hw_format
);
1609 /* The hardware doesn't actually support a typed format that we can use
1610 * so we have to fall back to untyped read/write messages.
1612 return ISL_FORMAT_RAW
;
1617 update_default_image_param(struct brw_context
*brw
,
1618 struct gl_image_unit
*u
,
1619 unsigned surface_idx
,
1620 struct brw_image_param
*param
)
1622 memset(param
, 0, sizeof(*param
));
1623 param
->surface_idx
= surface_idx
;
1624 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1625 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1626 * detailed explanation of these parameters.
1628 param
->swizzling
[0] = 0xff;
1629 param
->swizzling
[1] = 0xff;
1633 update_buffer_image_param(struct brw_context
*brw
,
1634 struct gl_image_unit
*u
,
1635 unsigned surface_idx
,
1636 struct brw_image_param
*param
)
1638 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1639 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1640 update_default_image_param(brw
, u
, surface_idx
, param
);
1642 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1643 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1647 get_image_num_layers(const struct intel_mipmap_tree
*mt
, GLenum target
,
1650 if (target
== GL_TEXTURE_CUBE_MAP
)
1653 if (mt
->surf
.size
> 0) {
1654 return target
== GL_TEXTURE_3D
?
1655 minify(mt
->surf
.logical_level0_px
.depth
, level
) :
1656 mt
->surf
.logical_level0_px
.array_len
;
1659 return target
== GL_TEXTURE_3D
?
1660 minify(mt
->logical_depth0
, level
) : mt
->logical_depth0
;
1664 update_image_surface(struct brw_context
*brw
,
1665 struct gl_image_unit
*u
,
1667 unsigned surface_idx
,
1668 uint32_t *surf_offset
,
1669 struct brw_image_param
*param
)
1671 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1672 struct gl_texture_object
*obj
= u
->TexObj
;
1673 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1675 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1676 struct intel_buffer_object
*intel_obj
=
1677 intel_buffer_object(obj
->BufferObject
);
1678 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1679 _mesa_get_format_bytes(u
->_ActualFormat
));
1681 brw_emit_buffer_surface_state(
1682 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1683 format
, intel_obj
->Base
.Size
, texel_size
,
1684 access
!= GL_READ_ONLY
);
1686 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1689 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1690 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1691 const unsigned num_layers
= u
->Layered
?
1692 get_image_num_layers(mt
, obj
->Target
, u
->Level
) : 1;
1694 struct isl_view view
= {
1696 .base_level
= obj
->MinLevel
+ u
->Level
,
1698 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1699 .array_len
= num_layers
,
1700 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1701 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1704 if (format
== ISL_FORMAT_RAW
) {
1705 brw_emit_buffer_surface_state(
1706 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1707 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1708 access
!= GL_READ_ONLY
);
1711 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1712 assert(!intel_miptree_has_color_unresolved(mt
,
1714 view
.base_array_layer
,
1716 brw_emit_surface_state(brw
, mt
, INTEL_AUX_BUFFER_DISABLED
,
1717 mt
->target
, view
, tex_mocs
[brw
->gen
],
1718 surf_offset
, surf_index
,
1719 I915_GEM_DOMAIN_SAMPLER
,
1720 access
== GL_READ_ONLY
? 0 :
1721 I915_GEM_DOMAIN_SAMPLER
);
1724 struct isl_surf surf
;
1725 intel_miptree_get_isl_surf(brw
, mt
, &surf
);
1727 isl_surf_fill_image_param(&brw
->isl_dev
, param
, &surf
, &view
);
1728 param
->surface_idx
= surface_idx
;
1732 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1733 update_default_image_param(brw
, u
, surface_idx
, param
);
1738 brw_upload_image_surfaces(struct brw_context
*brw
,
1739 const struct gl_program
*prog
,
1740 struct brw_stage_state
*stage_state
,
1741 struct brw_stage_prog_data
*prog_data
)
1744 struct gl_context
*ctx
= &brw
->ctx
;
1746 if (prog
->info
.num_images
) {
1747 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1748 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1749 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1751 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1753 &stage_state
->surf_offset
[surf_idx
],
1754 &prog_data
->image_param
[i
]);
1757 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1758 /* This may have changed the image metadata dependent on the context
1759 * image unit state and passed to the program as uniforms, make sure
1760 * that push and pull constants are reuploaded.
1762 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1767 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1769 /* BRW_NEW_FRAGMENT_PROGRAM */
1770 const struct gl_program
*wm
= brw
->fragment_program
;
1773 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1774 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1775 brw
->wm
.base
.prog_data
);
1779 const struct brw_tracked_state brw_wm_image_surfaces
= {
1781 .mesa
= _NEW_TEXTURE
,
1782 .brw
= BRW_NEW_BATCH
|
1784 BRW_NEW_FRAGMENT_PROGRAM
|
1785 BRW_NEW_FS_PROG_DATA
|
1788 .emit
= brw_upload_wm_image_surfaces
,
1792 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1794 brw
->vtbl
.update_renderbuffer_surface
= gen4_update_renderbuffer_surface
;
1795 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1799 gen6_init_vtable_surface_functions(struct brw_context
*brw
)
1801 gen4_init_vtable_surface_functions(brw
);
1802 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1806 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1808 struct gl_context
*ctx
= &brw
->ctx
;
1810 struct gl_program
*prog
=
1811 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1812 /* BRW_NEW_CS_PROG_DATA */
1813 const struct brw_cs_prog_data
*cs_prog_data
=
1814 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1816 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1817 const unsigned surf_idx
=
1818 cs_prog_data
->binding_table
.work_groups_start
;
1819 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1823 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1825 intel_upload_data(brw
,
1826 (void *)brw
->compute
.num_work_groups
,
1832 bo
= brw
->compute
.num_work_groups_bo
;
1833 bo_offset
= brw
->compute
.num_work_groups_offset
;
1836 brw_emit_buffer_surface_state(brw
, surf_offset
,
1839 3 * sizeof(GLuint
), 1, true);
1840 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1844 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1846 .brw
= BRW_NEW_BLORP
|
1847 BRW_NEW_CS_PROG_DATA
|
1848 BRW_NEW_CS_WORK_GROUPS
1850 .emit
= brw_upload_cs_work_groups_surface
,