2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
58 uint32_t tex_mocs
[] = {
65 uint32_t rb_mocs
[] = {
73 get_isl_surf(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
74 GLenum target
, struct isl_view
*view
,
75 uint32_t *tile_x
, uint32_t *tile_y
,
76 uint32_t *offset
, struct isl_surf
*surf
)
80 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
81 const enum isl_dim_layout dim_layout
=
82 get_isl_dim_layout(devinfo
, mt
->surf
.tiling
, target
);
84 if (surf
->dim_layout
== dim_layout
)
87 /* The layout of the specified texture target is not compatible with the
88 * actual layout of the miptree structure in memory -- You're entering
89 * dangerous territory, this can only possibly work if you only intended
90 * to access a single level and slice of the texture, and the hardware
91 * supports the tile offset feature in order to allow non-tile-aligned
92 * base offsets, since we'll have to point the hardware to the first
93 * texel of the level instead of relying on the usual base level/layer
96 assert(devinfo
->has_surface_tile_offset
);
97 assert(view
->levels
== 1 && view
->array_len
== 1);
98 assert(*tile_x
== 0 && *tile_y
== 0);
100 *offset
+= intel_miptree_get_tile_offsets(mt
, view
->base_level
,
101 view
->base_array_layer
,
104 /* Minify the logical dimensions of the texture. */
105 const unsigned l
= view
->base_level
- mt
->first_level
;
106 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, l
);
107 surf
->logical_level0_px
.height
= surf
->dim
<= ISL_SURF_DIM_1D
? 1 :
108 minify(surf
->logical_level0_px
.height
, l
);
109 surf
->logical_level0_px
.depth
= surf
->dim
<= ISL_SURF_DIM_2D
? 1 :
110 minify(surf
->logical_level0_px
.depth
, l
);
112 /* Only the base level and layer can be addressed with the overridden
115 surf
->logical_level0_px
.array_len
= 1;
117 surf
->dim_layout
= dim_layout
;
119 /* The requested slice of the texture is now at the base level and
122 view
->base_level
= 0;
123 view
->base_array_layer
= 0;
127 brw_emit_surface_state(struct brw_context
*brw
,
128 struct intel_mipmap_tree
*mt
,
129 GLenum target
, struct isl_view view
,
130 enum isl_aux_usage aux_usage
,
131 uint32_t mocs
, uint32_t *surf_offset
, int surf_index
,
132 unsigned reloc_flags
)
134 uint32_t tile_x
= mt
->level
[0].level_x
;
135 uint32_t tile_y
= mt
->level
[0].level_y
;
136 uint32_t offset
= mt
->offset
;
138 struct isl_surf surf
;
140 get_isl_surf(brw
, mt
, target
, &view
, &tile_x
, &tile_y
, &offset
, &surf
);
142 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
144 struct brw_bo
*aux_bo
;
145 struct isl_surf
*aux_surf
= NULL
;
146 uint64_t aux_offset
= 0;
148 case ISL_AUX_USAGE_MCS
:
149 case ISL_AUX_USAGE_CCS_D
:
150 case ISL_AUX_USAGE_CCS_E
:
151 aux_surf
= &mt
->mcs_buf
->surf
;
152 aux_bo
= mt
->mcs_buf
->bo
;
153 aux_offset
= mt
->mcs_buf
->offset
;
156 case ISL_AUX_USAGE_HIZ
:
157 aux_surf
= &mt
->hiz_buf
->surf
;
158 aux_bo
= mt
->hiz_buf
->bo
;
162 case ISL_AUX_USAGE_NONE
:
166 if (aux_usage
!= ISL_AUX_USAGE_NONE
) {
167 /* We only really need a clear color if we also have an auxiliary
168 * surface. Without one, it does nothing.
170 clear_color
= mt
->fast_clear_color
;
173 void *state
= brw_state_batch(brw
,
174 brw
->isl_dev
.ss
.size
,
175 brw
->isl_dev
.ss
.align
,
178 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &mt
->surf
, .view
= &view
,
179 .address
= brw_state_reloc(&brw
->batch
,
180 *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
181 mt
->bo
, offset
, reloc_flags
),
182 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
183 .aux_address
= aux_offset
,
184 .mocs
= mocs
, .clear_color
= clear_color
,
185 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
187 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
188 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
189 * contain other control information. Since buffer addresses are always
190 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
191 * an ordinary reloc to do the necessary address translation.
193 * FIXME: move to the point of assignment.
195 assert((aux_offset
& 0xfff) == 0);
196 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
197 *aux_addr
= brw_state_reloc(&brw
->batch
,
199 brw
->isl_dev
.ss
.aux_addr_offset
,
206 gen6_update_renderbuffer_surface(struct brw_context
*brw
,
207 struct gl_renderbuffer
*rb
,
211 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
212 struct gl_context
*ctx
= &brw
->ctx
;
213 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
214 struct intel_mipmap_tree
*mt
= irb
->mt
;
216 assert(brw_render_target_supported(brw
, rb
));
218 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
219 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
220 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
221 __func__
, _mesa_get_format_name(rb_format
));
223 enum isl_format isl_format
= brw
->mesa_to_isl_render_format
[rb_format
];
225 enum isl_aux_usage aux_usage
=
226 brw
->draw_aux_buffer_disabled
[unit
] ? ISL_AUX_USAGE_NONE
:
227 intel_miptree_render_aux_usage(brw
, mt
, isl_format
,
228 ctx
->Color
.BlendEnabled
& (1 << unit
));
230 struct isl_view view
= {
231 .format
= isl_format
,
232 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
234 .base_array_layer
= irb
->mt_layer
,
235 .array_len
= MAX2(irb
->layer_count
, 1),
236 .swizzle
= ISL_SWIZZLE_IDENTITY
,
237 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
241 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
242 rb_mocs
[devinfo
->gen
],
249 translate_tex_target(GLenum target
)
253 case GL_TEXTURE_1D_ARRAY_EXT
:
254 return BRW_SURFACE_1D
;
256 case GL_TEXTURE_RECTANGLE_NV
:
257 return BRW_SURFACE_2D
;
260 case GL_TEXTURE_2D_ARRAY_EXT
:
261 case GL_TEXTURE_EXTERNAL_OES
:
262 case GL_TEXTURE_2D_MULTISAMPLE
:
263 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
264 return BRW_SURFACE_2D
;
267 return BRW_SURFACE_3D
;
269 case GL_TEXTURE_CUBE_MAP
:
270 case GL_TEXTURE_CUBE_MAP_ARRAY
:
271 return BRW_SURFACE_CUBE
;
274 unreachable("not reached");
279 brw_get_surface_tiling_bits(enum isl_tiling tiling
)
283 return BRW_SURFACE_TILED
;
285 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
293 brw_get_surface_num_multisamples(unsigned num_samples
)
296 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
298 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
302 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
306 brw_get_texture_swizzle(const struct gl_context
*ctx
,
307 const struct gl_texture_object
*t
)
309 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
311 int swizzles
[SWIZZLE_NIL
+ 1] = {
321 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
322 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
323 GLenum depth_mode
= t
->DepthMode
;
325 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
326 * with depth component data specified with a sized internal format.
327 * Otherwise, it's left at the old default, GL_LUMINANCE.
329 if (_mesa_is_gles3(ctx
) &&
330 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
331 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
335 switch (depth_mode
) {
337 swizzles
[0] = SWIZZLE_ZERO
;
338 swizzles
[1] = SWIZZLE_ZERO
;
339 swizzles
[2] = SWIZZLE_ZERO
;
340 swizzles
[3] = SWIZZLE_X
;
343 swizzles
[0] = SWIZZLE_X
;
344 swizzles
[1] = SWIZZLE_X
;
345 swizzles
[2] = SWIZZLE_X
;
346 swizzles
[3] = SWIZZLE_ONE
;
349 swizzles
[0] = SWIZZLE_X
;
350 swizzles
[1] = SWIZZLE_X
;
351 swizzles
[2] = SWIZZLE_X
;
352 swizzles
[3] = SWIZZLE_X
;
355 swizzles
[0] = SWIZZLE_X
;
356 swizzles
[1] = SWIZZLE_ZERO
;
357 swizzles
[2] = SWIZZLE_ZERO
;
358 swizzles
[3] = SWIZZLE_ONE
;
363 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
365 /* If the texture's format is alpha-only, force R, G, and B to
366 * 0.0. Similarly, if the texture's format has no alpha channel,
367 * force the alpha value read to 1.0. This allows for the
368 * implementation to use an RGBA texture for any of these formats
369 * without leaking any unexpected values.
371 switch (img
->_BaseFormat
) {
373 swizzles
[0] = SWIZZLE_ZERO
;
374 swizzles
[1] = SWIZZLE_ZERO
;
375 swizzles
[2] = SWIZZLE_ZERO
;
378 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
379 swizzles
[0] = SWIZZLE_X
;
380 swizzles
[1] = SWIZZLE_X
;
381 swizzles
[2] = SWIZZLE_X
;
382 swizzles
[3] = SWIZZLE_ONE
;
385 case GL_LUMINANCE_ALPHA
:
386 if (datatype
== GL_SIGNED_NORMALIZED
) {
387 swizzles
[0] = SWIZZLE_X
;
388 swizzles
[1] = SWIZZLE_X
;
389 swizzles
[2] = SWIZZLE_X
;
390 swizzles
[3] = SWIZZLE_W
;
394 if (datatype
== GL_SIGNED_NORMALIZED
) {
395 swizzles
[0] = SWIZZLE_X
;
396 swizzles
[1] = SWIZZLE_X
;
397 swizzles
[2] = SWIZZLE_X
;
398 swizzles
[3] = SWIZZLE_X
;
404 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
405 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
406 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
407 swizzles
[3] = SWIZZLE_ONE
;
411 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
412 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
413 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
414 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
418 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
419 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
421 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
424 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
426 * which is simply adding 4 then modding by 8 (or anding with 7).
428 * We then may need to apply workarounds for textureGather hardware bugs.
431 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
433 unsigned scs
= (swizzle
+ 4) & 7;
435 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
439 brw_aux_surface_disabled(const struct brw_context
*brw
,
440 const struct intel_mipmap_tree
*mt
)
442 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
444 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
445 const struct intel_renderbuffer
*irb
=
446 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
448 if (irb
&& irb
->mt
== mt
)
449 return brw
->draw_aux_buffer_disabled
[i
];
456 brw_update_texture_surface(struct gl_context
*ctx
,
458 uint32_t *surf_offset
,
463 struct brw_context
*brw
= brw_context(ctx
);
464 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
465 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
467 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
468 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
471 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
472 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
475 if (mt
->plane
[plane
- 1] == NULL
)
477 mt
= mt
->plane
[plane
- 1];
480 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
481 /* If this is a view with restricted NumLayers, then our effective depth
482 * is not just the miptree depth.
484 unsigned view_num_layers
;
485 if (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) {
486 view_num_layers
= obj
->NumLayers
;
488 view_num_layers
= mt
->surf
.dim
== ISL_SURF_DIM_3D
?
489 mt
->surf
.logical_level0_px
.depth
:
490 mt
->surf
.logical_level0_px
.array_len
;
493 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
494 * texturing functions that return a float, as our code generation always
495 * selects the .x channel (which would always be 0).
497 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
498 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
499 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
500 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
501 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
502 brw_get_texture_swizzle(&brw
->ctx
, obj
));
504 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
505 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
506 for_txf
? GL_DECODE_EXT
:
507 sampler
->sRGBDecode
);
509 /* Implement gen6 and gen7 gather work-around */
510 bool need_green_to_blue
= false;
512 if (devinfo
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
513 format
== ISL_FORMAT_R32G32_SINT
||
514 format
== ISL_FORMAT_R32G32_UINT
)) {
515 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
516 need_green_to_blue
= devinfo
->is_haswell
;
517 } else if (devinfo
->gen
== 6) {
518 /* Sandybridge's gather4 message is broken for integer formats.
519 * To work around this, we pretend the surface is UNORM for
520 * 8 or 16-bit formats, and emit shader instructions to recover
521 * the real INT/UINT value. For 32-bit formats, we pretend
522 * the surface is FLOAT, and simply reinterpret the resulting
526 case ISL_FORMAT_R8_SINT
:
527 case ISL_FORMAT_R8_UINT
:
528 format
= ISL_FORMAT_R8_UNORM
;
531 case ISL_FORMAT_R16_SINT
:
532 case ISL_FORMAT_R16_UINT
:
533 format
= ISL_FORMAT_R16_UNORM
;
536 case ISL_FORMAT_R32_SINT
:
537 case ISL_FORMAT_R32_UINT
:
538 format
= ISL_FORMAT_R32_FLOAT
;
547 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
548 if (devinfo
->gen
<= 7) {
549 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
550 mt
= mt
->r8stencil_mt
;
554 format
= ISL_FORMAT_R8_UINT
;
555 } else if (devinfo
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
556 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
557 mt
= mt
->r8stencil_mt
;
558 format
= ISL_FORMAT_R8_UINT
;
561 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
563 struct isl_view view
= {
565 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
566 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
567 .base_array_layer
= obj
->MinLayer
,
568 .array_len
= view_num_layers
,
570 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
571 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
572 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
573 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
575 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
578 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
579 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
580 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
582 enum isl_aux_usage aux_usage
=
583 intel_miptree_texture_aux_usage(brw
, mt
, format
);
585 if (brw_aux_surface_disabled(brw
, mt
))
586 aux_usage
= ISL_AUX_USAGE_NONE
;
588 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
589 tex_mocs
[devinfo
->gen
],
590 surf_offset
, surf_index
,
596 brw_emit_buffer_surface_state(struct brw_context
*brw
,
597 uint32_t *out_offset
,
599 unsigned buffer_offset
,
600 unsigned surface_format
,
601 unsigned buffer_size
,
603 unsigned reloc_flags
)
605 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
606 uint32_t *dw
= brw_state_batch(brw
,
607 brw
->isl_dev
.ss
.size
,
608 brw
->isl_dev
.ss
.align
,
611 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
612 .address
= !bo
? buffer_offset
:
613 brw_state_reloc(&brw
->batch
,
614 *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
618 .format
= surface_format
,
620 .mocs
= tex_mocs
[devinfo
->gen
]);
624 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
626 uint32_t *surf_offset
)
628 struct brw_context
*brw
= brw_context(ctx
);
629 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
630 struct intel_buffer_object
*intel_obj
=
631 intel_buffer_object(tObj
->BufferObject
);
632 uint32_t size
= tObj
->BufferSize
;
633 struct brw_bo
*bo
= NULL
;
634 mesa_format format
= tObj
->_BufferObjectFormat
;
635 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
636 int texel_size
= _mesa_get_format_bytes(format
);
639 size
= MIN2(size
, intel_obj
->Base
.Size
);
640 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
644 /* The ARB_texture_buffer_specification says:
646 * "The number of texels in the buffer texture's texel array is given by
648 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
650 * where <buffer_size> is the size of the buffer object, in basic
651 * machine units and <components> and <base_type> are the element count
652 * and base data type for elements, as specified in Table X.1. The
653 * number of texels in the texel array is then clamped to the
654 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
656 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
657 * so that when ISL divides by stride to obtain the number of texels, that
658 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
660 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
662 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
663 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
664 _mesa_get_format_name(format
));
667 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
676 * Create the constant buffer surface. Vertex/fragment shader constants will be
677 * read from this buffer with Data Port Read instructions/messages.
680 brw_create_constant_surface(struct brw_context
*brw
,
684 uint32_t *out_offset
)
686 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
687 ISL_FORMAT_R32G32B32A32_FLOAT
,
692 * Create the buffer surface. Shader buffer variables will be
693 * read from / write to this buffer with Data Port Read/Write
694 * instructions/messages.
697 brw_create_buffer_surface(struct brw_context
*brw
,
701 uint32_t *out_offset
)
703 /* Use a raw surface so we can reuse existing untyped read/write/atomic
704 * messages. We need these specifically for the fragment shader since they
705 * include a pixel mask header that we need to ensure correct behavior
706 * with helper invocations, which cannot write to the buffer.
708 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
710 size
, 1, RELOC_WRITE
);
714 * Set up a binding table entry for use by stream output logic (transform
717 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
720 brw_update_sol_surface(struct brw_context
*brw
,
721 struct gl_buffer_object
*buffer_obj
,
722 uint32_t *out_offset
, unsigned num_vector_components
,
723 unsigned stride_dwords
, unsigned offset_dwords
)
725 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
726 uint32_t offset_bytes
= 4 * offset_dwords
;
727 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
729 buffer_obj
->Size
- offset_bytes
,
731 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
732 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
733 size_t size_dwords
= buffer_obj
->Size
/ 4;
734 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
736 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
737 * too big to map using a single binding table entry?
739 assert((size_dwords
- offset_dwords
) / stride_dwords
740 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
742 if (size_dwords
> offset_dwords
+ num_vector_components
) {
743 /* There is room for at least 1 transform feedback output in the buffer.
744 * Compute the number of additional transform feedback outputs the
745 * buffer has room for.
747 buffer_size_minus_1
=
748 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
750 /* There isn't even room for a single transform feedback output in the
751 * buffer. We can't configure the binding table entry to prevent output
752 * entirely; we'll have to rely on the geometry shader to detect
753 * overflow. But to minimize the damage in case of a bug, set up the
754 * binding table entry to just allow a single output.
756 buffer_size_minus_1
= 0;
758 width
= buffer_size_minus_1
& 0x7f;
759 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
760 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
762 switch (num_vector_components
) {
764 surface_format
= ISL_FORMAT_R32_FLOAT
;
767 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
770 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
773 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
776 unreachable("Invalid vector size for transform feedback output");
779 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
780 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
781 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
782 BRW_SURFACE_RC_READ_WRITE
;
783 surf
[1] = brw_state_reloc(&brw
->batch
,
784 *out_offset
+ 4, bo
, offset_bytes
, RELOC_WRITE
);
785 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
786 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
787 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
788 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
793 /* Creates a new WM constant buffer reflecting the current fragment program's
794 * constants, if needed by the fragment program.
796 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
800 brw_upload_wm_pull_constants(struct brw_context
*brw
)
802 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
803 /* BRW_NEW_FRAGMENT_PROGRAM */
804 struct brw_program
*fp
=
805 (struct brw_program
*) brw
->programs
[MESA_SHADER_FRAGMENT
];
807 /* BRW_NEW_FS_PROG_DATA */
808 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
810 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
811 /* _NEW_PROGRAM_CONSTANTS */
812 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
813 stage_state
, prog_data
);
816 const struct brw_tracked_state brw_wm_pull_constants
= {
818 .mesa
= _NEW_PROGRAM_CONSTANTS
,
819 .brw
= BRW_NEW_BATCH
|
820 BRW_NEW_FRAGMENT_PROGRAM
|
821 BRW_NEW_FS_PROG_DATA
,
823 .emit
= brw_upload_wm_pull_constants
,
827 * Creates a null renderbuffer surface.
829 * This is used when the shader doesn't write to any color output. An FB
830 * write to target 0 will still be emitted, because that's how the thread is
831 * terminated (and computed depth is returned), so we need to have the
832 * hardware discard the target 0 color output..
835 emit_null_surface_state(struct brw_context
*brw
,
836 const struct gl_framebuffer
*fb
,
837 uint32_t *out_offset
)
839 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
840 uint32_t *surf
= brw_state_batch(brw
,
841 brw
->isl_dev
.ss
.size
,
842 brw
->isl_dev
.ss
.align
,
845 /* Use the fb dimensions or 1x1x1 */
846 const unsigned width
= fb
? _mesa_geometric_width(fb
) : 1;
847 const unsigned height
= fb
? _mesa_geometric_height(fb
) : 1;
848 const unsigned samples
= fb
? _mesa_geometric_samples(fb
) : 1;
850 if (devinfo
->gen
!= 6 || samples
<= 1) {
851 isl_null_fill_state(&brw
->isl_dev
, surf
,
852 isl_extent3d(width
, height
, 1));
856 /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
857 * So work around this problem by rendering into dummy color buffer.
859 * To decrease the amount of memory needed by the workaround buffer, we
860 * set its pitch to 128 bytes (the width of a Y tile). This means that
861 * the amount of memory needed for the workaround buffer is
862 * (width_in_tiles + height_in_tiles - 1) tiles.
864 * Note that since the workaround buffer will be interpreted by the
865 * hardware as an interleaved multisampled buffer, we need to compute
866 * width_in_tiles and height_in_tiles by dividing the width and height
867 * by 16 rather than the normal Y-tile size of 32.
869 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
870 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
871 unsigned pitch_minus_1
= 127;
872 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
873 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
876 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
877 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
878 surf
[1] = brw_state_reloc(&brw
->batch
, *out_offset
+ 4,
879 brw
->wm
.multisampled_null_render_target_bo
,
882 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
883 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
885 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
888 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
890 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
891 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
892 surf
[4] = BRW_SURFACE_MULTISAMPLECOUNT_4
;
897 * Sets up a surface state structure to point at the given region.
898 * While it is only used for the front/back buffer currently, it should be
899 * usable for further buffers when doing ARB_draw_buffer support.
902 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
903 struct gl_renderbuffer
*rb
,
907 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
908 struct gl_context
*ctx
= &brw
->ctx
;
909 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
910 struct intel_mipmap_tree
*mt
= irb
->mt
;
912 uint32_t tile_x
, tile_y
;
913 enum isl_format format
;
916 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
917 /* BRW_NEW_FS_PROG_DATA */
919 if (rb
->TexImage
&& !devinfo
->has_surface_tile_offset
) {
920 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
922 if (tile_x
!= 0 || tile_y
!= 0) {
923 /* Original gen4 hardware couldn't draw to a non-tile-aligned
924 * destination in a miptree unless you actually setup your renderbuffer
925 * as a miptree and used the fragile lod/array_index/etc. controls to
926 * select the image. So, instead, we just make a new single-level
927 * miptree and render into that.
929 intel_renderbuffer_move_to_temp(brw
, irb
, false);
930 assert(irb
->align_wa_mt
);
931 mt
= irb
->align_wa_mt
;
935 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
937 format
= brw
->mesa_to_isl_render_format
[rb_format
];
938 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
939 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
940 __func__
, _mesa_get_format_name(rb_format
));
943 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
944 format
<< BRW_SURFACE_FORMAT_SHIFT
);
947 assert(mt
->offset
% mt
->cpp
== 0);
948 surf
[1] = brw_state_reloc(&brw
->batch
, offset
+ 4, mt
->bo
,
950 intel_renderbuffer_get_tile_offsets(irb
,
955 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
956 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
958 surf
[3] = (brw_get_surface_tiling_bits(mt
->surf
.tiling
) |
959 (mt
->surf
.row_pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
961 surf
[4] = brw_get_surface_num_multisamples(mt
->surf
.samples
);
963 assert(devinfo
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
964 /* Note that the low bits of these fields are missing, so
965 * there's the possibility of getting in trouble.
967 assert(tile_x
% 4 == 0);
968 assert(tile_y
% 2 == 0);
969 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
970 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
971 (mt
->surf
.image_alignment_el
.height
== 4 ?
972 BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
974 if (devinfo
->gen
< 6) {
976 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
977 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
978 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
980 if (!ctx
->Color
.ColorMask
[unit
][0])
981 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
982 if (!ctx
->Color
.ColorMask
[unit
][1])
983 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
984 if (!ctx
->Color
.ColorMask
[unit
][2])
985 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
987 /* As mentioned above, disable writes to the alpha component when the
988 * renderbuffer is XRGB.
990 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
991 !ctx
->Color
.ColorMask
[unit
][3]) {
992 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
1000 update_renderbuffer_surfaces(struct brw_context
*brw
)
1002 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1003 const struct gl_context
*ctx
= &brw
->ctx
;
1005 /* _NEW_BUFFERS | _NEW_COLOR */
1006 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1008 /* Render targets always start at binding table index 0. */
1009 const unsigned rt_start
= 0;
1011 uint32_t *surf_offsets
= brw
->wm
.base
.surf_offset
;
1013 /* Update surfaces for drawing buffers */
1014 if (fb
->_NumColorDrawBuffers
>= 1) {
1015 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1016 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1018 if (intel_renderbuffer(rb
)) {
1019 surf_offsets
[rt_start
+ i
] = devinfo
->gen
>= 6 ?
1020 gen6_update_renderbuffer_surface(brw
, rb
, i
, rt_start
+ i
) :
1021 gen4_update_renderbuffer_surface(brw
, rb
, i
, rt_start
+ i
);
1023 emit_null_surface_state(brw
, fb
, &surf_offsets
[rt_start
+ i
]);
1027 emit_null_surface_state(brw
, fb
, &surf_offsets
[rt_start
]);
1030 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1033 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1035 .mesa
= _NEW_BUFFERS
|
1037 .brw
= BRW_NEW_BATCH
,
1039 .emit
= update_renderbuffer_surfaces
,
1042 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1044 .mesa
= _NEW_BUFFERS
,
1045 .brw
= BRW_NEW_BATCH
|
1048 .emit
= update_renderbuffer_surfaces
,
1052 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1054 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1055 const struct gl_context
*ctx
= &brw
->ctx
;
1057 /* BRW_NEW_FS_PROG_DATA */
1058 const struct brw_wm_prog_data
*wm_prog_data
=
1059 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1061 if (wm_prog_data
->has_render_target_reads
&&
1062 !ctx
->Extensions
.MESA_shader_framebuffer_fetch
) {
1064 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1066 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1067 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1068 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1069 const unsigned surf_index
=
1070 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1071 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1074 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1075 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1076 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1079 /* Override the target of the texture if the render buffer is a
1080 * single slice of a 3D texture (since the minimum array element
1081 * field of the surface state structure is ignored by the sampler
1082 * unit for 3D textures on some hardware), or if the render buffer
1083 * is a 1D array (since shaders always provide the array index
1084 * coordinate at the Z component to avoid state-dependent
1085 * recompiles when changing the texture target of the
1088 const GLenum target
=
1089 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1090 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1091 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1094 const struct isl_view view
= {
1096 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1098 .base_array_layer
= irb
->mt_layer
,
1099 .array_len
= irb
->layer_count
,
1100 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1101 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1104 enum isl_aux_usage aux_usage
=
1105 intel_miptree_texture_aux_usage(brw
, irb
->mt
, format
);
1106 if (brw
->draw_aux_buffer_disabled
[i
])
1107 aux_usage
= ISL_AUX_USAGE_NONE
;
1109 brw_emit_surface_state(brw
, irb
->mt
, target
, view
, aux_usage
,
1110 tex_mocs
[devinfo
->gen
],
1111 surf_offset
, surf_index
,
1115 emit_null_surface_state(brw
, fb
, surf_offset
);
1119 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1123 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1125 .mesa
= _NEW_BUFFERS
,
1126 .brw
= BRW_NEW_BATCH
|
1128 BRW_NEW_FS_PROG_DATA
,
1130 .emit
= update_renderbuffer_read_surfaces
,
1134 update_stage_texture_surfaces(struct brw_context
*brw
,
1135 const struct gl_program
*prog
,
1136 struct brw_stage_state
*stage_state
,
1137 bool for_gather
, uint32_t plane
)
1142 struct gl_context
*ctx
= &brw
->ctx
;
1144 uint32_t *surf_offset
= stage_state
->surf_offset
;
1146 /* BRW_NEW_*_PROG_DATA */
1148 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1150 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1152 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1153 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1156 if (prog
->SamplersUsed
& (1 << s
)) {
1157 const unsigned unit
= prog
->SamplerUnits
[s
];
1158 const bool used_by_txf
= prog
->info
.textures_used_by_txf
& (1 << s
);
1161 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1162 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
,
1163 used_by_txf
, plane
);
1171 * Construct SURFACE_STATE objects for enabled textures.
1174 brw_update_texture_surfaces(struct brw_context
*brw
)
1176 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1178 /* BRW_NEW_VERTEX_PROGRAM */
1179 struct gl_program
*vs
= brw
->programs
[MESA_SHADER_VERTEX
];
1181 /* BRW_NEW_TESS_PROGRAMS */
1182 struct gl_program
*tcs
= brw
->programs
[MESA_SHADER_TESS_CTRL
];
1183 struct gl_program
*tes
= brw
->programs
[MESA_SHADER_TESS_EVAL
];
1185 /* BRW_NEW_GEOMETRY_PROGRAM */
1186 struct gl_program
*gs
= brw
->programs
[MESA_SHADER_GEOMETRY
];
1188 /* BRW_NEW_FRAGMENT_PROGRAM */
1189 struct gl_program
*fs
= brw
->programs
[MESA_SHADER_FRAGMENT
];
1192 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1193 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1194 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1195 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1196 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1198 /* emit alternate set of surface state for gather. this
1199 * allows the surface format to be overriden for only the
1200 * gather4 messages. */
1201 if (devinfo
->gen
< 8) {
1202 if (vs
&& vs
->nir
->info
.uses_texture_gather
)
1203 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1204 if (tcs
&& tcs
->nir
->info
.uses_texture_gather
)
1205 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1206 if (tes
&& tes
->nir
->info
.uses_texture_gather
)
1207 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1208 if (gs
&& gs
->nir
->info
.uses_texture_gather
)
1209 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1210 if (fs
&& fs
->nir
->info
.uses_texture_gather
)
1211 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1215 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1216 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1219 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1222 const struct brw_tracked_state brw_texture_surfaces
= {
1224 .mesa
= _NEW_TEXTURE
,
1225 .brw
= BRW_NEW_BATCH
|
1227 BRW_NEW_FRAGMENT_PROGRAM
|
1228 BRW_NEW_FS_PROG_DATA
|
1229 BRW_NEW_GEOMETRY_PROGRAM
|
1230 BRW_NEW_GS_PROG_DATA
|
1231 BRW_NEW_TESS_PROGRAMS
|
1232 BRW_NEW_TCS_PROG_DATA
|
1233 BRW_NEW_TES_PROG_DATA
|
1234 BRW_NEW_TEXTURE_BUFFER
|
1235 BRW_NEW_VERTEX_PROGRAM
|
1236 BRW_NEW_VS_PROG_DATA
,
1238 .emit
= brw_update_texture_surfaces
,
1242 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1244 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1246 /* BRW_NEW_COMPUTE_PROGRAM */
1247 struct gl_program
*cs
= brw
->programs
[MESA_SHADER_COMPUTE
];
1250 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1252 /* emit alternate set of surface state for gather. this
1253 * allows the surface format to be overriden for only the
1256 if (devinfo
->gen
< 8) {
1257 if (cs
&& cs
->nir
->info
.uses_texture_gather
)
1258 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1261 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1264 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1266 .mesa
= _NEW_TEXTURE
,
1267 .brw
= BRW_NEW_BATCH
|
1268 BRW_NEW_COMPUTE_PROGRAM
|
1271 .emit
= brw_update_cs_texture_surfaces
,
1276 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1277 struct brw_stage_state
*stage_state
,
1278 struct brw_stage_prog_data
*prog_data
)
1280 struct gl_context
*ctx
= &brw
->ctx
;
1285 uint32_t *ubo_surf_offsets
=
1286 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1288 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1289 struct gl_buffer_binding
*binding
=
1290 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1292 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1293 emit_null_surface_state(brw
, NULL
, &ubo_surf_offsets
[i
]);
1295 struct intel_buffer_object
*intel_bo
=
1296 intel_buffer_object(binding
->BufferObject
);
1297 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1298 if (!binding
->AutomaticSize
)
1299 size
= MIN2(size
, binding
->Size
);
1301 intel_bufferobj_buffer(brw
, intel_bo
,
1304 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1306 &ubo_surf_offsets
[i
]);
1310 uint32_t *ssbo_surf_offsets
=
1311 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1313 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1314 struct gl_buffer_binding
*binding
=
1315 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1317 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1318 emit_null_surface_state(brw
, NULL
, &ssbo_surf_offsets
[i
]);
1320 struct intel_buffer_object
*intel_bo
=
1321 intel_buffer_object(binding
->BufferObject
);
1322 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1323 if (!binding
->AutomaticSize
)
1324 size
= MIN2(size
, binding
->Size
);
1326 intel_bufferobj_buffer(brw
, intel_bo
,
1329 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1331 &ssbo_surf_offsets
[i
]);
1335 stage_state
->push_constants_dirty
= true;
1337 if (prog
->info
.num_ubos
|| prog
->info
.num_ssbos
)
1338 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1342 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1344 struct gl_context
*ctx
= &brw
->ctx
;
1346 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1348 /* BRW_NEW_FS_PROG_DATA */
1349 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1352 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1354 .mesa
= _NEW_PROGRAM
,
1355 .brw
= BRW_NEW_BATCH
|
1356 BRW_NEW_FS_PROG_DATA
|
1357 BRW_NEW_UNIFORM_BUFFER
,
1359 .emit
= brw_upload_wm_ubo_surfaces
,
1363 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1365 struct gl_context
*ctx
= &brw
->ctx
;
1367 struct gl_program
*prog
=
1368 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1370 /* BRW_NEW_CS_PROG_DATA */
1371 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1374 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1376 .mesa
= _NEW_PROGRAM
,
1377 .brw
= BRW_NEW_BATCH
|
1378 BRW_NEW_CS_PROG_DATA
|
1379 BRW_NEW_UNIFORM_BUFFER
,
1381 .emit
= brw_upload_cs_ubo_surfaces
,
1385 brw_upload_abo_surfaces(struct brw_context
*brw
,
1386 const struct gl_program
*prog
,
1387 struct brw_stage_state
*stage_state
,
1388 struct brw_stage_prog_data
*prog_data
)
1390 struct gl_context
*ctx
= &brw
->ctx
;
1391 uint32_t *surf_offsets
=
1392 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1394 if (prog
->info
.num_abos
) {
1395 for (unsigned i
= 0; i
< prog
->info
.num_abos
; i
++) {
1396 struct gl_buffer_binding
*binding
=
1397 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1398 struct intel_buffer_object
*intel_bo
=
1399 intel_buffer_object(binding
->BufferObject
);
1401 intel_bufferobj_buffer(brw
, intel_bo
, binding
->Offset
,
1402 intel_bo
->Base
.Size
- binding
->Offset
,
1405 brw_emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1406 binding
->Offset
, ISL_FORMAT_RAW
,
1407 bo
->size
- binding
->Offset
, 1,
1411 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1416 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1419 const struct gl_program
*wm
= brw
->programs
[MESA_SHADER_FRAGMENT
];
1422 /* BRW_NEW_FS_PROG_DATA */
1423 brw_upload_abo_surfaces(brw
, wm
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1427 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1429 .mesa
= _NEW_PROGRAM
,
1430 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1432 BRW_NEW_FS_PROG_DATA
,
1434 .emit
= brw_upload_wm_abo_surfaces
,
1438 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1441 const struct gl_program
*cp
= brw
->programs
[MESA_SHADER_COMPUTE
];
1444 /* BRW_NEW_CS_PROG_DATA */
1445 brw_upload_abo_surfaces(brw
, cp
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1449 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1451 .mesa
= _NEW_PROGRAM
,
1452 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1454 BRW_NEW_CS_PROG_DATA
,
1456 .emit
= brw_upload_cs_abo_surfaces
,
1460 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1463 const struct gl_program
*cp
= brw
->programs
[MESA_SHADER_COMPUTE
];
1466 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1467 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1468 brw
->cs
.base
.prog_data
);
1472 const struct brw_tracked_state brw_cs_image_surfaces
= {
1474 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1475 .brw
= BRW_NEW_BATCH
|
1476 BRW_NEW_CS_PROG_DATA
|
1480 .emit
= brw_upload_cs_image_surfaces
,
1484 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1486 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1487 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1488 if (access
== GL_WRITE_ONLY
) {
1490 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1491 /* Typed surface reads support a very limited subset of the shader
1492 * image formats. Translate it into the closest format the
1493 * hardware supports.
1495 return isl_lower_storage_image_format(devinfo
, hw_format
);
1497 /* The hardware doesn't actually support a typed format that we can use
1498 * so we have to fall back to untyped read/write messages.
1500 return ISL_FORMAT_RAW
;
1505 update_default_image_param(struct brw_context
*brw
,
1506 struct gl_image_unit
*u
,
1507 unsigned surface_idx
,
1508 struct brw_image_param
*param
)
1510 memset(param
, 0, sizeof(*param
));
1511 param
->surface_idx
= surface_idx
;
1512 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1513 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1514 * detailed explanation of these parameters.
1516 param
->swizzling
[0] = 0xff;
1517 param
->swizzling
[1] = 0xff;
1521 update_buffer_image_param(struct brw_context
*brw
,
1522 struct gl_image_unit
*u
,
1523 unsigned surface_idx
,
1524 struct brw_image_param
*param
)
1526 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1527 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1528 update_default_image_param(brw
, u
, surface_idx
, param
);
1530 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1531 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1535 get_image_num_layers(const struct intel_mipmap_tree
*mt
, GLenum target
,
1538 if (target
== GL_TEXTURE_CUBE_MAP
)
1541 return target
== GL_TEXTURE_3D
?
1542 minify(mt
->surf
.logical_level0_px
.depth
, level
) :
1543 mt
->surf
.logical_level0_px
.array_len
;
1547 update_image_surface(struct brw_context
*brw
,
1548 struct gl_image_unit
*u
,
1550 unsigned surface_idx
,
1551 uint32_t *surf_offset
,
1552 struct brw_image_param
*param
)
1554 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1556 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1557 struct gl_texture_object
*obj
= u
->TexObj
;
1558 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1560 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1561 struct intel_buffer_object
*intel_obj
=
1562 intel_buffer_object(obj
->BufferObject
);
1563 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1564 _mesa_get_format_bytes(u
->_ActualFormat
));
1566 brw_emit_buffer_surface_state(
1567 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1568 format
, intel_obj
->Base
.Size
, texel_size
,
1569 access
!= GL_READ_ONLY
? RELOC_WRITE
: 0);
1571 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1574 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1575 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1576 const unsigned num_layers
= u
->Layered
?
1577 get_image_num_layers(mt
, obj
->Target
, u
->Level
) : 1;
1579 struct isl_view view
= {
1581 .base_level
= obj
->MinLevel
+ u
->Level
,
1583 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1584 .array_len
= num_layers
,
1585 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1586 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1589 if (format
== ISL_FORMAT_RAW
) {
1590 brw_emit_buffer_surface_state(
1591 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1592 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1593 access
!= GL_READ_ONLY
? RELOC_WRITE
: 0);
1596 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1597 assert(!intel_miptree_has_color_unresolved(mt
,
1599 view
.base_array_layer
,
1601 brw_emit_surface_state(brw
, mt
, mt
->target
, view
,
1602 ISL_AUX_USAGE_NONE
, tex_mocs
[devinfo
->gen
],
1603 surf_offset
, surf_index
,
1604 access
== GL_READ_ONLY
? 0 : RELOC_WRITE
);
1607 isl_surf_fill_image_param(&brw
->isl_dev
, param
, &mt
->surf
, &view
);
1608 param
->surface_idx
= surface_idx
;
1612 emit_null_surface_state(brw
, NULL
, surf_offset
);
1613 update_default_image_param(brw
, u
, surface_idx
, param
);
1618 brw_upload_image_surfaces(struct brw_context
*brw
,
1619 const struct gl_program
*prog
,
1620 struct brw_stage_state
*stage_state
,
1621 struct brw_stage_prog_data
*prog_data
)
1624 struct gl_context
*ctx
= &brw
->ctx
;
1626 if (prog
->info
.num_images
) {
1627 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1628 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1629 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1631 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1633 &stage_state
->surf_offset
[surf_idx
],
1634 &stage_state
->image_param
[i
]);
1637 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1638 /* This may have changed the image metadata dependent on the context
1639 * image unit state and passed to the program as uniforms, make sure
1640 * that push and pull constants are reuploaded.
1642 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1647 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1649 /* BRW_NEW_FRAGMENT_PROGRAM */
1650 const struct gl_program
*wm
= brw
->programs
[MESA_SHADER_FRAGMENT
];
1653 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1654 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1655 brw
->wm
.base
.prog_data
);
1659 const struct brw_tracked_state brw_wm_image_surfaces
= {
1661 .mesa
= _NEW_TEXTURE
,
1662 .brw
= BRW_NEW_BATCH
|
1664 BRW_NEW_FRAGMENT_PROGRAM
|
1665 BRW_NEW_FS_PROG_DATA
|
1668 .emit
= brw_upload_wm_image_surfaces
,
1672 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1674 struct gl_context
*ctx
= &brw
->ctx
;
1676 struct gl_program
*prog
=
1677 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1678 /* BRW_NEW_CS_PROG_DATA */
1679 const struct brw_cs_prog_data
*cs_prog_data
=
1680 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1682 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1683 const unsigned surf_idx
=
1684 cs_prog_data
->binding_table
.work_groups_start
;
1685 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1689 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1691 intel_upload_data(brw
,
1692 (void *)brw
->compute
.num_work_groups
,
1698 bo
= brw
->compute
.num_work_groups_bo
;
1699 bo_offset
= brw
->compute
.num_work_groups_offset
;
1702 brw_emit_buffer_surface_state(brw
, surf_offset
,
1705 3 * sizeof(GLuint
), 1,
1707 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1711 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1713 .brw
= BRW_NEW_CS_PROG_DATA
|
1714 BRW_NEW_CS_WORK_GROUPS
1716 .emit
= brw_upload_cs_work_groups_surface
,