2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
58 uint32_t wb_mocs
[] = {
66 uint32_t pte_mocs
[] = {
75 brw_get_bo_mocs(const struct gen_device_info
*devinfo
, struct brw_bo
*bo
)
77 return (bo
&& bo
->external
? pte_mocs
: wb_mocs
)[devinfo
->gen
];
81 get_isl_surf(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
82 GLenum target
, struct isl_view
*view
,
83 uint32_t *tile_x
, uint32_t *tile_y
,
84 uint32_t *offset
, struct isl_surf
*surf
)
88 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
89 const enum isl_dim_layout dim_layout
=
90 get_isl_dim_layout(devinfo
, mt
->surf
.tiling
, target
);
92 if (surf
->dim_layout
== dim_layout
)
95 /* The layout of the specified texture target is not compatible with the
96 * actual layout of the miptree structure in memory -- You're entering
97 * dangerous territory, this can only possibly work if you only intended
98 * to access a single level and slice of the texture, and the hardware
99 * supports the tile offset feature in order to allow non-tile-aligned
100 * base offsets, since we'll have to point the hardware to the first
101 * texel of the level instead of relying on the usual base level/layer
104 assert(devinfo
->has_surface_tile_offset
);
105 assert(view
->levels
== 1 && view
->array_len
== 1);
106 assert(*tile_x
== 0 && *tile_y
== 0);
108 *offset
+= intel_miptree_get_tile_offsets(mt
, view
->base_level
,
109 view
->base_array_layer
,
112 /* Minify the logical dimensions of the texture. */
113 const unsigned l
= view
->base_level
- mt
->first_level
;
114 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, l
);
115 surf
->logical_level0_px
.height
= surf
->dim
<= ISL_SURF_DIM_1D
? 1 :
116 minify(surf
->logical_level0_px
.height
, l
);
117 surf
->logical_level0_px
.depth
= surf
->dim
<= ISL_SURF_DIM_2D
? 1 :
118 minify(surf
->logical_level0_px
.depth
, l
);
120 /* Only the base level and layer can be addressed with the overridden
123 surf
->logical_level0_px
.array_len
= 1;
125 surf
->dim_layout
= dim_layout
;
127 /* The requested slice of the texture is now at the base level and
130 view
->base_level
= 0;
131 view
->base_array_layer
= 0;
135 brw_emit_surface_state(struct brw_context
*brw
,
136 struct intel_mipmap_tree
*mt
,
137 GLenum target
, struct isl_view view
,
138 enum isl_aux_usage aux_usage
,
139 uint32_t *surf_offset
, int surf_index
,
140 unsigned reloc_flags
)
142 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
143 uint32_t tile_x
= mt
->level
[0].level_x
;
144 uint32_t tile_y
= mt
->level
[0].level_y
;
145 uint32_t offset
= mt
->offset
;
147 struct isl_surf surf
;
149 get_isl_surf(brw
, mt
, target
, &view
, &tile_x
, &tile_y
, &offset
, &surf
);
151 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
153 struct brw_bo
*aux_bo
;
154 struct isl_surf
*aux_surf
= NULL
;
155 uint64_t aux_offset
= 0;
157 case ISL_AUX_USAGE_MCS
:
158 case ISL_AUX_USAGE_CCS_D
:
159 case ISL_AUX_USAGE_CCS_E
:
160 aux_surf
= &mt
->mcs_buf
->surf
;
161 aux_bo
= mt
->mcs_buf
->bo
;
162 aux_offset
= mt
->mcs_buf
->offset
;
165 case ISL_AUX_USAGE_HIZ
:
166 aux_surf
= &mt
->hiz_buf
->surf
;
167 aux_bo
= mt
->hiz_buf
->bo
;
171 case ISL_AUX_USAGE_NONE
:
175 if (aux_usage
!= ISL_AUX_USAGE_NONE
) {
176 /* We only really need a clear color if we also have an auxiliary
177 * surface. Without one, it does nothing.
179 clear_color
= mt
->fast_clear_color
;
182 void *state
= brw_state_batch(brw
,
183 brw
->isl_dev
.ss
.size
,
184 brw
->isl_dev
.ss
.align
,
187 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &mt
->surf
, .view
= &view
,
188 .address
= brw_state_reloc(&brw
->batch
,
189 *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
190 mt
->bo
, offset
, reloc_flags
),
191 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
192 .aux_address
= aux_offset
,
193 .mocs
= brw_get_bo_mocs(devinfo
, mt
->bo
),
194 .clear_color
= clear_color
,
195 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
197 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
198 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
199 * contain other control information. Since buffer addresses are always
200 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
201 * an ordinary reloc to do the necessary address translation.
203 * FIXME: move to the point of assignment.
205 assert((aux_offset
& 0xfff) == 0);
206 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
207 *aux_addr
= brw_state_reloc(&brw
->batch
,
209 brw
->isl_dev
.ss
.aux_addr_offset
,
216 gen6_update_renderbuffer_surface(struct brw_context
*brw
,
217 struct gl_renderbuffer
*rb
,
221 struct gl_context
*ctx
= &brw
->ctx
;
222 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
223 struct intel_mipmap_tree
*mt
= irb
->mt
;
225 assert(brw_render_target_supported(brw
, rb
));
227 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
228 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
229 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
230 __func__
, _mesa_get_format_name(rb_format
));
232 enum isl_format isl_format
= brw
->mesa_to_isl_render_format
[rb_format
];
234 struct isl_view view
= {
235 .format
= isl_format
,
236 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
238 .base_array_layer
= irb
->mt_layer
,
239 .array_len
= MAX2(irb
->layer_count
, 1),
240 .swizzle
= ISL_SWIZZLE_IDENTITY
,
241 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
245 brw_emit_surface_state(brw
, mt
, mt
->target
, view
,
246 brw
->draw_aux_usage
[unit
],
253 translate_tex_target(GLenum target
)
257 case GL_TEXTURE_1D_ARRAY_EXT
:
258 return BRW_SURFACE_1D
;
260 case GL_TEXTURE_RECTANGLE_NV
:
261 return BRW_SURFACE_2D
;
264 case GL_TEXTURE_2D_ARRAY_EXT
:
265 case GL_TEXTURE_EXTERNAL_OES
:
266 case GL_TEXTURE_2D_MULTISAMPLE
:
267 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
268 return BRW_SURFACE_2D
;
271 return BRW_SURFACE_3D
;
273 case GL_TEXTURE_CUBE_MAP
:
274 case GL_TEXTURE_CUBE_MAP_ARRAY
:
275 return BRW_SURFACE_CUBE
;
278 unreachable("not reached");
283 brw_get_surface_tiling_bits(enum isl_tiling tiling
)
287 return BRW_SURFACE_TILED
;
289 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
297 brw_get_surface_num_multisamples(unsigned num_samples
)
300 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
302 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
306 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
310 brw_get_texture_swizzle(const struct gl_context
*ctx
,
311 const struct gl_texture_object
*t
)
313 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
315 int swizzles
[SWIZZLE_NIL
+ 1] = {
325 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
326 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
327 GLenum depth_mode
= t
->DepthMode
;
329 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
330 * with depth component data specified with a sized internal format.
331 * Otherwise, it's left at the old default, GL_LUMINANCE.
333 if (_mesa_is_gles3(ctx
) &&
334 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
335 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
339 switch (depth_mode
) {
341 swizzles
[0] = SWIZZLE_ZERO
;
342 swizzles
[1] = SWIZZLE_ZERO
;
343 swizzles
[2] = SWIZZLE_ZERO
;
344 swizzles
[3] = SWIZZLE_X
;
347 swizzles
[0] = SWIZZLE_X
;
348 swizzles
[1] = SWIZZLE_X
;
349 swizzles
[2] = SWIZZLE_X
;
350 swizzles
[3] = SWIZZLE_ONE
;
353 swizzles
[0] = SWIZZLE_X
;
354 swizzles
[1] = SWIZZLE_X
;
355 swizzles
[2] = SWIZZLE_X
;
356 swizzles
[3] = SWIZZLE_X
;
359 swizzles
[0] = SWIZZLE_X
;
360 swizzles
[1] = SWIZZLE_ZERO
;
361 swizzles
[2] = SWIZZLE_ZERO
;
362 swizzles
[3] = SWIZZLE_ONE
;
367 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
369 /* If the texture's format is alpha-only, force R, G, and B to
370 * 0.0. Similarly, if the texture's format has no alpha channel,
371 * force the alpha value read to 1.0. This allows for the
372 * implementation to use an RGBA texture for any of these formats
373 * without leaking any unexpected values.
375 switch (img
->_BaseFormat
) {
377 swizzles
[0] = SWIZZLE_ZERO
;
378 swizzles
[1] = SWIZZLE_ZERO
;
379 swizzles
[2] = SWIZZLE_ZERO
;
382 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
383 swizzles
[0] = SWIZZLE_X
;
384 swizzles
[1] = SWIZZLE_X
;
385 swizzles
[2] = SWIZZLE_X
;
386 swizzles
[3] = SWIZZLE_ONE
;
389 case GL_LUMINANCE_ALPHA
:
390 if (datatype
== GL_SIGNED_NORMALIZED
) {
391 swizzles
[0] = SWIZZLE_X
;
392 swizzles
[1] = SWIZZLE_X
;
393 swizzles
[2] = SWIZZLE_X
;
394 swizzles
[3] = SWIZZLE_W
;
398 if (datatype
== GL_SIGNED_NORMALIZED
) {
399 swizzles
[0] = SWIZZLE_X
;
400 swizzles
[1] = SWIZZLE_X
;
401 swizzles
[2] = SWIZZLE_X
;
402 swizzles
[3] = SWIZZLE_X
;
408 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
409 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
410 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
411 swizzles
[3] = SWIZZLE_ONE
;
415 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
416 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
417 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
418 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
422 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
423 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
425 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
428 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
430 * which is simply adding 4 then modding by 8 (or anding with 7).
432 * We then may need to apply workarounds for textureGather hardware bugs.
435 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
437 unsigned scs
= (swizzle
+ 4) & 7;
439 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
442 static void brw_update_texture_surface(struct gl_context
*ctx
,
444 uint32_t *surf_offset
,
449 struct brw_context
*brw
= brw_context(ctx
);
450 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
451 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
453 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
454 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
457 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
458 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
461 if (mt
->plane
[plane
- 1] == NULL
)
463 mt
= mt
->plane
[plane
- 1];
466 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
467 /* If this is a view with restricted NumLayers, then our effective depth
468 * is not just the miptree depth.
470 unsigned view_num_layers
;
471 if (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) {
472 view_num_layers
= obj
->NumLayers
;
474 view_num_layers
= mt
->surf
.dim
== ISL_SURF_DIM_3D
?
475 mt
->surf
.logical_level0_px
.depth
:
476 mt
->surf
.logical_level0_px
.array_len
;
479 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
480 * texturing functions that return a float, as our code generation always
481 * selects the .x channel (which would always be 0).
483 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
484 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
485 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
486 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
487 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
488 brw_get_texture_swizzle(&brw
->ctx
, obj
));
490 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
491 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
492 for_txf
? GL_DECODE_EXT
:
493 sampler
->sRGBDecode
);
495 /* Implement gen6 and gen7 gather work-around */
496 bool need_green_to_blue
= false;
498 if (devinfo
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
499 format
== ISL_FORMAT_R32G32_SINT
||
500 format
== ISL_FORMAT_R32G32_UINT
)) {
501 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
502 need_green_to_blue
= devinfo
->is_haswell
;
503 } else if (devinfo
->gen
== 6) {
504 /* Sandybridge's gather4 message is broken for integer formats.
505 * To work around this, we pretend the surface is UNORM for
506 * 8 or 16-bit formats, and emit shader instructions to recover
507 * the real INT/UINT value. For 32-bit formats, we pretend
508 * the surface is FLOAT, and simply reinterpret the resulting
512 case ISL_FORMAT_R8_SINT
:
513 case ISL_FORMAT_R8_UINT
:
514 format
= ISL_FORMAT_R8_UNORM
;
517 case ISL_FORMAT_R16_SINT
:
518 case ISL_FORMAT_R16_UINT
:
519 format
= ISL_FORMAT_R16_UNORM
;
522 case ISL_FORMAT_R32_SINT
:
523 case ISL_FORMAT_R32_UINT
:
524 format
= ISL_FORMAT_R32_FLOAT
;
533 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
534 if (devinfo
->gen
<= 7) {
535 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
536 mt
= mt
->r8stencil_mt
;
540 format
= ISL_FORMAT_R8_UINT
;
541 } else if (devinfo
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
542 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
543 mt
= mt
->r8stencil_mt
;
544 format
= ISL_FORMAT_R8_UINT
;
547 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
549 struct isl_view view
= {
551 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
552 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
553 .base_array_layer
= obj
->MinLayer
,
554 .array_len
= view_num_layers
,
556 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
557 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
558 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
559 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
561 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
564 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
565 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
566 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
568 enum isl_aux_usage aux_usage
=
569 intel_miptree_texture_aux_usage(brw
, mt
, format
);
571 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
572 surf_offset
, surf_index
,
578 brw_emit_buffer_surface_state(struct brw_context
*brw
,
579 uint32_t *out_offset
,
581 unsigned buffer_offset
,
582 unsigned surface_format
,
583 unsigned buffer_size
,
585 unsigned reloc_flags
)
587 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
588 uint32_t *dw
= brw_state_batch(brw
,
589 brw
->isl_dev
.ss
.size
,
590 brw
->isl_dev
.ss
.align
,
593 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
594 .address
= !bo
? buffer_offset
:
595 brw_state_reloc(&brw
->batch
,
596 *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
600 .format
= surface_format
,
602 .mocs
= brw_get_bo_mocs(devinfo
, bo
));
606 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
608 uint32_t *surf_offset
)
610 struct brw_context
*brw
= brw_context(ctx
);
611 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
612 struct intel_buffer_object
*intel_obj
=
613 intel_buffer_object(tObj
->BufferObject
);
614 uint32_t size
= tObj
->BufferSize
;
615 struct brw_bo
*bo
= NULL
;
616 mesa_format format
= tObj
->_BufferObjectFormat
;
617 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
618 int texel_size
= _mesa_get_format_bytes(format
);
621 size
= MIN2(size
, intel_obj
->Base
.Size
);
622 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
626 /* The ARB_texture_buffer_specification says:
628 * "The number of texels in the buffer texture's texel array is given by
630 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
632 * where <buffer_size> is the size of the buffer object, in basic
633 * machine units and <components> and <base_type> are the element count
634 * and base data type for elements, as specified in Table X.1. The
635 * number of texels in the texel array is then clamped to the
636 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
638 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
639 * so that when ISL divides by stride to obtain the number of texels, that
640 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
642 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
644 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
645 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
646 _mesa_get_format_name(format
));
649 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
658 * Set up a binding table entry for use by stream output logic (transform
661 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
664 brw_update_sol_surface(struct brw_context
*brw
,
665 struct gl_buffer_object
*buffer_obj
,
666 uint32_t *out_offset
, unsigned num_vector_components
,
667 unsigned stride_dwords
, unsigned offset_dwords
)
669 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
670 uint32_t offset_bytes
= 4 * offset_dwords
;
671 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
673 buffer_obj
->Size
- offset_bytes
,
675 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
676 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
677 size_t size_dwords
= buffer_obj
->Size
/ 4;
678 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
680 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
681 * too big to map using a single binding table entry?
683 assert((size_dwords
- offset_dwords
) / stride_dwords
684 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
686 if (size_dwords
> offset_dwords
+ num_vector_components
) {
687 /* There is room for at least 1 transform feedback output in the buffer.
688 * Compute the number of additional transform feedback outputs the
689 * buffer has room for.
691 buffer_size_minus_1
=
692 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
694 /* There isn't even room for a single transform feedback output in the
695 * buffer. We can't configure the binding table entry to prevent output
696 * entirely; we'll have to rely on the geometry shader to detect
697 * overflow. But to minimize the damage in case of a bug, set up the
698 * binding table entry to just allow a single output.
700 buffer_size_minus_1
= 0;
702 width
= buffer_size_minus_1
& 0x7f;
703 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
704 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
706 switch (num_vector_components
) {
708 surface_format
= ISL_FORMAT_R32_FLOAT
;
711 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
714 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
717 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
720 unreachable("Invalid vector size for transform feedback output");
723 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
724 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
725 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
726 BRW_SURFACE_RC_READ_WRITE
;
727 surf
[1] = brw_state_reloc(&brw
->batch
,
728 *out_offset
+ 4, bo
, offset_bytes
, RELOC_WRITE
);
729 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
730 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
731 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
732 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
737 /* Creates a new WM constant buffer reflecting the current fragment program's
738 * constants, if needed by the fragment program.
740 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
744 brw_upload_wm_pull_constants(struct brw_context
*brw
)
746 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
747 /* BRW_NEW_FRAGMENT_PROGRAM */
748 struct brw_program
*fp
=
749 (struct brw_program
*) brw
->programs
[MESA_SHADER_FRAGMENT
];
751 /* BRW_NEW_FS_PROG_DATA */
752 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
754 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
755 /* _NEW_PROGRAM_CONSTANTS */
756 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
757 stage_state
, prog_data
);
760 const struct brw_tracked_state brw_wm_pull_constants
= {
762 .mesa
= _NEW_PROGRAM_CONSTANTS
,
763 .brw
= BRW_NEW_BATCH
|
764 BRW_NEW_FRAGMENT_PROGRAM
|
765 BRW_NEW_FS_PROG_DATA
,
767 .emit
= brw_upload_wm_pull_constants
,
771 * Creates a null renderbuffer surface.
773 * This is used when the shader doesn't write to any color output. An FB
774 * write to target 0 will still be emitted, because that's how the thread is
775 * terminated (and computed depth is returned), so we need to have the
776 * hardware discard the target 0 color output..
779 emit_null_surface_state(struct brw_context
*brw
,
780 const struct gl_framebuffer
*fb
,
781 uint32_t *out_offset
)
783 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
784 uint32_t *surf
= brw_state_batch(brw
,
785 brw
->isl_dev
.ss
.size
,
786 brw
->isl_dev
.ss
.align
,
789 /* Use the fb dimensions or 1x1x1 */
790 const unsigned width
= fb
? _mesa_geometric_width(fb
) : 1;
791 const unsigned height
= fb
? _mesa_geometric_height(fb
) : 1;
792 const unsigned samples
= fb
? _mesa_geometric_samples(fb
) : 1;
794 if (devinfo
->gen
!= 6 || samples
<= 1) {
795 isl_null_fill_state(&brw
->isl_dev
, surf
,
796 isl_extent3d(width
, height
, 1));
800 /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
801 * So work around this problem by rendering into dummy color buffer.
803 * To decrease the amount of memory needed by the workaround buffer, we
804 * set its pitch to 128 bytes (the width of a Y tile). This means that
805 * the amount of memory needed for the workaround buffer is
806 * (width_in_tiles + height_in_tiles - 1) tiles.
808 * Note that since the workaround buffer will be interpreted by the
809 * hardware as an interleaved multisampled buffer, we need to compute
810 * width_in_tiles and height_in_tiles by dividing the width and height
811 * by 16 rather than the normal Y-tile size of 32.
813 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
814 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
815 unsigned pitch_minus_1
= 127;
816 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
817 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
820 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
821 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
822 surf
[1] = brw_state_reloc(&brw
->batch
, *out_offset
+ 4,
823 brw
->wm
.multisampled_null_render_target_bo
,
826 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
827 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
829 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
832 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
834 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
835 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
836 surf
[4] = BRW_SURFACE_MULTISAMPLECOUNT_4
;
841 * Sets up a surface state structure to point at the given region.
842 * While it is only used for the front/back buffer currently, it should be
843 * usable for further buffers when doing ARB_draw_buffer support.
846 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
847 struct gl_renderbuffer
*rb
,
851 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
852 struct gl_context
*ctx
= &brw
->ctx
;
853 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
854 struct intel_mipmap_tree
*mt
= irb
->mt
;
856 uint32_t tile_x
, tile_y
;
857 enum isl_format format
;
860 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
861 /* BRW_NEW_FS_PROG_DATA */
863 if (rb
->TexImage
&& !devinfo
->has_surface_tile_offset
) {
864 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
866 if (tile_x
!= 0 || tile_y
!= 0) {
867 /* Original gen4 hardware couldn't draw to a non-tile-aligned
868 * destination in a miptree unless you actually setup your renderbuffer
869 * as a miptree and used the fragile lod/array_index/etc. controls to
870 * select the image. So, instead, we just make a new single-level
871 * miptree and render into that.
873 intel_renderbuffer_move_to_temp(brw
, irb
, false);
874 assert(irb
->align_wa_mt
);
875 mt
= irb
->align_wa_mt
;
879 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
881 format
= brw
->mesa_to_isl_render_format
[rb_format
];
882 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
883 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
884 __func__
, _mesa_get_format_name(rb_format
));
887 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
888 format
<< BRW_SURFACE_FORMAT_SHIFT
);
891 assert(mt
->offset
% mt
->cpp
== 0);
892 surf
[1] = brw_state_reloc(&brw
->batch
, offset
+ 4, mt
->bo
,
894 intel_renderbuffer_get_tile_offsets(irb
,
899 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
900 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
902 surf
[3] = (brw_get_surface_tiling_bits(mt
->surf
.tiling
) |
903 (mt
->surf
.row_pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
905 surf
[4] = brw_get_surface_num_multisamples(mt
->surf
.samples
);
907 assert(devinfo
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
908 /* Note that the low bits of these fields are missing, so
909 * there's the possibility of getting in trouble.
911 assert(tile_x
% 4 == 0);
912 assert(tile_y
% 2 == 0);
913 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
914 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
915 (mt
->surf
.image_alignment_el
.height
== 4 ?
916 BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
918 if (devinfo
->gen
< 6) {
920 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
921 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
922 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
924 if (!GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 0))
925 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
926 if (!GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 1))
927 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
928 if (!GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 2))
929 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
931 /* As mentioned above, disable writes to the alpha component when the
932 * renderbuffer is XRGB.
934 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
935 !GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 3)) {
936 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
944 update_renderbuffer_surfaces(struct brw_context
*brw
)
946 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
947 const struct gl_context
*ctx
= &brw
->ctx
;
949 /* _NEW_BUFFERS | _NEW_COLOR */
950 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
952 /* Render targets always start at binding table index 0. */
953 const unsigned rt_start
= 0;
955 uint32_t *surf_offsets
= brw
->wm
.base
.surf_offset
;
957 /* Update surfaces for drawing buffers */
958 if (fb
->_NumColorDrawBuffers
>= 1) {
959 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
960 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
962 if (intel_renderbuffer(rb
)) {
963 surf_offsets
[rt_start
+ i
] = devinfo
->gen
>= 6 ?
964 gen6_update_renderbuffer_surface(brw
, rb
, i
, rt_start
+ i
) :
965 gen4_update_renderbuffer_surface(brw
, rb
, i
, rt_start
+ i
);
967 emit_null_surface_state(brw
, fb
, &surf_offsets
[rt_start
+ i
]);
971 emit_null_surface_state(brw
, fb
, &surf_offsets
[rt_start
]);
974 /* The PIPE_CONTROL command description says:
976 * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
977 * points to a different RENDER_SURFACE_STATE, SW must issue a Render
978 * Target Cache Flush by enabling this bit. When render target flush
979 * is set due to new association of BTI, PS Scoreboard Stall bit must
980 * be set in this packet."
982 if (devinfo
->gen
>= 11) {
983 brw_emit_pipe_control_flush(brw
,
984 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
985 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
988 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
991 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
993 .mesa
= _NEW_BUFFERS
|
995 .brw
= BRW_NEW_BATCH
,
997 .emit
= update_renderbuffer_surfaces
,
1000 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1002 .mesa
= _NEW_BUFFERS
,
1003 .brw
= BRW_NEW_BATCH
|
1006 .emit
= update_renderbuffer_surfaces
,
1010 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1012 const struct gl_context
*ctx
= &brw
->ctx
;
1014 /* BRW_NEW_FS_PROG_DATA */
1015 const struct brw_wm_prog_data
*wm_prog_data
=
1016 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1018 if (wm_prog_data
->has_render_target_reads
&&
1019 !ctx
->Extensions
.MESA_shader_framebuffer_fetch
) {
1021 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1023 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1024 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1025 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1026 const unsigned surf_index
=
1027 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1028 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1031 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1032 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1033 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1036 /* Override the target of the texture if the render buffer is a
1037 * single slice of a 3D texture (since the minimum array element
1038 * field of the surface state structure is ignored by the sampler
1039 * unit for 3D textures on some hardware), or if the render buffer
1040 * is a 1D array (since shaders always provide the array index
1041 * coordinate at the Z component to avoid state-dependent
1042 * recompiles when changing the texture target of the
1045 const GLenum target
=
1046 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1047 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1048 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1051 const struct isl_view view
= {
1053 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1055 .base_array_layer
= irb
->mt_layer
,
1056 .array_len
= irb
->layer_count
,
1057 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1058 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1061 enum isl_aux_usage aux_usage
=
1062 intel_miptree_texture_aux_usage(brw
, irb
->mt
, format
);
1063 if (brw
->draw_aux_usage
[i
] == ISL_AUX_USAGE_NONE
)
1064 aux_usage
= ISL_AUX_USAGE_NONE
;
1066 brw_emit_surface_state(brw
, irb
->mt
, target
, view
, aux_usage
,
1067 surf_offset
, surf_index
,
1071 emit_null_surface_state(brw
, fb
, surf_offset
);
1075 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1079 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1081 .mesa
= _NEW_BUFFERS
,
1082 .brw
= BRW_NEW_BATCH
|
1084 BRW_NEW_FS_PROG_DATA
,
1086 .emit
= update_renderbuffer_read_surfaces
,
1090 is_depth_texture(struct intel_texture_object
*iobj
)
1092 GLenum base_format
= _mesa_get_format_base_format(iobj
->_Format
);
1093 return base_format
== GL_DEPTH_COMPONENT
||
1094 (base_format
== GL_DEPTH_STENCIL
&& !iobj
->base
.StencilSampling
);
1098 update_stage_texture_surfaces(struct brw_context
*brw
,
1099 const struct gl_program
*prog
,
1100 struct brw_stage_state
*stage_state
,
1101 bool for_gather
, uint32_t plane
)
1106 struct gl_context
*ctx
= &brw
->ctx
;
1108 uint32_t *surf_offset
= stage_state
->surf_offset
;
1110 /* BRW_NEW_*_PROG_DATA */
1112 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1114 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1116 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1117 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1120 if (prog
->SamplersUsed
& (1 << s
)) {
1121 const unsigned unit
= prog
->SamplerUnits
[s
];
1122 const bool used_by_txf
= prog
->info
.textures_used_by_txf
& (1 << s
);
1123 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
1124 struct intel_texture_object
*iobj
= intel_texture_object(obj
);
1130 if ((prog
->ShadowSamplers
& (1 << s
)) && !is_depth_texture(iobj
)) {
1131 /* A programming note for the sample_c message says:
1133 * "The Surface Format of the associated surface must be
1134 * indicated as supporting shadow mapping as indicated in the
1135 * surface format table."
1137 * Accessing non-depth textures via a sampler*Shadow type is
1138 * undefined. GLSL 4.50 page 162 says:
1140 * "If a shadow texture call is made to a sampler that does not
1141 * represent a depth texture, then results are undefined."
1143 * We give them a null surface (zeros) for undefined. We've seen
1144 * GPU hangs with color buffers and sample_c, so we try and avoid
1145 * those with this hack.
1147 emit_null_surface_state(brw
, NULL
, surf_offset
+ s
);
1149 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
,
1150 used_by_txf
, plane
);
1158 * Construct SURFACE_STATE objects for enabled textures.
1161 brw_update_texture_surfaces(struct brw_context
*brw
)
1163 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1165 /* BRW_NEW_VERTEX_PROGRAM */
1166 struct gl_program
*vs
= brw
->programs
[MESA_SHADER_VERTEX
];
1168 /* BRW_NEW_TESS_PROGRAMS */
1169 struct gl_program
*tcs
= brw
->programs
[MESA_SHADER_TESS_CTRL
];
1170 struct gl_program
*tes
= brw
->programs
[MESA_SHADER_TESS_EVAL
];
1172 /* BRW_NEW_GEOMETRY_PROGRAM */
1173 struct gl_program
*gs
= brw
->programs
[MESA_SHADER_GEOMETRY
];
1175 /* BRW_NEW_FRAGMENT_PROGRAM */
1176 struct gl_program
*fs
= brw
->programs
[MESA_SHADER_FRAGMENT
];
1179 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1180 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1181 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1182 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1183 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1185 /* emit alternate set of surface state for gather. this
1186 * allows the surface format to be overriden for only the
1187 * gather4 messages. */
1188 if (devinfo
->gen
< 8) {
1189 if (vs
&& vs
->info
.uses_texture_gather
)
1190 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1191 if (tcs
&& tcs
->info
.uses_texture_gather
)
1192 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1193 if (tes
&& tes
->info
.uses_texture_gather
)
1194 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1195 if (gs
&& gs
->info
.uses_texture_gather
)
1196 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1197 if (fs
&& fs
->info
.uses_texture_gather
)
1198 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1202 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1203 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1206 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1209 const struct brw_tracked_state brw_texture_surfaces
= {
1211 .mesa
= _NEW_TEXTURE
,
1212 .brw
= BRW_NEW_BATCH
|
1214 BRW_NEW_FRAGMENT_PROGRAM
|
1215 BRW_NEW_FS_PROG_DATA
|
1216 BRW_NEW_GEOMETRY_PROGRAM
|
1217 BRW_NEW_GS_PROG_DATA
|
1218 BRW_NEW_TESS_PROGRAMS
|
1219 BRW_NEW_TCS_PROG_DATA
|
1220 BRW_NEW_TES_PROG_DATA
|
1221 BRW_NEW_TEXTURE_BUFFER
|
1222 BRW_NEW_VERTEX_PROGRAM
|
1223 BRW_NEW_VS_PROG_DATA
,
1225 .emit
= brw_update_texture_surfaces
,
1229 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1231 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1233 /* BRW_NEW_COMPUTE_PROGRAM */
1234 struct gl_program
*cs
= brw
->programs
[MESA_SHADER_COMPUTE
];
1237 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1239 /* emit alternate set of surface state for gather. this
1240 * allows the surface format to be overriden for only the
1243 if (devinfo
->gen
< 8) {
1244 if (cs
&& cs
->info
.uses_texture_gather
)
1245 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1248 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1251 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1253 .mesa
= _NEW_TEXTURE
,
1254 .brw
= BRW_NEW_BATCH
|
1255 BRW_NEW_COMPUTE_PROGRAM
|
1258 .emit
= brw_update_cs_texture_surfaces
,
1262 upload_buffer_surface(struct brw_context
*brw
,
1263 struct gl_buffer_binding
*binding
,
1264 uint32_t *out_offset
,
1265 enum isl_format format
,
1266 unsigned reloc_flags
)
1268 struct gl_context
*ctx
= &brw
->ctx
;
1270 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1271 emit_null_surface_state(brw
, NULL
, out_offset
);
1273 ptrdiff_t size
= binding
->BufferObject
->Size
- binding
->Offset
;
1274 if (!binding
->AutomaticSize
)
1275 size
= MIN2(size
, binding
->Size
);
1277 struct intel_buffer_object
*iobj
=
1278 intel_buffer_object(binding
->BufferObject
);
1280 intel_bufferobj_buffer(brw
, iobj
, binding
->Offset
, size
,
1281 (reloc_flags
& RELOC_WRITE
) != 0);
1283 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, binding
->Offset
,
1284 format
, size
, 1, reloc_flags
);
1289 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1290 struct brw_stage_state
*stage_state
,
1291 struct brw_stage_prog_data
*prog_data
)
1293 struct gl_context
*ctx
= &brw
->ctx
;
1295 if (!prog
|| (prog
->info
.num_ubos
== 0 &&
1296 prog
->info
.num_ssbos
== 0 &&
1297 prog
->info
.num_abos
== 0))
1300 uint32_t *ubo_surf_offsets
=
1301 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1303 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1304 struct gl_buffer_binding
*binding
=
1305 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1306 upload_buffer_surface(brw
, binding
, &ubo_surf_offsets
[i
],
1307 ISL_FORMAT_R32G32B32A32_FLOAT
, 0);
1310 uint32_t *abo_surf_offsets
=
1311 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1312 uint32_t *ssbo_surf_offsets
= abo_surf_offsets
+ prog
->info
.num_abos
;
1314 for (int i
= 0; i
< prog
->info
.num_abos
; i
++) {
1315 struct gl_buffer_binding
*binding
=
1316 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1317 upload_buffer_surface(brw
, binding
, &abo_surf_offsets
[i
],
1318 ISL_FORMAT_RAW
, RELOC_WRITE
);
1321 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1322 struct gl_buffer_binding
*binding
=
1323 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1325 upload_buffer_surface(brw
, binding
, &ssbo_surf_offsets
[i
],
1326 ISL_FORMAT_RAW
, RELOC_WRITE
);
1329 stage_state
->push_constants_dirty
= true;
1330 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1334 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1336 struct gl_context
*ctx
= &brw
->ctx
;
1338 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1340 /* BRW_NEW_FS_PROG_DATA */
1341 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1344 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1346 .mesa
= _NEW_PROGRAM
,
1347 .brw
= BRW_NEW_BATCH
|
1348 BRW_NEW_FS_PROG_DATA
|
1349 BRW_NEW_UNIFORM_BUFFER
,
1351 .emit
= brw_upload_wm_ubo_surfaces
,
1355 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1357 struct gl_context
*ctx
= &brw
->ctx
;
1359 struct gl_program
*prog
=
1360 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1362 /* BRW_NEW_CS_PROG_DATA */
1363 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1366 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1368 .mesa
= _NEW_PROGRAM
,
1369 .brw
= BRW_NEW_BATCH
|
1370 BRW_NEW_CS_PROG_DATA
|
1371 BRW_NEW_UNIFORM_BUFFER
,
1373 .emit
= brw_upload_cs_ubo_surfaces
,
1377 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1380 const struct gl_program
*cp
= brw
->programs
[MESA_SHADER_COMPUTE
];
1383 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1384 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1385 brw
->cs
.base
.prog_data
);
1389 const struct brw_tracked_state brw_cs_image_surfaces
= {
1391 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1392 .brw
= BRW_NEW_BATCH
|
1393 BRW_NEW_CS_PROG_DATA
|
1397 .emit
= brw_upload_cs_image_surfaces
,
1401 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1403 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1404 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1405 if (access
== GL_WRITE_ONLY
) {
1407 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1408 /* Typed surface reads support a very limited subset of the shader
1409 * image formats. Translate it into the closest format the
1410 * hardware supports.
1412 return isl_lower_storage_image_format(devinfo
, hw_format
);
1414 /* The hardware doesn't actually support a typed format that we can use
1415 * so we have to fall back to untyped read/write messages.
1417 return ISL_FORMAT_RAW
;
1422 update_default_image_param(struct brw_context
*brw
,
1423 struct gl_image_unit
*u
,
1424 unsigned surface_idx
,
1425 struct brw_image_param
*param
)
1427 memset(param
, 0, sizeof(*param
));
1428 param
->surface_idx
= surface_idx
;
1429 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1430 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1431 * detailed explanation of these parameters.
1433 param
->swizzling
[0] = 0xff;
1434 param
->swizzling
[1] = 0xff;
1438 update_buffer_image_param(struct brw_context
*brw
,
1439 struct gl_image_unit
*u
,
1440 unsigned surface_idx
,
1441 struct brw_image_param
*param
)
1443 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1444 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1445 update_default_image_param(brw
, u
, surface_idx
, param
);
1447 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1448 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1452 get_image_num_layers(const struct intel_mipmap_tree
*mt
, GLenum target
,
1455 if (target
== GL_TEXTURE_CUBE_MAP
)
1458 return target
== GL_TEXTURE_3D
?
1459 minify(mt
->surf
.logical_level0_px
.depth
, level
) :
1460 mt
->surf
.logical_level0_px
.array_len
;
1464 update_image_surface(struct brw_context
*brw
,
1465 struct gl_image_unit
*u
,
1467 unsigned surface_idx
,
1468 uint32_t *surf_offset
,
1469 struct brw_image_param
*param
)
1471 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1472 struct gl_texture_object
*obj
= u
->TexObj
;
1473 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1475 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1476 struct intel_buffer_object
*intel_obj
=
1477 intel_buffer_object(obj
->BufferObject
);
1478 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1479 _mesa_get_format_bytes(u
->_ActualFormat
));
1481 brw_emit_buffer_surface_state(
1482 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1483 format
, intel_obj
->Base
.Size
, texel_size
,
1484 access
!= GL_READ_ONLY
? RELOC_WRITE
: 0);
1486 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1489 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1490 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1491 const unsigned num_layers
= u
->Layered
?
1492 get_image_num_layers(mt
, obj
->Target
, u
->Level
) : 1;
1494 struct isl_view view
= {
1496 .base_level
= obj
->MinLevel
+ u
->Level
,
1498 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1499 .array_len
= num_layers
,
1500 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1501 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1504 if (format
== ISL_FORMAT_RAW
) {
1505 brw_emit_buffer_surface_state(
1506 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1507 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1508 access
!= GL_READ_ONLY
? RELOC_WRITE
: 0);
1511 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1512 assert(!intel_miptree_has_color_unresolved(mt
,
1514 view
.base_array_layer
,
1516 brw_emit_surface_state(brw
, mt
, mt
->target
, view
,
1518 surf_offset
, surf_index
,
1519 access
== GL_READ_ONLY
? 0 : RELOC_WRITE
);
1522 isl_surf_fill_image_param(&brw
->isl_dev
, param
, &mt
->surf
, &view
);
1523 param
->surface_idx
= surface_idx
;
1527 emit_null_surface_state(brw
, NULL
, surf_offset
);
1528 update_default_image_param(brw
, u
, surface_idx
, param
);
1533 brw_upload_image_surfaces(struct brw_context
*brw
,
1534 const struct gl_program
*prog
,
1535 struct brw_stage_state
*stage_state
,
1536 struct brw_stage_prog_data
*prog_data
)
1539 struct gl_context
*ctx
= &brw
->ctx
;
1541 if (prog
->info
.num_images
) {
1542 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1543 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1544 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1546 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1548 &stage_state
->surf_offset
[surf_idx
],
1549 &stage_state
->image_param
[i
]);
1552 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1553 /* This may have changed the image metadata dependent on the context
1554 * image unit state and passed to the program as uniforms, make sure
1555 * that push and pull constants are reuploaded.
1557 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1562 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1564 /* BRW_NEW_FRAGMENT_PROGRAM */
1565 const struct gl_program
*wm
= brw
->programs
[MESA_SHADER_FRAGMENT
];
1568 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1569 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1570 brw
->wm
.base
.prog_data
);
1574 const struct brw_tracked_state brw_wm_image_surfaces
= {
1576 .mesa
= _NEW_TEXTURE
,
1577 .brw
= BRW_NEW_BATCH
|
1579 BRW_NEW_FRAGMENT_PROGRAM
|
1580 BRW_NEW_FS_PROG_DATA
|
1583 .emit
= brw_upload_wm_image_surfaces
,
1587 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1589 struct gl_context
*ctx
= &brw
->ctx
;
1591 struct gl_program
*prog
=
1592 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1593 /* BRW_NEW_CS_PROG_DATA */
1594 const struct brw_cs_prog_data
*cs_prog_data
=
1595 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1597 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1598 const unsigned surf_idx
=
1599 cs_prog_data
->binding_table
.work_groups_start
;
1600 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1604 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1606 intel_upload_data(brw
,
1607 (void *)brw
->compute
.num_work_groups
,
1613 bo
= brw
->compute
.num_work_groups_bo
;
1614 bo_offset
= brw
->compute
.num_work_groups_offset
;
1617 brw_emit_buffer_surface_state(brw
, surf_offset
,
1620 3 * sizeof(GLuint
), 1,
1622 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1626 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1628 .brw
= BRW_NEW_CS_PROG_DATA
|
1629 BRW_NEW_CS_WORK_GROUPS
1631 .emit
= brw_upload_cs_work_groups_surface
,