2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
58 uint32_t wb_mocs
[] = {
66 uint32_t pte_mocs
[] = {
75 brw_get_bo_mocs(const struct gen_device_info
*devinfo
, struct brw_bo
*bo
)
77 return (bo
&& bo
->external
? pte_mocs
: wb_mocs
)[devinfo
->gen
];
81 get_isl_surf(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
82 GLenum target
, struct isl_view
*view
,
83 uint32_t *tile_x
, uint32_t *tile_y
,
84 uint32_t *offset
, struct isl_surf
*surf
)
88 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
89 const enum isl_dim_layout dim_layout
=
90 get_isl_dim_layout(devinfo
, mt
->surf
.tiling
, target
);
92 if (surf
->dim_layout
== dim_layout
)
95 /* The layout of the specified texture target is not compatible with the
96 * actual layout of the miptree structure in memory -- You're entering
97 * dangerous territory, this can only possibly work if you only intended
98 * to access a single level and slice of the texture, and the hardware
99 * supports the tile offset feature in order to allow non-tile-aligned
100 * base offsets, since we'll have to point the hardware to the first
101 * texel of the level instead of relying on the usual base level/layer
104 assert(devinfo
->has_surface_tile_offset
);
105 assert(view
->levels
== 1 && view
->array_len
== 1);
106 assert(*tile_x
== 0 && *tile_y
== 0);
108 *offset
+= intel_miptree_get_tile_offsets(mt
, view
->base_level
,
109 view
->base_array_layer
,
112 /* Minify the logical dimensions of the texture. */
113 const unsigned l
= view
->base_level
- mt
->first_level
;
114 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, l
);
115 surf
->logical_level0_px
.height
= surf
->dim
<= ISL_SURF_DIM_1D
? 1 :
116 minify(surf
->logical_level0_px
.height
, l
);
117 surf
->logical_level0_px
.depth
= surf
->dim
<= ISL_SURF_DIM_2D
? 1 :
118 minify(surf
->logical_level0_px
.depth
, l
);
120 /* Only the base level and layer can be addressed with the overridden
123 surf
->logical_level0_px
.array_len
= 1;
125 surf
->dim_layout
= dim_layout
;
127 /* The requested slice of the texture is now at the base level and
130 view
->base_level
= 0;
131 view
->base_array_layer
= 0;
135 brw_emit_surface_state(struct brw_context
*brw
,
136 struct intel_mipmap_tree
*mt
,
137 GLenum target
, struct isl_view view
,
138 enum isl_aux_usage aux_usage
,
139 uint32_t *surf_offset
, int surf_index
,
140 unsigned reloc_flags
)
142 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
143 uint32_t tile_x
= mt
->level
[0].level_x
;
144 uint32_t tile_y
= mt
->level
[0].level_y
;
145 uint32_t offset
= mt
->offset
;
147 struct isl_surf surf
;
149 get_isl_surf(brw
, mt
, target
, &view
, &tile_x
, &tile_y
, &offset
, &surf
);
151 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
153 struct brw_bo
*aux_bo
;
154 struct isl_surf
*aux_surf
= NULL
;
155 uint64_t aux_offset
= 0;
157 case ISL_AUX_USAGE_MCS
:
158 case ISL_AUX_USAGE_CCS_D
:
159 case ISL_AUX_USAGE_CCS_E
:
160 aux_surf
= &mt
->mcs_buf
->surf
;
161 aux_bo
= mt
->mcs_buf
->bo
;
162 aux_offset
= mt
->mcs_buf
->offset
;
165 case ISL_AUX_USAGE_HIZ
:
166 aux_surf
= &mt
->hiz_buf
->surf
;
167 aux_bo
= mt
->hiz_buf
->bo
;
171 case ISL_AUX_USAGE_NONE
:
175 if (aux_usage
!= ISL_AUX_USAGE_NONE
) {
176 /* We only really need a clear color if we also have an auxiliary
177 * surface. Without one, it does nothing.
179 clear_color
= mt
->fast_clear_color
;
182 void *state
= brw_state_batch(brw
,
183 brw
->isl_dev
.ss
.size
,
184 brw
->isl_dev
.ss
.align
,
187 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &mt
->surf
, .view
= &view
,
188 .address
= brw_state_reloc(&brw
->batch
,
189 *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
190 mt
->bo
, offset
, reloc_flags
),
191 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
192 .aux_address
= aux_offset
,
193 .mocs
= brw_get_bo_mocs(devinfo
, mt
->bo
),
194 .clear_color
= clear_color
,
195 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
197 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
198 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
199 * contain other control information. Since buffer addresses are always
200 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
201 * an ordinary reloc to do the necessary address translation.
203 * FIXME: move to the point of assignment.
205 assert((aux_offset
& 0xfff) == 0);
206 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
207 *aux_addr
= brw_state_reloc(&brw
->batch
,
209 brw
->isl_dev
.ss
.aux_addr_offset
,
216 gen6_update_renderbuffer_surface(struct brw_context
*brw
,
217 struct gl_renderbuffer
*rb
,
221 struct gl_context
*ctx
= &brw
->ctx
;
222 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
223 struct intel_mipmap_tree
*mt
= irb
->mt
;
225 assert(brw_render_target_supported(brw
, rb
));
227 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
228 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
229 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
230 __func__
, _mesa_get_format_name(rb_format
));
232 enum isl_format isl_format
= brw
->mesa_to_isl_render_format
[rb_format
];
234 struct isl_view view
= {
235 .format
= isl_format
,
236 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
238 .base_array_layer
= irb
->mt_layer
,
239 .array_len
= MAX2(irb
->layer_count
, 1),
240 .swizzle
= ISL_SWIZZLE_IDENTITY
,
241 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
245 brw_emit_surface_state(brw
, mt
, mt
->target
, view
,
246 brw
->draw_aux_usage
[unit
],
253 translate_tex_target(GLenum target
)
257 case GL_TEXTURE_1D_ARRAY_EXT
:
258 return BRW_SURFACE_1D
;
260 case GL_TEXTURE_RECTANGLE_NV
:
261 return BRW_SURFACE_2D
;
264 case GL_TEXTURE_2D_ARRAY_EXT
:
265 case GL_TEXTURE_EXTERNAL_OES
:
266 case GL_TEXTURE_2D_MULTISAMPLE
:
267 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
268 return BRW_SURFACE_2D
;
271 return BRW_SURFACE_3D
;
273 case GL_TEXTURE_CUBE_MAP
:
274 case GL_TEXTURE_CUBE_MAP_ARRAY
:
275 return BRW_SURFACE_CUBE
;
278 unreachable("not reached");
283 brw_get_surface_tiling_bits(enum isl_tiling tiling
)
287 return BRW_SURFACE_TILED
;
289 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
297 brw_get_surface_num_multisamples(unsigned num_samples
)
300 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
302 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
306 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
310 brw_get_texture_swizzle(const struct gl_context
*ctx
,
311 const struct gl_texture_object
*t
)
313 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
315 int swizzles
[SWIZZLE_NIL
+ 1] = {
325 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
326 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
327 GLenum depth_mode
= t
->DepthMode
;
329 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
330 * with depth component data specified with a sized internal format.
331 * Otherwise, it's left at the old default, GL_LUMINANCE.
333 if (_mesa_is_gles3(ctx
) &&
334 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
335 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
339 switch (depth_mode
) {
341 swizzles
[0] = SWIZZLE_ZERO
;
342 swizzles
[1] = SWIZZLE_ZERO
;
343 swizzles
[2] = SWIZZLE_ZERO
;
344 swizzles
[3] = SWIZZLE_X
;
347 swizzles
[0] = SWIZZLE_X
;
348 swizzles
[1] = SWIZZLE_X
;
349 swizzles
[2] = SWIZZLE_X
;
350 swizzles
[3] = SWIZZLE_ONE
;
353 swizzles
[0] = SWIZZLE_X
;
354 swizzles
[1] = SWIZZLE_X
;
355 swizzles
[2] = SWIZZLE_X
;
356 swizzles
[3] = SWIZZLE_X
;
359 swizzles
[0] = SWIZZLE_X
;
360 swizzles
[1] = SWIZZLE_ZERO
;
361 swizzles
[2] = SWIZZLE_ZERO
;
362 swizzles
[3] = SWIZZLE_ONE
;
367 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
369 /* If the texture's format is alpha-only, force R, G, and B to
370 * 0.0. Similarly, if the texture's format has no alpha channel,
371 * force the alpha value read to 1.0. This allows for the
372 * implementation to use an RGBA texture for any of these formats
373 * without leaking any unexpected values.
375 switch (img
->_BaseFormat
) {
377 swizzles
[0] = SWIZZLE_ZERO
;
378 swizzles
[1] = SWIZZLE_ZERO
;
379 swizzles
[2] = SWIZZLE_ZERO
;
382 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
383 swizzles
[0] = SWIZZLE_X
;
384 swizzles
[1] = SWIZZLE_X
;
385 swizzles
[2] = SWIZZLE_X
;
386 swizzles
[3] = SWIZZLE_ONE
;
389 case GL_LUMINANCE_ALPHA
:
390 if (datatype
== GL_SIGNED_NORMALIZED
) {
391 swizzles
[0] = SWIZZLE_X
;
392 swizzles
[1] = SWIZZLE_X
;
393 swizzles
[2] = SWIZZLE_X
;
394 swizzles
[3] = SWIZZLE_W
;
398 if (datatype
== GL_SIGNED_NORMALIZED
) {
399 swizzles
[0] = SWIZZLE_X
;
400 swizzles
[1] = SWIZZLE_X
;
401 swizzles
[2] = SWIZZLE_X
;
402 swizzles
[3] = SWIZZLE_X
;
408 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
409 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
410 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
411 swizzles
[3] = SWIZZLE_ONE
;
415 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
416 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
417 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
418 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
422 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
423 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
425 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
428 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
430 * which is simply adding 4 then modding by 8 (or anding with 7).
432 * We then may need to apply workarounds for textureGather hardware bugs.
435 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
437 unsigned scs
= (swizzle
+ 4) & 7;
439 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
442 static void brw_update_texture_surface(struct gl_context
*ctx
,
444 uint32_t *surf_offset
,
449 struct brw_context
*brw
= brw_context(ctx
);
450 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
451 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
453 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
454 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
457 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
458 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
461 if (mt
->plane
[plane
- 1] == NULL
)
463 mt
= mt
->plane
[plane
- 1];
466 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
467 /* If this is a view with restricted NumLayers, then our effective depth
468 * is not just the miptree depth.
470 unsigned view_num_layers
;
471 if (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) {
472 view_num_layers
= obj
->NumLayers
;
474 view_num_layers
= mt
->surf
.dim
== ISL_SURF_DIM_3D
?
475 mt
->surf
.logical_level0_px
.depth
:
476 mt
->surf
.logical_level0_px
.array_len
;
479 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
480 * texturing functions that return a float, as our code generation always
481 * selects the .x channel (which would always be 0).
483 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
484 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
485 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
486 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
487 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
488 brw_get_texture_swizzle(&brw
->ctx
, obj
));
490 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
491 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
492 for_txf
? GL_DECODE_EXT
:
493 sampler
->sRGBDecode
);
495 /* Implement gen6 and gen7 gather work-around */
496 bool need_green_to_blue
= false;
498 if (devinfo
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
499 format
== ISL_FORMAT_R32G32_SINT
||
500 format
== ISL_FORMAT_R32G32_UINT
)) {
501 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
502 need_green_to_blue
= devinfo
->is_haswell
;
503 } else if (devinfo
->gen
== 6) {
504 /* Sandybridge's gather4 message is broken for integer formats.
505 * To work around this, we pretend the surface is UNORM for
506 * 8 or 16-bit formats, and emit shader instructions to recover
507 * the real INT/UINT value. For 32-bit formats, we pretend
508 * the surface is FLOAT, and simply reinterpret the resulting
512 case ISL_FORMAT_R8_SINT
:
513 case ISL_FORMAT_R8_UINT
:
514 format
= ISL_FORMAT_R8_UNORM
;
517 case ISL_FORMAT_R16_SINT
:
518 case ISL_FORMAT_R16_UINT
:
519 format
= ISL_FORMAT_R16_UNORM
;
522 case ISL_FORMAT_R32_SINT
:
523 case ISL_FORMAT_R32_UINT
:
524 format
= ISL_FORMAT_R32_FLOAT
;
533 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
534 if (devinfo
->gen
<= 7) {
535 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
536 mt
= mt
->r8stencil_mt
;
540 format
= ISL_FORMAT_R8_UINT
;
541 } else if (devinfo
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
542 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
543 mt
= mt
->r8stencil_mt
;
544 format
= ISL_FORMAT_R8_UINT
;
547 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
549 struct isl_view view
= {
551 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
552 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
553 .base_array_layer
= obj
->MinLayer
,
554 .array_len
= view_num_layers
,
556 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
557 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
558 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
559 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
561 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
564 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
565 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
566 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
568 enum isl_aux_usage aux_usage
=
569 intel_miptree_texture_aux_usage(brw
, mt
, format
);
571 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
572 surf_offset
, surf_index
,
578 brw_emit_buffer_surface_state(struct brw_context
*brw
,
579 uint32_t *out_offset
,
581 unsigned buffer_offset
,
582 unsigned surface_format
,
583 unsigned buffer_size
,
585 unsigned reloc_flags
)
587 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
588 uint32_t *dw
= brw_state_batch(brw
,
589 brw
->isl_dev
.ss
.size
,
590 brw
->isl_dev
.ss
.align
,
593 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
594 .address
= !bo
? buffer_offset
:
595 brw_state_reloc(&brw
->batch
,
596 *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
600 .format
= surface_format
,
602 .mocs
= brw_get_bo_mocs(devinfo
, bo
));
606 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
608 uint32_t *surf_offset
)
610 struct brw_context
*brw
= brw_context(ctx
);
611 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
612 struct intel_buffer_object
*intel_obj
=
613 intel_buffer_object(tObj
->BufferObject
);
614 uint32_t size
= tObj
->BufferSize
;
615 struct brw_bo
*bo
= NULL
;
616 mesa_format format
= tObj
->_BufferObjectFormat
;
617 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
618 int texel_size
= _mesa_get_format_bytes(format
);
621 size
= MIN2(size
, intel_obj
->Base
.Size
);
622 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
626 /* The ARB_texture_buffer_specification says:
628 * "The number of texels in the buffer texture's texel array is given by
630 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
632 * where <buffer_size> is the size of the buffer object, in basic
633 * machine units and <components> and <base_type> are the element count
634 * and base data type for elements, as specified in Table X.1. The
635 * number of texels in the texel array is then clamped to the
636 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
638 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
639 * so that when ISL divides by stride to obtain the number of texels, that
640 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
642 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
644 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
645 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
646 _mesa_get_format_name(format
));
649 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
658 * Set up a binding table entry for use by stream output logic (transform
661 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
664 brw_update_sol_surface(struct brw_context
*brw
,
665 struct gl_buffer_object
*buffer_obj
,
666 uint32_t *out_offset
, unsigned num_vector_components
,
667 unsigned stride_dwords
, unsigned offset_dwords
)
669 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
670 uint32_t offset_bytes
= 4 * offset_dwords
;
671 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
673 buffer_obj
->Size
- offset_bytes
,
675 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
676 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
677 size_t size_dwords
= buffer_obj
->Size
/ 4;
678 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
680 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
681 * too big to map using a single binding table entry?
683 assert((size_dwords
- offset_dwords
) / stride_dwords
684 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
686 if (size_dwords
> offset_dwords
+ num_vector_components
) {
687 /* There is room for at least 1 transform feedback output in the buffer.
688 * Compute the number of additional transform feedback outputs the
689 * buffer has room for.
691 buffer_size_minus_1
=
692 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
694 /* There isn't even room for a single transform feedback output in the
695 * buffer. We can't configure the binding table entry to prevent output
696 * entirely; we'll have to rely on the geometry shader to detect
697 * overflow. But to minimize the damage in case of a bug, set up the
698 * binding table entry to just allow a single output.
700 buffer_size_minus_1
= 0;
702 width
= buffer_size_minus_1
& 0x7f;
703 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
704 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
706 switch (num_vector_components
) {
708 surface_format
= ISL_FORMAT_R32_FLOAT
;
711 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
714 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
717 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
720 unreachable("Invalid vector size for transform feedback output");
723 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
724 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
725 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
726 BRW_SURFACE_RC_READ_WRITE
;
727 surf
[1] = brw_state_reloc(&brw
->batch
,
728 *out_offset
+ 4, bo
, offset_bytes
, RELOC_WRITE
);
729 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
730 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
731 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
732 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
737 /* Creates a new WM constant buffer reflecting the current fragment program's
738 * constants, if needed by the fragment program.
740 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
744 brw_upload_wm_pull_constants(struct brw_context
*brw
)
746 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
747 /* BRW_NEW_FRAGMENT_PROGRAM */
748 struct brw_program
*fp
=
749 (struct brw_program
*) brw
->programs
[MESA_SHADER_FRAGMENT
];
751 /* BRW_NEW_FS_PROG_DATA */
752 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
754 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
755 /* _NEW_PROGRAM_CONSTANTS */
756 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
757 stage_state
, prog_data
);
760 const struct brw_tracked_state brw_wm_pull_constants
= {
762 .mesa
= _NEW_PROGRAM_CONSTANTS
,
763 .brw
= BRW_NEW_BATCH
|
764 BRW_NEW_FRAGMENT_PROGRAM
|
765 BRW_NEW_FS_PROG_DATA
,
767 .emit
= brw_upload_wm_pull_constants
,
771 * Creates a null renderbuffer surface.
773 * This is used when the shader doesn't write to any color output. An FB
774 * write to target 0 will still be emitted, because that's how the thread is
775 * terminated (and computed depth is returned), so we need to have the
776 * hardware discard the target 0 color output..
779 emit_null_surface_state(struct brw_context
*brw
,
780 const struct gl_framebuffer
*fb
,
781 uint32_t *out_offset
)
783 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
784 uint32_t *surf
= brw_state_batch(brw
,
785 brw
->isl_dev
.ss
.size
,
786 brw
->isl_dev
.ss
.align
,
789 /* Use the fb dimensions or 1x1x1 */
790 const unsigned width
= fb
? _mesa_geometric_width(fb
) : 1;
791 const unsigned height
= fb
? _mesa_geometric_height(fb
) : 1;
792 const unsigned samples
= fb
? _mesa_geometric_samples(fb
) : 1;
794 if (devinfo
->gen
!= 6 || samples
<= 1) {
795 isl_null_fill_state(&brw
->isl_dev
, surf
,
796 isl_extent3d(width
, height
, 1));
800 /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
801 * So work around this problem by rendering into dummy color buffer.
803 * To decrease the amount of memory needed by the workaround buffer, we
804 * set its pitch to 128 bytes (the width of a Y tile). This means that
805 * the amount of memory needed for the workaround buffer is
806 * (width_in_tiles + height_in_tiles - 1) tiles.
808 * Note that since the workaround buffer will be interpreted by the
809 * hardware as an interleaved multisampled buffer, we need to compute
810 * width_in_tiles and height_in_tiles by dividing the width and height
811 * by 16 rather than the normal Y-tile size of 32.
813 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
814 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
815 unsigned pitch_minus_1
= 127;
816 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
817 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
820 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
821 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
822 surf
[1] = brw_state_reloc(&brw
->batch
, *out_offset
+ 4,
823 brw
->wm
.multisampled_null_render_target_bo
,
826 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
827 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
829 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
832 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
834 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
835 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
836 surf
[4] = BRW_SURFACE_MULTISAMPLECOUNT_4
;
841 * Sets up a surface state structure to point at the given region.
842 * While it is only used for the front/back buffer currently, it should be
843 * usable for further buffers when doing ARB_draw_buffer support.
846 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
847 struct gl_renderbuffer
*rb
,
851 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
852 struct gl_context
*ctx
= &brw
->ctx
;
853 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
854 struct intel_mipmap_tree
*mt
= irb
->mt
;
856 uint32_t tile_x
, tile_y
;
857 enum isl_format format
;
860 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
861 /* BRW_NEW_FS_PROG_DATA */
863 if (rb
->TexImage
&& !devinfo
->has_surface_tile_offset
) {
864 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
866 if (tile_x
!= 0 || tile_y
!= 0) {
867 /* Original gen4 hardware couldn't draw to a non-tile-aligned
868 * destination in a miptree unless you actually setup your renderbuffer
869 * as a miptree and used the fragile lod/array_index/etc. controls to
870 * select the image. So, instead, we just make a new single-level
871 * miptree and render into that.
873 intel_renderbuffer_move_to_temp(brw
, irb
, false);
874 assert(irb
->align_wa_mt
);
875 mt
= irb
->align_wa_mt
;
879 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
881 format
= brw
->mesa_to_isl_render_format
[rb_format
];
882 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
883 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
884 __func__
, _mesa_get_format_name(rb_format
));
887 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
888 format
<< BRW_SURFACE_FORMAT_SHIFT
);
891 assert(mt
->offset
% mt
->cpp
== 0);
892 surf
[1] = brw_state_reloc(&brw
->batch
, offset
+ 4, mt
->bo
,
894 intel_renderbuffer_get_tile_offsets(irb
,
899 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
900 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
902 surf
[3] = (brw_get_surface_tiling_bits(mt
->surf
.tiling
) |
903 (mt
->surf
.row_pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
905 surf
[4] = brw_get_surface_num_multisamples(mt
->surf
.samples
);
907 assert(devinfo
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
908 /* Note that the low bits of these fields are missing, so
909 * there's the possibility of getting in trouble.
911 assert(tile_x
% 4 == 0);
912 assert(tile_y
% 2 == 0);
913 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
914 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
915 (mt
->surf
.image_alignment_el
.height
== 4 ?
916 BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
918 if (devinfo
->gen
< 6) {
920 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
921 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
922 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
924 if (!GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 0))
925 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
926 if (!GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 1))
927 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
928 if (!GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 2))
929 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
931 /* As mentioned above, disable writes to the alpha component when the
932 * renderbuffer is XRGB.
934 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
935 !GET_COLORMASK_BIT(ctx
->Color
.ColorMask
, unit
, 3)) {
936 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
944 update_renderbuffer_surfaces(struct brw_context
*brw
)
946 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
947 const struct gl_context
*ctx
= &brw
->ctx
;
949 /* _NEW_BUFFERS | _NEW_COLOR */
950 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
952 /* Render targets always start at binding table index 0. */
953 const unsigned rt_start
= 0;
955 uint32_t *surf_offsets
= brw
->wm
.base
.surf_offset
;
957 /* Update surfaces for drawing buffers */
958 if (fb
->_NumColorDrawBuffers
>= 1) {
959 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
960 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
962 if (intel_renderbuffer(rb
)) {
963 surf_offsets
[rt_start
+ i
] = devinfo
->gen
>= 6 ?
964 gen6_update_renderbuffer_surface(brw
, rb
, i
, rt_start
+ i
) :
965 gen4_update_renderbuffer_surface(brw
, rb
, i
, rt_start
+ i
);
967 emit_null_surface_state(brw
, fb
, &surf_offsets
[rt_start
+ i
]);
971 emit_null_surface_state(brw
, fb
, &surf_offsets
[rt_start
]);
974 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
977 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
979 .mesa
= _NEW_BUFFERS
|
981 .brw
= BRW_NEW_BATCH
,
983 .emit
= update_renderbuffer_surfaces
,
986 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
988 .mesa
= _NEW_BUFFERS
,
989 .brw
= BRW_NEW_BATCH
|
992 .emit
= update_renderbuffer_surfaces
,
996 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
998 const struct gl_context
*ctx
= &brw
->ctx
;
1000 /* BRW_NEW_FS_PROG_DATA */
1001 const struct brw_wm_prog_data
*wm_prog_data
=
1002 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1004 if (wm_prog_data
->has_render_target_reads
&&
1005 !ctx
->Extensions
.MESA_shader_framebuffer_fetch
) {
1007 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1009 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1010 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1011 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1012 const unsigned surf_index
=
1013 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1014 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1017 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1018 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1019 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1022 /* Override the target of the texture if the render buffer is a
1023 * single slice of a 3D texture (since the minimum array element
1024 * field of the surface state structure is ignored by the sampler
1025 * unit for 3D textures on some hardware), or if the render buffer
1026 * is a 1D array (since shaders always provide the array index
1027 * coordinate at the Z component to avoid state-dependent
1028 * recompiles when changing the texture target of the
1031 const GLenum target
=
1032 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1033 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1034 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1037 const struct isl_view view
= {
1039 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1041 .base_array_layer
= irb
->mt_layer
,
1042 .array_len
= irb
->layer_count
,
1043 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1044 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1047 enum isl_aux_usage aux_usage
=
1048 intel_miptree_texture_aux_usage(brw
, irb
->mt
, format
);
1049 if (brw
->draw_aux_usage
[i
] == ISL_AUX_USAGE_NONE
)
1050 aux_usage
= ISL_AUX_USAGE_NONE
;
1052 brw_emit_surface_state(brw
, irb
->mt
, target
, view
, aux_usage
,
1053 surf_offset
, surf_index
,
1057 emit_null_surface_state(brw
, fb
, surf_offset
);
1061 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1065 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1067 .mesa
= _NEW_BUFFERS
,
1068 .brw
= BRW_NEW_BATCH
|
1070 BRW_NEW_FS_PROG_DATA
,
1072 .emit
= update_renderbuffer_read_surfaces
,
1076 is_depth_texture(struct intel_texture_object
*iobj
)
1078 GLenum base_format
= _mesa_get_format_base_format(iobj
->_Format
);
1079 return base_format
== GL_DEPTH_COMPONENT
||
1080 (base_format
== GL_DEPTH_STENCIL
&& !iobj
->base
.StencilSampling
);
1084 update_stage_texture_surfaces(struct brw_context
*brw
,
1085 const struct gl_program
*prog
,
1086 struct brw_stage_state
*stage_state
,
1087 bool for_gather
, uint32_t plane
)
1092 struct gl_context
*ctx
= &brw
->ctx
;
1094 uint32_t *surf_offset
= stage_state
->surf_offset
;
1096 /* BRW_NEW_*_PROG_DATA */
1098 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1100 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1102 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1103 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1106 if (prog
->SamplersUsed
& (1 << s
)) {
1107 const unsigned unit
= prog
->SamplerUnits
[s
];
1108 const bool used_by_txf
= prog
->info
.textures_used_by_txf
& (1 << s
);
1109 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
1110 struct intel_texture_object
*iobj
= intel_texture_object(obj
);
1116 if ((prog
->ShadowSamplers
& (1 << s
)) && !is_depth_texture(iobj
)) {
1117 /* A programming note for the sample_c message says:
1119 * "The Surface Format of the associated surface must be
1120 * indicated as supporting shadow mapping as indicated in the
1121 * surface format table."
1123 * Accessing non-depth textures via a sampler*Shadow type is
1124 * undefined. GLSL 4.50 page 162 says:
1126 * "If a shadow texture call is made to a sampler that does not
1127 * represent a depth texture, then results are undefined."
1129 * We give them a null surface (zeros) for undefined. We've seen
1130 * GPU hangs with color buffers and sample_c, so we try and avoid
1131 * those with this hack.
1133 emit_null_surface_state(brw
, NULL
, surf_offset
+ s
);
1135 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
,
1136 used_by_txf
, plane
);
1144 * Construct SURFACE_STATE objects for enabled textures.
1147 brw_update_texture_surfaces(struct brw_context
*brw
)
1149 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1151 /* BRW_NEW_VERTEX_PROGRAM */
1152 struct gl_program
*vs
= brw
->programs
[MESA_SHADER_VERTEX
];
1154 /* BRW_NEW_TESS_PROGRAMS */
1155 struct gl_program
*tcs
= brw
->programs
[MESA_SHADER_TESS_CTRL
];
1156 struct gl_program
*tes
= brw
->programs
[MESA_SHADER_TESS_EVAL
];
1158 /* BRW_NEW_GEOMETRY_PROGRAM */
1159 struct gl_program
*gs
= brw
->programs
[MESA_SHADER_GEOMETRY
];
1161 /* BRW_NEW_FRAGMENT_PROGRAM */
1162 struct gl_program
*fs
= brw
->programs
[MESA_SHADER_FRAGMENT
];
1165 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1166 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1167 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1168 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1169 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1171 /* emit alternate set of surface state for gather. this
1172 * allows the surface format to be overriden for only the
1173 * gather4 messages. */
1174 if (devinfo
->gen
< 8) {
1175 if (vs
&& vs
->info
.uses_texture_gather
)
1176 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1177 if (tcs
&& tcs
->info
.uses_texture_gather
)
1178 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1179 if (tes
&& tes
->info
.uses_texture_gather
)
1180 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1181 if (gs
&& gs
->info
.uses_texture_gather
)
1182 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1183 if (fs
&& fs
->info
.uses_texture_gather
)
1184 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1188 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1189 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1192 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1195 const struct brw_tracked_state brw_texture_surfaces
= {
1197 .mesa
= _NEW_TEXTURE
,
1198 .brw
= BRW_NEW_BATCH
|
1200 BRW_NEW_FRAGMENT_PROGRAM
|
1201 BRW_NEW_FS_PROG_DATA
|
1202 BRW_NEW_GEOMETRY_PROGRAM
|
1203 BRW_NEW_GS_PROG_DATA
|
1204 BRW_NEW_TESS_PROGRAMS
|
1205 BRW_NEW_TCS_PROG_DATA
|
1206 BRW_NEW_TES_PROG_DATA
|
1207 BRW_NEW_TEXTURE_BUFFER
|
1208 BRW_NEW_VERTEX_PROGRAM
|
1209 BRW_NEW_VS_PROG_DATA
,
1211 .emit
= brw_update_texture_surfaces
,
1215 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1217 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1219 /* BRW_NEW_COMPUTE_PROGRAM */
1220 struct gl_program
*cs
= brw
->programs
[MESA_SHADER_COMPUTE
];
1223 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1225 /* emit alternate set of surface state for gather. this
1226 * allows the surface format to be overriden for only the
1229 if (devinfo
->gen
< 8) {
1230 if (cs
&& cs
->info
.uses_texture_gather
)
1231 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1234 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1237 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1239 .mesa
= _NEW_TEXTURE
,
1240 .brw
= BRW_NEW_BATCH
|
1241 BRW_NEW_COMPUTE_PROGRAM
|
1244 .emit
= brw_update_cs_texture_surfaces
,
1248 upload_buffer_surface(struct brw_context
*brw
,
1249 struct gl_buffer_binding
*binding
,
1250 uint32_t *out_offset
,
1251 enum isl_format format
,
1252 unsigned reloc_flags
)
1254 struct gl_context
*ctx
= &brw
->ctx
;
1256 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1257 emit_null_surface_state(brw
, NULL
, out_offset
);
1259 ptrdiff_t size
= binding
->BufferObject
->Size
- binding
->Offset
;
1260 if (!binding
->AutomaticSize
)
1261 size
= MIN2(size
, binding
->Size
);
1263 struct intel_buffer_object
*iobj
=
1264 intel_buffer_object(binding
->BufferObject
);
1266 intel_bufferobj_buffer(brw
, iobj
, binding
->Offset
, size
,
1267 (reloc_flags
& RELOC_WRITE
) != 0);
1269 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, binding
->Offset
,
1270 format
, size
, 1, reloc_flags
);
1275 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1276 struct brw_stage_state
*stage_state
,
1277 struct brw_stage_prog_data
*prog_data
)
1279 struct gl_context
*ctx
= &brw
->ctx
;
1281 if (!prog
|| (prog
->info
.num_ubos
== 0 &&
1282 prog
->info
.num_ssbos
== 0 &&
1283 prog
->info
.num_abos
== 0))
1286 uint32_t *ubo_surf_offsets
=
1287 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1289 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1290 struct gl_buffer_binding
*binding
=
1291 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1292 upload_buffer_surface(brw
, binding
, &ubo_surf_offsets
[i
],
1293 ISL_FORMAT_R32G32B32A32_FLOAT
, 0);
1296 uint32_t *abo_surf_offsets
=
1297 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1298 uint32_t *ssbo_surf_offsets
= abo_surf_offsets
+ prog
->info
.num_abos
;
1300 for (int i
= 0; i
< prog
->info
.num_abos
; i
++) {
1301 struct gl_buffer_binding
*binding
=
1302 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1303 upload_buffer_surface(brw
, binding
, &abo_surf_offsets
[i
],
1304 ISL_FORMAT_RAW
, RELOC_WRITE
);
1307 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1308 struct gl_buffer_binding
*binding
=
1309 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1311 upload_buffer_surface(brw
, binding
, &ssbo_surf_offsets
[i
],
1312 ISL_FORMAT_RAW
, RELOC_WRITE
);
1315 stage_state
->push_constants_dirty
= true;
1316 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1320 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1322 struct gl_context
*ctx
= &brw
->ctx
;
1324 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1326 /* BRW_NEW_FS_PROG_DATA */
1327 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1330 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1332 .mesa
= _NEW_PROGRAM
,
1333 .brw
= BRW_NEW_BATCH
|
1334 BRW_NEW_FS_PROG_DATA
|
1335 BRW_NEW_UNIFORM_BUFFER
,
1337 .emit
= brw_upload_wm_ubo_surfaces
,
1341 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1343 struct gl_context
*ctx
= &brw
->ctx
;
1345 struct gl_program
*prog
=
1346 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1348 /* BRW_NEW_CS_PROG_DATA */
1349 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1352 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1354 .mesa
= _NEW_PROGRAM
,
1355 .brw
= BRW_NEW_BATCH
|
1356 BRW_NEW_CS_PROG_DATA
|
1357 BRW_NEW_UNIFORM_BUFFER
,
1359 .emit
= brw_upload_cs_ubo_surfaces
,
1363 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1366 const struct gl_program
*cp
= brw
->programs
[MESA_SHADER_COMPUTE
];
1369 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1370 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1371 brw
->cs
.base
.prog_data
);
1375 const struct brw_tracked_state brw_cs_image_surfaces
= {
1377 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1378 .brw
= BRW_NEW_BATCH
|
1379 BRW_NEW_CS_PROG_DATA
|
1383 .emit
= brw_upload_cs_image_surfaces
,
1387 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1389 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1390 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1391 if (access
== GL_WRITE_ONLY
) {
1393 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1394 /* Typed surface reads support a very limited subset of the shader
1395 * image formats. Translate it into the closest format the
1396 * hardware supports.
1398 return isl_lower_storage_image_format(devinfo
, hw_format
);
1400 /* The hardware doesn't actually support a typed format that we can use
1401 * so we have to fall back to untyped read/write messages.
1403 return ISL_FORMAT_RAW
;
1408 update_default_image_param(struct brw_context
*brw
,
1409 struct gl_image_unit
*u
,
1410 unsigned surface_idx
,
1411 struct brw_image_param
*param
)
1413 memset(param
, 0, sizeof(*param
));
1414 param
->surface_idx
= surface_idx
;
1415 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1416 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1417 * detailed explanation of these parameters.
1419 param
->swizzling
[0] = 0xff;
1420 param
->swizzling
[1] = 0xff;
1424 update_buffer_image_param(struct brw_context
*brw
,
1425 struct gl_image_unit
*u
,
1426 unsigned surface_idx
,
1427 struct brw_image_param
*param
)
1429 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1430 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1431 update_default_image_param(brw
, u
, surface_idx
, param
);
1433 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1434 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1438 get_image_num_layers(const struct intel_mipmap_tree
*mt
, GLenum target
,
1441 if (target
== GL_TEXTURE_CUBE_MAP
)
1444 return target
== GL_TEXTURE_3D
?
1445 minify(mt
->surf
.logical_level0_px
.depth
, level
) :
1446 mt
->surf
.logical_level0_px
.array_len
;
1450 update_image_surface(struct brw_context
*brw
,
1451 struct gl_image_unit
*u
,
1453 unsigned surface_idx
,
1454 uint32_t *surf_offset
,
1455 struct brw_image_param
*param
)
1457 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1458 struct gl_texture_object
*obj
= u
->TexObj
;
1459 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1461 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1462 struct intel_buffer_object
*intel_obj
=
1463 intel_buffer_object(obj
->BufferObject
);
1464 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1465 _mesa_get_format_bytes(u
->_ActualFormat
));
1467 brw_emit_buffer_surface_state(
1468 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1469 format
, intel_obj
->Base
.Size
, texel_size
,
1470 access
!= GL_READ_ONLY
? RELOC_WRITE
: 0);
1472 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1475 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1476 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1477 const unsigned num_layers
= u
->Layered
?
1478 get_image_num_layers(mt
, obj
->Target
, u
->Level
) : 1;
1480 struct isl_view view
= {
1482 .base_level
= obj
->MinLevel
+ u
->Level
,
1484 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1485 .array_len
= num_layers
,
1486 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1487 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1490 if (format
== ISL_FORMAT_RAW
) {
1491 brw_emit_buffer_surface_state(
1492 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1493 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1494 access
!= GL_READ_ONLY
? RELOC_WRITE
: 0);
1497 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1498 assert(!intel_miptree_has_color_unresolved(mt
,
1500 view
.base_array_layer
,
1502 brw_emit_surface_state(brw
, mt
, mt
->target
, view
,
1504 surf_offset
, surf_index
,
1505 access
== GL_READ_ONLY
? 0 : RELOC_WRITE
);
1508 isl_surf_fill_image_param(&brw
->isl_dev
, param
, &mt
->surf
, &view
);
1509 param
->surface_idx
= surface_idx
;
1513 emit_null_surface_state(brw
, NULL
, surf_offset
);
1514 update_default_image_param(brw
, u
, surface_idx
, param
);
1519 brw_upload_image_surfaces(struct brw_context
*brw
,
1520 const struct gl_program
*prog
,
1521 struct brw_stage_state
*stage_state
,
1522 struct brw_stage_prog_data
*prog_data
)
1525 struct gl_context
*ctx
= &brw
->ctx
;
1527 if (prog
->info
.num_images
) {
1528 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1529 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1530 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1532 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1534 &stage_state
->surf_offset
[surf_idx
],
1535 &stage_state
->image_param
[i
]);
1538 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1539 /* This may have changed the image metadata dependent on the context
1540 * image unit state and passed to the program as uniforms, make sure
1541 * that push and pull constants are reuploaded.
1543 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1548 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1550 /* BRW_NEW_FRAGMENT_PROGRAM */
1551 const struct gl_program
*wm
= brw
->programs
[MESA_SHADER_FRAGMENT
];
1554 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1555 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1556 brw
->wm
.base
.prog_data
);
1560 const struct brw_tracked_state brw_wm_image_surfaces
= {
1562 .mesa
= _NEW_TEXTURE
,
1563 .brw
= BRW_NEW_BATCH
|
1565 BRW_NEW_FRAGMENT_PROGRAM
|
1566 BRW_NEW_FS_PROG_DATA
|
1569 .emit
= brw_upload_wm_image_surfaces
,
1573 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1575 struct gl_context
*ctx
= &brw
->ctx
;
1577 struct gl_program
*prog
=
1578 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1579 /* BRW_NEW_CS_PROG_DATA */
1580 const struct brw_cs_prog_data
*cs_prog_data
=
1581 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1583 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1584 const unsigned surf_idx
=
1585 cs_prog_data
->binding_table
.work_groups_start
;
1586 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1590 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1592 intel_upload_data(brw
,
1593 (void *)brw
->compute
.num_work_groups
,
1599 bo
= brw
->compute
.num_work_groups_bo
;
1600 bo_offset
= brw
->compute
.num_work_groups_offset
;
1603 brw_emit_buffer_surface_state(brw
, surf_offset
,
1606 3 * sizeof(GLuint
), 1,
1608 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1612 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1614 .brw
= BRW_NEW_CS_PROG_DATA
|
1615 BRW_NEW_CS_WORK_GROUPS
1617 .emit
= brw_upload_cs_work_groups_surface
,