2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
59 INTEL_RENDERBUFFER_LAYERED
= 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED
= 1 << 1,
63 uint32_t tex_mocs
[] = {
70 uint32_t rb_mocs
[] = {
78 get_isl_surf(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
79 GLenum target
, struct isl_view
*view
,
80 uint32_t *tile_x
, uint32_t *tile_y
,
81 uint32_t *offset
, struct isl_surf
*surf
)
85 const enum isl_dim_layout dim_layout
=
86 get_isl_dim_layout(&brw
->screen
->devinfo
, mt
->surf
.tiling
, target
);
88 if (surf
->dim_layout
== dim_layout
)
91 /* The layout of the specified texture target is not compatible with the
92 * actual layout of the miptree structure in memory -- You're entering
93 * dangerous territory, this can only possibly work if you only intended
94 * to access a single level and slice of the texture, and the hardware
95 * supports the tile offset feature in order to allow non-tile-aligned
96 * base offsets, since we'll have to point the hardware to the first
97 * texel of the level instead of relying on the usual base level/layer
100 assert(brw
->has_surface_tile_offset
);
101 assert(view
->levels
== 1 && view
->array_len
== 1);
102 assert(*tile_x
== 0 && *tile_y
== 0);
104 offset
+= intel_miptree_get_tile_offsets(mt
, view
->base_level
,
105 view
->base_array_layer
,
108 /* Minify the logical dimensions of the texture. */
109 const unsigned l
= view
->base_level
- mt
->first_level
;
110 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, l
);
111 surf
->logical_level0_px
.height
= surf
->dim
<= ISL_SURF_DIM_1D
? 1 :
112 minify(surf
->logical_level0_px
.height
, l
);
113 surf
->logical_level0_px
.depth
= surf
->dim
<= ISL_SURF_DIM_2D
? 1 :
114 minify(surf
->logical_level0_px
.depth
, l
);
116 /* Only the base level and layer can be addressed with the overridden
119 surf
->logical_level0_px
.array_len
= 1;
121 surf
->dim_layout
= dim_layout
;
123 /* The requested slice of the texture is now at the base level and
126 view
->base_level
= 0;
127 view
->base_array_layer
= 0;
131 brw_emit_surface_state(struct brw_context
*brw
,
132 struct intel_mipmap_tree
*mt
,
133 GLenum target
, struct isl_view view
,
134 enum isl_aux_usage aux_usage
,
135 uint32_t mocs
, uint32_t *surf_offset
, int surf_index
,
136 unsigned read_domains
, unsigned write_domains
)
138 uint32_t tile_x
= mt
->level
[0].level_x
;
139 uint32_t tile_y
= mt
->level
[0].level_y
;
140 uint32_t offset
= mt
->offset
;
142 struct isl_surf surf
;
144 get_isl_surf(brw
, mt
, target
, &view
, &tile_x
, &tile_y
, &offset
, &surf
);
146 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
148 struct brw_bo
*aux_bo
;
149 struct isl_surf
*aux_surf
= NULL
;
150 uint64_t aux_offset
= 0;
152 case ISL_AUX_USAGE_MCS
:
153 case ISL_AUX_USAGE_CCS_D
:
154 case ISL_AUX_USAGE_CCS_E
:
155 aux_surf
= &mt
->mcs_buf
->surf
;
156 aux_bo
= mt
->mcs_buf
->bo
;
157 aux_offset
= mt
->mcs_buf
->bo
->offset64
+ mt
->mcs_buf
->offset
;
160 case ISL_AUX_USAGE_HIZ
:
161 aux_surf
= &mt
->hiz_buf
->surf
;
162 aux_bo
= mt
->hiz_buf
->bo
;
163 aux_offset
= mt
->hiz_buf
->bo
->offset64
;
166 case ISL_AUX_USAGE_NONE
:
170 if (aux_usage
!= ISL_AUX_USAGE_NONE
) {
171 /* We only really need a clear color if we also have an auxiliary
172 * surface. Without one, it does nothing.
174 clear_color
= mt
->fast_clear_color
;
177 void *state
= brw_state_batch(brw
,
178 brw
->isl_dev
.ss
.size
,
179 brw
->isl_dev
.ss
.align
,
182 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &mt
->surf
, .view
= &view
,
183 .address
= mt
->bo
->offset64
+ offset
,
184 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
185 .aux_address
= aux_offset
,
186 .mocs
= mocs
, .clear_color
= clear_color
,
187 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
189 brw_emit_reloc(&brw
->batch
, *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
190 mt
->bo
, offset
, read_domains
, write_domains
);
193 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
194 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
195 * contain other control information. Since buffer addresses are always
196 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
197 * an ordinary reloc to do the necessary address translation.
199 assert((aux_offset
& 0xfff) == 0);
200 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
201 brw_emit_reloc(&brw
->batch
,
202 *surf_offset
+ brw
->isl_dev
.ss
.aux_addr_offset
,
203 aux_bo
, *aux_addr
- aux_bo
->offset64
,
204 read_domains
, write_domains
);
209 brw_update_renderbuffer_surface(struct brw_context
*brw
,
210 struct gl_renderbuffer
*rb
,
211 uint32_t flags
, unsigned unit
,
214 struct gl_context
*ctx
= &brw
->ctx
;
215 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
216 struct intel_mipmap_tree
*mt
= irb
->mt
;
218 enum isl_aux_usage aux_usage
=
219 intel_miptree_render_aux_usage(brw
, mt
, ctx
->Color
.sRGBEnabled
,
220 ctx
->Color
.BlendEnabled
& (1 << unit
));
222 if (flags
& INTEL_AUX_BUFFER_DISABLED
) {
223 assert(brw
->gen
>= 9);
224 aux_usage
= ISL_AUX_USAGE_NONE
;
227 assert(brw_render_target_supported(brw
, rb
));
229 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
230 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
231 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
232 __func__
, _mesa_get_format_name(rb_format
));
235 struct isl_view view
= {
236 .format
= brw
->mesa_to_isl_render_format
[rb_format
],
237 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
239 .base_array_layer
= irb
->mt_layer
,
240 .array_len
= MAX2(irb
->layer_count
, 1),
241 .swizzle
= ISL_SWIZZLE_IDENTITY
,
242 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
246 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
249 I915_GEM_DOMAIN_RENDER
,
250 I915_GEM_DOMAIN_RENDER
);
255 translate_tex_target(GLenum target
)
259 case GL_TEXTURE_1D_ARRAY_EXT
:
260 return BRW_SURFACE_1D
;
262 case GL_TEXTURE_RECTANGLE_NV
:
263 return BRW_SURFACE_2D
;
266 case GL_TEXTURE_2D_ARRAY_EXT
:
267 case GL_TEXTURE_EXTERNAL_OES
:
268 case GL_TEXTURE_2D_MULTISAMPLE
:
269 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
270 return BRW_SURFACE_2D
;
273 return BRW_SURFACE_3D
;
275 case GL_TEXTURE_CUBE_MAP
:
276 case GL_TEXTURE_CUBE_MAP_ARRAY
:
277 return BRW_SURFACE_CUBE
;
280 unreachable("not reached");
285 brw_get_surface_tiling_bits(enum isl_tiling tiling
)
289 return BRW_SURFACE_TILED
;
291 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
299 brw_get_surface_num_multisamples(unsigned num_samples
)
302 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
304 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
308 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
312 brw_get_texture_swizzle(const struct gl_context
*ctx
,
313 const struct gl_texture_object
*t
)
315 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
317 int swizzles
[SWIZZLE_NIL
+ 1] = {
327 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
328 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
329 GLenum depth_mode
= t
->DepthMode
;
331 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
332 * with depth component data specified with a sized internal format.
333 * Otherwise, it's left at the old default, GL_LUMINANCE.
335 if (_mesa_is_gles3(ctx
) &&
336 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
337 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
341 switch (depth_mode
) {
343 swizzles
[0] = SWIZZLE_ZERO
;
344 swizzles
[1] = SWIZZLE_ZERO
;
345 swizzles
[2] = SWIZZLE_ZERO
;
346 swizzles
[3] = SWIZZLE_X
;
349 swizzles
[0] = SWIZZLE_X
;
350 swizzles
[1] = SWIZZLE_X
;
351 swizzles
[2] = SWIZZLE_X
;
352 swizzles
[3] = SWIZZLE_ONE
;
355 swizzles
[0] = SWIZZLE_X
;
356 swizzles
[1] = SWIZZLE_X
;
357 swizzles
[2] = SWIZZLE_X
;
358 swizzles
[3] = SWIZZLE_X
;
361 swizzles
[0] = SWIZZLE_X
;
362 swizzles
[1] = SWIZZLE_ZERO
;
363 swizzles
[2] = SWIZZLE_ZERO
;
364 swizzles
[3] = SWIZZLE_ONE
;
369 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
371 /* If the texture's format is alpha-only, force R, G, and B to
372 * 0.0. Similarly, if the texture's format has no alpha channel,
373 * force the alpha value read to 1.0. This allows for the
374 * implementation to use an RGBA texture for any of these formats
375 * without leaking any unexpected values.
377 switch (img
->_BaseFormat
) {
379 swizzles
[0] = SWIZZLE_ZERO
;
380 swizzles
[1] = SWIZZLE_ZERO
;
381 swizzles
[2] = SWIZZLE_ZERO
;
384 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
385 swizzles
[0] = SWIZZLE_X
;
386 swizzles
[1] = SWIZZLE_X
;
387 swizzles
[2] = SWIZZLE_X
;
388 swizzles
[3] = SWIZZLE_ONE
;
391 case GL_LUMINANCE_ALPHA
:
392 if (datatype
== GL_SIGNED_NORMALIZED
) {
393 swizzles
[0] = SWIZZLE_X
;
394 swizzles
[1] = SWIZZLE_X
;
395 swizzles
[2] = SWIZZLE_X
;
396 swizzles
[3] = SWIZZLE_W
;
400 if (datatype
== GL_SIGNED_NORMALIZED
) {
401 swizzles
[0] = SWIZZLE_X
;
402 swizzles
[1] = SWIZZLE_X
;
403 swizzles
[2] = SWIZZLE_X
;
404 swizzles
[3] = SWIZZLE_X
;
410 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
411 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
412 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
413 swizzles
[3] = SWIZZLE_ONE
;
417 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
418 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
419 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
420 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
424 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
425 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
427 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
430 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
432 * which is simply adding 4 then modding by 8 (or anding with 7).
434 * We then may need to apply workarounds for textureGather hardware bugs.
437 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
439 unsigned scs
= (swizzle
+ 4) & 7;
441 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
445 brw_aux_surface_disabled(const struct brw_context
*brw
,
446 const struct intel_mipmap_tree
*mt
)
448 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
450 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
451 const struct intel_renderbuffer
*irb
=
452 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
454 if (irb
&& irb
->mt
== mt
)
455 return brw
->draw_aux_buffer_disabled
[i
];
462 brw_update_texture_surface(struct gl_context
*ctx
,
464 uint32_t *surf_offset
,
468 struct brw_context
*brw
= brw_context(ctx
);
469 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
471 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
472 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
475 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
476 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
479 if (mt
->plane
[plane
- 1] == NULL
)
481 mt
= mt
->plane
[plane
- 1];
484 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
485 /* If this is a view with restricted NumLayers, then our effective depth
486 * is not just the miptree depth.
488 unsigned view_num_layers
;
489 if (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) {
490 view_num_layers
= obj
->NumLayers
;
492 view_num_layers
= mt
->surf
.dim
== ISL_SURF_DIM_3D
?
493 mt
->surf
.logical_level0_px
.depth
:
494 mt
->surf
.logical_level0_px
.array_len
;
497 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
498 * texturing functions that return a float, as our code generation always
499 * selects the .x channel (which would always be 0).
501 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
502 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
503 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
504 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
505 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
506 brw_get_texture_swizzle(&brw
->ctx
, obj
));
508 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
509 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
510 sampler
->sRGBDecode
);
512 /* Implement gen6 and gen7 gather work-around */
513 bool need_green_to_blue
= false;
515 if (brw
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
516 format
== ISL_FORMAT_R32G32_SINT
||
517 format
== ISL_FORMAT_R32G32_UINT
)) {
518 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
519 need_green_to_blue
= brw
->is_haswell
;
520 } else if (brw
->gen
== 6) {
521 /* Sandybridge's gather4 message is broken for integer formats.
522 * To work around this, we pretend the surface is UNORM for
523 * 8 or 16-bit formats, and emit shader instructions to recover
524 * the real INT/UINT value. For 32-bit formats, we pretend
525 * the surface is FLOAT, and simply reinterpret the resulting
529 case ISL_FORMAT_R8_SINT
:
530 case ISL_FORMAT_R8_UINT
:
531 format
= ISL_FORMAT_R8_UNORM
;
534 case ISL_FORMAT_R16_SINT
:
535 case ISL_FORMAT_R16_UINT
:
536 format
= ISL_FORMAT_R16_UNORM
;
539 case ISL_FORMAT_R32_SINT
:
540 case ISL_FORMAT_R32_UINT
:
541 format
= ISL_FORMAT_R32_FLOAT
;
550 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
552 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
553 mt
= mt
->r8stencil_mt
;
557 format
= ISL_FORMAT_R8_UINT
;
558 } else if (brw
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
559 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
560 mt
= mt
->r8stencil_mt
;
561 format
= ISL_FORMAT_R8_UINT
;
564 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
566 struct isl_view view
= {
568 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
569 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
570 .base_array_layer
= obj
->MinLayer
,
571 .array_len
= view_num_layers
,
573 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
574 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
575 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
576 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
578 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
581 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
582 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
583 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
585 enum isl_aux_usage aux_usage
=
586 intel_miptree_texture_aux_usage(brw
, mt
, format
);
588 if (brw_aux_surface_disabled(brw
, mt
))
589 aux_usage
= ISL_AUX_USAGE_NONE
;
591 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
593 surf_offset
, surf_index
,
594 I915_GEM_DOMAIN_SAMPLER
, 0);
599 brw_emit_buffer_surface_state(struct brw_context
*brw
,
600 uint32_t *out_offset
,
602 unsigned buffer_offset
,
603 unsigned surface_format
,
604 unsigned buffer_size
,
608 uint32_t *dw
= brw_state_batch(brw
,
609 brw
->isl_dev
.ss
.size
,
610 brw
->isl_dev
.ss
.align
,
613 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
614 .address
= (bo
? bo
->offset64
: 0) + buffer_offset
,
616 .format
= surface_format
,
618 .mocs
= tex_mocs
[brw
->gen
]);
621 brw_emit_reloc(&brw
->batch
, *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
623 I915_GEM_DOMAIN_SAMPLER
,
624 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
629 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
631 uint32_t *surf_offset
)
633 struct brw_context
*brw
= brw_context(ctx
);
634 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
635 struct intel_buffer_object
*intel_obj
=
636 intel_buffer_object(tObj
->BufferObject
);
637 uint32_t size
= tObj
->BufferSize
;
638 struct brw_bo
*bo
= NULL
;
639 mesa_format format
= tObj
->_BufferObjectFormat
;
640 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
641 int texel_size
= _mesa_get_format_bytes(format
);
644 size
= MIN2(size
, intel_obj
->Base
.Size
);
645 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
649 /* The ARB_texture_buffer_specification says:
651 * "The number of texels in the buffer texture's texel array is given by
653 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
655 * where <buffer_size> is the size of the buffer object, in basic
656 * machine units and <components> and <base_type> are the element count
657 * and base data type for elements, as specified in Table X.1. The
658 * number of texels in the texel array is then clamped to the
659 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
661 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
662 * so that when ISL divides by stride to obtain the number of texels, that
663 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
665 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
667 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
668 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
669 _mesa_get_format_name(format
));
672 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
681 * Create the constant buffer surface. Vertex/fragment shader constants will be
682 * read from this buffer with Data Port Read instructions/messages.
685 brw_create_constant_surface(struct brw_context
*brw
,
689 uint32_t *out_offset
)
691 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
692 ISL_FORMAT_R32G32B32A32_FLOAT
,
697 * Create the buffer surface. Shader buffer variables will be
698 * read from / write to this buffer with Data Port Read/Write
699 * instructions/messages.
702 brw_create_buffer_surface(struct brw_context
*brw
,
706 uint32_t *out_offset
)
708 /* Use a raw surface so we can reuse existing untyped read/write/atomic
709 * messages. We need these specifically for the fragment shader since they
710 * include a pixel mask header that we need to ensure correct behavior
711 * with helper invocations, which cannot write to the buffer.
713 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
719 * Set up a binding table entry for use by stream output logic (transform
722 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
725 brw_update_sol_surface(struct brw_context
*brw
,
726 struct gl_buffer_object
*buffer_obj
,
727 uint32_t *out_offset
, unsigned num_vector_components
,
728 unsigned stride_dwords
, unsigned offset_dwords
)
730 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
731 uint32_t offset_bytes
= 4 * offset_dwords
;
732 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
734 buffer_obj
->Size
- offset_bytes
,
736 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
737 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
738 size_t size_dwords
= buffer_obj
->Size
/ 4;
739 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
741 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
742 * too big to map using a single binding table entry?
744 assert((size_dwords
- offset_dwords
) / stride_dwords
745 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
747 if (size_dwords
> offset_dwords
+ num_vector_components
) {
748 /* There is room for at least 1 transform feedback output in the buffer.
749 * Compute the number of additional transform feedback outputs the
750 * buffer has room for.
752 buffer_size_minus_1
=
753 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
755 /* There isn't even room for a single transform feedback output in the
756 * buffer. We can't configure the binding table entry to prevent output
757 * entirely; we'll have to rely on the geometry shader to detect
758 * overflow. But to minimize the damage in case of a bug, set up the
759 * binding table entry to just allow a single output.
761 buffer_size_minus_1
= 0;
763 width
= buffer_size_minus_1
& 0x7f;
764 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
765 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
767 switch (num_vector_components
) {
769 surface_format
= ISL_FORMAT_R32_FLOAT
;
772 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
775 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
778 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
781 unreachable("Invalid vector size for transform feedback output");
784 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
785 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
786 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
787 BRW_SURFACE_RC_READ_WRITE
;
788 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
789 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
790 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
791 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
792 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
796 /* Emit relocation to surface contents. */
797 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, offset_bytes
,
798 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
801 /* Creates a new WM constant buffer reflecting the current fragment program's
802 * constants, if needed by the fragment program.
804 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
808 brw_upload_wm_pull_constants(struct brw_context
*brw
)
810 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
811 /* BRW_NEW_FRAGMENT_PROGRAM */
812 struct brw_program
*fp
= (struct brw_program
*) brw
->fragment_program
;
813 /* BRW_NEW_FS_PROG_DATA */
814 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
816 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
817 /* _NEW_PROGRAM_CONSTANTS */
818 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
819 stage_state
, prog_data
);
822 const struct brw_tracked_state brw_wm_pull_constants
= {
824 .mesa
= _NEW_PROGRAM_CONSTANTS
,
825 .brw
= BRW_NEW_BATCH
|
827 BRW_NEW_FRAGMENT_PROGRAM
|
828 BRW_NEW_FS_PROG_DATA
,
830 .emit
= brw_upload_wm_pull_constants
,
834 * Creates a null renderbuffer surface.
836 * This is used when the shader doesn't write to any color output. An FB
837 * write to target 0 will still be emitted, because that's how the thread is
838 * terminated (and computed depth is returned), so we need to have the
839 * hardware discard the target 0 color output..
842 brw_emit_null_surface_state(struct brw_context
*brw
,
846 uint32_t *out_offset
)
848 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
851 * A null surface will be used in instances where an actual surface is
852 * not bound. When a write message is generated to a null surface, no
853 * actual surface is written to. When a read message (including any
854 * sampling engine message) is generated to a null surface, the result
855 * is all zeros. Note that a null surface type is allowed to be used
856 * with all messages, even if it is not specificially indicated as
857 * supported. All of the remaining fields in surface state are ignored
858 * for null surfaces, with the following exceptions:
860 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
861 * depth buffer’s corresponding state for all render target surfaces,
864 * - Surface Format must be R8G8B8A8_UNORM.
866 unsigned surface_type
= BRW_SURFACE_NULL
;
867 struct brw_bo
*bo
= NULL
;
868 unsigned pitch_minus_1
= 0;
869 uint32_t multisampling_state
= 0;
870 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
873 /* On Gen6, null render targets seem to cause GPU hangs when
874 * multisampling. So work around this problem by rendering into dummy
877 * To decrease the amount of memory needed by the workaround buffer, we
878 * set its pitch to 128 bytes (the width of a Y tile). This means that
879 * the amount of memory needed for the workaround buffer is
880 * (width_in_tiles + height_in_tiles - 1) tiles.
882 * Note that since the workaround buffer will be interpreted by the
883 * hardware as an interleaved multisampled buffer, we need to compute
884 * width_in_tiles and height_in_tiles by dividing the width and height
885 * by 16 rather than the normal Y-tile size of 32.
887 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
888 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
889 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
890 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
892 bo
= brw
->wm
.multisampled_null_render_target_bo
;
893 surface_type
= BRW_SURFACE_2D
;
895 multisampling_state
= brw_get_surface_num_multisamples(samples
);
898 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
899 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
901 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
902 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
903 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
904 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
906 surf
[1] = bo
? bo
->offset64
: 0;
907 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
908 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
910 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
913 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
915 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
916 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
917 surf
[4] = multisampling_state
;
921 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, 0,
922 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
927 * Sets up a surface state structure to point at the given region.
928 * While it is only used for the front/back buffer currently, it should be
929 * usable for further buffers when doing ARB_draw_buffer support.
932 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
933 struct gl_renderbuffer
*rb
,
934 uint32_t flags
, unsigned unit
,
937 struct gl_context
*ctx
= &brw
->ctx
;
938 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
939 struct intel_mipmap_tree
*mt
= irb
->mt
;
941 uint32_t tile_x
, tile_y
;
942 enum isl_format format
;
945 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
946 /* BRW_NEW_FS_PROG_DATA */
948 assert(!(flags
& INTEL_RENDERBUFFER_LAYERED
));
949 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
951 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
952 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
954 if (tile_x
!= 0 || tile_y
!= 0) {
955 /* Original gen4 hardware couldn't draw to a non-tile-aligned
956 * destination in a miptree unless you actually setup your renderbuffer
957 * as a miptree and used the fragile lod/array_index/etc. controls to
958 * select the image. So, instead, we just make a new single-level
959 * miptree and render into that.
961 intel_renderbuffer_move_to_temp(brw
, irb
, false);
962 assert(irb
->align_wa_mt
);
963 mt
= irb
->align_wa_mt
;
967 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
969 format
= brw
->mesa_to_isl_render_format
[rb_format
];
970 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
971 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
972 __func__
, _mesa_get_format_name(rb_format
));
975 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
976 format
<< BRW_SURFACE_FORMAT_SHIFT
);
979 assert(mt
->offset
% mt
->cpp
== 0);
980 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
981 mt
->bo
->offset64
+ mt
->offset
);
983 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
984 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
986 surf
[3] = (brw_get_surface_tiling_bits(mt
->surf
.tiling
) |
987 (mt
->surf
.row_pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
989 surf
[4] = brw_get_surface_num_multisamples(mt
->surf
.samples
);
991 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
992 /* Note that the low bits of these fields are missing, so
993 * there's the possibility of getting in trouble.
995 assert(tile_x
% 4 == 0);
996 assert(tile_y
% 2 == 0);
997 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
998 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
999 (mt
->surf
.image_alignment_el
.height
== 4 ?
1000 BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
1004 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
1005 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
1006 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
1008 if (!ctx
->Color
.ColorMask
[unit
][0])
1009 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
1010 if (!ctx
->Color
.ColorMask
[unit
][1])
1011 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
1012 if (!ctx
->Color
.ColorMask
[unit
][2])
1013 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
1015 /* As mentioned above, disable writes to the alpha component when the
1016 * renderbuffer is XRGB.
1018 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
1019 !ctx
->Color
.ColorMask
[unit
][3]) {
1020 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
1024 brw_emit_reloc(&brw
->batch
, offset
+ 4, mt
->bo
, surf
[1] - mt
->bo
->offset64
,
1025 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
1031 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1034 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
1035 const struct gl_framebuffer
*fb
,
1036 uint32_t render_target_start
,
1037 uint32_t *surf_offset
)
1040 const unsigned int w
= _mesa_geometric_width(fb
);
1041 const unsigned int h
= _mesa_geometric_height(fb
);
1042 const unsigned int s
= _mesa_geometric_samples(fb
);
1044 /* Update surfaces for drawing buffers */
1045 if (fb
->_NumColorDrawBuffers
>= 1) {
1046 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1047 const uint32_t surf_index
= render_target_start
+ i
;
1048 const int flags
= (_mesa_geometric_layers(fb
) > 0 ?
1049 INTEL_RENDERBUFFER_LAYERED
: 0) |
1050 (brw
->draw_aux_buffer_disabled
[i
] ?
1051 INTEL_AUX_BUFFER_DISABLED
: 0);
1053 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
1054 surf_offset
[surf_index
] =
1055 brw
->vtbl
.update_renderbuffer_surface(
1056 brw
, fb
->_ColorDrawBuffers
[i
], flags
, i
, surf_index
);
1058 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1059 &surf_offset
[surf_index
]);
1063 const uint32_t surf_index
= render_target_start
;
1064 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1065 &surf_offset
[surf_index
]);
1070 update_renderbuffer_surfaces(struct brw_context
*brw
)
1072 const struct gl_context
*ctx
= &brw
->ctx
;
1074 /* BRW_NEW_FS_PROG_DATA */
1075 const struct brw_wm_prog_data
*wm_prog_data
=
1076 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1078 /* _NEW_BUFFERS | _NEW_COLOR */
1079 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1080 brw_update_renderbuffer_surfaces(
1082 wm_prog_data
->binding_table
.render_target_start
,
1083 brw
->wm
.base
.surf_offset
);
1084 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1087 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1089 .mesa
= _NEW_BUFFERS
|
1091 .brw
= BRW_NEW_BATCH
|
1093 BRW_NEW_FS_PROG_DATA
,
1095 .emit
= update_renderbuffer_surfaces
,
1098 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1100 .mesa
= _NEW_BUFFERS
,
1101 .brw
= BRW_NEW_BATCH
|
1104 .emit
= update_renderbuffer_surfaces
,
1108 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1110 const struct gl_context
*ctx
= &brw
->ctx
;
1112 /* BRW_NEW_FS_PROG_DATA */
1113 const struct brw_wm_prog_data
*wm_prog_data
=
1114 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1116 /* BRW_NEW_FRAGMENT_PROGRAM */
1117 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
1118 brw
->fragment_program
&& brw
->fragment_program
->info
.outputs_read
) {
1120 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1122 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1123 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1124 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1125 const unsigned surf_index
=
1126 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1127 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1130 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1131 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1132 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1135 /* Override the target of the texture if the render buffer is a
1136 * single slice of a 3D texture (since the minimum array element
1137 * field of the surface state structure is ignored by the sampler
1138 * unit for 3D textures on some hardware), or if the render buffer
1139 * is a 1D array (since shaders always provide the array index
1140 * coordinate at the Z component to avoid state-dependent
1141 * recompiles when changing the texture target of the
1144 const GLenum target
=
1145 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1146 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1147 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1150 const struct isl_view view
= {
1152 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1154 .base_array_layer
= irb
->mt_layer
,
1155 .array_len
= irb
->layer_count
,
1156 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1157 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1160 enum isl_aux_usage aux_usage
=
1161 intel_miptree_texture_aux_usage(brw
, irb
->mt
, format
);
1162 if (brw
->draw_aux_buffer_disabled
[i
])
1163 aux_usage
= ISL_AUX_USAGE_NONE
;
1165 brw_emit_surface_state(brw
, irb
->mt
, target
, view
, aux_usage
,
1167 surf_offset
, surf_index
,
1168 I915_GEM_DOMAIN_SAMPLER
, 0);
1171 brw
->vtbl
.emit_null_surface_state(
1172 brw
, _mesa_geometric_width(fb
), _mesa_geometric_height(fb
),
1173 _mesa_geometric_samples(fb
), surf_offset
);
1177 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1181 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1183 .mesa
= _NEW_BUFFERS
,
1184 .brw
= BRW_NEW_BATCH
|
1185 BRW_NEW_FRAGMENT_PROGRAM
|
1186 BRW_NEW_FS_PROG_DATA
,
1188 .emit
= update_renderbuffer_read_surfaces
,
1192 update_stage_texture_surfaces(struct brw_context
*brw
,
1193 const struct gl_program
*prog
,
1194 struct brw_stage_state
*stage_state
,
1195 bool for_gather
, uint32_t plane
)
1200 struct gl_context
*ctx
= &brw
->ctx
;
1202 uint32_t *surf_offset
= stage_state
->surf_offset
;
1204 /* BRW_NEW_*_PROG_DATA */
1206 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1208 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1210 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1211 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1214 if (prog
->SamplersUsed
& (1 << s
)) {
1215 const unsigned unit
= prog
->SamplerUnits
[s
];
1218 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1219 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
1227 * Construct SURFACE_STATE objects for enabled textures.
1230 brw_update_texture_surfaces(struct brw_context
*brw
)
1232 /* BRW_NEW_VERTEX_PROGRAM */
1233 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
1235 /* BRW_NEW_TESS_PROGRAMS */
1236 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
1237 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
1239 /* BRW_NEW_GEOMETRY_PROGRAM */
1240 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
1242 /* BRW_NEW_FRAGMENT_PROGRAM */
1243 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
1246 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1247 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1248 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1249 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1250 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1252 /* emit alternate set of surface state for gather. this
1253 * allows the surface format to be overriden for only the
1254 * gather4 messages. */
1256 if (vs
&& vs
->nir
->info
.uses_texture_gather
)
1257 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1258 if (tcs
&& tcs
->nir
->info
.uses_texture_gather
)
1259 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1260 if (tes
&& tes
->nir
->info
.uses_texture_gather
)
1261 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1262 if (gs
&& gs
->nir
->info
.uses_texture_gather
)
1263 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1264 if (fs
&& fs
->nir
->info
.uses_texture_gather
)
1265 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1269 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1270 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1273 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1276 const struct brw_tracked_state brw_texture_surfaces
= {
1278 .mesa
= _NEW_TEXTURE
,
1279 .brw
= BRW_NEW_BATCH
|
1281 BRW_NEW_FRAGMENT_PROGRAM
|
1282 BRW_NEW_FS_PROG_DATA
|
1283 BRW_NEW_GEOMETRY_PROGRAM
|
1284 BRW_NEW_GS_PROG_DATA
|
1285 BRW_NEW_TESS_PROGRAMS
|
1286 BRW_NEW_TCS_PROG_DATA
|
1287 BRW_NEW_TES_PROG_DATA
|
1288 BRW_NEW_TEXTURE_BUFFER
|
1289 BRW_NEW_VERTEX_PROGRAM
|
1290 BRW_NEW_VS_PROG_DATA
,
1292 .emit
= brw_update_texture_surfaces
,
1296 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1298 /* BRW_NEW_COMPUTE_PROGRAM */
1299 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1302 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1304 /* emit alternate set of surface state for gather. this
1305 * allows the surface format to be overriden for only the
1309 if (cs
&& cs
->nir
->info
.uses_texture_gather
)
1310 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1313 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1316 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1318 .mesa
= _NEW_TEXTURE
,
1319 .brw
= BRW_NEW_BATCH
|
1321 BRW_NEW_COMPUTE_PROGRAM
,
1323 .emit
= brw_update_cs_texture_surfaces
,
1328 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1329 struct brw_stage_state
*stage_state
,
1330 struct brw_stage_prog_data
*prog_data
)
1332 struct gl_context
*ctx
= &brw
->ctx
;
1337 uint32_t *ubo_surf_offsets
=
1338 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1340 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1341 struct gl_uniform_buffer_binding
*binding
=
1342 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1344 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1345 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1347 struct intel_buffer_object
*intel_bo
=
1348 intel_buffer_object(binding
->BufferObject
);
1349 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1350 if (!binding
->AutomaticSize
)
1351 size
= MIN2(size
, binding
->Size
);
1353 intel_bufferobj_buffer(brw
, intel_bo
,
1356 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1358 &ubo_surf_offsets
[i
]);
1362 uint32_t *ssbo_surf_offsets
=
1363 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1365 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1366 struct gl_shader_storage_buffer_binding
*binding
=
1367 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1369 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1370 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1372 struct intel_buffer_object
*intel_bo
=
1373 intel_buffer_object(binding
->BufferObject
);
1374 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1375 if (!binding
->AutomaticSize
)
1376 size
= MIN2(size
, binding
->Size
);
1378 intel_bufferobj_buffer(brw
, intel_bo
,
1381 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1383 &ssbo_surf_offsets
[i
]);
1387 stage_state
->push_constants_dirty
= true;
1389 if (prog
->info
.num_ubos
|| prog
->info
.num_ssbos
)
1390 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1394 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1396 struct gl_context
*ctx
= &brw
->ctx
;
1398 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1400 /* BRW_NEW_FS_PROG_DATA */
1401 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1404 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1406 .mesa
= _NEW_PROGRAM
,
1407 .brw
= BRW_NEW_BATCH
|
1409 BRW_NEW_FS_PROG_DATA
|
1410 BRW_NEW_UNIFORM_BUFFER
,
1412 .emit
= brw_upload_wm_ubo_surfaces
,
1416 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1418 struct gl_context
*ctx
= &brw
->ctx
;
1420 struct gl_program
*prog
=
1421 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1423 /* BRW_NEW_CS_PROG_DATA */
1424 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1427 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1429 .mesa
= _NEW_PROGRAM
,
1430 .brw
= BRW_NEW_BATCH
|
1432 BRW_NEW_CS_PROG_DATA
|
1433 BRW_NEW_UNIFORM_BUFFER
,
1435 .emit
= brw_upload_cs_ubo_surfaces
,
1439 brw_upload_abo_surfaces(struct brw_context
*brw
,
1440 const struct gl_program
*prog
,
1441 struct brw_stage_state
*stage_state
,
1442 struct brw_stage_prog_data
*prog_data
)
1444 struct gl_context
*ctx
= &brw
->ctx
;
1445 uint32_t *surf_offsets
=
1446 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1448 if (prog
->info
.num_abos
) {
1449 for (unsigned i
= 0; i
< prog
->info
.num_abos
; i
++) {
1450 struct gl_atomic_buffer_binding
*binding
=
1451 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1452 struct intel_buffer_object
*intel_bo
=
1453 intel_buffer_object(binding
->BufferObject
);
1455 intel_bufferobj_buffer(brw
, intel_bo
, binding
->Offset
,
1456 intel_bo
->Base
.Size
- binding
->Offset
,
1459 brw_emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1460 binding
->Offset
, ISL_FORMAT_RAW
,
1461 bo
->size
- binding
->Offset
, 1, true);
1464 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1469 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1472 const struct gl_program
*wm
= brw
->fragment_program
;
1475 /* BRW_NEW_FS_PROG_DATA */
1476 brw_upload_abo_surfaces(brw
, wm
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1480 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1482 .mesa
= _NEW_PROGRAM
,
1483 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1486 BRW_NEW_FS_PROG_DATA
,
1488 .emit
= brw_upload_wm_abo_surfaces
,
1492 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1495 const struct gl_program
*cp
= brw
->compute_program
;
1498 /* BRW_NEW_CS_PROG_DATA */
1499 brw_upload_abo_surfaces(brw
, cp
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1503 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1505 .mesa
= _NEW_PROGRAM
,
1506 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1509 BRW_NEW_CS_PROG_DATA
,
1511 .emit
= brw_upload_cs_abo_surfaces
,
1515 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1518 const struct gl_program
*cp
= brw
->compute_program
;
1521 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1522 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1523 brw
->cs
.base
.prog_data
);
1527 const struct brw_tracked_state brw_cs_image_surfaces
= {
1529 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1530 .brw
= BRW_NEW_BATCH
|
1532 BRW_NEW_CS_PROG_DATA
|
1535 .emit
= brw_upload_cs_image_surfaces
,
1539 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1541 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1542 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1543 if (access
== GL_WRITE_ONLY
) {
1545 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1546 /* Typed surface reads support a very limited subset of the shader
1547 * image formats. Translate it into the closest format the
1548 * hardware supports.
1550 return isl_lower_storage_image_format(devinfo
, hw_format
);
1552 /* The hardware doesn't actually support a typed format that we can use
1553 * so we have to fall back to untyped read/write messages.
1555 return ISL_FORMAT_RAW
;
1560 update_default_image_param(struct brw_context
*brw
,
1561 struct gl_image_unit
*u
,
1562 unsigned surface_idx
,
1563 struct brw_image_param
*param
)
1565 memset(param
, 0, sizeof(*param
));
1566 param
->surface_idx
= surface_idx
;
1567 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1568 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1569 * detailed explanation of these parameters.
1571 param
->swizzling
[0] = 0xff;
1572 param
->swizzling
[1] = 0xff;
1576 update_buffer_image_param(struct brw_context
*brw
,
1577 struct gl_image_unit
*u
,
1578 unsigned surface_idx
,
1579 struct brw_image_param
*param
)
1581 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1582 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1583 update_default_image_param(brw
, u
, surface_idx
, param
);
1585 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1586 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1590 get_image_num_layers(const struct intel_mipmap_tree
*mt
, GLenum target
,
1593 if (target
== GL_TEXTURE_CUBE_MAP
)
1596 return target
== GL_TEXTURE_3D
?
1597 minify(mt
->surf
.logical_level0_px
.depth
, level
) :
1598 mt
->surf
.logical_level0_px
.array_len
;
1602 update_image_surface(struct brw_context
*brw
,
1603 struct gl_image_unit
*u
,
1605 unsigned surface_idx
,
1606 uint32_t *surf_offset
,
1607 struct brw_image_param
*param
)
1609 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1610 struct gl_texture_object
*obj
= u
->TexObj
;
1611 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1613 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1614 struct intel_buffer_object
*intel_obj
=
1615 intel_buffer_object(obj
->BufferObject
);
1616 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1617 _mesa_get_format_bytes(u
->_ActualFormat
));
1619 brw_emit_buffer_surface_state(
1620 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1621 format
, intel_obj
->Base
.Size
, texel_size
,
1622 access
!= GL_READ_ONLY
);
1624 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1627 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1628 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1629 const unsigned num_layers
= u
->Layered
?
1630 get_image_num_layers(mt
, obj
->Target
, u
->Level
) : 1;
1632 struct isl_view view
= {
1634 .base_level
= obj
->MinLevel
+ u
->Level
,
1636 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1637 .array_len
= num_layers
,
1638 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1639 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1642 if (format
== ISL_FORMAT_RAW
) {
1643 brw_emit_buffer_surface_state(
1644 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1645 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1646 access
!= GL_READ_ONLY
);
1649 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1650 assert(!intel_miptree_has_color_unresolved(mt
,
1652 view
.base_array_layer
,
1654 brw_emit_surface_state(brw
, mt
, mt
->target
, view
,
1655 ISL_AUX_USAGE_NONE
, tex_mocs
[brw
->gen
],
1656 surf_offset
, surf_index
,
1657 I915_GEM_DOMAIN_SAMPLER
,
1658 access
== GL_READ_ONLY
? 0 :
1659 I915_GEM_DOMAIN_SAMPLER
);
1662 isl_surf_fill_image_param(&brw
->isl_dev
, param
, &mt
->surf
, &view
);
1663 param
->surface_idx
= surface_idx
;
1667 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1668 update_default_image_param(brw
, u
, surface_idx
, param
);
1673 brw_upload_image_surfaces(struct brw_context
*brw
,
1674 const struct gl_program
*prog
,
1675 struct brw_stage_state
*stage_state
,
1676 struct brw_stage_prog_data
*prog_data
)
1679 struct gl_context
*ctx
= &brw
->ctx
;
1681 if (prog
->info
.num_images
) {
1682 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1683 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1684 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1686 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1688 &stage_state
->surf_offset
[surf_idx
],
1689 &prog_data
->image_param
[i
]);
1692 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1693 /* This may have changed the image metadata dependent on the context
1694 * image unit state and passed to the program as uniforms, make sure
1695 * that push and pull constants are reuploaded.
1697 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1702 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1704 /* BRW_NEW_FRAGMENT_PROGRAM */
1705 const struct gl_program
*wm
= brw
->fragment_program
;
1708 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1709 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1710 brw
->wm
.base
.prog_data
);
1714 const struct brw_tracked_state brw_wm_image_surfaces
= {
1716 .mesa
= _NEW_TEXTURE
,
1717 .brw
= BRW_NEW_BATCH
|
1719 BRW_NEW_FRAGMENT_PROGRAM
|
1720 BRW_NEW_FS_PROG_DATA
|
1723 .emit
= brw_upload_wm_image_surfaces
,
1727 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1729 brw
->vtbl
.update_renderbuffer_surface
= gen4_update_renderbuffer_surface
;
1730 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1734 gen6_init_vtable_surface_functions(struct brw_context
*brw
)
1736 gen4_init_vtable_surface_functions(brw
);
1737 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1741 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1743 struct gl_context
*ctx
= &brw
->ctx
;
1745 struct gl_program
*prog
=
1746 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1747 /* BRW_NEW_CS_PROG_DATA */
1748 const struct brw_cs_prog_data
*cs_prog_data
=
1749 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1751 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1752 const unsigned surf_idx
=
1753 cs_prog_data
->binding_table
.work_groups_start
;
1754 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1758 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1760 intel_upload_data(brw
,
1761 (void *)brw
->compute
.num_work_groups
,
1767 bo
= brw
->compute
.num_work_groups_bo
;
1768 bo_offset
= brw
->compute
.num_work_groups_offset
;
1771 brw_emit_buffer_surface_state(brw
, surf_offset
,
1774 3 * sizeof(GLuint
), 1, true);
1775 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1779 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1781 .brw
= BRW_NEW_BLORP
|
1782 BRW_NEW_CS_PROG_DATA
|
1783 BRW_NEW_CS_WORK_GROUPS
1785 .emit
= brw_upload_cs_work_groups_surface
,