2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
59 INTEL_RENDERBUFFER_LAYERED
= 1 << 0,
60 INTEL_AUX_BUFFER_DISABLED
= 1 << 1,
63 uint32_t tex_mocs
[] = {
70 uint32_t rb_mocs
[] = {
78 get_isl_surf(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
79 GLenum target
, struct isl_view
*view
,
80 uint32_t *tile_x
, uint32_t *tile_y
,
81 uint32_t *offset
, struct isl_surf
*surf
)
85 const enum isl_dim_layout dim_layout
=
86 get_isl_dim_layout(&brw
->screen
->devinfo
, mt
->surf
.tiling
, target
);
88 if (surf
->dim_layout
== dim_layout
)
91 /* The layout of the specified texture target is not compatible with the
92 * actual layout of the miptree structure in memory -- You're entering
93 * dangerous territory, this can only possibly work if you only intended
94 * to access a single level and slice of the texture, and the hardware
95 * supports the tile offset feature in order to allow non-tile-aligned
96 * base offsets, since we'll have to point the hardware to the first
97 * texel of the level instead of relying on the usual base level/layer
100 assert(brw
->has_surface_tile_offset
);
101 assert(view
->levels
== 1 && view
->array_len
== 1);
102 assert(*tile_x
== 0 && *tile_y
== 0);
104 offset
+= intel_miptree_get_tile_offsets(mt
, view
->base_level
,
105 view
->base_array_layer
,
108 /* Minify the logical dimensions of the texture. */
109 const unsigned l
= view
->base_level
- mt
->first_level
;
110 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, l
);
111 surf
->logical_level0_px
.height
= surf
->dim
<= ISL_SURF_DIM_1D
? 1 :
112 minify(surf
->logical_level0_px
.height
, l
);
113 surf
->logical_level0_px
.depth
= surf
->dim
<= ISL_SURF_DIM_2D
? 1 :
114 minify(surf
->logical_level0_px
.depth
, l
);
116 /* Only the base level and layer can be addressed with the overridden
119 surf
->logical_level0_px
.array_len
= 1;
121 surf
->dim_layout
= dim_layout
;
123 /* The requested slice of the texture is now at the base level and
126 view
->base_level
= 0;
127 view
->base_array_layer
= 0;
131 brw_emit_surface_state(struct brw_context
*brw
,
132 struct intel_mipmap_tree
*mt
,
133 GLenum target
, struct isl_view view
,
134 enum isl_aux_usage aux_usage
,
135 uint32_t mocs
, uint32_t *surf_offset
, int surf_index
,
136 unsigned read_domains
, unsigned write_domains
)
138 uint32_t tile_x
= mt
->level
[0].level_x
;
139 uint32_t tile_y
= mt
->level
[0].level_y
;
140 uint32_t offset
= mt
->offset
;
142 struct isl_surf surf
;
144 get_isl_surf(brw
, mt
, target
, &view
, &tile_x
, &tile_y
, &offset
, &surf
);
146 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
148 struct brw_bo
*aux_bo
;
149 struct isl_surf
*aux_surf
= NULL
;
150 uint64_t aux_offset
= 0;
152 case ISL_AUX_USAGE_MCS
:
153 case ISL_AUX_USAGE_CCS_D
:
154 case ISL_AUX_USAGE_CCS_E
:
155 aux_surf
= &mt
->mcs_buf
->surf
;
156 aux_bo
= mt
->mcs_buf
->bo
;
157 aux_offset
= mt
->mcs_buf
->bo
->offset64
+ mt
->mcs_buf
->offset
;
160 case ISL_AUX_USAGE_HIZ
:
161 aux_surf
= &mt
->hiz_buf
->surf
;
162 aux_bo
= mt
->hiz_buf
->bo
;
163 aux_offset
= mt
->hiz_buf
->bo
->offset64
;
166 case ISL_AUX_USAGE_NONE
:
170 if (aux_usage
!= ISL_AUX_USAGE_NONE
) {
171 /* We only really need a clear color if we also have an auxiliary
172 * surface. Without one, it does nothing.
174 clear_color
= mt
->fast_clear_color
;
177 void *state
= brw_state_batch(brw
,
178 brw
->isl_dev
.ss
.size
,
179 brw
->isl_dev
.ss
.align
,
182 isl_surf_fill_state(&brw
->isl_dev
, state
, .surf
= &mt
->surf
, .view
= &view
,
183 .address
= mt
->bo
->offset64
+ offset
,
184 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
185 .aux_address
= aux_offset
,
186 .mocs
= mocs
, .clear_color
= clear_color
,
187 .x_offset_sa
= tile_x
, .y_offset_sa
= tile_y
);
189 brw_emit_reloc(&brw
->batch
, *surf_offset
+ brw
->isl_dev
.ss
.addr_offset
,
190 mt
->bo
, offset
, read_domains
, write_domains
);
193 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
194 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
195 * contain other control information. Since buffer addresses are always
196 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
197 * an ordinary reloc to do the necessary address translation.
199 assert((aux_offset
& 0xfff) == 0);
200 uint32_t *aux_addr
= state
+ brw
->isl_dev
.ss
.aux_addr_offset
;
201 brw_emit_reloc(&brw
->batch
,
202 *surf_offset
+ brw
->isl_dev
.ss
.aux_addr_offset
,
203 aux_bo
, *aux_addr
- aux_bo
->offset64
,
204 read_domains
, write_domains
);
209 brw_update_renderbuffer_surface(struct brw_context
*brw
,
210 struct gl_renderbuffer
*rb
,
211 uint32_t flags
, unsigned unit
/* unused */,
214 struct gl_context
*ctx
= &brw
->ctx
;
215 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
216 struct intel_mipmap_tree
*mt
= irb
->mt
;
218 enum isl_aux_usage aux_usage
=
219 intel_miptree_render_aux_usage(brw
, mt
, ctx
->Color
.sRGBEnabled
);
221 if (flags
& INTEL_AUX_BUFFER_DISABLED
) {
222 assert(brw
->gen
>= 9);
223 aux_usage
= ISL_AUX_USAGE_NONE
;
226 assert(brw_render_target_supported(brw
, rb
));
228 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
229 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
230 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
231 __func__
, _mesa_get_format_name(rb_format
));
234 struct isl_view view
= {
235 .format
= brw
->mesa_to_isl_render_format
[rb_format
],
236 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
238 .base_array_layer
= irb
->mt_layer
,
239 .array_len
= MAX2(irb
->layer_count
, 1),
240 .swizzle
= ISL_SWIZZLE_IDENTITY
,
241 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
245 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
248 I915_GEM_DOMAIN_RENDER
,
249 I915_GEM_DOMAIN_RENDER
);
254 translate_tex_target(GLenum target
)
258 case GL_TEXTURE_1D_ARRAY_EXT
:
259 return BRW_SURFACE_1D
;
261 case GL_TEXTURE_RECTANGLE_NV
:
262 return BRW_SURFACE_2D
;
265 case GL_TEXTURE_2D_ARRAY_EXT
:
266 case GL_TEXTURE_EXTERNAL_OES
:
267 case GL_TEXTURE_2D_MULTISAMPLE
:
268 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
269 return BRW_SURFACE_2D
;
272 return BRW_SURFACE_3D
;
274 case GL_TEXTURE_CUBE_MAP
:
275 case GL_TEXTURE_CUBE_MAP_ARRAY
:
276 return BRW_SURFACE_CUBE
;
279 unreachable("not reached");
284 brw_get_surface_tiling_bits(enum isl_tiling tiling
)
288 return BRW_SURFACE_TILED
;
290 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
298 brw_get_surface_num_multisamples(unsigned num_samples
)
301 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
303 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
307 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
311 brw_get_texture_swizzle(const struct gl_context
*ctx
,
312 const struct gl_texture_object
*t
)
314 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
316 int swizzles
[SWIZZLE_NIL
+ 1] = {
326 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
327 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
328 GLenum depth_mode
= t
->DepthMode
;
330 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
331 * with depth component data specified with a sized internal format.
332 * Otherwise, it's left at the old default, GL_LUMINANCE.
334 if (_mesa_is_gles3(ctx
) &&
335 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
336 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
340 switch (depth_mode
) {
342 swizzles
[0] = SWIZZLE_ZERO
;
343 swizzles
[1] = SWIZZLE_ZERO
;
344 swizzles
[2] = SWIZZLE_ZERO
;
345 swizzles
[3] = SWIZZLE_X
;
348 swizzles
[0] = SWIZZLE_X
;
349 swizzles
[1] = SWIZZLE_X
;
350 swizzles
[2] = SWIZZLE_X
;
351 swizzles
[3] = SWIZZLE_ONE
;
354 swizzles
[0] = SWIZZLE_X
;
355 swizzles
[1] = SWIZZLE_X
;
356 swizzles
[2] = SWIZZLE_X
;
357 swizzles
[3] = SWIZZLE_X
;
360 swizzles
[0] = SWIZZLE_X
;
361 swizzles
[1] = SWIZZLE_ZERO
;
362 swizzles
[2] = SWIZZLE_ZERO
;
363 swizzles
[3] = SWIZZLE_ONE
;
368 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
370 /* If the texture's format is alpha-only, force R, G, and B to
371 * 0.0. Similarly, if the texture's format has no alpha channel,
372 * force the alpha value read to 1.0. This allows for the
373 * implementation to use an RGBA texture for any of these formats
374 * without leaking any unexpected values.
376 switch (img
->_BaseFormat
) {
378 swizzles
[0] = SWIZZLE_ZERO
;
379 swizzles
[1] = SWIZZLE_ZERO
;
380 swizzles
[2] = SWIZZLE_ZERO
;
383 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
384 swizzles
[0] = SWIZZLE_X
;
385 swizzles
[1] = SWIZZLE_X
;
386 swizzles
[2] = SWIZZLE_X
;
387 swizzles
[3] = SWIZZLE_ONE
;
390 case GL_LUMINANCE_ALPHA
:
391 if (datatype
== GL_SIGNED_NORMALIZED
) {
392 swizzles
[0] = SWIZZLE_X
;
393 swizzles
[1] = SWIZZLE_X
;
394 swizzles
[2] = SWIZZLE_X
;
395 swizzles
[3] = SWIZZLE_W
;
399 if (datatype
== GL_SIGNED_NORMALIZED
) {
400 swizzles
[0] = SWIZZLE_X
;
401 swizzles
[1] = SWIZZLE_X
;
402 swizzles
[2] = SWIZZLE_X
;
403 swizzles
[3] = SWIZZLE_X
;
409 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0 ||
410 img
->TexFormat
== MESA_FORMAT_RGB_DXT1
||
411 img
->TexFormat
== MESA_FORMAT_SRGB_DXT1
)
412 swizzles
[3] = SWIZZLE_ONE
;
416 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
417 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
418 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
419 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
423 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
424 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
426 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
429 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
431 * which is simply adding 4 then modding by 8 (or anding with 7).
433 * We then may need to apply workarounds for textureGather hardware bugs.
436 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
438 unsigned scs
= (swizzle
+ 4) & 7;
440 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
444 brw_aux_surface_disabled(const struct brw_context
*brw
,
445 const struct intel_mipmap_tree
*mt
)
447 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
449 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
450 const struct intel_renderbuffer
*irb
=
451 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
453 if (irb
&& irb
->mt
== mt
)
454 return brw
->draw_aux_buffer_disabled
[i
];
461 brw_update_texture_surface(struct gl_context
*ctx
,
463 uint32_t *surf_offset
,
467 struct brw_context
*brw
= brw_context(ctx
);
468 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
470 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
471 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
474 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
475 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
478 if (mt
->plane
[plane
- 1] == NULL
)
480 mt
= mt
->plane
[plane
- 1];
483 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
484 /* If this is a view with restricted NumLayers, then our effective depth
485 * is not just the miptree depth.
487 unsigned view_num_layers
;
488 if (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) {
489 view_num_layers
= obj
->NumLayers
;
491 view_num_layers
= mt
->surf
.dim
== ISL_SURF_DIM_3D
?
492 mt
->surf
.logical_level0_px
.depth
:
493 mt
->surf
.logical_level0_px
.array_len
;
496 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
497 * texturing functions that return a float, as our code generation always
498 * selects the .x channel (which would always be 0).
500 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
501 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
502 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
503 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
504 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
505 brw_get_texture_swizzle(&brw
->ctx
, obj
));
507 mesa_format mesa_fmt
= plane
== 0 ? intel_obj
->_Format
: mt
->format
;
508 enum isl_format format
= translate_tex_format(brw
, mesa_fmt
,
509 sampler
->sRGBDecode
);
511 /* Implement gen6 and gen7 gather work-around */
512 bool need_green_to_blue
= false;
514 if (brw
->gen
== 7 && (format
== ISL_FORMAT_R32G32_FLOAT
||
515 format
== ISL_FORMAT_R32G32_SINT
||
516 format
== ISL_FORMAT_R32G32_UINT
)) {
517 format
= ISL_FORMAT_R32G32_FLOAT_LD
;
518 need_green_to_blue
= brw
->is_haswell
;
519 } else if (brw
->gen
== 6) {
520 /* Sandybridge's gather4 message is broken for integer formats.
521 * To work around this, we pretend the surface is UNORM for
522 * 8 or 16-bit formats, and emit shader instructions to recover
523 * the real INT/UINT value. For 32-bit formats, we pretend
524 * the surface is FLOAT, and simply reinterpret the resulting
528 case ISL_FORMAT_R8_SINT
:
529 case ISL_FORMAT_R8_UINT
:
530 format
= ISL_FORMAT_R8_UNORM
;
533 case ISL_FORMAT_R16_SINT
:
534 case ISL_FORMAT_R16_UINT
:
535 format
= ISL_FORMAT_R16_UNORM
;
538 case ISL_FORMAT_R32_SINT
:
539 case ISL_FORMAT_R32_UINT
:
540 format
= ISL_FORMAT_R32_FLOAT
;
549 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
551 assert(mt
->r8stencil_mt
&& !mt
->stencil_mt
->r8stencil_needs_update
);
552 mt
= mt
->r8stencil_mt
;
556 format
= ISL_FORMAT_R8_UINT
;
557 } else if (brw
->gen
<= 7 && mt
->format
== MESA_FORMAT_S_UINT8
) {
558 assert(mt
->r8stencil_mt
&& !mt
->r8stencil_needs_update
);
559 mt
= mt
->r8stencil_mt
;
560 format
= ISL_FORMAT_R8_UINT
;
563 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
565 struct isl_view view
= {
567 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
568 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
569 .base_array_layer
= obj
->MinLayer
,
570 .array_len
= view_num_layers
,
572 .r
= swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
573 .g
= swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
574 .b
= swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
575 .a
= swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
577 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
580 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
581 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
582 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
584 enum isl_aux_usage aux_usage
=
585 intel_miptree_texture_aux_usage(brw
, mt
, format
);
587 if (brw_aux_surface_disabled(brw
, mt
))
588 aux_usage
= ISL_AUX_USAGE_NONE
;
590 brw_emit_surface_state(brw
, mt
, mt
->target
, view
, aux_usage
,
592 surf_offset
, surf_index
,
593 I915_GEM_DOMAIN_SAMPLER
, 0);
598 brw_emit_buffer_surface_state(struct brw_context
*brw
,
599 uint32_t *out_offset
,
601 unsigned buffer_offset
,
602 unsigned surface_format
,
603 unsigned buffer_size
,
607 uint32_t *dw
= brw_state_batch(brw
,
608 brw
->isl_dev
.ss
.size
,
609 brw
->isl_dev
.ss
.align
,
612 isl_buffer_fill_state(&brw
->isl_dev
, dw
,
613 .address
= (bo
? bo
->offset64
: 0) + buffer_offset
,
615 .format
= surface_format
,
617 .mocs
= tex_mocs
[brw
->gen
]);
620 brw_emit_reloc(&brw
->batch
, *out_offset
+ brw
->isl_dev
.ss
.addr_offset
,
622 I915_GEM_DOMAIN_SAMPLER
,
623 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
628 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
630 uint32_t *surf_offset
)
632 struct brw_context
*brw
= brw_context(ctx
);
633 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
634 struct intel_buffer_object
*intel_obj
=
635 intel_buffer_object(tObj
->BufferObject
);
636 uint32_t size
= tObj
->BufferSize
;
637 struct brw_bo
*bo
= NULL
;
638 mesa_format format
= tObj
->_BufferObjectFormat
;
639 const enum isl_format isl_format
= brw_isl_format_for_mesa_format(format
);
640 int texel_size
= _mesa_get_format_bytes(format
);
643 size
= MIN2(size
, intel_obj
->Base
.Size
);
644 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
,
648 /* The ARB_texture_buffer_specification says:
650 * "The number of texels in the buffer texture's texel array is given by
652 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
654 * where <buffer_size> is the size of the buffer object, in basic
655 * machine units and <components> and <base_type> are the element count
656 * and base data type for elements, as specified in Table X.1. The
657 * number of texels in the texel array is then clamped to the
658 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
660 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
661 * so that when ISL divides by stride to obtain the number of texels, that
662 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
664 size
= MIN2(size
, ctx
->Const
.MaxTextureBufferSize
* (unsigned) texel_size
);
666 if (isl_format
== ISL_FORMAT_UNSUPPORTED
) {
667 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
668 _mesa_get_format_name(format
));
671 brw_emit_buffer_surface_state(brw
, surf_offset
, bo
,
680 * Create the constant buffer surface. Vertex/fragment shader constants will be
681 * read from this buffer with Data Port Read instructions/messages.
684 brw_create_constant_surface(struct brw_context
*brw
,
688 uint32_t *out_offset
)
690 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
691 ISL_FORMAT_R32G32B32A32_FLOAT
,
696 * Create the buffer surface. Shader buffer variables will be
697 * read from / write to this buffer with Data Port Read/Write
698 * instructions/messages.
701 brw_create_buffer_surface(struct brw_context
*brw
,
705 uint32_t *out_offset
)
707 /* Use a raw surface so we can reuse existing untyped read/write/atomic
708 * messages. We need these specifically for the fragment shader since they
709 * include a pixel mask header that we need to ensure correct behavior
710 * with helper invocations, which cannot write to the buffer.
712 brw_emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
718 * Set up a binding table entry for use by stream output logic (transform
721 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
724 brw_update_sol_surface(struct brw_context
*brw
,
725 struct gl_buffer_object
*buffer_obj
,
726 uint32_t *out_offset
, unsigned num_vector_components
,
727 unsigned stride_dwords
, unsigned offset_dwords
)
729 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
730 uint32_t offset_bytes
= 4 * offset_dwords
;
731 struct brw_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
733 buffer_obj
->Size
- offset_bytes
,
735 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
736 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
737 size_t size_dwords
= buffer_obj
->Size
/ 4;
738 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
740 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
741 * too big to map using a single binding table entry?
743 assert((size_dwords
- offset_dwords
) / stride_dwords
744 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
746 if (size_dwords
> offset_dwords
+ num_vector_components
) {
747 /* There is room for at least 1 transform feedback output in the buffer.
748 * Compute the number of additional transform feedback outputs the
749 * buffer has room for.
751 buffer_size_minus_1
=
752 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
754 /* There isn't even room for a single transform feedback output in the
755 * buffer. We can't configure the binding table entry to prevent output
756 * entirely; we'll have to rely on the geometry shader to detect
757 * overflow. But to minimize the damage in case of a bug, set up the
758 * binding table entry to just allow a single output.
760 buffer_size_minus_1
= 0;
762 width
= buffer_size_minus_1
& 0x7f;
763 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
764 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
766 switch (num_vector_components
) {
768 surface_format
= ISL_FORMAT_R32_FLOAT
;
771 surface_format
= ISL_FORMAT_R32G32_FLOAT
;
774 surface_format
= ISL_FORMAT_R32G32B32_FLOAT
;
777 surface_format
= ISL_FORMAT_R32G32B32A32_FLOAT
;
780 unreachable("Invalid vector size for transform feedback output");
783 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
784 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
785 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
786 BRW_SURFACE_RC_READ_WRITE
;
787 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
788 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
789 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
790 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
791 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
795 /* Emit relocation to surface contents. */
796 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, offset_bytes
,
797 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
800 /* Creates a new WM constant buffer reflecting the current fragment program's
801 * constants, if needed by the fragment program.
803 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
807 brw_upload_wm_pull_constants(struct brw_context
*brw
)
809 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
810 /* BRW_NEW_FRAGMENT_PROGRAM */
811 struct brw_program
*fp
= (struct brw_program
*) brw
->fragment_program
;
812 /* BRW_NEW_FS_PROG_DATA */
813 struct brw_stage_prog_data
*prog_data
= brw
->wm
.base
.prog_data
;
815 _mesa_shader_write_subroutine_indices(&brw
->ctx
, MESA_SHADER_FRAGMENT
);
816 /* _NEW_PROGRAM_CONSTANTS */
817 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
,
818 stage_state
, prog_data
);
821 const struct brw_tracked_state brw_wm_pull_constants
= {
823 .mesa
= _NEW_PROGRAM_CONSTANTS
,
824 .brw
= BRW_NEW_BATCH
|
826 BRW_NEW_FRAGMENT_PROGRAM
|
827 BRW_NEW_FS_PROG_DATA
,
829 .emit
= brw_upload_wm_pull_constants
,
833 * Creates a null renderbuffer surface.
835 * This is used when the shader doesn't write to any color output. An FB
836 * write to target 0 will still be emitted, because that's how the thread is
837 * terminated (and computed depth is returned), so we need to have the
838 * hardware discard the target 0 color output..
841 brw_emit_null_surface_state(struct brw_context
*brw
,
845 uint32_t *out_offset
)
847 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
850 * A null surface will be used in instances where an actual surface is
851 * not bound. When a write message is generated to a null surface, no
852 * actual surface is written to. When a read message (including any
853 * sampling engine message) is generated to a null surface, the result
854 * is all zeros. Note that a null surface type is allowed to be used
855 * with all messages, even if it is not specificially indicated as
856 * supported. All of the remaining fields in surface state are ignored
857 * for null surfaces, with the following exceptions:
859 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
860 * depth buffer’s corresponding state for all render target surfaces,
863 * - Surface Format must be R8G8B8A8_UNORM.
865 unsigned surface_type
= BRW_SURFACE_NULL
;
866 struct brw_bo
*bo
= NULL
;
867 unsigned pitch_minus_1
= 0;
868 uint32_t multisampling_state
= 0;
869 uint32_t *surf
= brw_state_batch(brw
, 6 * 4, 32, out_offset
);
872 /* On Gen6, null render targets seem to cause GPU hangs when
873 * multisampling. So work around this problem by rendering into dummy
876 * To decrease the amount of memory needed by the workaround buffer, we
877 * set its pitch to 128 bytes (the width of a Y tile). This means that
878 * the amount of memory needed for the workaround buffer is
879 * (width_in_tiles + height_in_tiles - 1) tiles.
881 * Note that since the workaround buffer will be interpreted by the
882 * hardware as an interleaved multisampled buffer, we need to compute
883 * width_in_tiles and height_in_tiles by dividing the width and height
884 * by 16 rather than the normal Y-tile size of 32.
886 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
887 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
888 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
889 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
891 bo
= brw
->wm
.multisampled_null_render_target_bo
;
892 surface_type
= BRW_SURFACE_2D
;
894 multisampling_state
= brw_get_surface_num_multisamples(samples
);
897 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
898 ISL_FORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
900 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
901 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
902 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
903 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
905 surf
[1] = bo
? bo
->offset64
: 0;
906 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
907 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
909 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
912 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
914 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
915 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
916 surf
[4] = multisampling_state
;
920 brw_emit_reloc(&brw
->batch
, *out_offset
+ 4, bo
, 0,
921 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
926 * Sets up a surface state structure to point at the given region.
927 * While it is only used for the front/back buffer currently, it should be
928 * usable for further buffers when doing ARB_draw_buffer support.
931 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
932 struct gl_renderbuffer
*rb
,
933 uint32_t flags
, unsigned unit
,
936 struct gl_context
*ctx
= &brw
->ctx
;
937 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
938 struct intel_mipmap_tree
*mt
= irb
->mt
;
940 uint32_t tile_x
, tile_y
;
941 enum isl_format format
;
944 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
945 /* BRW_NEW_FS_PROG_DATA */
947 assert(!(flags
& INTEL_RENDERBUFFER_LAYERED
));
948 assert(!(flags
& INTEL_AUX_BUFFER_DISABLED
));
950 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
951 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
953 if (tile_x
!= 0 || tile_y
!= 0) {
954 /* Original gen4 hardware couldn't draw to a non-tile-aligned
955 * destination in a miptree unless you actually setup your renderbuffer
956 * as a miptree and used the fragile lod/array_index/etc. controls to
957 * select the image. So, instead, we just make a new single-level
958 * miptree and render into that.
960 intel_renderbuffer_move_to_temp(brw
, irb
, false);
961 assert(irb
->align_wa_mt
);
962 mt
= irb
->align_wa_mt
;
966 surf
= brw_state_batch(brw
, 6 * 4, 32, &offset
);
968 format
= brw
->mesa_to_isl_render_format
[rb_format
];
969 if (unlikely(!brw
->mesa_format_supports_render
[rb_format
])) {
970 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
971 __func__
, _mesa_get_format_name(rb_format
));
974 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
975 format
<< BRW_SURFACE_FORMAT_SHIFT
);
978 assert(mt
->offset
% mt
->cpp
== 0);
979 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
980 mt
->bo
->offset64
+ mt
->offset
);
982 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
983 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
985 surf
[3] = (brw_get_surface_tiling_bits(mt
->surf
.tiling
) |
986 (mt
->surf
.row_pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
988 surf
[4] = brw_get_surface_num_multisamples(mt
->surf
.samples
);
990 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
991 /* Note that the low bits of these fields are missing, so
992 * there's the possibility of getting in trouble.
994 assert(tile_x
% 4 == 0);
995 assert(tile_y
% 2 == 0);
996 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
997 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
998 (mt
->surf
.image_alignment_el
.height
== 4 ?
999 BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
1003 if (!ctx
->Color
.ColorLogicOpEnabled
&& !ctx
->Color
._AdvancedBlendMode
&&
1004 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
1005 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
1007 if (!ctx
->Color
.ColorMask
[unit
][0])
1008 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
1009 if (!ctx
->Color
.ColorMask
[unit
][1])
1010 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
1011 if (!ctx
->Color
.ColorMask
[unit
][2])
1012 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
1014 /* As mentioned above, disable writes to the alpha component when the
1015 * renderbuffer is XRGB.
1017 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
1018 !ctx
->Color
.ColorMask
[unit
][3]) {
1019 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
1023 brw_emit_reloc(&brw
->batch
, offset
+ 4, mt
->bo
, surf
[1] - mt
->bo
->offset64
,
1024 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
1030 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1033 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
1034 const struct gl_framebuffer
*fb
,
1035 uint32_t render_target_start
,
1036 uint32_t *surf_offset
)
1039 const unsigned int w
= _mesa_geometric_width(fb
);
1040 const unsigned int h
= _mesa_geometric_height(fb
);
1041 const unsigned int s
= _mesa_geometric_samples(fb
);
1043 /* Update surfaces for drawing buffers */
1044 if (fb
->_NumColorDrawBuffers
>= 1) {
1045 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1046 const uint32_t surf_index
= render_target_start
+ i
;
1047 const int flags
= (_mesa_geometric_layers(fb
) > 0 ?
1048 INTEL_RENDERBUFFER_LAYERED
: 0) |
1049 (brw
->draw_aux_buffer_disabled
[i
] ?
1050 INTEL_AUX_BUFFER_DISABLED
: 0);
1052 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
1053 surf_offset
[surf_index
] =
1054 brw
->vtbl
.update_renderbuffer_surface(
1055 brw
, fb
->_ColorDrawBuffers
[i
], flags
, i
, surf_index
);
1057 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1058 &surf_offset
[surf_index
]);
1062 const uint32_t surf_index
= render_target_start
;
1063 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1064 &surf_offset
[surf_index
]);
1069 update_renderbuffer_surfaces(struct brw_context
*brw
)
1071 const struct gl_context
*ctx
= &brw
->ctx
;
1073 /* BRW_NEW_FS_PROG_DATA */
1074 const struct brw_wm_prog_data
*wm_prog_data
=
1075 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1077 /* _NEW_BUFFERS | _NEW_COLOR */
1078 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1079 brw_update_renderbuffer_surfaces(
1081 wm_prog_data
->binding_table
.render_target_start
,
1082 brw
->wm
.base
.surf_offset
);
1083 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1086 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1088 .mesa
= _NEW_BUFFERS
|
1090 .brw
= BRW_NEW_BATCH
|
1092 BRW_NEW_FS_PROG_DATA
,
1094 .emit
= update_renderbuffer_surfaces
,
1097 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1099 .mesa
= _NEW_BUFFERS
,
1100 .brw
= BRW_NEW_BATCH
|
1103 .emit
= update_renderbuffer_surfaces
,
1107 update_renderbuffer_read_surfaces(struct brw_context
*brw
)
1109 const struct gl_context
*ctx
= &brw
->ctx
;
1111 /* BRW_NEW_FS_PROG_DATA */
1112 const struct brw_wm_prog_data
*wm_prog_data
=
1113 brw_wm_prog_data(brw
->wm
.base
.prog_data
);
1115 /* BRW_NEW_FRAGMENT_PROGRAM */
1116 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
1117 brw
->fragment_program
&& brw
->fragment_program
->info
.outputs_read
) {
1119 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1121 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1122 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[i
];
1123 const struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1124 const unsigned surf_index
=
1125 wm_prog_data
->binding_table
.render_target_read_start
+ i
;
1126 uint32_t *surf_offset
= &brw
->wm
.base
.surf_offset
[surf_index
];
1129 const enum isl_format format
= brw
->mesa_to_isl_render_format
[
1130 _mesa_get_render_format(ctx
, intel_rb_format(irb
))];
1131 assert(isl_format_supports_sampling(&brw
->screen
->devinfo
,
1134 /* Override the target of the texture if the render buffer is a
1135 * single slice of a 3D texture (since the minimum array element
1136 * field of the surface state structure is ignored by the sampler
1137 * unit for 3D textures on some hardware), or if the render buffer
1138 * is a 1D array (since shaders always provide the array index
1139 * coordinate at the Z component to avoid state-dependent
1140 * recompiles when changing the texture target of the
1143 const GLenum target
=
1144 (irb
->mt
->target
== GL_TEXTURE_3D
&&
1145 irb
->layer_count
== 1) ? GL_TEXTURE_2D
:
1146 irb
->mt
->target
== GL_TEXTURE_1D_ARRAY
? GL_TEXTURE_2D_ARRAY
:
1149 const struct isl_view view
= {
1151 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
1153 .base_array_layer
= irb
->mt_layer
,
1154 .array_len
= irb
->layer_count
,
1155 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1156 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
1159 enum isl_aux_usage aux_usage
=
1160 intel_miptree_texture_aux_usage(brw
, irb
->mt
, format
);
1161 if (brw
->draw_aux_buffer_disabled
[i
])
1162 aux_usage
= ISL_AUX_USAGE_NONE
;
1164 brw_emit_surface_state(brw
, irb
->mt
, target
, view
, aux_usage
,
1166 surf_offset
, surf_index
,
1167 I915_GEM_DOMAIN_SAMPLER
, 0);
1170 brw
->vtbl
.emit_null_surface_state(
1171 brw
, _mesa_geometric_width(fb
), _mesa_geometric_height(fb
),
1172 _mesa_geometric_samples(fb
), surf_offset
);
1176 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1180 const struct brw_tracked_state brw_renderbuffer_read_surfaces
= {
1182 .mesa
= _NEW_BUFFERS
,
1183 .brw
= BRW_NEW_BATCH
|
1184 BRW_NEW_FRAGMENT_PROGRAM
|
1185 BRW_NEW_FS_PROG_DATA
,
1187 .emit
= update_renderbuffer_read_surfaces
,
1191 update_stage_texture_surfaces(struct brw_context
*brw
,
1192 const struct gl_program
*prog
,
1193 struct brw_stage_state
*stage_state
,
1194 bool for_gather
, uint32_t plane
)
1199 struct gl_context
*ctx
= &brw
->ctx
;
1201 uint32_t *surf_offset
= stage_state
->surf_offset
;
1203 /* BRW_NEW_*_PROG_DATA */
1205 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1207 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1209 unsigned num_samplers
= util_last_bit(prog
->SamplersUsed
);
1210 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1213 if (prog
->SamplersUsed
& (1 << s
)) {
1214 const unsigned unit
= prog
->SamplerUnits
[s
];
1217 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1218 brw_update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
1226 * Construct SURFACE_STATE objects for enabled textures.
1229 brw_update_texture_surfaces(struct brw_context
*brw
)
1231 /* BRW_NEW_VERTEX_PROGRAM */
1232 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
1234 /* BRW_NEW_TESS_PROGRAMS */
1235 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
1236 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
1238 /* BRW_NEW_GEOMETRY_PROGRAM */
1239 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
1241 /* BRW_NEW_FRAGMENT_PROGRAM */
1242 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
1245 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1246 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1247 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1248 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1249 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1251 /* emit alternate set of surface state for gather. this
1252 * allows the surface format to be overriden for only the
1253 * gather4 messages. */
1255 if (vs
&& vs
->nir
->info
.uses_texture_gather
)
1256 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1257 if (tcs
&& tcs
->nir
->info
.uses_texture_gather
)
1258 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1259 if (tes
&& tes
->nir
->info
.uses_texture_gather
)
1260 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1261 if (gs
&& gs
->nir
->info
.uses_texture_gather
)
1262 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1263 if (fs
&& fs
->nir
->info
.uses_texture_gather
)
1264 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1268 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1269 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1272 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1275 const struct brw_tracked_state brw_texture_surfaces
= {
1277 .mesa
= _NEW_TEXTURE
,
1278 .brw
= BRW_NEW_BATCH
|
1280 BRW_NEW_FRAGMENT_PROGRAM
|
1281 BRW_NEW_FS_PROG_DATA
|
1282 BRW_NEW_GEOMETRY_PROGRAM
|
1283 BRW_NEW_GS_PROG_DATA
|
1284 BRW_NEW_TESS_PROGRAMS
|
1285 BRW_NEW_TCS_PROG_DATA
|
1286 BRW_NEW_TES_PROG_DATA
|
1287 BRW_NEW_TEXTURE_BUFFER
|
1288 BRW_NEW_VERTEX_PROGRAM
|
1289 BRW_NEW_VS_PROG_DATA
,
1291 .emit
= brw_update_texture_surfaces
,
1295 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1297 /* BRW_NEW_COMPUTE_PROGRAM */
1298 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1301 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1303 /* emit alternate set of surface state for gather. this
1304 * allows the surface format to be overriden for only the
1308 if (cs
&& cs
->nir
->info
.uses_texture_gather
)
1309 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1312 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1315 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1317 .mesa
= _NEW_TEXTURE
,
1318 .brw
= BRW_NEW_BATCH
|
1320 BRW_NEW_COMPUTE_PROGRAM
,
1322 .emit
= brw_update_cs_texture_surfaces
,
1327 brw_upload_ubo_surfaces(struct brw_context
*brw
, struct gl_program
*prog
,
1328 struct brw_stage_state
*stage_state
,
1329 struct brw_stage_prog_data
*prog_data
)
1331 struct gl_context
*ctx
= &brw
->ctx
;
1336 uint32_t *ubo_surf_offsets
=
1337 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1339 for (int i
= 0; i
< prog
->info
.num_ubos
; i
++) {
1340 struct gl_uniform_buffer_binding
*binding
=
1341 &ctx
->UniformBufferBindings
[prog
->sh
.UniformBlocks
[i
]->Binding
];
1343 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1344 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1346 struct intel_buffer_object
*intel_bo
=
1347 intel_buffer_object(binding
->BufferObject
);
1348 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1349 if (!binding
->AutomaticSize
)
1350 size
= MIN2(size
, binding
->Size
);
1352 intel_bufferobj_buffer(brw
, intel_bo
,
1355 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1357 &ubo_surf_offsets
[i
]);
1361 uint32_t *ssbo_surf_offsets
=
1362 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1364 for (int i
= 0; i
< prog
->info
.num_ssbos
; i
++) {
1365 struct gl_shader_storage_buffer_binding
*binding
=
1366 &ctx
->ShaderStorageBufferBindings
[prog
->sh
.ShaderStorageBlocks
[i
]->Binding
];
1368 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1369 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1371 struct intel_buffer_object
*intel_bo
=
1372 intel_buffer_object(binding
->BufferObject
);
1373 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1374 if (!binding
->AutomaticSize
)
1375 size
= MIN2(size
, binding
->Size
);
1377 intel_bufferobj_buffer(brw
, intel_bo
,
1380 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1382 &ssbo_surf_offsets
[i
]);
1386 stage_state
->push_constants_dirty
= true;
1388 if (prog
->info
.num_ubos
|| prog
->info
.num_ssbos
)
1389 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1393 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1395 struct gl_context
*ctx
= &brw
->ctx
;
1397 struct gl_program
*prog
= ctx
->FragmentProgram
._Current
;
1399 /* BRW_NEW_FS_PROG_DATA */
1400 brw_upload_ubo_surfaces(brw
, prog
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1403 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1405 .mesa
= _NEW_PROGRAM
,
1406 .brw
= BRW_NEW_BATCH
|
1408 BRW_NEW_FS_PROG_DATA
|
1409 BRW_NEW_UNIFORM_BUFFER
,
1411 .emit
= brw_upload_wm_ubo_surfaces
,
1415 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1417 struct gl_context
*ctx
= &brw
->ctx
;
1419 struct gl_program
*prog
=
1420 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1422 /* BRW_NEW_CS_PROG_DATA */
1423 brw_upload_ubo_surfaces(brw
, prog
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1426 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1428 .mesa
= _NEW_PROGRAM
,
1429 .brw
= BRW_NEW_BATCH
|
1431 BRW_NEW_CS_PROG_DATA
|
1432 BRW_NEW_UNIFORM_BUFFER
,
1434 .emit
= brw_upload_cs_ubo_surfaces
,
1438 brw_upload_abo_surfaces(struct brw_context
*brw
,
1439 const struct gl_program
*prog
,
1440 struct brw_stage_state
*stage_state
,
1441 struct brw_stage_prog_data
*prog_data
)
1443 struct gl_context
*ctx
= &brw
->ctx
;
1444 uint32_t *surf_offsets
=
1445 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1447 if (prog
->info
.num_abos
) {
1448 for (unsigned i
= 0; i
< prog
->info
.num_abos
; i
++) {
1449 struct gl_atomic_buffer_binding
*binding
=
1450 &ctx
->AtomicBufferBindings
[prog
->sh
.AtomicBuffers
[i
]->Binding
];
1451 struct intel_buffer_object
*intel_bo
=
1452 intel_buffer_object(binding
->BufferObject
);
1454 intel_bufferobj_buffer(brw
, intel_bo
, binding
->Offset
,
1455 intel_bo
->Base
.Size
- binding
->Offset
,
1458 brw_emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1459 binding
->Offset
, ISL_FORMAT_RAW
,
1460 bo
->size
- binding
->Offset
, 1, true);
1463 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1468 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1471 const struct gl_program
*wm
= brw
->fragment_program
;
1474 /* BRW_NEW_FS_PROG_DATA */
1475 brw_upload_abo_surfaces(brw
, wm
, &brw
->wm
.base
, brw
->wm
.base
.prog_data
);
1479 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1481 .mesa
= _NEW_PROGRAM
,
1482 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1485 BRW_NEW_FS_PROG_DATA
,
1487 .emit
= brw_upload_wm_abo_surfaces
,
1491 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1494 const struct gl_program
*cp
= brw
->compute_program
;
1497 /* BRW_NEW_CS_PROG_DATA */
1498 brw_upload_abo_surfaces(brw
, cp
, &brw
->cs
.base
, brw
->cs
.base
.prog_data
);
1502 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1504 .mesa
= _NEW_PROGRAM
,
1505 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1508 BRW_NEW_CS_PROG_DATA
,
1510 .emit
= brw_upload_cs_abo_surfaces
,
1514 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1517 const struct gl_program
*cp
= brw
->compute_program
;
1520 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1521 brw_upload_image_surfaces(brw
, cp
, &brw
->cs
.base
,
1522 brw
->cs
.base
.prog_data
);
1526 const struct brw_tracked_state brw_cs_image_surfaces
= {
1528 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1529 .brw
= BRW_NEW_BATCH
|
1531 BRW_NEW_CS_PROG_DATA
|
1534 .emit
= brw_upload_cs_image_surfaces
,
1538 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1540 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1541 enum isl_format hw_format
= brw_isl_format_for_mesa_format(format
);
1542 if (access
== GL_WRITE_ONLY
) {
1544 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1545 /* Typed surface reads support a very limited subset of the shader
1546 * image formats. Translate it into the closest format the
1547 * hardware supports.
1549 return isl_lower_storage_image_format(devinfo
, hw_format
);
1551 /* The hardware doesn't actually support a typed format that we can use
1552 * so we have to fall back to untyped read/write messages.
1554 return ISL_FORMAT_RAW
;
1559 update_default_image_param(struct brw_context
*brw
,
1560 struct gl_image_unit
*u
,
1561 unsigned surface_idx
,
1562 struct brw_image_param
*param
)
1564 memset(param
, 0, sizeof(*param
));
1565 param
->surface_idx
= surface_idx
;
1566 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1567 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1568 * detailed explanation of these parameters.
1570 param
->swizzling
[0] = 0xff;
1571 param
->swizzling
[1] = 0xff;
1575 update_buffer_image_param(struct brw_context
*brw
,
1576 struct gl_image_unit
*u
,
1577 unsigned surface_idx
,
1578 struct brw_image_param
*param
)
1580 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1581 const uint32_t size
= MIN2((uint32_t)u
->TexObj
->BufferSize
, obj
->Size
);
1582 update_default_image_param(brw
, u
, surface_idx
, param
);
1584 param
->size
[0] = size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1585 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1589 get_image_num_layers(const struct intel_mipmap_tree
*mt
, GLenum target
,
1592 if (target
== GL_TEXTURE_CUBE_MAP
)
1595 return target
== GL_TEXTURE_3D
?
1596 minify(mt
->surf
.logical_level0_px
.depth
, level
) :
1597 mt
->surf
.logical_level0_px
.array_len
;
1601 update_image_surface(struct brw_context
*brw
,
1602 struct gl_image_unit
*u
,
1604 unsigned surface_idx
,
1605 uint32_t *surf_offset
,
1606 struct brw_image_param
*param
)
1608 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1609 struct gl_texture_object
*obj
= u
->TexObj
;
1610 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1612 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1613 struct intel_buffer_object
*intel_obj
=
1614 intel_buffer_object(obj
->BufferObject
);
1615 const unsigned texel_size
= (format
== ISL_FORMAT_RAW
? 1 :
1616 _mesa_get_format_bytes(u
->_ActualFormat
));
1618 brw_emit_buffer_surface_state(
1619 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1620 format
, intel_obj
->Base
.Size
, texel_size
,
1621 access
!= GL_READ_ONLY
);
1623 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1626 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1627 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1628 const unsigned num_layers
= u
->Layered
?
1629 get_image_num_layers(mt
, obj
->Target
, u
->Level
) : 1;
1631 struct isl_view view
= {
1633 .base_level
= obj
->MinLevel
+ u
->Level
,
1635 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1636 .array_len
= num_layers
,
1637 .swizzle
= ISL_SWIZZLE_IDENTITY
,
1638 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1641 if (format
== ISL_FORMAT_RAW
) {
1642 brw_emit_buffer_surface_state(
1643 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1644 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1645 access
!= GL_READ_ONLY
);
1648 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1649 assert(!intel_miptree_has_color_unresolved(mt
,
1651 view
.base_array_layer
,
1653 brw_emit_surface_state(brw
, mt
, mt
->target
, view
,
1654 ISL_AUX_USAGE_NONE
, tex_mocs
[brw
->gen
],
1655 surf_offset
, surf_index
,
1656 I915_GEM_DOMAIN_SAMPLER
,
1657 access
== GL_READ_ONLY
? 0 :
1658 I915_GEM_DOMAIN_SAMPLER
);
1661 isl_surf_fill_image_param(&brw
->isl_dev
, param
, &mt
->surf
, &view
);
1662 param
->surface_idx
= surface_idx
;
1666 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1667 update_default_image_param(brw
, u
, surface_idx
, param
);
1672 brw_upload_image_surfaces(struct brw_context
*brw
,
1673 const struct gl_program
*prog
,
1674 struct brw_stage_state
*stage_state
,
1675 struct brw_stage_prog_data
*prog_data
)
1678 struct gl_context
*ctx
= &brw
->ctx
;
1680 if (prog
->info
.num_images
) {
1681 for (unsigned i
= 0; i
< prog
->info
.num_images
; i
++) {
1682 struct gl_image_unit
*u
= &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[i
]];
1683 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1685 update_image_surface(brw
, u
, prog
->sh
.ImageAccess
[i
],
1687 &stage_state
->surf_offset
[surf_idx
],
1688 &prog_data
->image_param
[i
]);
1691 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1692 /* This may have changed the image metadata dependent on the context
1693 * image unit state and passed to the program as uniforms, make sure
1694 * that push and pull constants are reuploaded.
1696 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1701 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1703 /* BRW_NEW_FRAGMENT_PROGRAM */
1704 const struct gl_program
*wm
= brw
->fragment_program
;
1707 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1708 brw_upload_image_surfaces(brw
, wm
, &brw
->wm
.base
,
1709 brw
->wm
.base
.prog_data
);
1713 const struct brw_tracked_state brw_wm_image_surfaces
= {
1715 .mesa
= _NEW_TEXTURE
,
1716 .brw
= BRW_NEW_BATCH
|
1718 BRW_NEW_FRAGMENT_PROGRAM
|
1719 BRW_NEW_FS_PROG_DATA
|
1722 .emit
= brw_upload_wm_image_surfaces
,
1726 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1728 brw
->vtbl
.update_renderbuffer_surface
= gen4_update_renderbuffer_surface
;
1729 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1733 gen6_init_vtable_surface_functions(struct brw_context
*brw
)
1735 gen4_init_vtable_surface_functions(brw
);
1736 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1740 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1742 struct gl_context
*ctx
= &brw
->ctx
;
1744 struct gl_program
*prog
=
1745 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1746 /* BRW_NEW_CS_PROG_DATA */
1747 const struct brw_cs_prog_data
*cs_prog_data
=
1748 brw_cs_prog_data(brw
->cs
.base
.prog_data
);
1750 if (prog
&& cs_prog_data
->uses_num_work_groups
) {
1751 const unsigned surf_idx
=
1752 cs_prog_data
->binding_table
.work_groups_start
;
1753 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1757 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1759 intel_upload_data(brw
,
1760 (void *)brw
->compute
.num_work_groups
,
1766 bo
= brw
->compute
.num_work_groups_bo
;
1767 bo_offset
= brw
->compute
.num_work_groups_offset
;
1770 brw_emit_buffer_surface_state(brw
, surf_offset
,
1773 3 * sizeof(GLuint
), 1, true);
1774 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1778 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1780 .brw
= BRW_NEW_BLORP
|
1781 BRW_NEW_CS_PROG_DATA
|
1782 BRW_NEW_CS_WORK_GROUPS
1784 .emit
= brw_upload_cs_work_groups_surface
,