2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "main/teximage.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_instruction.h"
41 #include "main/framebuffer.h"
45 #include "intel_mipmap_tree.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_tex.h"
48 #include "intel_fbo.h"
49 #include "intel_buffer_objects.h"
51 #include "brw_context.h"
52 #include "brw_state.h"
53 #include "brw_defines.h"
56 struct surface_state_info
{
58 unsigned ss_align
; /* Required alignment of RENDER_SURFACE_STATE in bytes */
60 unsigned aux_reloc_dw
;
65 static const struct surface_state_info surface_state_infos
[] = {
69 [7] = {8, 32, 1, 6, GEN7_MOCS_L3
, GEN7_MOCS_L3
},
70 [8] = {13, 64, 8, 10, BDW_MOCS_WB
, BDW_MOCS_PTE
},
71 [9] = {16, 64, 8, 10, SKL_MOCS_WB
, SKL_MOCS_PTE
},
75 brw_emit_surface_state(struct brw_context
*brw
,
76 struct intel_mipmap_tree
*mt
,
77 const struct isl_view
*view
,
78 uint32_t mocs
, bool for_gather
,
79 uint32_t *surf_offset
, int surf_index
,
80 unsigned read_domains
, unsigned write_domains
)
82 const struct surface_state_info ss_info
= surface_state_infos
[brw
->gen
];
85 intel_miptree_get_isl_surf(brw
, mt
, &surf
);
87 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
89 struct isl_surf
*aux_surf
= NULL
, aux_surf_s
;
90 uint64_t aux_offset
= 0;
91 enum isl_aux_usage aux_usage
= ISL_AUX_USAGE_NONE
;
93 ((view
->usage
& ISL_SURF_USAGE_RENDER_TARGET_BIT
) ||
94 mt
->fast_clear_state
!= INTEL_FAST_CLEAR_STATE_RESOLVED
)) {
95 intel_miptree_get_aux_isl_surf(brw
, mt
, &aux_surf_s
, &aux_usage
);
96 aux_surf
= &aux_surf_s
;
97 assert(mt
->mcs_mt
->offset
== 0);
98 aux_offset
= mt
->mcs_mt
->bo
->offset64
;
100 /* We only really need a clear color if we also have an auxiliary
101 * surfacae. Without one, it does nothing.
103 clear_color
= intel_miptree_get_isl_clear_color(brw
, mt
);
106 uint32_t *dw
= __brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
107 ss_info
.num_dwords
* 4, ss_info
.ss_align
,
108 surf_index
, surf_offset
);
110 isl_surf_fill_state(&brw
->isl_dev
, dw
, .surf
= &surf
, .view
= view
,
111 .address
= mt
->bo
->offset64
+ mt
->offset
,
112 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
113 .aux_address
= aux_offset
,
114 .mocs
= mocs
, .clear_color
= clear_color
);
116 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
117 *surf_offset
+ 4 * ss_info
.reloc_dw
,
119 read_domains
, write_domains
);
122 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
123 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
124 * contain other control information. Since buffer addresses are always
125 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
126 * an ordinary reloc to do the necessary address translation.
128 assert((aux_offset
& 0xfff) == 0);
129 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
130 *surf_offset
+ 4 * ss_info
.aux_reloc_dw
,
131 mt
->mcs_mt
->bo
, dw
[ss_info
.aux_reloc_dw
] & 0xfff,
132 read_domains
, write_domains
);
137 brw_update_renderbuffer_surface(struct brw_context
*brw
,
138 struct gl_renderbuffer
*rb
,
139 bool layered
, unsigned unit
/* unused */,
142 struct gl_context
*ctx
= &brw
->ctx
;
143 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
144 struct intel_mipmap_tree
*mt
= irb
->mt
;
146 assert(brw_render_target_supported(brw
, rb
));
147 intel_miptree_used_for_rendering(mt
);
149 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
150 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
151 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
152 __func__
, _mesa_get_format_name(rb_format
));
155 const unsigned layer_multiplier
=
156 (irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
157 irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
158 MAX2(irb
->mt
->num_samples
, 1) : 1;
160 struct isl_view view
= {
161 .format
= brw
->render_target_format
[rb_format
],
162 .base_level
= irb
->mt_level
- irb
->mt
->first_level
,
164 .base_array_layer
= irb
->mt_layer
/ layer_multiplier
,
165 .array_len
= MAX2(irb
->layer_count
, 1),
167 ISL_CHANNEL_SELECT_RED
,
168 ISL_CHANNEL_SELECT_GREEN
,
169 ISL_CHANNEL_SELECT_BLUE
,
170 ISL_CHANNEL_SELECT_ALPHA
,
172 .usage
= ISL_SURF_USAGE_RENDER_TARGET_BIT
,
176 brw_emit_surface_state(brw
, mt
, &view
,
177 surface_state_infos
[brw
->gen
].rb_mocs
, false,
179 I915_GEM_DOMAIN_RENDER
,
180 I915_GEM_DOMAIN_RENDER
);
185 translate_tex_target(GLenum target
)
189 case GL_TEXTURE_1D_ARRAY_EXT
:
190 return BRW_SURFACE_1D
;
192 case GL_TEXTURE_RECTANGLE_NV
:
193 return BRW_SURFACE_2D
;
196 case GL_TEXTURE_2D_ARRAY_EXT
:
197 case GL_TEXTURE_EXTERNAL_OES
:
198 case GL_TEXTURE_2D_MULTISAMPLE
:
199 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
200 return BRW_SURFACE_2D
;
203 return BRW_SURFACE_3D
;
205 case GL_TEXTURE_CUBE_MAP
:
206 case GL_TEXTURE_CUBE_MAP_ARRAY
:
207 return BRW_SURFACE_CUBE
;
210 unreachable("not reached");
215 brw_get_surface_tiling_bits(uint32_t tiling
)
219 return BRW_SURFACE_TILED
;
221 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
229 brw_get_surface_num_multisamples(unsigned num_samples
)
232 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
234 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
238 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
242 brw_get_texture_swizzle(const struct gl_context
*ctx
,
243 const struct gl_texture_object
*t
)
245 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
247 int swizzles
[SWIZZLE_NIL
+ 1] = {
257 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
258 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
259 GLenum depth_mode
= t
->DepthMode
;
261 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
262 * with depth component data specified with a sized internal format.
263 * Otherwise, it's left at the old default, GL_LUMINANCE.
265 if (_mesa_is_gles3(ctx
) &&
266 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
267 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
271 switch (depth_mode
) {
273 swizzles
[0] = SWIZZLE_ZERO
;
274 swizzles
[1] = SWIZZLE_ZERO
;
275 swizzles
[2] = SWIZZLE_ZERO
;
276 swizzles
[3] = SWIZZLE_X
;
279 swizzles
[0] = SWIZZLE_X
;
280 swizzles
[1] = SWIZZLE_X
;
281 swizzles
[2] = SWIZZLE_X
;
282 swizzles
[3] = SWIZZLE_ONE
;
285 swizzles
[0] = SWIZZLE_X
;
286 swizzles
[1] = SWIZZLE_X
;
287 swizzles
[2] = SWIZZLE_X
;
288 swizzles
[3] = SWIZZLE_X
;
291 swizzles
[0] = SWIZZLE_X
;
292 swizzles
[1] = SWIZZLE_ZERO
;
293 swizzles
[2] = SWIZZLE_ZERO
;
294 swizzles
[3] = SWIZZLE_ONE
;
299 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
301 /* If the texture's format is alpha-only, force R, G, and B to
302 * 0.0. Similarly, if the texture's format has no alpha channel,
303 * force the alpha value read to 1.0. This allows for the
304 * implementation to use an RGBA texture for any of these formats
305 * without leaking any unexpected values.
307 switch (img
->_BaseFormat
) {
309 swizzles
[0] = SWIZZLE_ZERO
;
310 swizzles
[1] = SWIZZLE_ZERO
;
311 swizzles
[2] = SWIZZLE_ZERO
;
314 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
315 swizzles
[0] = SWIZZLE_X
;
316 swizzles
[1] = SWIZZLE_X
;
317 swizzles
[2] = SWIZZLE_X
;
318 swizzles
[3] = SWIZZLE_ONE
;
321 case GL_LUMINANCE_ALPHA
:
322 if (datatype
== GL_SIGNED_NORMALIZED
) {
323 swizzles
[0] = SWIZZLE_X
;
324 swizzles
[1] = SWIZZLE_X
;
325 swizzles
[2] = SWIZZLE_X
;
326 swizzles
[3] = SWIZZLE_W
;
330 if (datatype
== GL_SIGNED_NORMALIZED
) {
331 swizzles
[0] = SWIZZLE_X
;
332 swizzles
[1] = SWIZZLE_X
;
333 swizzles
[2] = SWIZZLE_X
;
334 swizzles
[3] = SWIZZLE_X
;
340 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0)
341 swizzles
[3] = SWIZZLE_ONE
;
345 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
346 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
347 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
348 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
352 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
353 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
355 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
358 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
360 * which is simply adding 4 then modding by 8 (or anding with 7).
362 * We then may need to apply workarounds for textureGather hardware bugs.
365 swizzle_to_scs(GLenum swizzle
, bool need_green_to_blue
)
367 unsigned scs
= (swizzle
+ 4) & 7;
369 return (need_green_to_blue
&& scs
== HSW_SCS_GREEN
) ? HSW_SCS_BLUE
: scs
;
373 brw_update_texture_surface(struct gl_context
*ctx
,
375 uint32_t *surf_offset
,
379 struct brw_context
*brw
= brw_context(ctx
);
380 struct gl_texture_object
*obj
= ctx
->Texture
.Unit
[unit
]._Current
;
382 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
383 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
386 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
387 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
388 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
389 /* If this is a view with restricted NumLayers, then our effective depth
390 * is not just the miptree depth.
392 const unsigned mt_num_layers
=
393 mt
->logical_depth0
* (_mesa_is_cube_map_texture(mt
->target
) ? 6 : 1);
394 const unsigned view_num_layers
=
395 (obj
->Immutable
&& obj
->Target
!= GL_TEXTURE_3D
) ? obj
->NumLayers
:
398 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
399 * texturing functions that return a float, as our code generation always
400 * selects the .x channel (which would always be 0).
402 struct gl_texture_image
*firstImage
= obj
->Image
[0][obj
->BaseLevel
];
403 const bool alpha_depth
= obj
->DepthMode
== GL_ALPHA
&&
404 (firstImage
->_BaseFormat
== GL_DEPTH_COMPONENT
||
405 firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
);
406 const unsigned swizzle
= (unlikely(alpha_depth
) ? SWIZZLE_XYZW
:
407 brw_get_texture_swizzle(&brw
->ctx
, obj
));
409 unsigned format
= translate_tex_format(
410 brw
, intel_obj
->_Format
, sampler
->sRGBDecode
);
412 /* Implement gen6 and gen7 gather work-around */
413 bool need_green_to_blue
= false;
415 if (brw
->gen
== 7 && format
== BRW_SURFACEFORMAT_R32G32_FLOAT
) {
416 format
= BRW_SURFACEFORMAT_R32G32_FLOAT_LD
;
417 need_green_to_blue
= brw
->is_haswell
;
418 } else if (brw
->gen
== 6) {
419 /* Sandybridge's gather4 message is broken for integer formats.
420 * To work around this, we pretend the surface is UNORM for
421 * 8 or 16-bit formats, and emit shader instructions to recover
422 * the real INT/UINT value. For 32-bit formats, we pretend
423 * the surface is FLOAT, and simply reinterpret the resulting
427 case BRW_SURFACEFORMAT_R8_SINT
:
428 case BRW_SURFACEFORMAT_R8_UINT
:
429 format
= BRW_SURFACEFORMAT_R8_UNORM
;
432 case BRW_SURFACEFORMAT_R16_SINT
:
433 case BRW_SURFACEFORMAT_R16_UINT
:
434 format
= BRW_SURFACEFORMAT_R16_UNORM
;
437 case BRW_SURFACEFORMAT_R32_SINT
:
438 case BRW_SURFACEFORMAT_R32_UINT
:
439 format
= BRW_SURFACEFORMAT_R32_FLOAT
;
448 if (obj
->StencilSampling
&& firstImage
->_BaseFormat
== GL_DEPTH_STENCIL
) {
449 assert(brw
->gen
>= 8);
451 format
= BRW_SURFACEFORMAT_R8_UINT
;
452 } else if (obj
->Target
== GL_TEXTURE_EXTERNAL_OES
) {
454 mt
= mt
->plane
[plane
- 1];
457 format
= translate_tex_format(brw
, mt
->format
, sampler
->sRGBDecode
);
460 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
462 struct isl_view view
= {
464 .base_level
= obj
->MinLevel
+ obj
->BaseLevel
,
465 .levels
= intel_obj
->_MaxLevel
- obj
->BaseLevel
+ 1,
466 .base_array_layer
= obj
->MinLayer
,
467 .array_len
= view_num_layers
,
469 swizzle_to_scs(GET_SWZ(swizzle
, 0), need_green_to_blue
),
470 swizzle_to_scs(GET_SWZ(swizzle
, 1), need_green_to_blue
),
471 swizzle_to_scs(GET_SWZ(swizzle
, 2), need_green_to_blue
),
472 swizzle_to_scs(GET_SWZ(swizzle
, 3), need_green_to_blue
),
474 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
477 if (obj
->Target
== GL_TEXTURE_CUBE_MAP
||
478 obj
->Target
== GL_TEXTURE_CUBE_MAP_ARRAY
)
479 view
.usage
|= ISL_SURF_USAGE_CUBE_BIT
;
481 brw_emit_surface_state(brw
, mt
, &view
,
482 surface_state_infos
[brw
->gen
].tex_mocs
, for_gather
,
483 surf_offset
, surf_index
,
484 I915_GEM_DOMAIN_SAMPLER
, 0);
489 gen4_emit_buffer_surface_state(struct brw_context
*brw
,
490 uint32_t *out_offset
,
492 unsigned buffer_offset
,
493 unsigned surface_format
,
494 unsigned buffer_size
,
498 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
499 6 * 4, 32, out_offset
);
500 memset(surf
, 0, 6 * 4);
502 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
503 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
504 (brw
->gen
>= 6 ? BRW_SURFACE_RC_READ_WRITE
: 0);
505 surf
[1] = (bo
? bo
->offset64
: 0) + buffer_offset
; /* reloc */
506 surf
[2] = ((buffer_size
- 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT
|
507 (((buffer_size
- 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT
;
508 surf
[3] = (((buffer_size
- 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT
|
509 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
;
511 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
512 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
513 * physical cache. It is mapped in hardware to the sampler cache."
516 drm_intel_bo_emit_reloc(brw
->batch
.bo
, *out_offset
+ 4,
518 I915_GEM_DOMAIN_SAMPLER
,
519 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
524 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
526 uint32_t *surf_offset
)
528 struct brw_context
*brw
= brw_context(ctx
);
529 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
530 struct intel_buffer_object
*intel_obj
=
531 intel_buffer_object(tObj
->BufferObject
);
532 uint32_t size
= tObj
->BufferSize
;
533 drm_intel_bo
*bo
= NULL
;
534 mesa_format format
= tObj
->_BufferObjectFormat
;
535 uint32_t brw_format
= brw_format_for_mesa_format(format
);
536 int texel_size
= _mesa_get_format_bytes(format
);
539 size
= MIN2(size
, intel_obj
->Base
.Size
);
540 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
);
543 if (brw_format
== 0 && format
!= MESA_FORMAT_RGBA_FLOAT32
) {
544 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
545 _mesa_get_format_name(format
));
548 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
, bo
,
557 gen4_update_texture_surface(struct gl_context
*ctx
,
559 uint32_t *surf_offset
,
563 struct brw_context
*brw
= brw_context(ctx
);
564 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
565 struct intel_texture_object
*intelObj
= intel_texture_object(tObj
);
566 struct intel_mipmap_tree
*mt
= intelObj
->mt
;
567 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
570 /* BRW_NEW_TEXTURE_BUFFER */
571 if (tObj
->Target
== GL_TEXTURE_BUFFER
) {
572 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
577 if (mt
->plane
[plane
- 1] == NULL
)
579 mt
= mt
->plane
[plane
- 1];
582 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
583 6 * 4, 32, surf_offset
);
585 mesa_format mesa_fmt
= plane
== 0 ? intelObj
->_Format
: mt
->format
;
586 uint32_t tex_format
= translate_tex_format(brw
, mesa_fmt
,
587 sampler
->sRGBDecode
);
590 /* Sandybridge's gather4 message is broken for integer formats.
591 * To work around this, we pretend the surface is UNORM for
592 * 8 or 16-bit formats, and emit shader instructions to recover
593 * the real INT/UINT value. For 32-bit formats, we pretend
594 * the surface is FLOAT, and simply reinterpret the resulting
597 switch (tex_format
) {
598 case BRW_SURFACEFORMAT_R8_SINT
:
599 case BRW_SURFACEFORMAT_R8_UINT
:
600 tex_format
= BRW_SURFACEFORMAT_R8_UNORM
;
603 case BRW_SURFACEFORMAT_R16_SINT
:
604 case BRW_SURFACEFORMAT_R16_UINT
:
605 tex_format
= BRW_SURFACEFORMAT_R16_UNORM
;
608 case BRW_SURFACEFORMAT_R32_SINT
:
609 case BRW_SURFACEFORMAT_R32_UINT
:
610 tex_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
618 surf
[0] = (translate_tex_target(tObj
->Target
) << BRW_SURFACE_TYPE_SHIFT
|
619 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
620 BRW_SURFACE_CUBEFACE_ENABLES
|
621 tex_format
<< BRW_SURFACE_FORMAT_SHIFT
);
623 surf
[1] = mt
->bo
->offset64
+ mt
->offset
; /* reloc */
625 surf
[2] = ((intelObj
->_MaxLevel
- tObj
->BaseLevel
) << BRW_SURFACE_LOD_SHIFT
|
626 (mt
->logical_width0
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
627 (mt
->logical_height0
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
629 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
630 (mt
->logical_depth0
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
631 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
633 const unsigned min_lod
= tObj
->MinLevel
+ tObj
->BaseLevel
- mt
->first_level
;
634 surf
[4] = (brw_get_surface_num_multisamples(mt
->num_samples
) |
635 SET_FIELD(min_lod
, BRW_SURFACE_MIN_LOD
) |
636 SET_FIELD(tObj
->MinLayer
, BRW_SURFACE_MIN_ARRAY_ELEMENT
));
638 surf
[5] = mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0;
640 /* Emit relocation to surface contents */
641 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
644 surf
[1] - mt
->bo
->offset64
,
645 I915_GEM_DOMAIN_SAMPLER
, 0);
649 * Create the constant buffer surface. Vertex/fragment shader constants will be
650 * read from this buffer with Data Port Read instructions/messages.
653 brw_create_constant_surface(struct brw_context
*brw
,
657 uint32_t *out_offset
)
659 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
660 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
,
665 * Create the buffer surface. Shader buffer variables will be
666 * read from / write to this buffer with Data Port Read/Write
667 * instructions/messages.
670 brw_create_buffer_surface(struct brw_context
*brw
,
674 uint32_t *out_offset
)
676 /* Use a raw surface so we can reuse existing untyped read/write/atomic
677 * messages. We need these specifically for the fragment shader since they
678 * include a pixel mask header that we need to ensure correct behavior
679 * with helper invocations, which cannot write to the buffer.
681 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
682 BRW_SURFACEFORMAT_RAW
,
687 * Set up a binding table entry for use by stream output logic (transform
690 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
693 brw_update_sol_surface(struct brw_context
*brw
,
694 struct gl_buffer_object
*buffer_obj
,
695 uint32_t *out_offset
, unsigned num_vector_components
,
696 unsigned stride_dwords
, unsigned offset_dwords
)
698 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
699 uint32_t offset_bytes
= 4 * offset_dwords
;
700 drm_intel_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
702 buffer_obj
->Size
- offset_bytes
);
703 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
705 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
706 size_t size_dwords
= buffer_obj
->Size
/ 4;
707 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
709 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
710 * too big to map using a single binding table entry?
712 assert((size_dwords
- offset_dwords
) / stride_dwords
713 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
715 if (size_dwords
> offset_dwords
+ num_vector_components
) {
716 /* There is room for at least 1 transform feedback output in the buffer.
717 * Compute the number of additional transform feedback outputs the
718 * buffer has room for.
720 buffer_size_minus_1
=
721 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
723 /* There isn't even room for a single transform feedback output in the
724 * buffer. We can't configure the binding table entry to prevent output
725 * entirely; we'll have to rely on the geometry shader to detect
726 * overflow. But to minimize the damage in case of a bug, set up the
727 * binding table entry to just allow a single output.
729 buffer_size_minus_1
= 0;
731 width
= buffer_size_minus_1
& 0x7f;
732 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
733 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
735 switch (num_vector_components
) {
737 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
740 surface_format
= BRW_SURFACEFORMAT_R32G32_FLOAT
;
743 surface_format
= BRW_SURFACEFORMAT_R32G32B32_FLOAT
;
746 surface_format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
749 unreachable("Invalid vector size for transform feedback output");
752 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
753 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
754 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
755 BRW_SURFACE_RC_READ_WRITE
;
756 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
757 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
758 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
759 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
760 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
764 /* Emit relocation to surface contents. */
765 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
768 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
771 /* Creates a new WM constant buffer reflecting the current fragment program's
772 * constants, if needed by the fragment program.
774 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
778 brw_upload_wm_pull_constants(struct brw_context
*brw
)
780 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
781 /* BRW_NEW_FRAGMENT_PROGRAM */
782 struct brw_fragment_program
*fp
=
783 (struct brw_fragment_program
*) brw
->fragment_program
;
784 /* BRW_NEW_FS_PROG_DATA */
785 struct brw_stage_prog_data
*prog_data
= &brw
->wm
.prog_data
->base
;
787 /* _NEW_PROGRAM_CONSTANTS */
788 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
.Base
,
789 stage_state
, prog_data
);
792 const struct brw_tracked_state brw_wm_pull_constants
= {
794 .mesa
= _NEW_PROGRAM_CONSTANTS
,
795 .brw
= BRW_NEW_BATCH
|
797 BRW_NEW_FRAGMENT_PROGRAM
|
798 BRW_NEW_FS_PROG_DATA
,
800 .emit
= brw_upload_wm_pull_constants
,
804 * Creates a null renderbuffer surface.
806 * This is used when the shader doesn't write to any color output. An FB
807 * write to target 0 will still be emitted, because that's how the thread is
808 * terminated (and computed depth is returned), so we need to have the
809 * hardware discard the target 0 color output..
812 brw_emit_null_surface_state(struct brw_context
*brw
,
816 uint32_t *out_offset
)
818 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
821 * A null surface will be used in instances where an actual surface is
822 * not bound. When a write message is generated to a null surface, no
823 * actual surface is written to. When a read message (including any
824 * sampling engine message) is generated to a null surface, the result
825 * is all zeros. Note that a null surface type is allowed to be used
826 * with all messages, even if it is not specificially indicated as
827 * supported. All of the remaining fields in surface state are ignored
828 * for null surfaces, with the following exceptions:
830 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
831 * depth buffer’s corresponding state for all render target surfaces,
834 * - Surface Format must be R8G8B8A8_UNORM.
836 unsigned surface_type
= BRW_SURFACE_NULL
;
837 drm_intel_bo
*bo
= NULL
;
838 unsigned pitch_minus_1
= 0;
839 uint32_t multisampling_state
= 0;
840 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
844 /* On Gen6, null render targets seem to cause GPU hangs when
845 * multisampling. So work around this problem by rendering into dummy
848 * To decrease the amount of memory needed by the workaround buffer, we
849 * set its pitch to 128 bytes (the width of a Y tile). This means that
850 * the amount of memory needed for the workaround buffer is
851 * (width_in_tiles + height_in_tiles - 1) tiles.
853 * Note that since the workaround buffer will be interpreted by the
854 * hardware as an interleaved multisampled buffer, we need to compute
855 * width_in_tiles and height_in_tiles by dividing the width and height
856 * by 16 rather than the normal Y-tile size of 32.
858 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
859 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
860 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
861 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
863 bo
= brw
->wm
.multisampled_null_render_target_bo
;
864 surface_type
= BRW_SURFACE_2D
;
866 multisampling_state
= brw_get_surface_num_multisamples(samples
);
869 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
870 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
872 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
873 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
874 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
875 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
877 surf
[1] = bo
? bo
->offset64
: 0;
878 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
879 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
881 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
884 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
886 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
887 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
888 surf
[4] = multisampling_state
;
892 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
895 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
900 * Sets up a surface state structure to point at the given region.
901 * While it is only used for the front/back buffer currently, it should be
902 * usable for further buffers when doing ARB_draw_buffer support.
905 gen4_update_renderbuffer_surface(struct brw_context
*brw
,
906 struct gl_renderbuffer
*rb
,
907 bool layered
, unsigned unit
,
910 struct gl_context
*ctx
= &brw
->ctx
;
911 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
912 struct intel_mipmap_tree
*mt
= irb
->mt
;
914 uint32_t tile_x
, tile_y
;
918 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
919 /* BRW_NEW_FS_PROG_DATA */
923 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
924 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
926 if (tile_x
!= 0 || tile_y
!= 0) {
927 /* Original gen4 hardware couldn't draw to a non-tile-aligned
928 * destination in a miptree unless you actually setup your renderbuffer
929 * as a miptree and used the fragile lod/array_index/etc. controls to
930 * select the image. So, instead, we just make a new single-level
931 * miptree and render into that.
933 intel_renderbuffer_move_to_temp(brw
, irb
, false);
938 intel_miptree_used_for_rendering(irb
->mt
);
940 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32, &offset
);
942 format
= brw
->render_target_format
[rb_format
];
943 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
944 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
945 __func__
, _mesa_get_format_name(rb_format
));
948 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
949 format
<< BRW_SURFACE_FORMAT_SHIFT
);
952 assert(mt
->offset
% mt
->cpp
== 0);
953 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
954 mt
->bo
->offset64
+ mt
->offset
);
956 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
957 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
959 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
960 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
962 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
964 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
965 /* Note that the low bits of these fields are missing, so
966 * there's the possibility of getting in trouble.
968 assert(tile_x
% 4 == 0);
969 assert(tile_y
% 2 == 0);
970 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
971 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
972 (mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
976 if (!ctx
->Color
.ColorLogicOpEnabled
&&
977 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
978 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
980 if (!ctx
->Color
.ColorMask
[unit
][0])
981 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
982 if (!ctx
->Color
.ColorMask
[unit
][1])
983 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
984 if (!ctx
->Color
.ColorMask
[unit
][2])
985 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
987 /* As mentioned above, disable writes to the alpha component when the
988 * renderbuffer is XRGB.
990 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
991 !ctx
->Color
.ColorMask
[unit
][3]) {
992 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
996 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
999 surf
[1] - mt
->bo
->offset64
,
1000 I915_GEM_DOMAIN_RENDER
,
1001 I915_GEM_DOMAIN_RENDER
);
1007 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1010 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
1011 const struct gl_framebuffer
*fb
,
1012 uint32_t render_target_start
,
1013 uint32_t *surf_offset
)
1016 const unsigned int w
= _mesa_geometric_width(fb
);
1017 const unsigned int h
= _mesa_geometric_height(fb
);
1018 const unsigned int s
= _mesa_geometric_samples(fb
);
1020 /* Update surfaces for drawing buffers */
1021 if (fb
->_NumColorDrawBuffers
>= 1) {
1022 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
1023 const uint32_t surf_index
= render_target_start
+ i
;
1025 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
1026 surf_offset
[surf_index
] =
1027 brw
->vtbl
.update_renderbuffer_surface(
1028 brw
, fb
->_ColorDrawBuffers
[i
],
1029 _mesa_geometric_layers(fb
) > 0, i
, surf_index
);
1031 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1032 &surf_offset
[surf_index
]);
1036 const uint32_t surf_index
= render_target_start
;
1037 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
1038 &surf_offset
[surf_index
]);
1043 update_renderbuffer_surfaces(struct brw_context
*brw
)
1045 const struct gl_context
*ctx
= &brw
->ctx
;
1047 /* _NEW_BUFFERS | _NEW_COLOR */
1048 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
1049 brw_update_renderbuffer_surfaces(
1051 brw
->wm
.prog_data
->binding_table
.render_target_start
,
1052 brw
->wm
.base
.surf_offset
);
1053 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1056 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
1058 .mesa
= _NEW_BUFFERS
|
1060 .brw
= BRW_NEW_BATCH
|
1062 BRW_NEW_FS_PROG_DATA
,
1064 .emit
= update_renderbuffer_surfaces
,
1067 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
1069 .mesa
= _NEW_BUFFERS
,
1070 .brw
= BRW_NEW_BATCH
|
1073 .emit
= update_renderbuffer_surfaces
,
1078 update_stage_texture_surfaces(struct brw_context
*brw
,
1079 const struct gl_program
*prog
,
1080 struct brw_stage_state
*stage_state
,
1081 bool for_gather
, uint32_t plane
)
1086 struct gl_context
*ctx
= &brw
->ctx
;
1088 uint32_t *surf_offset
= stage_state
->surf_offset
;
1090 /* BRW_NEW_*_PROG_DATA */
1092 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
1094 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
1096 unsigned num_samplers
= _mesa_fls(prog
->SamplersUsed
);
1097 for (unsigned s
= 0; s
< num_samplers
; s
++) {
1100 if (prog
->SamplersUsed
& (1 << s
)) {
1101 const unsigned unit
= prog
->SamplerUnits
[s
];
1104 if (ctx
->Texture
.Unit
[unit
]._Current
) {
1105 brw
->vtbl
.update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
1113 * Construct SURFACE_STATE objects for enabled textures.
1116 brw_update_texture_surfaces(struct brw_context
*brw
)
1118 /* BRW_NEW_VERTEX_PROGRAM */
1119 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
1121 /* BRW_NEW_TESS_PROGRAMS */
1122 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
1123 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
1125 /* BRW_NEW_GEOMETRY_PROGRAM */
1126 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
1128 /* BRW_NEW_FRAGMENT_PROGRAM */
1129 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
1132 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
1133 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
1134 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
1135 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
1136 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
1138 /* emit alternate set of surface state for gather. this
1139 * allows the surface format to be overriden for only the
1140 * gather4 messages. */
1142 if (vs
&& vs
->UsesGather
)
1143 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
1144 if (tcs
&& tcs
->UsesGather
)
1145 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
1146 if (tes
&& tes
->UsesGather
)
1147 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
1148 if (gs
&& gs
->UsesGather
)
1149 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
1150 if (fs
&& fs
->UsesGather
)
1151 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
1155 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
1156 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
1159 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1162 const struct brw_tracked_state brw_texture_surfaces
= {
1164 .mesa
= _NEW_TEXTURE
,
1165 .brw
= BRW_NEW_BATCH
|
1167 BRW_NEW_FRAGMENT_PROGRAM
|
1168 BRW_NEW_FS_PROG_DATA
|
1169 BRW_NEW_GEOMETRY_PROGRAM
|
1170 BRW_NEW_GS_PROG_DATA
|
1171 BRW_NEW_TESS_PROGRAMS
|
1172 BRW_NEW_TCS_PROG_DATA
|
1173 BRW_NEW_TES_PROG_DATA
|
1174 BRW_NEW_TEXTURE_BUFFER
|
1175 BRW_NEW_VERTEX_PROGRAM
|
1176 BRW_NEW_VS_PROG_DATA
,
1178 .emit
= brw_update_texture_surfaces
,
1182 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
1184 /* BRW_NEW_COMPUTE_PROGRAM */
1185 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1188 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1190 /* emit alternate set of surface state for gather. this
1191 * allows the surface format to be overriden for only the
1195 if (cs
&& cs
->UsesGather
)
1196 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1199 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1202 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1204 .mesa
= _NEW_TEXTURE
,
1205 .brw
= BRW_NEW_BATCH
|
1207 BRW_NEW_COMPUTE_PROGRAM
,
1209 .emit
= brw_update_cs_texture_surfaces
,
1214 brw_upload_ubo_surfaces(struct brw_context
*brw
,
1215 struct gl_linked_shader
*shader
,
1216 struct brw_stage_state
*stage_state
,
1217 struct brw_stage_prog_data
*prog_data
)
1219 struct gl_context
*ctx
= &brw
->ctx
;
1224 uint32_t *ubo_surf_offsets
=
1225 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1227 for (int i
= 0; i
< shader
->NumUniformBlocks
; i
++) {
1228 struct gl_uniform_buffer_binding
*binding
=
1229 &ctx
->UniformBufferBindings
[shader
->UniformBlocks
[i
]->Binding
];
1231 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1232 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1234 struct intel_buffer_object
*intel_bo
=
1235 intel_buffer_object(binding
->BufferObject
);
1236 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1237 if (!binding
->AutomaticSize
)
1238 size
= MIN2(size
, binding
->Size
);
1240 intel_bufferobj_buffer(brw
, intel_bo
,
1243 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1245 &ubo_surf_offsets
[i
]);
1249 uint32_t *ssbo_surf_offsets
=
1250 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1252 for (int i
= 0; i
< shader
->NumShaderStorageBlocks
; i
++) {
1253 struct gl_shader_storage_buffer_binding
*binding
=
1254 &ctx
->ShaderStorageBufferBindings
[shader
->ShaderStorageBlocks
[i
]->Binding
];
1256 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1257 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1259 struct intel_buffer_object
*intel_bo
=
1260 intel_buffer_object(binding
->BufferObject
);
1261 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1262 if (!binding
->AutomaticSize
)
1263 size
= MIN2(size
, binding
->Size
);
1265 intel_bufferobj_buffer(brw
, intel_bo
,
1268 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1270 &ssbo_surf_offsets
[i
]);
1274 if (shader
->NumUniformBlocks
|| shader
->NumShaderStorageBlocks
)
1275 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1279 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1281 struct gl_context
*ctx
= &brw
->ctx
;
1283 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1288 /* BRW_NEW_FS_PROG_DATA */
1289 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1290 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
1293 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1295 .mesa
= _NEW_PROGRAM
,
1296 .brw
= BRW_NEW_BATCH
|
1298 BRW_NEW_FS_PROG_DATA
|
1299 BRW_NEW_UNIFORM_BUFFER
,
1301 .emit
= brw_upload_wm_ubo_surfaces
,
1305 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1307 struct gl_context
*ctx
= &brw
->ctx
;
1309 struct gl_shader_program
*prog
=
1310 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1315 /* BRW_NEW_CS_PROG_DATA */
1316 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1317 &brw
->cs
.base
, &brw
->cs
.prog_data
->base
);
1320 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1322 .mesa
= _NEW_PROGRAM
,
1323 .brw
= BRW_NEW_BATCH
|
1325 BRW_NEW_CS_PROG_DATA
|
1326 BRW_NEW_UNIFORM_BUFFER
,
1328 .emit
= brw_upload_cs_ubo_surfaces
,
1332 brw_upload_abo_surfaces(struct brw_context
*brw
,
1333 struct gl_linked_shader
*shader
,
1334 struct brw_stage_state
*stage_state
,
1335 struct brw_stage_prog_data
*prog_data
)
1337 struct gl_context
*ctx
= &brw
->ctx
;
1338 uint32_t *surf_offsets
=
1339 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1341 if (shader
&& shader
->NumAtomicBuffers
) {
1342 for (unsigned i
= 0; i
< shader
->NumAtomicBuffers
; i
++) {
1343 struct gl_atomic_buffer_binding
*binding
=
1344 &ctx
->AtomicBufferBindings
[shader
->AtomicBuffers
[i
]->Binding
];
1345 struct intel_buffer_object
*intel_bo
=
1346 intel_buffer_object(binding
->BufferObject
);
1347 drm_intel_bo
*bo
= intel_bufferobj_buffer(
1348 brw
, intel_bo
, binding
->Offset
, intel_bo
->Base
.Size
- binding
->Offset
);
1350 brw
->vtbl
.emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1351 binding
->Offset
, BRW_SURFACEFORMAT_RAW
,
1352 bo
->size
- binding
->Offset
, 1, true);
1355 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1360 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1362 struct gl_context
*ctx
= &brw
->ctx
;
1364 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1367 /* BRW_NEW_FS_PROG_DATA */
1368 brw_upload_abo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1369 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
1373 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1375 .mesa
= _NEW_PROGRAM
,
1376 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1379 BRW_NEW_FS_PROG_DATA
,
1381 .emit
= brw_upload_wm_abo_surfaces
,
1385 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1387 struct gl_context
*ctx
= &brw
->ctx
;
1389 struct gl_shader_program
*prog
=
1390 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1393 /* BRW_NEW_CS_PROG_DATA */
1394 brw_upload_abo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1395 &brw
->cs
.base
, &brw
->cs
.prog_data
->base
);
1399 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1401 .mesa
= _NEW_PROGRAM
,
1402 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1405 BRW_NEW_CS_PROG_DATA
,
1407 .emit
= brw_upload_cs_abo_surfaces
,
1411 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1413 struct gl_context
*ctx
= &brw
->ctx
;
1415 struct gl_shader_program
*prog
=
1416 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1419 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1420 brw_upload_image_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1421 &brw
->cs
.base
, &brw
->cs
.prog_data
->base
);
1425 const struct brw_tracked_state brw_cs_image_surfaces
= {
1427 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1428 .brw
= BRW_NEW_BATCH
|
1430 BRW_NEW_CS_PROG_DATA
|
1433 .emit
= brw_upload_cs_image_surfaces
,
1437 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1439 const struct brw_device_info
*devinfo
= brw
->intelScreen
->devinfo
;
1440 uint32_t hw_format
= brw_format_for_mesa_format(format
);
1441 if (access
== GL_WRITE_ONLY
) {
1443 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1444 /* Typed surface reads support a very limited subset of the shader
1445 * image formats. Translate it into the closest format the
1446 * hardware supports.
1448 return isl_lower_storage_image_format(devinfo
, hw_format
);
1450 /* The hardware doesn't actually support a typed format that we can use
1451 * so we have to fall back to untyped read/write messages.
1453 return BRW_SURFACEFORMAT_RAW
;
1458 update_default_image_param(struct brw_context
*brw
,
1459 struct gl_image_unit
*u
,
1460 unsigned surface_idx
,
1461 struct brw_image_param
*param
)
1463 memset(param
, 0, sizeof(*param
));
1464 param
->surface_idx
= surface_idx
;
1465 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1466 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1467 * detailed explanation of these parameters.
1469 param
->swizzling
[0] = 0xff;
1470 param
->swizzling
[1] = 0xff;
1474 update_buffer_image_param(struct brw_context
*brw
,
1475 struct gl_image_unit
*u
,
1476 unsigned surface_idx
,
1477 struct brw_image_param
*param
)
1479 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1481 update_default_image_param(brw
, u
, surface_idx
, param
);
1483 param
->size
[0] = obj
->Size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1484 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1488 update_texture_image_param(struct brw_context
*brw
,
1489 struct gl_image_unit
*u
,
1490 unsigned surface_idx
,
1491 struct brw_image_param
*param
)
1493 struct intel_mipmap_tree
*mt
= intel_texture_object(u
->TexObj
)->mt
;
1495 update_default_image_param(brw
, u
, surface_idx
, param
);
1497 param
->size
[0] = minify(mt
->logical_width0
, u
->Level
);
1498 param
->size
[1] = minify(mt
->logical_height0
, u
->Level
);
1499 param
->size
[2] = (!u
->Layered
? 1 :
1500 u
->TexObj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1501 u
->TexObj
->Target
== GL_TEXTURE_3D
?
1502 minify(mt
->logical_depth0
, u
->Level
) :
1503 mt
->logical_depth0
);
1505 intel_miptree_get_image_offset(mt
, u
->Level
, u
->_Layer
,
1509 param
->stride
[0] = mt
->cpp
;
1510 param
->stride
[1] = mt
->pitch
/ mt
->cpp
;
1512 brw_miptree_get_horizontal_slice_pitch(brw
, mt
, u
->Level
);
1514 brw_miptree_get_vertical_slice_pitch(brw
, mt
, u
->Level
);
1516 if (mt
->tiling
== I915_TILING_X
) {
1517 /* An X tile is a rectangular block of 512x8 bytes. */
1518 param
->tiling
[0] = _mesa_logbase2(512 / mt
->cpp
);
1519 param
->tiling
[1] = _mesa_logbase2(8);
1521 if (brw
->has_swizzling
) {
1522 /* Right shifts required to swizzle bits 9 and 10 of the memory
1523 * address with bit 6.
1525 param
->swizzling
[0] = 3;
1526 param
->swizzling
[1] = 4;
1528 } else if (mt
->tiling
== I915_TILING_Y
) {
1529 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1530 * different to the layout of an X-tiled surface, we simply pretend that
1531 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1532 * one arranged in X-major order just like is the case for X-tiling.
1534 param
->tiling
[0] = _mesa_logbase2(16 / mt
->cpp
);
1535 param
->tiling
[1] = _mesa_logbase2(32);
1537 if (brw
->has_swizzling
) {
1538 /* Right shift required to swizzle bit 9 of the memory address with
1541 param
->swizzling
[0] = 3;
1545 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1546 * address calculation algorithm (emit_address_calculation() in
1547 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1548 * modulus equal to the LOD.
1550 param
->tiling
[2] = (u
->TexObj
->Target
== GL_TEXTURE_3D
? u
->Level
:
1555 update_image_surface(struct brw_context
*brw
,
1556 struct gl_image_unit
*u
,
1558 unsigned surface_idx
,
1559 uint32_t *surf_offset
,
1560 struct brw_image_param
*param
)
1562 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1563 struct gl_texture_object
*obj
= u
->TexObj
;
1564 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1566 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1567 struct intel_buffer_object
*intel_obj
=
1568 intel_buffer_object(obj
->BufferObject
);
1569 const unsigned texel_size
= (format
== BRW_SURFACEFORMAT_RAW
? 1 :
1570 _mesa_get_format_bytes(u
->_ActualFormat
));
1572 brw
->vtbl
.emit_buffer_surface_state(
1573 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1574 format
, intel_obj
->Base
.Size
/ texel_size
, texel_size
,
1575 access
!= GL_READ_ONLY
);
1577 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1580 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1581 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1583 if (format
== BRW_SURFACEFORMAT_RAW
) {
1584 brw
->vtbl
.emit_buffer_surface_state(
1585 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1586 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1587 access
!= GL_READ_ONLY
);
1590 const unsigned num_layers
= (!u
->Layered
? 1 :
1591 obj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1592 mt
->logical_depth0
);
1594 struct isl_view view
= {
1596 .base_level
= obj
->MinLevel
+ u
->Level
,
1598 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1599 .array_len
= num_layers
,
1601 ISL_CHANNEL_SELECT_RED
,
1602 ISL_CHANNEL_SELECT_GREEN
,
1603 ISL_CHANNEL_SELECT_BLUE
,
1604 ISL_CHANNEL_SELECT_ALPHA
,
1606 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1609 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1611 brw_emit_surface_state(brw
, mt
, &view
,
1612 surface_state_infos
[brw
->gen
].rb_mocs
, false,
1613 surf_offset
, surf_index
,
1614 I915_GEM_DOMAIN_SAMPLER
,
1615 access
== GL_READ_ONLY
? 0 :
1616 I915_GEM_DOMAIN_SAMPLER
);
1619 update_texture_image_param(brw
, u
, surface_idx
, param
);
1623 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1624 update_default_image_param(brw
, u
, surface_idx
, param
);
1629 brw_upload_image_surfaces(struct brw_context
*brw
,
1630 struct gl_linked_shader
*shader
,
1631 struct brw_stage_state
*stage_state
,
1632 struct brw_stage_prog_data
*prog_data
)
1634 struct gl_context
*ctx
= &brw
->ctx
;
1636 if (shader
&& shader
->NumImages
) {
1637 for (unsigned i
= 0; i
< shader
->NumImages
; i
++) {
1638 struct gl_image_unit
*u
= &ctx
->ImageUnits
[shader
->ImageUnits
[i
]];
1639 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1641 update_image_surface(brw
, u
, shader
->ImageAccess
[i
],
1643 &stage_state
->surf_offset
[surf_idx
],
1644 &prog_data
->image_param
[i
]);
1647 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1648 /* This may have changed the image metadata dependent on the context
1649 * image unit state and passed to the program as uniforms, make sure
1650 * that push and pull constants are reuploaded.
1652 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1657 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1659 struct gl_context
*ctx
= &brw
->ctx
;
1660 /* BRW_NEW_FRAGMENT_PROGRAM */
1661 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1664 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1665 brw_upload_image_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1666 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
1670 const struct brw_tracked_state brw_wm_image_surfaces
= {
1672 .mesa
= _NEW_TEXTURE
,
1673 .brw
= BRW_NEW_BATCH
|
1675 BRW_NEW_FRAGMENT_PROGRAM
|
1676 BRW_NEW_FS_PROG_DATA
|
1679 .emit
= brw_upload_wm_image_surfaces
,
1683 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1685 brw
->vtbl
.update_texture_surface
= gen4_update_texture_surface
;
1686 brw
->vtbl
.update_renderbuffer_surface
= gen4_update_renderbuffer_surface
;
1687 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1688 brw
->vtbl
.emit_buffer_surface_state
= gen4_emit_buffer_surface_state
;
1692 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1694 struct gl_context
*ctx
= &brw
->ctx
;
1696 struct gl_shader_program
*prog
=
1697 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1699 if (prog
&& brw
->cs
.prog_data
->uses_num_work_groups
) {
1700 const unsigned surf_idx
=
1701 brw
->cs
.prog_data
->binding_table
.work_groups_start
;
1702 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1706 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1708 intel_upload_data(brw
,
1709 (void *)brw
->compute
.num_work_groups
,
1715 bo
= brw
->compute
.num_work_groups_bo
;
1716 bo_offset
= brw
->compute
.num_work_groups_offset
;
1719 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
,
1721 BRW_SURFACEFORMAT_RAW
,
1722 3 * sizeof(GLuint
), 1, true);
1723 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1727 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1729 .brw
= BRW_NEW_BLORP
|
1730 BRW_NEW_CS_WORK_GROUPS
1732 .emit
= brw_upload_cs_work_groups_surface
,