2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "main/teximage.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_instruction.h"
41 #include "main/framebuffer.h"
45 #include "intel_mipmap_tree.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_tex.h"
48 #include "intel_fbo.h"
49 #include "intel_buffer_objects.h"
51 #include "brw_context.h"
52 #include "brw_state.h"
53 #include "brw_defines.h"
56 struct surface_state_info
{
58 unsigned ss_align
; /* Required alignment of RENDER_SURFACE_STATE in bytes */
60 unsigned aux_reloc_dw
;
65 static const struct surface_state_info surface_state_infos
[] = {
69 [7] = {8, 32, 1, 6, GEN7_MOCS_L3
, GEN7_MOCS_L3
},
70 [8] = {13, 64, 8, 10, BDW_MOCS_WB
, BDW_MOCS_PTE
},
71 [9] = {16, 64, 8, 10, SKL_MOCS_WB
, SKL_MOCS_PTE
},
75 brw_emit_surface_state(struct brw_context
*brw
,
76 struct intel_mipmap_tree
*mt
,
77 const struct isl_view
*view
,
78 uint32_t mocs
, bool for_gather
,
79 uint32_t *surf_offset
, int surf_index
,
80 unsigned read_domains
, unsigned write_domains
)
82 const struct surface_state_info ss_info
= surface_state_infos
[brw
->gen
];
85 intel_miptree_get_isl_surf(brw
, mt
, &surf
);
87 union isl_color_value clear_color
= { .u32
= { 0, 0, 0, 0 } };
89 struct isl_surf
*aux_surf
= NULL
, aux_surf_s
;
90 uint64_t aux_offset
= 0;
91 enum isl_aux_usage aux_usage
= ISL_AUX_USAGE_NONE
;
93 ((view
->usage
& ISL_SURF_USAGE_RENDER_TARGET_BIT
) ||
94 mt
->fast_clear_state
!= INTEL_FAST_CLEAR_STATE_RESOLVED
)) {
95 intel_miptree_get_aux_isl_surf(brw
, mt
, &aux_surf_s
, &aux_usage
);
96 aux_surf
= &aux_surf_s
;
97 assert(mt
->mcs_mt
->offset
== 0);
98 aux_offset
= mt
->mcs_mt
->bo
->offset64
;
100 /* We only really need a clear color if we also have an auxiliary
101 * surfacae. Without one, it does nothing.
103 clear_color
= intel_miptree_get_isl_clear_color(brw
, mt
);
106 uint32_t *dw
= __brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
107 ss_info
.num_dwords
* 4, ss_info
.ss_align
,
108 surf_index
, surf_offset
);
110 isl_surf_fill_state(&brw
->isl_dev
, dw
, .surf
= &surf
, .view
= view
,
111 .address
= mt
->bo
->offset64
+ mt
->offset
,
112 .aux_surf
= aux_surf
, .aux_usage
= aux_usage
,
113 .aux_address
= aux_offset
,
114 .mocs
= mocs
, .clear_color
= clear_color
);
116 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
117 *surf_offset
+ 4 * ss_info
.reloc_dw
,
119 read_domains
, write_domains
);
122 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
123 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
124 * contain other control information. Since buffer addresses are always
125 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
126 * an ordinary reloc to do the necessary address translation.
128 assert((aux_offset
& 0xfff) == 0);
129 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
130 *surf_offset
+ 4 * ss_info
.aux_reloc_dw
,
131 mt
->mcs_mt
->bo
, dw
[ss_info
.aux_reloc_dw
] & 0xfff,
132 read_domains
, write_domains
);
137 translate_tex_target(GLenum target
)
141 case GL_TEXTURE_1D_ARRAY_EXT
:
142 return BRW_SURFACE_1D
;
144 case GL_TEXTURE_RECTANGLE_NV
:
145 return BRW_SURFACE_2D
;
148 case GL_TEXTURE_2D_ARRAY_EXT
:
149 case GL_TEXTURE_EXTERNAL_OES
:
150 case GL_TEXTURE_2D_MULTISAMPLE
:
151 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
152 return BRW_SURFACE_2D
;
155 return BRW_SURFACE_3D
;
157 case GL_TEXTURE_CUBE_MAP
:
158 case GL_TEXTURE_CUBE_MAP_ARRAY
:
159 return BRW_SURFACE_CUBE
;
162 unreachable("not reached");
167 brw_get_surface_tiling_bits(uint32_t tiling
)
171 return BRW_SURFACE_TILED
;
173 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
181 brw_get_surface_num_multisamples(unsigned num_samples
)
184 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
186 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
190 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
194 brw_get_texture_swizzle(const struct gl_context
*ctx
,
195 const struct gl_texture_object
*t
)
197 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
199 int swizzles
[SWIZZLE_NIL
+ 1] = {
209 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
210 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
211 GLenum depth_mode
= t
->DepthMode
;
213 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
214 * with depth component data specified with a sized internal format.
215 * Otherwise, it's left at the old default, GL_LUMINANCE.
217 if (_mesa_is_gles3(ctx
) &&
218 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
219 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
223 switch (depth_mode
) {
225 swizzles
[0] = SWIZZLE_ZERO
;
226 swizzles
[1] = SWIZZLE_ZERO
;
227 swizzles
[2] = SWIZZLE_ZERO
;
228 swizzles
[3] = SWIZZLE_X
;
231 swizzles
[0] = SWIZZLE_X
;
232 swizzles
[1] = SWIZZLE_X
;
233 swizzles
[2] = SWIZZLE_X
;
234 swizzles
[3] = SWIZZLE_ONE
;
237 swizzles
[0] = SWIZZLE_X
;
238 swizzles
[1] = SWIZZLE_X
;
239 swizzles
[2] = SWIZZLE_X
;
240 swizzles
[3] = SWIZZLE_X
;
243 swizzles
[0] = SWIZZLE_X
;
244 swizzles
[1] = SWIZZLE_ZERO
;
245 swizzles
[2] = SWIZZLE_ZERO
;
246 swizzles
[3] = SWIZZLE_ONE
;
251 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
253 /* If the texture's format is alpha-only, force R, G, and B to
254 * 0.0. Similarly, if the texture's format has no alpha channel,
255 * force the alpha value read to 1.0. This allows for the
256 * implementation to use an RGBA texture for any of these formats
257 * without leaking any unexpected values.
259 switch (img
->_BaseFormat
) {
261 swizzles
[0] = SWIZZLE_ZERO
;
262 swizzles
[1] = SWIZZLE_ZERO
;
263 swizzles
[2] = SWIZZLE_ZERO
;
266 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
267 swizzles
[0] = SWIZZLE_X
;
268 swizzles
[1] = SWIZZLE_X
;
269 swizzles
[2] = SWIZZLE_X
;
270 swizzles
[3] = SWIZZLE_ONE
;
273 case GL_LUMINANCE_ALPHA
:
274 if (datatype
== GL_SIGNED_NORMALIZED
) {
275 swizzles
[0] = SWIZZLE_X
;
276 swizzles
[1] = SWIZZLE_X
;
277 swizzles
[2] = SWIZZLE_X
;
278 swizzles
[3] = SWIZZLE_W
;
282 if (datatype
== GL_SIGNED_NORMALIZED
) {
283 swizzles
[0] = SWIZZLE_X
;
284 swizzles
[1] = SWIZZLE_X
;
285 swizzles
[2] = SWIZZLE_X
;
286 swizzles
[3] = SWIZZLE_X
;
292 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0)
293 swizzles
[3] = SWIZZLE_ONE
;
297 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
298 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
299 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
300 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
304 gen4_emit_buffer_surface_state(struct brw_context
*brw
,
305 uint32_t *out_offset
,
307 unsigned buffer_offset
,
308 unsigned surface_format
,
309 unsigned buffer_size
,
313 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
314 6 * 4, 32, out_offset
);
315 memset(surf
, 0, 6 * 4);
317 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
318 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
319 (brw
->gen
>= 6 ? BRW_SURFACE_RC_READ_WRITE
: 0);
320 surf
[1] = (bo
? bo
->offset64
: 0) + buffer_offset
; /* reloc */
321 surf
[2] = ((buffer_size
- 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT
|
322 (((buffer_size
- 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT
;
323 surf
[3] = (((buffer_size
- 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT
|
324 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
;
326 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
327 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
328 * physical cache. It is mapped in hardware to the sampler cache."
331 drm_intel_bo_emit_reloc(brw
->batch
.bo
, *out_offset
+ 4,
333 I915_GEM_DOMAIN_SAMPLER
,
334 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
339 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
341 uint32_t *surf_offset
)
343 struct brw_context
*brw
= brw_context(ctx
);
344 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
345 struct intel_buffer_object
*intel_obj
=
346 intel_buffer_object(tObj
->BufferObject
);
347 uint32_t size
= tObj
->BufferSize
;
348 drm_intel_bo
*bo
= NULL
;
349 mesa_format format
= tObj
->_BufferObjectFormat
;
350 uint32_t brw_format
= brw_format_for_mesa_format(format
);
351 int texel_size
= _mesa_get_format_bytes(format
);
354 size
= MIN2(size
, intel_obj
->Base
.Size
);
355 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
);
358 if (brw_format
== 0 && format
!= MESA_FORMAT_RGBA_FLOAT32
) {
359 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
360 _mesa_get_format_name(format
));
363 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
, bo
,
372 brw_update_texture_surface(struct gl_context
*ctx
,
374 uint32_t *surf_offset
,
378 struct brw_context
*brw
= brw_context(ctx
);
379 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
380 struct intel_texture_object
*intelObj
= intel_texture_object(tObj
);
381 struct intel_mipmap_tree
*mt
= intelObj
->mt
;
382 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
385 /* BRW_NEW_TEXTURE_BUFFER */
386 if (tObj
->Target
== GL_TEXTURE_BUFFER
) {
387 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
392 if (mt
->plane
[plane
- 1] == NULL
)
394 mt
= mt
->plane
[plane
- 1];
397 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
398 6 * 4, 32, surf_offset
);
400 mesa_format mesa_fmt
= plane
== 0 ? intelObj
->_Format
: mt
->format
;
401 uint32_t tex_format
= translate_tex_format(brw
, mesa_fmt
,
402 sampler
->sRGBDecode
);
405 /* Sandybridge's gather4 message is broken for integer formats.
406 * To work around this, we pretend the surface is UNORM for
407 * 8 or 16-bit formats, and emit shader instructions to recover
408 * the real INT/UINT value. For 32-bit formats, we pretend
409 * the surface is FLOAT, and simply reinterpret the resulting
412 switch (tex_format
) {
413 case BRW_SURFACEFORMAT_R8_SINT
:
414 case BRW_SURFACEFORMAT_R8_UINT
:
415 tex_format
= BRW_SURFACEFORMAT_R8_UNORM
;
418 case BRW_SURFACEFORMAT_R16_SINT
:
419 case BRW_SURFACEFORMAT_R16_UINT
:
420 tex_format
= BRW_SURFACEFORMAT_R16_UNORM
;
423 case BRW_SURFACEFORMAT_R32_SINT
:
424 case BRW_SURFACEFORMAT_R32_UINT
:
425 tex_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
433 surf
[0] = (translate_tex_target(tObj
->Target
) << BRW_SURFACE_TYPE_SHIFT
|
434 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
435 BRW_SURFACE_CUBEFACE_ENABLES
|
436 tex_format
<< BRW_SURFACE_FORMAT_SHIFT
);
438 surf
[1] = mt
->bo
->offset64
+ mt
->offset
; /* reloc */
440 surf
[2] = ((intelObj
->_MaxLevel
- tObj
->BaseLevel
) << BRW_SURFACE_LOD_SHIFT
|
441 (mt
->logical_width0
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
442 (mt
->logical_height0
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
444 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
445 (mt
->logical_depth0
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
446 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
448 const unsigned min_lod
= tObj
->MinLevel
+ tObj
->BaseLevel
- mt
->first_level
;
449 surf
[4] = (brw_get_surface_num_multisamples(mt
->num_samples
) |
450 SET_FIELD(min_lod
, BRW_SURFACE_MIN_LOD
) |
451 SET_FIELD(tObj
->MinLayer
, BRW_SURFACE_MIN_ARRAY_ELEMENT
));
453 surf
[5] = mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0;
455 /* Emit relocation to surface contents */
456 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
459 surf
[1] - mt
->bo
->offset64
,
460 I915_GEM_DOMAIN_SAMPLER
, 0);
464 * Create the constant buffer surface. Vertex/fragment shader constants will be
465 * read from this buffer with Data Port Read instructions/messages.
468 brw_create_constant_surface(struct brw_context
*brw
,
472 uint32_t *out_offset
)
474 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
475 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
,
480 * Create the buffer surface. Shader buffer variables will be
481 * read from / write to this buffer with Data Port Read/Write
482 * instructions/messages.
485 brw_create_buffer_surface(struct brw_context
*brw
,
489 uint32_t *out_offset
)
491 /* Use a raw surface so we can reuse existing untyped read/write/atomic
492 * messages. We need these specifically for the fragment shader since they
493 * include a pixel mask header that we need to ensure correct behavior
494 * with helper invocations, which cannot write to the buffer.
496 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
497 BRW_SURFACEFORMAT_RAW
,
502 * Set up a binding table entry for use by stream output logic (transform
505 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
508 brw_update_sol_surface(struct brw_context
*brw
,
509 struct gl_buffer_object
*buffer_obj
,
510 uint32_t *out_offset
, unsigned num_vector_components
,
511 unsigned stride_dwords
, unsigned offset_dwords
)
513 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
514 uint32_t offset_bytes
= 4 * offset_dwords
;
515 drm_intel_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
517 buffer_obj
->Size
- offset_bytes
);
518 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
520 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
521 size_t size_dwords
= buffer_obj
->Size
/ 4;
522 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
524 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
525 * too big to map using a single binding table entry?
527 assert((size_dwords
- offset_dwords
) / stride_dwords
528 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
530 if (size_dwords
> offset_dwords
+ num_vector_components
) {
531 /* There is room for at least 1 transform feedback output in the buffer.
532 * Compute the number of additional transform feedback outputs the
533 * buffer has room for.
535 buffer_size_minus_1
=
536 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
538 /* There isn't even room for a single transform feedback output in the
539 * buffer. We can't configure the binding table entry to prevent output
540 * entirely; we'll have to rely on the geometry shader to detect
541 * overflow. But to minimize the damage in case of a bug, set up the
542 * binding table entry to just allow a single output.
544 buffer_size_minus_1
= 0;
546 width
= buffer_size_minus_1
& 0x7f;
547 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
548 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
550 switch (num_vector_components
) {
552 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
555 surface_format
= BRW_SURFACEFORMAT_R32G32_FLOAT
;
558 surface_format
= BRW_SURFACEFORMAT_R32G32B32_FLOAT
;
561 surface_format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
564 unreachable("Invalid vector size for transform feedback output");
567 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
568 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
569 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
570 BRW_SURFACE_RC_READ_WRITE
;
571 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
572 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
573 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
574 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
575 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
579 /* Emit relocation to surface contents. */
580 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
583 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
586 /* Creates a new WM constant buffer reflecting the current fragment program's
587 * constants, if needed by the fragment program.
589 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
593 brw_upload_wm_pull_constants(struct brw_context
*brw
)
595 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
596 /* BRW_NEW_FRAGMENT_PROGRAM */
597 struct brw_fragment_program
*fp
=
598 (struct brw_fragment_program
*) brw
->fragment_program
;
599 /* BRW_NEW_FS_PROG_DATA */
600 struct brw_stage_prog_data
*prog_data
= &brw
->wm
.prog_data
->base
;
602 /* _NEW_PROGRAM_CONSTANTS */
603 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
.Base
,
604 stage_state
, prog_data
);
607 const struct brw_tracked_state brw_wm_pull_constants
= {
609 .mesa
= _NEW_PROGRAM_CONSTANTS
,
610 .brw
= BRW_NEW_BATCH
|
612 BRW_NEW_FRAGMENT_PROGRAM
|
613 BRW_NEW_FS_PROG_DATA
,
615 .emit
= brw_upload_wm_pull_constants
,
619 * Creates a null renderbuffer surface.
621 * This is used when the shader doesn't write to any color output. An FB
622 * write to target 0 will still be emitted, because that's how the thread is
623 * terminated (and computed depth is returned), so we need to have the
624 * hardware discard the target 0 color output..
627 brw_emit_null_surface_state(struct brw_context
*brw
,
631 uint32_t *out_offset
)
633 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
636 * A null surface will be used in instances where an actual surface is
637 * not bound. When a write message is generated to a null surface, no
638 * actual surface is written to. When a read message (including any
639 * sampling engine message) is generated to a null surface, the result
640 * is all zeros. Note that a null surface type is allowed to be used
641 * with all messages, even if it is not specificially indicated as
642 * supported. All of the remaining fields in surface state are ignored
643 * for null surfaces, with the following exceptions:
645 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
646 * depth buffer’s corresponding state for all render target surfaces,
649 * - Surface Format must be R8G8B8A8_UNORM.
651 unsigned surface_type
= BRW_SURFACE_NULL
;
652 drm_intel_bo
*bo
= NULL
;
653 unsigned pitch_minus_1
= 0;
654 uint32_t multisampling_state
= 0;
655 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
659 /* On Gen6, null render targets seem to cause GPU hangs when
660 * multisampling. So work around this problem by rendering into dummy
663 * To decrease the amount of memory needed by the workaround buffer, we
664 * set its pitch to 128 bytes (the width of a Y tile). This means that
665 * the amount of memory needed for the workaround buffer is
666 * (width_in_tiles + height_in_tiles - 1) tiles.
668 * Note that since the workaround buffer will be interpreted by the
669 * hardware as an interleaved multisampled buffer, we need to compute
670 * width_in_tiles and height_in_tiles by dividing the width and height
671 * by 16 rather than the normal Y-tile size of 32.
673 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
674 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
675 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
676 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
678 bo
= brw
->wm
.multisampled_null_render_target_bo
;
679 surface_type
= BRW_SURFACE_2D
;
681 multisampling_state
= brw_get_surface_num_multisamples(samples
);
684 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
685 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
687 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
688 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
689 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
690 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
692 surf
[1] = bo
? bo
->offset64
: 0;
693 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
694 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
696 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
699 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
701 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
702 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
703 surf
[4] = multisampling_state
;
707 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
710 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
715 * Sets up a surface state structure to point at the given region.
716 * While it is only used for the front/back buffer currently, it should be
717 * usable for further buffers when doing ARB_draw_buffer support.
720 brw_update_renderbuffer_surface(struct brw_context
*brw
,
721 struct gl_renderbuffer
*rb
,
722 bool layered
, unsigned unit
,
725 struct gl_context
*ctx
= &brw
->ctx
;
726 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
727 struct intel_mipmap_tree
*mt
= irb
->mt
;
729 uint32_t tile_x
, tile_y
;
733 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
734 /* BRW_NEW_FS_PROG_DATA */
738 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
739 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
741 if (tile_x
!= 0 || tile_y
!= 0) {
742 /* Original gen4 hardware couldn't draw to a non-tile-aligned
743 * destination in a miptree unless you actually setup your renderbuffer
744 * as a miptree and used the fragile lod/array_index/etc. controls to
745 * select the image. So, instead, we just make a new single-level
746 * miptree and render into that.
748 intel_renderbuffer_move_to_temp(brw
, irb
, false);
753 intel_miptree_used_for_rendering(irb
->mt
);
755 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32, &offset
);
757 format
= brw
->render_target_format
[rb_format
];
758 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
759 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
760 __func__
, _mesa_get_format_name(rb_format
));
763 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
764 format
<< BRW_SURFACE_FORMAT_SHIFT
);
767 assert(mt
->offset
% mt
->cpp
== 0);
768 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
769 mt
->bo
->offset64
+ mt
->offset
);
771 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
772 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
774 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
775 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
777 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
779 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
780 /* Note that the low bits of these fields are missing, so
781 * there's the possibility of getting in trouble.
783 assert(tile_x
% 4 == 0);
784 assert(tile_y
% 2 == 0);
785 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
786 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
787 (mt
->valign
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
791 if (!ctx
->Color
.ColorLogicOpEnabled
&&
792 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
793 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
795 if (!ctx
->Color
.ColorMask
[unit
][0])
796 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
797 if (!ctx
->Color
.ColorMask
[unit
][1])
798 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
799 if (!ctx
->Color
.ColorMask
[unit
][2])
800 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
802 /* As mentioned above, disable writes to the alpha component when the
803 * renderbuffer is XRGB.
805 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
806 !ctx
->Color
.ColorMask
[unit
][3]) {
807 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
811 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
814 surf
[1] - mt
->bo
->offset64
,
815 I915_GEM_DOMAIN_RENDER
,
816 I915_GEM_DOMAIN_RENDER
);
822 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
825 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
826 const struct gl_framebuffer
*fb
,
827 uint32_t render_target_start
,
828 uint32_t *surf_offset
)
831 const unsigned int w
= _mesa_geometric_width(fb
);
832 const unsigned int h
= _mesa_geometric_height(fb
);
833 const unsigned int s
= _mesa_geometric_samples(fb
);
835 /* Update surfaces for drawing buffers */
836 if (fb
->_NumColorDrawBuffers
>= 1) {
837 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
838 const uint32_t surf_index
= render_target_start
+ i
;
840 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
841 surf_offset
[surf_index
] =
842 brw
->vtbl
.update_renderbuffer_surface(
843 brw
, fb
->_ColorDrawBuffers
[i
],
844 _mesa_geometric_layers(fb
) > 0, i
, surf_index
);
846 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
847 &surf_offset
[surf_index
]);
851 const uint32_t surf_index
= render_target_start
;
852 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
853 &surf_offset
[surf_index
]);
858 update_renderbuffer_surfaces(struct brw_context
*brw
)
860 const struct gl_context
*ctx
= &brw
->ctx
;
862 /* _NEW_BUFFERS | _NEW_COLOR */
863 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
864 brw_update_renderbuffer_surfaces(
866 brw
->wm
.prog_data
->binding_table
.render_target_start
,
867 brw
->wm
.base
.surf_offset
);
868 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
871 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
873 .mesa
= _NEW_BUFFERS
|
875 .brw
= BRW_NEW_BATCH
|
877 BRW_NEW_FS_PROG_DATA
,
879 .emit
= update_renderbuffer_surfaces
,
882 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
884 .mesa
= _NEW_BUFFERS
,
885 .brw
= BRW_NEW_BATCH
|
888 .emit
= update_renderbuffer_surfaces
,
893 update_stage_texture_surfaces(struct brw_context
*brw
,
894 const struct gl_program
*prog
,
895 struct brw_stage_state
*stage_state
,
896 bool for_gather
, uint32_t plane
)
901 struct gl_context
*ctx
= &brw
->ctx
;
903 uint32_t *surf_offset
= stage_state
->surf_offset
;
905 /* BRW_NEW_*_PROG_DATA */
907 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
909 surf_offset
+= stage_state
->prog_data
->binding_table
.plane_start
[plane
];
911 unsigned num_samplers
= _mesa_fls(prog
->SamplersUsed
);
912 for (unsigned s
= 0; s
< num_samplers
; s
++) {
915 if (prog
->SamplersUsed
& (1 << s
)) {
916 const unsigned unit
= prog
->SamplerUnits
[s
];
919 if (ctx
->Texture
.Unit
[unit
]._Current
) {
920 brw
->vtbl
.update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
, plane
);
928 * Construct SURFACE_STATE objects for enabled textures.
931 brw_update_texture_surfaces(struct brw_context
*brw
)
933 /* BRW_NEW_VERTEX_PROGRAM */
934 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
936 /* BRW_NEW_TESS_PROGRAMS */
937 struct gl_program
*tcs
= (struct gl_program
*) brw
->tess_ctrl_program
;
938 struct gl_program
*tes
= (struct gl_program
*) brw
->tess_eval_program
;
940 /* BRW_NEW_GEOMETRY_PROGRAM */
941 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
943 /* BRW_NEW_FRAGMENT_PROGRAM */
944 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
947 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false, 0);
948 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, false, 0);
949 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, false, 0);
950 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false, 0);
951 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 0);
953 /* emit alternate set of surface state for gather. this
954 * allows the surface format to be overriden for only the
955 * gather4 messages. */
957 if (vs
&& vs
->UsesGather
)
958 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true, 0);
959 if (tcs
&& tcs
->UsesGather
)
960 update_stage_texture_surfaces(brw
, tcs
, &brw
->tcs
.base
, true, 0);
961 if (tes
&& tes
->UsesGather
)
962 update_stage_texture_surfaces(brw
, tes
, &brw
->tes
.base
, true, 0);
963 if (gs
&& gs
->UsesGather
)
964 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true, 0);
965 if (fs
&& fs
->UsesGather
)
966 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true, 0);
970 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 1);
971 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false, 2);
974 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
977 const struct brw_tracked_state brw_texture_surfaces
= {
979 .mesa
= _NEW_TEXTURE
,
980 .brw
= BRW_NEW_BATCH
|
982 BRW_NEW_FRAGMENT_PROGRAM
|
983 BRW_NEW_FS_PROG_DATA
|
984 BRW_NEW_GEOMETRY_PROGRAM
|
985 BRW_NEW_GS_PROG_DATA
|
986 BRW_NEW_TESS_PROGRAMS
|
987 BRW_NEW_TCS_PROG_DATA
|
988 BRW_NEW_TES_PROG_DATA
|
989 BRW_NEW_TEXTURE_BUFFER
|
990 BRW_NEW_VERTEX_PROGRAM
|
991 BRW_NEW_VS_PROG_DATA
,
993 .emit
= brw_update_texture_surfaces
,
997 brw_update_cs_texture_surfaces(struct brw_context
*brw
)
999 /* BRW_NEW_COMPUTE_PROGRAM */
1000 struct gl_program
*cs
= (struct gl_program
*) brw
->compute_program
;
1003 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, false, 0);
1005 /* emit alternate set of surface state for gather. this
1006 * allows the surface format to be overriden for only the
1010 if (cs
&& cs
->UsesGather
)
1011 update_stage_texture_surfaces(brw
, cs
, &brw
->cs
.base
, true, 0);
1014 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1017 const struct brw_tracked_state brw_cs_texture_surfaces
= {
1019 .mesa
= _NEW_TEXTURE
,
1020 .brw
= BRW_NEW_BATCH
|
1022 BRW_NEW_COMPUTE_PROGRAM
,
1024 .emit
= brw_update_cs_texture_surfaces
,
1029 brw_upload_ubo_surfaces(struct brw_context
*brw
,
1030 struct gl_linked_shader
*shader
,
1031 struct brw_stage_state
*stage_state
,
1032 struct brw_stage_prog_data
*prog_data
)
1034 struct gl_context
*ctx
= &brw
->ctx
;
1039 uint32_t *ubo_surf_offsets
=
1040 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
1042 for (int i
= 0; i
< shader
->NumUniformBlocks
; i
++) {
1043 struct gl_uniform_buffer_binding
*binding
=
1044 &ctx
->UniformBufferBindings
[shader
->UniformBlocks
[i
]->Binding
];
1046 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1047 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ubo_surf_offsets
[i
]);
1049 struct intel_buffer_object
*intel_bo
=
1050 intel_buffer_object(binding
->BufferObject
);
1051 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1052 if (!binding
->AutomaticSize
)
1053 size
= MIN2(size
, binding
->Size
);
1055 intel_bufferobj_buffer(brw
, intel_bo
,
1058 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
1060 &ubo_surf_offsets
[i
]);
1064 uint32_t *ssbo_surf_offsets
=
1065 &stage_state
->surf_offset
[prog_data
->binding_table
.ssbo_start
];
1067 for (int i
= 0; i
< shader
->NumShaderStorageBlocks
; i
++) {
1068 struct gl_shader_storage_buffer_binding
*binding
=
1069 &ctx
->ShaderStorageBufferBindings
[shader
->ShaderStorageBlocks
[i
]->Binding
];
1071 if (binding
->BufferObject
== ctx
->Shared
->NullBufferObj
) {
1072 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, &ssbo_surf_offsets
[i
]);
1074 struct intel_buffer_object
*intel_bo
=
1075 intel_buffer_object(binding
->BufferObject
);
1076 GLsizeiptr size
= binding
->BufferObject
->Size
- binding
->Offset
;
1077 if (!binding
->AutomaticSize
)
1078 size
= MIN2(size
, binding
->Size
);
1080 intel_bufferobj_buffer(brw
, intel_bo
,
1083 brw_create_buffer_surface(brw
, bo
, binding
->Offset
,
1085 &ssbo_surf_offsets
[i
]);
1089 if (shader
->NumUniformBlocks
|| shader
->NumShaderStorageBlocks
)
1090 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1094 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
1096 struct gl_context
*ctx
= &brw
->ctx
;
1098 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1103 /* BRW_NEW_FS_PROG_DATA */
1104 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1105 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
1108 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
1110 .mesa
= _NEW_PROGRAM
,
1111 .brw
= BRW_NEW_BATCH
|
1113 BRW_NEW_FS_PROG_DATA
|
1114 BRW_NEW_UNIFORM_BUFFER
,
1116 .emit
= brw_upload_wm_ubo_surfaces
,
1120 brw_upload_cs_ubo_surfaces(struct brw_context
*brw
)
1122 struct gl_context
*ctx
= &brw
->ctx
;
1124 struct gl_shader_program
*prog
=
1125 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1130 /* BRW_NEW_CS_PROG_DATA */
1131 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1132 &brw
->cs
.base
, &brw
->cs
.prog_data
->base
);
1135 const struct brw_tracked_state brw_cs_ubo_surfaces
= {
1137 .mesa
= _NEW_PROGRAM
,
1138 .brw
= BRW_NEW_BATCH
|
1140 BRW_NEW_CS_PROG_DATA
|
1141 BRW_NEW_UNIFORM_BUFFER
,
1143 .emit
= brw_upload_cs_ubo_surfaces
,
1147 brw_upload_abo_surfaces(struct brw_context
*brw
,
1148 struct gl_linked_shader
*shader
,
1149 struct brw_stage_state
*stage_state
,
1150 struct brw_stage_prog_data
*prog_data
)
1152 struct gl_context
*ctx
= &brw
->ctx
;
1153 uint32_t *surf_offsets
=
1154 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
1156 if (shader
&& shader
->NumAtomicBuffers
) {
1157 for (unsigned i
= 0; i
< shader
->NumAtomicBuffers
; i
++) {
1158 struct gl_atomic_buffer_binding
*binding
=
1159 &ctx
->AtomicBufferBindings
[shader
->AtomicBuffers
[i
]->Binding
];
1160 struct intel_buffer_object
*intel_bo
=
1161 intel_buffer_object(binding
->BufferObject
);
1162 drm_intel_bo
*bo
= intel_bufferobj_buffer(
1163 brw
, intel_bo
, binding
->Offset
, intel_bo
->Base
.Size
- binding
->Offset
);
1165 brw
->vtbl
.emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
1166 binding
->Offset
, BRW_SURFACEFORMAT_RAW
,
1167 bo
->size
- binding
->Offset
, 1, true);
1170 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1175 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
1177 struct gl_context
*ctx
= &brw
->ctx
;
1179 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1182 /* BRW_NEW_FS_PROG_DATA */
1183 brw_upload_abo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1184 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
1188 const struct brw_tracked_state brw_wm_abo_surfaces
= {
1190 .mesa
= _NEW_PROGRAM
,
1191 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1194 BRW_NEW_FS_PROG_DATA
,
1196 .emit
= brw_upload_wm_abo_surfaces
,
1200 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1202 struct gl_context
*ctx
= &brw
->ctx
;
1204 struct gl_shader_program
*prog
=
1205 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1208 /* BRW_NEW_CS_PROG_DATA */
1209 brw_upload_abo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1210 &brw
->cs
.base
, &brw
->cs
.prog_data
->base
);
1214 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1216 .mesa
= _NEW_PROGRAM
,
1217 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1220 BRW_NEW_CS_PROG_DATA
,
1222 .emit
= brw_upload_cs_abo_surfaces
,
1226 brw_upload_cs_image_surfaces(struct brw_context
*brw
)
1228 struct gl_context
*ctx
= &brw
->ctx
;
1230 struct gl_shader_program
*prog
=
1231 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1234 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1235 brw_upload_image_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
],
1236 &brw
->cs
.base
, &brw
->cs
.prog_data
->base
);
1240 const struct brw_tracked_state brw_cs_image_surfaces
= {
1242 .mesa
= _NEW_TEXTURE
| _NEW_PROGRAM
,
1243 .brw
= BRW_NEW_BATCH
|
1245 BRW_NEW_CS_PROG_DATA
|
1248 .emit
= brw_upload_cs_image_surfaces
,
1252 get_image_format(struct brw_context
*brw
, mesa_format format
, GLenum access
)
1254 const struct brw_device_info
*devinfo
= brw
->intelScreen
->devinfo
;
1255 uint32_t hw_format
= brw_format_for_mesa_format(format
);
1256 if (access
== GL_WRITE_ONLY
) {
1258 } else if (isl_has_matching_typed_storage_image_format(devinfo
, hw_format
)) {
1259 /* Typed surface reads support a very limited subset of the shader
1260 * image formats. Translate it into the closest format the
1261 * hardware supports.
1263 return isl_lower_storage_image_format(devinfo
, hw_format
);
1265 /* The hardware doesn't actually support a typed format that we can use
1266 * so we have to fall back to untyped read/write messages.
1268 return BRW_SURFACEFORMAT_RAW
;
1273 update_default_image_param(struct brw_context
*brw
,
1274 struct gl_image_unit
*u
,
1275 unsigned surface_idx
,
1276 struct brw_image_param
*param
)
1278 memset(param
, 0, sizeof(*param
));
1279 param
->surface_idx
= surface_idx
;
1280 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1281 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1282 * detailed explanation of these parameters.
1284 param
->swizzling
[0] = 0xff;
1285 param
->swizzling
[1] = 0xff;
1289 update_buffer_image_param(struct brw_context
*brw
,
1290 struct gl_image_unit
*u
,
1291 unsigned surface_idx
,
1292 struct brw_image_param
*param
)
1294 struct gl_buffer_object
*obj
= u
->TexObj
->BufferObject
;
1296 update_default_image_param(brw
, u
, surface_idx
, param
);
1298 param
->size
[0] = obj
->Size
/ _mesa_get_format_bytes(u
->_ActualFormat
);
1299 param
->stride
[0] = _mesa_get_format_bytes(u
->_ActualFormat
);
1303 update_texture_image_param(struct brw_context
*brw
,
1304 struct gl_image_unit
*u
,
1305 unsigned surface_idx
,
1306 struct brw_image_param
*param
)
1308 struct intel_mipmap_tree
*mt
= intel_texture_object(u
->TexObj
)->mt
;
1310 update_default_image_param(brw
, u
, surface_idx
, param
);
1312 param
->size
[0] = minify(mt
->logical_width0
, u
->Level
);
1313 param
->size
[1] = minify(mt
->logical_height0
, u
->Level
);
1314 param
->size
[2] = (!u
->Layered
? 1 :
1315 u
->TexObj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1316 u
->TexObj
->Target
== GL_TEXTURE_3D
?
1317 minify(mt
->logical_depth0
, u
->Level
) :
1318 mt
->logical_depth0
);
1320 intel_miptree_get_image_offset(mt
, u
->Level
, u
->_Layer
,
1324 param
->stride
[0] = mt
->cpp
;
1325 param
->stride
[1] = mt
->pitch
/ mt
->cpp
;
1327 brw_miptree_get_horizontal_slice_pitch(brw
, mt
, u
->Level
);
1329 brw_miptree_get_vertical_slice_pitch(brw
, mt
, u
->Level
);
1331 if (mt
->tiling
== I915_TILING_X
) {
1332 /* An X tile is a rectangular block of 512x8 bytes. */
1333 param
->tiling
[0] = _mesa_logbase2(512 / mt
->cpp
);
1334 param
->tiling
[1] = _mesa_logbase2(8);
1336 if (brw
->has_swizzling
) {
1337 /* Right shifts required to swizzle bits 9 and 10 of the memory
1338 * address with bit 6.
1340 param
->swizzling
[0] = 3;
1341 param
->swizzling
[1] = 4;
1343 } else if (mt
->tiling
== I915_TILING_Y
) {
1344 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1345 * different to the layout of an X-tiled surface, we simply pretend that
1346 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1347 * one arranged in X-major order just like is the case for X-tiling.
1349 param
->tiling
[0] = _mesa_logbase2(16 / mt
->cpp
);
1350 param
->tiling
[1] = _mesa_logbase2(32);
1352 if (brw
->has_swizzling
) {
1353 /* Right shift required to swizzle bit 9 of the memory address with
1356 param
->swizzling
[0] = 3;
1360 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1361 * address calculation algorithm (emit_address_calculation() in
1362 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1363 * modulus equal to the LOD.
1365 param
->tiling
[2] = (u
->TexObj
->Target
== GL_TEXTURE_3D
? u
->Level
:
1370 update_image_surface(struct brw_context
*brw
,
1371 struct gl_image_unit
*u
,
1373 unsigned surface_idx
,
1374 uint32_t *surf_offset
,
1375 struct brw_image_param
*param
)
1377 if (_mesa_is_image_unit_valid(&brw
->ctx
, u
)) {
1378 struct gl_texture_object
*obj
= u
->TexObj
;
1379 const unsigned format
= get_image_format(brw
, u
->_ActualFormat
, access
);
1381 if (obj
->Target
== GL_TEXTURE_BUFFER
) {
1382 struct intel_buffer_object
*intel_obj
=
1383 intel_buffer_object(obj
->BufferObject
);
1384 const unsigned texel_size
= (format
== BRW_SURFACEFORMAT_RAW
? 1 :
1385 _mesa_get_format_bytes(u
->_ActualFormat
));
1387 brw
->vtbl
.emit_buffer_surface_state(
1388 brw
, surf_offset
, intel_obj
->buffer
, obj
->BufferOffset
,
1389 format
, intel_obj
->Base
.Size
/ texel_size
, texel_size
,
1390 access
!= GL_READ_ONLY
);
1392 update_buffer_image_param(brw
, u
, surface_idx
, param
);
1395 struct intel_texture_object
*intel_obj
= intel_texture_object(obj
);
1396 struct intel_mipmap_tree
*mt
= intel_obj
->mt
;
1398 if (format
== BRW_SURFACEFORMAT_RAW
) {
1399 brw
->vtbl
.emit_buffer_surface_state(
1400 brw
, surf_offset
, mt
->bo
, mt
->offset
,
1401 format
, mt
->bo
->size
- mt
->offset
, 1 /* pitch */,
1402 access
!= GL_READ_ONLY
);
1405 const unsigned num_layers
= (!u
->Layered
? 1 :
1406 obj
->Target
== GL_TEXTURE_CUBE_MAP
? 6 :
1407 mt
->logical_depth0
);
1409 struct isl_view view
= {
1411 .base_level
= obj
->MinLevel
+ u
->Level
,
1413 .base_array_layer
= obj
->MinLayer
+ u
->_Layer
,
1414 .array_len
= num_layers
,
1416 ISL_CHANNEL_SELECT_RED
,
1417 ISL_CHANNEL_SELECT_GREEN
,
1418 ISL_CHANNEL_SELECT_BLUE
,
1419 ISL_CHANNEL_SELECT_ALPHA
,
1421 .usage
= ISL_SURF_USAGE_STORAGE_BIT
,
1424 const int surf_index
= surf_offset
- &brw
->wm
.base
.surf_offset
[0];
1426 brw_emit_surface_state(brw
, mt
, &view
,
1427 surface_state_infos
[brw
->gen
].rb_mocs
, false,
1428 surf_offset
, surf_index
,
1429 I915_GEM_DOMAIN_SAMPLER
,
1430 access
== GL_READ_ONLY
? 0 :
1431 I915_GEM_DOMAIN_SAMPLER
);
1434 update_texture_image_param(brw
, u
, surface_idx
, param
);
1438 brw
->vtbl
.emit_null_surface_state(brw
, 1, 1, 1, surf_offset
);
1439 update_default_image_param(brw
, u
, surface_idx
, param
);
1444 brw_upload_image_surfaces(struct brw_context
*brw
,
1445 struct gl_linked_shader
*shader
,
1446 struct brw_stage_state
*stage_state
,
1447 struct brw_stage_prog_data
*prog_data
)
1449 struct gl_context
*ctx
= &brw
->ctx
;
1451 if (shader
&& shader
->NumImages
) {
1452 for (unsigned i
= 0; i
< shader
->NumImages
; i
++) {
1453 struct gl_image_unit
*u
= &ctx
->ImageUnits
[shader
->ImageUnits
[i
]];
1454 const unsigned surf_idx
= prog_data
->binding_table
.image_start
+ i
;
1456 update_image_surface(brw
, u
, shader
->ImageAccess
[i
],
1458 &stage_state
->surf_offset
[surf_idx
],
1459 &prog_data
->image_param
[i
]);
1462 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1463 /* This may have changed the image metadata dependent on the context
1464 * image unit state and passed to the program as uniforms, make sure
1465 * that push and pull constants are reuploaded.
1467 brw
->NewGLState
|= _NEW_PROGRAM_CONSTANTS
;
1472 brw_upload_wm_image_surfaces(struct brw_context
*brw
)
1474 struct gl_context
*ctx
= &brw
->ctx
;
1475 /* BRW_NEW_FRAGMENT_PROGRAM */
1476 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
1479 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1480 brw_upload_image_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
1481 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
1485 const struct brw_tracked_state brw_wm_image_surfaces
= {
1487 .mesa
= _NEW_TEXTURE
,
1488 .brw
= BRW_NEW_BATCH
|
1490 BRW_NEW_FRAGMENT_PROGRAM
|
1491 BRW_NEW_FS_PROG_DATA
|
1494 .emit
= brw_upload_wm_image_surfaces
,
1498 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1500 brw
->vtbl
.update_texture_surface
= brw_update_texture_surface
;
1501 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1502 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1503 brw
->vtbl
.emit_buffer_surface_state
= gen4_emit_buffer_surface_state
;
1507 brw_upload_cs_work_groups_surface(struct brw_context
*brw
)
1509 struct gl_context
*ctx
= &brw
->ctx
;
1511 struct gl_shader_program
*prog
=
1512 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1514 if (prog
&& brw
->cs
.prog_data
->uses_num_work_groups
) {
1515 const unsigned surf_idx
=
1516 brw
->cs
.prog_data
->binding_table
.work_groups_start
;
1517 uint32_t *surf_offset
= &brw
->cs
.base
.surf_offset
[surf_idx
];
1521 if (brw
->compute
.num_work_groups_bo
== NULL
) {
1523 intel_upload_data(brw
,
1524 (void *)brw
->compute
.num_work_groups
,
1530 bo
= brw
->compute
.num_work_groups_bo
;
1531 bo_offset
= brw
->compute
.num_work_groups_offset
;
1534 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
,
1536 BRW_SURFACEFORMAT_RAW
,
1537 3 * sizeof(GLuint
), 1, true);
1538 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
1542 const struct brw_tracked_state brw_cs_work_groups_surface
= {
1544 .brw
= BRW_NEW_BLORP
|
1545 BRW_NEW_CS_WORK_GROUPS
1547 .emit
= brw_upload_cs_work_groups_surface
,