2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38 #include "main/framebuffer.h"
40 #include "intel_mipmap_tree.h"
41 #include "intel_batchbuffer.h"
42 #include "intel_tex.h"
43 #include "intel_fbo.h"
44 #include "intel_buffer_objects.h"
46 #include "brw_context.h"
47 #include "brw_state.h"
48 #include "brw_defines.h"
52 translate_tex_target(GLenum target
)
56 case GL_TEXTURE_1D_ARRAY_EXT
:
57 return BRW_SURFACE_1D
;
59 case GL_TEXTURE_RECTANGLE_NV
:
60 return BRW_SURFACE_2D
;
63 case GL_TEXTURE_2D_ARRAY_EXT
:
64 case GL_TEXTURE_EXTERNAL_OES
:
65 case GL_TEXTURE_2D_MULTISAMPLE
:
66 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
67 return BRW_SURFACE_2D
;
70 return BRW_SURFACE_3D
;
72 case GL_TEXTURE_CUBE_MAP
:
73 case GL_TEXTURE_CUBE_MAP_ARRAY
:
74 return BRW_SURFACE_CUBE
;
77 unreachable("not reached");
82 brw_get_surface_tiling_bits(uint32_t tiling
)
86 return BRW_SURFACE_TILED
;
88 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
96 brw_get_surface_num_multisamples(unsigned num_samples
)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
101 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
105 brw_configure_w_tiled(const struct intel_mipmap_tree
*mt
,
106 bool is_render_target
,
107 unsigned *width
, unsigned *height
,
108 unsigned *pitch
, uint32_t *tiling
, unsigned *format
)
110 static const unsigned halign_stencil
= 8;
112 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
113 * there are half as many rows.
114 * In addition, mip-levels are accessed manually by the program and
115 * therefore the surface is setup to cover all the mip-levels for one slice.
116 * (Hardware is still used to access individual slices).
118 *tiling
= I915_TILING_Y
;
119 *pitch
= mt
->pitch
* 2;
120 *width
= ALIGN(mt
->total_width
, halign_stencil
) * 2;
121 *height
= (mt
->total_height
/ mt
->physical_depth0
) / 2;
123 if (is_render_target
) {
124 *format
= BRW_SURFACEFORMAT_R8_UINT
;
130 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
134 brw_get_texture_swizzle(const struct gl_context
*ctx
,
135 const struct gl_texture_object
*t
)
137 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
139 int swizzles
[SWIZZLE_NIL
+ 1] = {
149 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
150 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
151 GLenum depth_mode
= t
->DepthMode
;
153 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
154 * with depth component data specified with a sized internal format.
155 * Otherwise, it's left at the old default, GL_LUMINANCE.
157 if (_mesa_is_gles3(ctx
) &&
158 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
159 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
163 switch (depth_mode
) {
165 swizzles
[0] = SWIZZLE_ZERO
;
166 swizzles
[1] = SWIZZLE_ZERO
;
167 swizzles
[2] = SWIZZLE_ZERO
;
168 swizzles
[3] = SWIZZLE_X
;
171 swizzles
[0] = SWIZZLE_X
;
172 swizzles
[1] = SWIZZLE_X
;
173 swizzles
[2] = SWIZZLE_X
;
174 swizzles
[3] = SWIZZLE_ONE
;
177 swizzles
[0] = SWIZZLE_X
;
178 swizzles
[1] = SWIZZLE_X
;
179 swizzles
[2] = SWIZZLE_X
;
180 swizzles
[3] = SWIZZLE_X
;
183 swizzles
[0] = SWIZZLE_X
;
184 swizzles
[1] = SWIZZLE_ZERO
;
185 swizzles
[2] = SWIZZLE_ZERO
;
186 swizzles
[3] = SWIZZLE_ONE
;
191 GLenum datatype
= _mesa_get_format_datatype(img
->TexFormat
);
193 /* If the texture's format is alpha-only, force R, G, and B to
194 * 0.0. Similarly, if the texture's format has no alpha channel,
195 * force the alpha value read to 1.0. This allows for the
196 * implementation to use an RGBA texture for any of these formats
197 * without leaking any unexpected values.
199 switch (img
->_BaseFormat
) {
201 swizzles
[0] = SWIZZLE_ZERO
;
202 swizzles
[1] = SWIZZLE_ZERO
;
203 swizzles
[2] = SWIZZLE_ZERO
;
206 if (t
->_IsIntegerFormat
|| datatype
== GL_SIGNED_NORMALIZED
) {
207 swizzles
[0] = SWIZZLE_X
;
208 swizzles
[1] = SWIZZLE_X
;
209 swizzles
[2] = SWIZZLE_X
;
210 swizzles
[3] = SWIZZLE_ONE
;
213 case GL_LUMINANCE_ALPHA
:
214 if (datatype
== GL_SIGNED_NORMALIZED
) {
215 swizzles
[0] = SWIZZLE_X
;
216 swizzles
[1] = SWIZZLE_X
;
217 swizzles
[2] = SWIZZLE_X
;
218 swizzles
[3] = SWIZZLE_W
;
222 if (datatype
== GL_SIGNED_NORMALIZED
) {
223 swizzles
[0] = SWIZZLE_X
;
224 swizzles
[1] = SWIZZLE_X
;
225 swizzles
[2] = SWIZZLE_X
;
226 swizzles
[3] = SWIZZLE_X
;
232 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0)
233 swizzles
[3] = SWIZZLE_ONE
;
237 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
238 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
239 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
240 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
244 gen4_emit_buffer_surface_state(struct brw_context
*brw
,
245 uint32_t *out_offset
,
247 unsigned buffer_offset
,
248 unsigned surface_format
,
249 unsigned buffer_size
,
253 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
254 6 * 4, 32, out_offset
);
255 memset(surf
, 0, 6 * 4);
257 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
258 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
259 (brw
->gen
>= 6 ? BRW_SURFACE_RC_READ_WRITE
: 0);
260 surf
[1] = (bo
? bo
->offset64
: 0) + buffer_offset
; /* reloc */
261 surf
[2] = (buffer_size
& 0x7f) << BRW_SURFACE_WIDTH_SHIFT
|
262 ((buffer_size
>> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT
;
263 surf
[3] = ((buffer_size
>> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT
|
264 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
;
266 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
267 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
268 * physical cache. It is mapped in hardware to the sampler cache."
271 drm_intel_bo_emit_reloc(brw
->batch
.bo
, *out_offset
+ 4,
273 I915_GEM_DOMAIN_SAMPLER
,
274 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
279 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
281 uint32_t *surf_offset
)
283 struct brw_context
*brw
= brw_context(ctx
);
284 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
285 struct intel_buffer_object
*intel_obj
=
286 intel_buffer_object(tObj
->BufferObject
);
287 uint32_t size
= tObj
->BufferSize
;
288 drm_intel_bo
*bo
= NULL
;
289 mesa_format format
= tObj
->_BufferObjectFormat
;
290 uint32_t brw_format
= brw_format_for_mesa_format(format
);
291 int texel_size
= _mesa_get_format_bytes(format
);
294 size
= MIN2(size
, intel_obj
->Base
.Size
);
295 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
);
298 if (brw_format
== 0 && format
!= MESA_FORMAT_RGBA_FLOAT32
) {
299 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
300 _mesa_get_format_name(format
));
303 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
, bo
,
312 brw_update_texture_surface(struct gl_context
*ctx
,
314 uint32_t *surf_offset
,
317 struct brw_context
*brw
= brw_context(ctx
);
318 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
319 struct intel_texture_object
*intelObj
= intel_texture_object(tObj
);
320 struct intel_mipmap_tree
*mt
= intelObj
->mt
;
321 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
324 /* BRW_NEW_TEXTURE_BUFFER */
325 if (tObj
->Target
== GL_TEXTURE_BUFFER
) {
326 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
330 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
331 6 * 4, 32, surf_offset
);
333 uint32_t tex_format
= translate_tex_format(brw
, mt
->format
,
334 sampler
->sRGBDecode
);
337 /* Sandybridge's gather4 message is broken for integer formats.
338 * To work around this, we pretend the surface is UNORM for
339 * 8 or 16-bit formats, and emit shader instructions to recover
340 * the real INT/UINT value. For 32-bit formats, we pretend
341 * the surface is FLOAT, and simply reinterpret the resulting
344 switch (tex_format
) {
345 case BRW_SURFACEFORMAT_R8_SINT
:
346 case BRW_SURFACEFORMAT_R8_UINT
:
347 tex_format
= BRW_SURFACEFORMAT_R8_UNORM
;
350 case BRW_SURFACEFORMAT_R16_SINT
:
351 case BRW_SURFACEFORMAT_R16_UINT
:
352 tex_format
= BRW_SURFACEFORMAT_R16_UNORM
;
355 case BRW_SURFACEFORMAT_R32_SINT
:
356 case BRW_SURFACEFORMAT_R32_UINT
:
357 tex_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
365 surf
[0] = (translate_tex_target(tObj
->Target
) << BRW_SURFACE_TYPE_SHIFT
|
366 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
367 BRW_SURFACE_CUBEFACE_ENABLES
|
368 tex_format
<< BRW_SURFACE_FORMAT_SHIFT
);
370 surf
[1] = mt
->bo
->offset64
+ mt
->offset
; /* reloc */
372 surf
[2] = ((intelObj
->_MaxLevel
- tObj
->BaseLevel
) << BRW_SURFACE_LOD_SHIFT
|
373 (mt
->logical_width0
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
374 (mt
->logical_height0
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
376 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
377 (mt
->logical_depth0
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
378 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
380 surf
[4] = (brw_get_surface_num_multisamples(mt
->num_samples
) |
381 SET_FIELD(tObj
->BaseLevel
- mt
->first_level
, BRW_SURFACE_MIN_LOD
));
383 surf
[5] = mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0;
385 /* Emit relocation to surface contents */
386 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
389 surf
[1] - mt
->bo
->offset64
,
390 I915_GEM_DOMAIN_SAMPLER
, 0);
394 * Create the constant buffer surface. Vertex/fragment shader constants will be
395 * read from this buffer with Data Port Read instructions/messages.
398 brw_create_constant_surface(struct brw_context
*brw
,
402 uint32_t *out_offset
,
405 uint32_t stride
= dword_pitch
? 4 : 16;
406 uint32_t elements
= ALIGN(size
, stride
) / stride
;
408 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
409 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
,
410 elements
, stride
, false);
414 * Set up a binding table entry for use by stream output logic (transform
417 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
420 brw_update_sol_surface(struct brw_context
*brw
,
421 struct gl_buffer_object
*buffer_obj
,
422 uint32_t *out_offset
, unsigned num_vector_components
,
423 unsigned stride_dwords
, unsigned offset_dwords
)
425 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
426 uint32_t offset_bytes
= 4 * offset_dwords
;
427 drm_intel_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
429 buffer_obj
->Size
- offset_bytes
);
430 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
432 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
433 size_t size_dwords
= buffer_obj
->Size
/ 4;
434 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
436 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
437 * too big to map using a single binding table entry?
439 assert((size_dwords
- offset_dwords
) / stride_dwords
440 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
442 if (size_dwords
> offset_dwords
+ num_vector_components
) {
443 /* There is room for at least 1 transform feedback output in the buffer.
444 * Compute the number of additional transform feedback outputs the
445 * buffer has room for.
447 buffer_size_minus_1
=
448 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
450 /* There isn't even room for a single transform feedback output in the
451 * buffer. We can't configure the binding table entry to prevent output
452 * entirely; we'll have to rely on the geometry shader to detect
453 * overflow. But to minimize the damage in case of a bug, set up the
454 * binding table entry to just allow a single output.
456 buffer_size_minus_1
= 0;
458 width
= buffer_size_minus_1
& 0x7f;
459 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
460 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
462 switch (num_vector_components
) {
464 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
467 surface_format
= BRW_SURFACEFORMAT_R32G32_FLOAT
;
470 surface_format
= BRW_SURFACEFORMAT_R32G32B32_FLOAT
;
473 surface_format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
476 unreachable("Invalid vector size for transform feedback output");
479 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
480 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
481 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
482 BRW_SURFACE_RC_READ_WRITE
;
483 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
484 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
485 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
486 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
487 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
491 /* Emit relocation to surface contents. */
492 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
495 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
498 /* Creates a new WM constant buffer reflecting the current fragment program's
499 * constants, if needed by the fragment program.
501 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
505 brw_upload_wm_pull_constants(struct brw_context
*brw
)
507 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
508 /* BRW_NEW_FRAGMENT_PROGRAM */
509 struct brw_fragment_program
*fp
=
510 (struct brw_fragment_program
*) brw
->fragment_program
;
511 /* BRW_NEW_FS_PROG_DATA */
512 struct brw_stage_prog_data
*prog_data
= &brw
->wm
.prog_data
->base
;
514 /* _NEW_PROGRAM_CONSTANTS */
515 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
.Base
,
516 stage_state
, prog_data
, true);
519 const struct brw_tracked_state brw_wm_pull_constants
= {
521 .mesa
= _NEW_PROGRAM_CONSTANTS
,
522 .brw
= BRW_NEW_BATCH
|
523 BRW_NEW_FRAGMENT_PROGRAM
|
524 BRW_NEW_FS_PROG_DATA
,
526 .emit
= brw_upload_wm_pull_constants
,
530 * Creates a null renderbuffer surface.
532 * This is used when the shader doesn't write to any color output. An FB
533 * write to target 0 will still be emitted, because that's how the thread is
534 * terminated (and computed depth is returned), so we need to have the
535 * hardware discard the target 0 color output..
538 brw_emit_null_surface_state(struct brw_context
*brw
,
542 uint32_t *out_offset
)
544 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
547 * A null surface will be used in instances where an actual surface is
548 * not bound. When a write message is generated to a null surface, no
549 * actual surface is written to. When a read message (including any
550 * sampling engine message) is generated to a null surface, the result
551 * is all zeros. Note that a null surface type is allowed to be used
552 * with all messages, even if it is not specificially indicated as
553 * supported. All of the remaining fields in surface state are ignored
554 * for null surfaces, with the following exceptions:
556 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
557 * depth buffer’s corresponding state for all render target surfaces,
560 * - Surface Format must be R8G8B8A8_UNORM.
562 unsigned surface_type
= BRW_SURFACE_NULL
;
563 drm_intel_bo
*bo
= NULL
;
564 unsigned pitch_minus_1
= 0;
565 uint32_t multisampling_state
= 0;
566 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
570 /* On Gen6, null render targets seem to cause GPU hangs when
571 * multisampling. So work around this problem by rendering into dummy
574 * To decrease the amount of memory needed by the workaround buffer, we
575 * set its pitch to 128 bytes (the width of a Y tile). This means that
576 * the amount of memory needed for the workaround buffer is
577 * (width_in_tiles + height_in_tiles - 1) tiles.
579 * Note that since the workaround buffer will be interpreted by the
580 * hardware as an interleaved multisampled buffer, we need to compute
581 * width_in_tiles and height_in_tiles by dividing the width and height
582 * by 16 rather than the normal Y-tile size of 32.
584 unsigned width_in_tiles
= ALIGN(width
, 16) / 16;
585 unsigned height_in_tiles
= ALIGN(height
, 16) / 16;
586 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
587 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
589 bo
= brw
->wm
.multisampled_null_render_target_bo
;
590 surface_type
= BRW_SURFACE_2D
;
592 multisampling_state
= brw_get_surface_num_multisamples(samples
);
595 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
596 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
598 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
599 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
600 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
601 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
603 surf
[1] = bo
? bo
->offset64
: 0;
604 surf
[2] = ((width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
605 (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
607 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
610 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
612 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
613 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
614 surf
[4] = multisampling_state
;
618 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
621 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
626 * Sets up a surface state structure to point at the given region.
627 * While it is only used for the front/back buffer currently, it should be
628 * usable for further buffers when doing ARB_draw_buffer support.
631 brw_update_renderbuffer_surface(struct brw_context
*brw
,
632 struct gl_renderbuffer
*rb
,
633 bool layered
, unsigned unit
,
636 struct gl_context
*ctx
= &brw
->ctx
;
637 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
638 struct intel_mipmap_tree
*mt
= irb
->mt
;
640 uint32_t tile_x
, tile_y
;
644 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
645 /* BRW_NEW_FS_PROG_DATA */
649 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
650 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
652 if (tile_x
!= 0 || tile_y
!= 0) {
653 /* Original gen4 hardware couldn't draw to a non-tile-aligned
654 * destination in a miptree unless you actually setup your renderbuffer
655 * as a miptree and used the fragile lod/array_index/etc. controls to
656 * select the image. So, instead, we just make a new single-level
657 * miptree and render into that.
659 intel_renderbuffer_move_to_temp(brw
, irb
, false);
664 intel_miptree_used_for_rendering(irb
->mt
);
666 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32, &offset
);
668 format
= brw
->render_target_format
[rb_format
];
669 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
670 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
671 __func__
, _mesa_get_format_name(rb_format
));
674 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
675 format
<< BRW_SURFACE_FORMAT_SHIFT
);
678 assert(mt
->offset
% mt
->cpp
== 0);
679 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
680 mt
->bo
->offset64
+ mt
->offset
);
682 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
683 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
685 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
686 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
688 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
690 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
691 /* Note that the low bits of these fields are missing, so
692 * there's the possibility of getting in trouble.
694 assert(tile_x
% 4 == 0);
695 assert(tile_y
% 2 == 0);
696 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
697 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
698 (mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
702 if (!ctx
->Color
.ColorLogicOpEnabled
&&
703 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
704 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
706 if (!ctx
->Color
.ColorMask
[unit
][0])
707 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
708 if (!ctx
->Color
.ColorMask
[unit
][1])
709 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
710 if (!ctx
->Color
.ColorMask
[unit
][2])
711 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
713 /* As mentioned above, disable writes to the alpha component when the
714 * renderbuffer is XRGB.
716 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
717 !ctx
->Color
.ColorMask
[unit
][3]) {
718 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
722 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
725 surf
[1] - mt
->bo
->offset64
,
726 I915_GEM_DOMAIN_RENDER
,
727 I915_GEM_DOMAIN_RENDER
);
733 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
736 brw_update_renderbuffer_surfaces(struct brw_context
*brw
,
737 const struct gl_framebuffer
*fb
,
738 uint32_t render_target_start
,
739 uint32_t *surf_offset
)
742 const unsigned int w
= _mesa_geometric_width(fb
);
743 const unsigned int h
= _mesa_geometric_height(fb
);
744 const unsigned int s
= _mesa_geometric_samples(fb
);
746 /* Update surfaces for drawing buffers */
747 if (fb
->_NumColorDrawBuffers
>= 1) {
748 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
749 const uint32_t surf_index
= render_target_start
+ i
;
751 if (intel_renderbuffer(fb
->_ColorDrawBuffers
[i
])) {
752 surf_offset
[surf_index
] =
753 brw
->vtbl
.update_renderbuffer_surface(
754 brw
, fb
->_ColorDrawBuffers
[i
],
755 _mesa_geometric_layers(fb
) > 0, i
, surf_index
);
757 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
758 &surf_offset
[surf_index
]);
762 const uint32_t surf_index
= render_target_start
;
763 brw
->vtbl
.emit_null_surface_state(brw
, w
, h
, s
,
764 &surf_offset
[surf_index
]);
769 update_renderbuffer_surfaces(struct brw_context
*brw
)
771 const struct gl_context
*ctx
= &brw
->ctx
;
773 /* _NEW_BUFFERS | _NEW_COLOR */
774 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
775 brw_update_renderbuffer_surfaces(
777 brw
->wm
.prog_data
->binding_table
.render_target_start
,
778 brw
->wm
.base
.surf_offset
);
779 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
782 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
784 .mesa
= _NEW_BUFFERS
|
786 .brw
= BRW_NEW_BATCH
|
787 BRW_NEW_FS_PROG_DATA
,
789 .emit
= update_renderbuffer_surfaces
,
792 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
794 .mesa
= _NEW_BUFFERS
,
795 .brw
= BRW_NEW_BATCH
,
797 .emit
= update_renderbuffer_surfaces
,
802 update_stage_texture_surfaces(struct brw_context
*brw
,
803 const struct gl_program
*prog
,
804 struct brw_stage_state
*stage_state
,
810 struct gl_context
*ctx
= &brw
->ctx
;
812 uint32_t *surf_offset
= stage_state
->surf_offset
;
814 /* BRW_NEW_*_PROG_DATA */
816 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
818 surf_offset
+= stage_state
->prog_data
->binding_table
.texture_start
;
820 unsigned num_samplers
= _mesa_fls(prog
->SamplersUsed
);
821 for (unsigned s
= 0; s
< num_samplers
; s
++) {
824 if (prog
->SamplersUsed
& (1 << s
)) {
825 const unsigned unit
= prog
->SamplerUnits
[s
];
828 if (ctx
->Texture
.Unit
[unit
]._Current
) {
829 brw
->vtbl
.update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
);
837 * Construct SURFACE_STATE objects for enabled textures.
840 brw_update_texture_surfaces(struct brw_context
*brw
)
842 /* BRW_NEW_VERTEX_PROGRAM */
843 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
845 /* BRW_NEW_GEOMETRY_PROGRAM */
846 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
848 /* BRW_NEW_FRAGMENT_PROGRAM */
849 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
852 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false);
853 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false);
854 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false);
856 /* emit alternate set of surface state for gather. this
857 * allows the surface format to be overriden for only the
858 * gather4 messages. */
860 if (vs
&& vs
->UsesGather
)
861 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true);
862 if (gs
&& gs
->UsesGather
)
863 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true);
864 if (fs
&& fs
->UsesGather
)
865 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true);
868 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
871 const struct brw_tracked_state brw_texture_surfaces
= {
873 .mesa
= _NEW_TEXTURE
,
874 .brw
= BRW_NEW_BATCH
|
875 BRW_NEW_FRAGMENT_PROGRAM
|
876 BRW_NEW_FS_PROG_DATA
|
877 BRW_NEW_GEOMETRY_PROGRAM
|
878 BRW_NEW_GS_PROG_DATA
|
879 BRW_NEW_TEXTURE_BUFFER
|
880 BRW_NEW_VERTEX_PROGRAM
|
881 BRW_NEW_VS_PROG_DATA
,
883 .emit
= brw_update_texture_surfaces
,
887 brw_upload_ubo_surfaces(struct brw_context
*brw
,
888 struct gl_shader
*shader
,
889 struct brw_stage_state
*stage_state
,
890 struct brw_stage_prog_data
*prog_data
,
893 struct gl_context
*ctx
= &brw
->ctx
;
898 uint32_t *surf_offsets
=
899 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
901 for (int i
= 0; i
< shader
->NumUniformBlocks
; i
++) {
902 struct gl_uniform_buffer_binding
*binding
;
903 struct intel_buffer_object
*intel_bo
;
905 binding
= &ctx
->UniformBufferBindings
[shader
->UniformBlocks
[i
].Binding
];
906 intel_bo
= intel_buffer_object(binding
->BufferObject
);
908 intel_bufferobj_buffer(brw
, intel_bo
,
910 binding
->BufferObject
->Size
- binding
->Offset
);
912 /* Because behavior for referencing outside of the binding's size in the
913 * glBindBufferRange case is undefined, we can just bind the whole buffer
914 * glBindBufferBase wants and be a correct implementation.
916 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
917 bo
->size
- binding
->Offset
,
922 if (shader
->NumUniformBlocks
)
923 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
927 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
929 struct gl_context
*ctx
= &brw
->ctx
;
931 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
936 /* BRW_NEW_FS_PROG_DATA */
937 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
938 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
, true);
941 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
943 .mesa
= _NEW_PROGRAM
,
944 .brw
= BRW_NEW_BATCH
|
945 BRW_NEW_FS_PROG_DATA
|
946 BRW_NEW_UNIFORM_BUFFER
,
948 .emit
= brw_upload_wm_ubo_surfaces
,
952 brw_upload_abo_surfaces(struct brw_context
*brw
,
953 struct gl_shader_program
*prog
,
954 struct brw_stage_state
*stage_state
,
955 struct brw_stage_prog_data
*prog_data
)
957 struct gl_context
*ctx
= &brw
->ctx
;
958 uint32_t *surf_offsets
=
959 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
961 for (int i
= 0; i
< prog
->NumAtomicBuffers
; i
++) {
962 struct gl_atomic_buffer_binding
*binding
=
963 &ctx
->AtomicBufferBindings
[prog
->AtomicBuffers
[i
].Binding
];
964 struct intel_buffer_object
*intel_bo
=
965 intel_buffer_object(binding
->BufferObject
);
966 drm_intel_bo
*bo
= intel_bufferobj_buffer(
967 brw
, intel_bo
, binding
->Offset
, intel_bo
->Base
.Size
- binding
->Offset
);
969 brw
->vtbl
.emit_buffer_surface_state(brw
, &surf_offsets
[i
], bo
,
970 binding
->Offset
, BRW_SURFACEFORMAT_RAW
,
971 bo
->size
- binding
->Offset
, 1, true);
974 if (prog
->NumAtomicBuffers
)
975 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
979 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
981 struct gl_context
*ctx
= &brw
->ctx
;
983 struct gl_shader_program
*prog
= ctx
->Shader
._CurrentFragmentProgram
;
986 /* BRW_NEW_FS_PROG_DATA */
987 brw_upload_abo_surfaces(brw
, prog
, &brw
->wm
.base
,
988 &brw
->wm
.prog_data
->base
);
992 const struct brw_tracked_state brw_wm_abo_surfaces
= {
994 .mesa
= _NEW_PROGRAM
,
995 .brw
= BRW_NEW_ATOMIC_BUFFER
|
997 BRW_NEW_FS_PROG_DATA
,
999 .emit
= brw_upload_wm_abo_surfaces
,
1003 brw_upload_cs_abo_surfaces(struct brw_context
*brw
)
1005 struct gl_context
*ctx
= &brw
->ctx
;
1007 struct gl_shader_program
*prog
=
1008 ctx
->_Shader
->CurrentProgram
[MESA_SHADER_COMPUTE
];
1011 /* BRW_NEW_CS_PROG_DATA */
1012 brw_upload_abo_surfaces(brw
, prog
, &brw
->cs
.base
,
1013 &brw
->cs
.prog_data
->base
);
1017 const struct brw_tracked_state brw_cs_abo_surfaces
= {
1019 .mesa
= _NEW_PROGRAM
,
1020 .brw
= BRW_NEW_ATOMIC_BUFFER
|
1022 BRW_NEW_CS_PROG_DATA
,
1024 .emit
= brw_upload_cs_abo_surfaces
,
1028 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
1030 brw
->vtbl
.update_texture_surface
= brw_update_texture_surface
;
1031 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
1032 brw
->vtbl
.emit_null_surface_state
= brw_emit_null_surface_state
;
1033 brw
->vtbl
.emit_buffer_surface_state
= gen4_emit_buffer_surface_state
;