2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
51 translate_tex_target(GLenum target
)
55 case GL_TEXTURE_1D_ARRAY_EXT
:
56 return BRW_SURFACE_1D
;
58 case GL_TEXTURE_RECTANGLE_NV
:
59 return BRW_SURFACE_2D
;
62 case GL_TEXTURE_2D_ARRAY_EXT
:
63 case GL_TEXTURE_EXTERNAL_OES
:
64 case GL_TEXTURE_2D_MULTISAMPLE
:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
66 return BRW_SURFACE_2D
;
69 return BRW_SURFACE_3D
;
71 case GL_TEXTURE_CUBE_MAP
:
72 case GL_TEXTURE_CUBE_MAP_ARRAY
:
73 return BRW_SURFACE_CUBE
;
82 brw_get_surface_tiling_bits(uint32_t tiling
)
86 return BRW_SURFACE_TILED
;
88 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
96 brw_get_surface_num_multisamples(unsigned num_samples
)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
101 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
105 brw_configure_w_tiled(const struct intel_mipmap_tree
*mt
,
106 bool is_render_target
,
107 unsigned *width
, unsigned *height
,
108 unsigned *pitch
, uint32_t *tiling
, unsigned *format
)
110 static const unsigned halign_stencil
= 8;
112 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
113 * there are half as many rows.
114 * In addition, mip-levels are accessed manually by the program and
115 * therefore the surface is setup to cover all the mip-levels for one slice.
116 * (Hardware is still used to access individual slices).
118 *tiling
= I915_TILING_Y
;
119 *pitch
= mt
->pitch
* 2;
120 *width
= ALIGN(mt
->total_width
, halign_stencil
) * 2;
121 *height
= (mt
->total_height
/ mt
->physical_depth0
) / 2;
123 if (is_render_target
) {
124 *format
= BRW_SURFACEFORMAT_R8_UINT
;
130 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
134 brw_get_texture_swizzle(const struct gl_context
*ctx
,
135 const struct gl_texture_object
*t
)
137 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
139 int swizzles
[SWIZZLE_NIL
+ 1] = {
149 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
150 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
151 GLenum depth_mode
= t
->DepthMode
;
153 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
154 * with depth component data specified with a sized internal format.
155 * Otherwise, it's left at the old default, GL_LUMINANCE.
157 if (_mesa_is_gles3(ctx
) &&
158 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
159 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
163 switch (depth_mode
) {
165 swizzles
[0] = SWIZZLE_ZERO
;
166 swizzles
[1] = SWIZZLE_ZERO
;
167 swizzles
[2] = SWIZZLE_ZERO
;
168 swizzles
[3] = SWIZZLE_X
;
171 swizzles
[0] = SWIZZLE_X
;
172 swizzles
[1] = SWIZZLE_X
;
173 swizzles
[2] = SWIZZLE_X
;
174 swizzles
[3] = SWIZZLE_ONE
;
177 swizzles
[0] = SWIZZLE_X
;
178 swizzles
[1] = SWIZZLE_X
;
179 swizzles
[2] = SWIZZLE_X
;
180 swizzles
[3] = SWIZZLE_X
;
183 swizzles
[0] = SWIZZLE_X
;
184 swizzles
[1] = SWIZZLE_ZERO
;
185 swizzles
[2] = SWIZZLE_ZERO
;
186 swizzles
[3] = SWIZZLE_ONE
;
191 /* If the texture's format is alpha-only, force R, G, and B to
192 * 0.0. Similarly, if the texture's format has no alpha channel,
193 * force the alpha value read to 1.0. This allows for the
194 * implementation to use an RGBA texture for any of these formats
195 * without leaking any unexpected values.
197 switch (img
->_BaseFormat
) {
199 swizzles
[0] = SWIZZLE_ZERO
;
200 swizzles
[1] = SWIZZLE_ZERO
;
201 swizzles
[2] = SWIZZLE_ZERO
;
206 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0)
207 swizzles
[3] = SWIZZLE_ONE
;
211 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
212 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
213 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
214 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
218 gen4_emit_buffer_surface_state(struct brw_context
*brw
,
219 uint32_t *out_offset
,
221 unsigned buffer_offset
,
222 unsigned surface_format
,
223 unsigned buffer_size
,
228 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
229 6 * 4, 32, out_offset
);
230 memset(surf
, 0, 6 * 4);
232 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
233 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
234 (brw
->gen
>= 6 ? BRW_SURFACE_RC_READ_WRITE
: 0);
235 surf
[1] = (bo
? bo
->offset64
: 0) + buffer_offset
; /* reloc */
236 surf
[2] = (buffer_size
& 0x7f) << BRW_SURFACE_WIDTH_SHIFT
|
237 ((buffer_size
>> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT
;
238 surf
[3] = ((buffer_size
>> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT
|
239 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
;
241 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
242 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
243 * physical cache. It is mapped in hardware to the sampler cache."
246 drm_intel_bo_emit_reloc(brw
->batch
.bo
, *out_offset
+ 4,
248 I915_GEM_DOMAIN_SAMPLER
,
249 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
254 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
256 uint32_t *surf_offset
)
258 struct brw_context
*brw
= brw_context(ctx
);
259 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
260 struct intel_buffer_object
*intel_obj
=
261 intel_buffer_object(tObj
->BufferObject
);
262 uint32_t size
= tObj
->BufferSize
;
263 drm_intel_bo
*bo
= NULL
;
264 mesa_format format
= tObj
->_BufferObjectFormat
;
265 uint32_t brw_format
= brw_format_for_mesa_format(format
);
266 int texel_size
= _mesa_get_format_bytes(format
);
269 size
= MIN2(size
, intel_obj
->Base
.Size
);
270 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
);
273 if (brw_format
== 0 && format
!= MESA_FORMAT_RGBA_FLOAT32
) {
274 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
275 _mesa_get_format_name(format
));
278 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
, bo
,
288 brw_update_texture_surface(struct gl_context
*ctx
,
290 uint32_t *surf_offset
,
293 struct brw_context
*brw
= brw_context(ctx
);
294 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
295 struct intel_texture_object
*intelObj
= intel_texture_object(tObj
);
296 struct intel_mipmap_tree
*mt
= intelObj
->mt
;
297 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
300 /* BRW_NEW_UNIFORM_BUFFER */
301 if (tObj
->Target
== GL_TEXTURE_BUFFER
) {
302 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
306 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
307 6 * 4, 32, surf_offset
);
309 uint32_t tex_format
= translate_tex_format(brw
, mt
->format
,
310 sampler
->sRGBDecode
);
313 /* Sandybridge's gather4 message is broken for integer formats.
314 * To work around this, we pretend the surface is UNORM for
315 * 8 or 16-bit formats, and emit shader instructions to recover
316 * the real INT/UINT value. For 32-bit formats, we pretend
317 * the surface is FLOAT, and simply reinterpret the resulting
320 switch (tex_format
) {
321 case BRW_SURFACEFORMAT_R8_SINT
:
322 case BRW_SURFACEFORMAT_R8_UINT
:
323 tex_format
= BRW_SURFACEFORMAT_R8_UNORM
;
326 case BRW_SURFACEFORMAT_R16_SINT
:
327 case BRW_SURFACEFORMAT_R16_UINT
:
328 tex_format
= BRW_SURFACEFORMAT_R16_UNORM
;
331 case BRW_SURFACEFORMAT_R32_SINT
:
332 case BRW_SURFACEFORMAT_R32_UINT
:
333 tex_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
341 surf
[0] = (translate_tex_target(tObj
->Target
) << BRW_SURFACE_TYPE_SHIFT
|
342 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
343 BRW_SURFACE_CUBEFACE_ENABLES
|
344 tex_format
<< BRW_SURFACE_FORMAT_SHIFT
);
346 surf
[1] = mt
->bo
->offset64
+ mt
->offset
; /* reloc */
348 surf
[2] = ((intelObj
->_MaxLevel
- tObj
->BaseLevel
) << BRW_SURFACE_LOD_SHIFT
|
349 (mt
->logical_width0
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
350 (mt
->logical_height0
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
352 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
353 (mt
->logical_depth0
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
354 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
356 surf
[4] = (brw_get_surface_num_multisamples(mt
->num_samples
) |
357 SET_FIELD(tObj
->BaseLevel
- mt
->first_level
, BRW_SURFACE_MIN_LOD
));
359 surf
[5] = mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0;
361 /* Emit relocation to surface contents */
362 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
365 surf
[1] - mt
->bo
->offset64
,
366 I915_GEM_DOMAIN_SAMPLER
, 0);
370 * Create the constant buffer surface. Vertex/fragment shader constants will be
371 * read from this buffer with Data Port Read instructions/messages.
374 brw_create_constant_surface(struct brw_context
*brw
,
378 uint32_t *out_offset
,
381 uint32_t stride
= dword_pitch
? 4 : 16;
382 uint32_t elements
= ALIGN(size
, stride
) / stride
;
384 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
385 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
,
386 elements
, stride
, 0, false);
390 * Set up a binding table entry for use by stream output logic (transform
393 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
396 brw_update_sol_surface(struct brw_context
*brw
,
397 struct gl_buffer_object
*buffer_obj
,
398 uint32_t *out_offset
, unsigned num_vector_components
,
399 unsigned stride_dwords
, unsigned offset_dwords
)
401 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
402 uint32_t offset_bytes
= 4 * offset_dwords
;
403 drm_intel_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
405 buffer_obj
->Size
- offset_bytes
);
406 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
408 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
409 size_t size_dwords
= buffer_obj
->Size
/ 4;
410 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
412 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
413 * too big to map using a single binding table entry?
415 assert((size_dwords
- offset_dwords
) / stride_dwords
416 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
418 if (size_dwords
> offset_dwords
+ num_vector_components
) {
419 /* There is room for at least 1 transform feedback output in the buffer.
420 * Compute the number of additional transform feedback outputs the
421 * buffer has room for.
423 buffer_size_minus_1
=
424 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
426 /* There isn't even room for a single transform feedback output in the
427 * buffer. We can't configure the binding table entry to prevent output
428 * entirely; we'll have to rely on the geometry shader to detect
429 * overflow. But to minimize the damage in case of a bug, set up the
430 * binding table entry to just allow a single output.
432 buffer_size_minus_1
= 0;
434 width
= buffer_size_minus_1
& 0x7f;
435 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
436 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
438 switch (num_vector_components
) {
440 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
443 surface_format
= BRW_SURFACEFORMAT_R32G32_FLOAT
;
446 surface_format
= BRW_SURFACEFORMAT_R32G32B32_FLOAT
;
449 surface_format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
452 assert(!"Invalid vector size for transform feedback output");
453 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
457 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
458 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
459 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
460 BRW_SURFACE_RC_READ_WRITE
;
461 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
462 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
463 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
464 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
465 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
469 /* Emit relocation to surface contents. */
470 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
473 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
476 /* Creates a new WM constant buffer reflecting the current fragment program's
477 * constants, if needed by the fragment program.
479 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
483 brw_upload_wm_pull_constants(struct brw_context
*brw
)
485 struct gl_context
*ctx
= &brw
->ctx
;
486 /* BRW_NEW_FRAGMENT_PROGRAM */
487 struct brw_fragment_program
*fp
=
488 (struct brw_fragment_program
*) brw
->fragment_program
;
489 struct gl_program_parameter_list
*params
= fp
->program
.Base
.Parameters
;
490 const int size
= brw
->wm
.prog_data
->base
.nr_pull_params
* sizeof(float);
491 const int surf_index
=
492 brw
->wm
.prog_data
->base
.binding_table
.pull_constants_start
;
495 _mesa_load_state_parameters(ctx
, params
);
497 /* CACHE_NEW_WM_PROG */
498 if (brw
->wm
.prog_data
->base
.nr_pull_params
== 0) {
499 if (brw
->wm
.base
.surf_offset
[surf_index
]) {
500 brw
->wm
.base
.surf_offset
[surf_index
] = 0;
501 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
506 /* _NEW_PROGRAM_CONSTANTS */
507 drm_intel_bo
*const_bo
= NULL
;
508 uint32_t const_offset
;
509 float *constants
= intel_upload_space(brw
, size
, 64,
510 &const_bo
, &const_offset
);
511 for (i
= 0; i
< brw
->wm
.prog_data
->base
.nr_pull_params
; i
++) {
512 constants
[i
] = *brw
->wm
.prog_data
->base
.pull_param
[i
];
515 brw_create_constant_surface(brw
, const_bo
, const_offset
, size
,
516 &brw
->wm
.base
.surf_offset
[surf_index
],
518 drm_intel_bo_unreference(const_bo
);
520 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
523 const struct brw_tracked_state brw_wm_pull_constants
= {
525 .mesa
= (_NEW_PROGRAM_CONSTANTS
),
526 .brw
= (BRW_NEW_BATCH
| BRW_NEW_FRAGMENT_PROGRAM
),
527 .cache
= CACHE_NEW_WM_PROG
,
529 .emit
= brw_upload_wm_pull_constants
,
533 brw_update_null_renderbuffer_surface(struct brw_context
*brw
, unsigned int unit
)
535 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
538 * A null surface will be used in instances where an actual surface is
539 * not bound. When a write message is generated to a null surface, no
540 * actual surface is written to. When a read message (including any
541 * sampling engine message) is generated to a null surface, the result
542 * is all zeros. Note that a null surface type is allowed to be used
543 * with all messages, even if it is not specificially indicated as
544 * supported. All of the remaining fields in surface state are ignored
545 * for null surfaces, with the following exceptions:
547 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
548 * depth buffer’s corresponding state for all render target surfaces,
551 * - Surface Format must be R8G8B8A8_UNORM.
553 struct gl_context
*ctx
= &brw
->ctx
;
555 unsigned surface_type
= BRW_SURFACE_NULL
;
556 drm_intel_bo
*bo
= NULL
;
557 unsigned pitch_minus_1
= 0;
558 uint32_t multisampling_state
= 0;
559 uint32_t surf_index
=
560 brw
->wm
.prog_data
->binding_table
.render_target_start
+ unit
;
563 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
565 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
566 &brw
->wm
.base
.surf_offset
[surf_index
]);
568 if (fb
->Visual
.samples
> 1) {
569 /* On Gen6, null render targets seem to cause GPU hangs when
570 * multisampling. So work around this problem by rendering into dummy
573 * To decrease the amount of memory needed by the workaround buffer, we
574 * set its pitch to 128 bytes (the width of a Y tile). This means that
575 * the amount of memory needed for the workaround buffer is
576 * (width_in_tiles + height_in_tiles - 1) tiles.
578 * Note that since the workaround buffer will be interpreted by the
579 * hardware as an interleaved multisampled buffer, we need to compute
580 * width_in_tiles and height_in_tiles by dividing the width and height
581 * by 16 rather than the normal Y-tile size of 32.
583 unsigned width_in_tiles
= ALIGN(fb
->Width
, 16) / 16;
584 unsigned height_in_tiles
= ALIGN(fb
->Height
, 16) / 16;
585 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
586 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
588 bo
= brw
->wm
.multisampled_null_render_target_bo
;
589 surface_type
= BRW_SURFACE_2D
;
591 multisampling_state
=
592 brw_get_surface_num_multisamples(fb
->Visual
.samples
);
595 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
596 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
598 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
599 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
600 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
601 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
603 surf
[1] = bo
? bo
->offset64
: 0;
604 surf
[2] = ((fb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
605 (fb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
607 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
610 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
612 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
613 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
614 surf
[4] = multisampling_state
;
618 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
619 brw
->wm
.base
.surf_offset
[surf_index
] + 4,
621 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
626 * Sets up a surface state structure to point at the given region.
627 * While it is only used for the front/back buffer currently, it should be
628 * usable for further buffers when doing ARB_draw_buffer support.
631 brw_update_renderbuffer_surface(struct brw_context
*brw
,
632 struct gl_renderbuffer
*rb
,
636 struct gl_context
*ctx
= &brw
->ctx
;
637 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
638 struct intel_mipmap_tree
*mt
= irb
->mt
;
640 uint32_t tile_x
, tile_y
;
643 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
644 uint32_t surf_index
=
645 brw
->wm
.prog_data
->binding_table
.render_target_start
+ unit
;
649 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
650 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
652 if (tile_x
!= 0 || tile_y
!= 0) {
653 /* Original gen4 hardware couldn't draw to a non-tile-aligned
654 * destination in a miptree unless you actually setup your renderbuffer
655 * as a miptree and used the fragile lod/array_index/etc. controls to
656 * select the image. So, instead, we just make a new single-level
657 * miptree and render into that.
659 intel_renderbuffer_move_to_temp(brw
, irb
, false);
664 intel_miptree_used_for_rendering(irb
->mt
);
666 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
667 &brw
->wm
.base
.surf_offset
[surf_index
]);
669 format
= brw
->render_target_format
[rb_format
];
670 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
671 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
672 __FUNCTION__
, _mesa_get_format_name(rb_format
));
675 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
676 format
<< BRW_SURFACE_FORMAT_SHIFT
);
679 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
682 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
683 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
685 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
686 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
688 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
690 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
691 /* Note that the low bits of these fields are missing, so
692 * there's the possibility of getting in trouble.
694 assert(tile_x
% 4 == 0);
695 assert(tile_y
% 2 == 0);
696 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
697 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
698 (mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
702 if (!ctx
->Color
.ColorLogicOpEnabled
&&
703 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
704 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
706 if (!ctx
->Color
.ColorMask
[unit
][0])
707 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
708 if (!ctx
->Color
.ColorMask
[unit
][1])
709 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
710 if (!ctx
->Color
.ColorMask
[unit
][2])
711 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
713 /* As mentioned above, disable writes to the alpha component when the
714 * renderbuffer is XRGB.
716 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
717 !ctx
->Color
.ColorMask
[unit
][3]) {
718 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
722 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
723 brw
->wm
.base
.surf_offset
[surf_index
] + 4,
725 surf
[1] - mt
->bo
->offset64
,
726 I915_GEM_DOMAIN_RENDER
,
727 I915_GEM_DOMAIN_RENDER
);
731 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
734 brw_update_renderbuffer_surfaces(struct brw_context
*brw
)
736 struct gl_context
*ctx
= &brw
->ctx
;
739 /* _NEW_BUFFERS | _NEW_COLOR */
740 /* Update surfaces for drawing buffers */
741 if (ctx
->DrawBuffer
->_NumColorDrawBuffers
>= 1) {
742 for (i
= 0; i
< ctx
->DrawBuffer
->_NumColorDrawBuffers
; i
++) {
743 if (intel_renderbuffer(ctx
->DrawBuffer
->_ColorDrawBuffers
[i
])) {
744 brw
->vtbl
.update_renderbuffer_surface(brw
, ctx
->DrawBuffer
->_ColorDrawBuffers
[i
],
745 ctx
->DrawBuffer
->MaxNumLayers
> 0, i
);
747 brw
->vtbl
.update_null_renderbuffer_surface(brw
, i
);
751 brw
->vtbl
.update_null_renderbuffer_surface(brw
, 0);
753 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
756 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
758 .mesa
= (_NEW_COLOR
|
760 .brw
= BRW_NEW_BATCH
,
763 .emit
= brw_update_renderbuffer_surfaces
,
766 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
768 .mesa
= _NEW_BUFFERS
,
769 .brw
= BRW_NEW_BATCH
,
772 .emit
= brw_update_renderbuffer_surfaces
,
777 update_stage_texture_surfaces(struct brw_context
*brw
,
778 const struct gl_program
*prog
,
779 struct brw_stage_state
*stage_state
,
785 struct gl_context
*ctx
= &brw
->ctx
;
787 uint32_t *surf_offset
= stage_state
->surf_offset
;
789 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
791 surf_offset
+= stage_state
->prog_data
->binding_table
.texture_start
;
793 unsigned num_samplers
= _mesa_fls(prog
->SamplersUsed
);
794 for (unsigned s
= 0; s
< num_samplers
; s
++) {
797 if (prog
->SamplersUsed
& (1 << s
)) {
798 const unsigned unit
= prog
->SamplerUnits
[s
];
801 if (ctx
->Texture
.Unit
[unit
]._Current
) {
802 brw
->vtbl
.update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
);
810 * Construct SURFACE_STATE objects for enabled textures.
813 brw_update_texture_surfaces(struct brw_context
*brw
)
815 /* BRW_NEW_VERTEX_PROGRAM */
816 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
818 /* BRW_NEW_GEOMETRY_PROGRAM */
819 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
821 /* BRW_NEW_FRAGMENT_PROGRAM */
822 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
825 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false);
826 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false);
827 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false);
829 /* emit alternate set of surface state for gather. this
830 * allows the surface format to be overriden for only the
831 * gather4 messages. */
832 if (vs
&& vs
->UsesGather
)
833 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true);
834 if (gs
&& gs
->UsesGather
)
835 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true);
836 if (fs
&& fs
->UsesGather
)
837 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true);
839 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
842 const struct brw_tracked_state brw_texture_surfaces
= {
844 .mesa
= _NEW_TEXTURE
,
845 .brw
= BRW_NEW_BATCH
|
846 BRW_NEW_UNIFORM_BUFFER
|
847 BRW_NEW_VERTEX_PROGRAM
|
848 BRW_NEW_GEOMETRY_PROGRAM
|
849 BRW_NEW_FRAGMENT_PROGRAM
,
852 .emit
= brw_update_texture_surfaces
,
856 brw_upload_ubo_surfaces(struct brw_context
*brw
,
857 struct gl_shader
*shader
,
858 struct brw_stage_state
*stage_state
,
859 struct brw_stage_prog_data
*prog_data
)
861 struct gl_context
*ctx
= &brw
->ctx
;
866 uint32_t *surf_offsets
=
867 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
869 for (int i
= 0; i
< shader
->NumUniformBlocks
; i
++) {
870 struct gl_uniform_buffer_binding
*binding
;
871 struct intel_buffer_object
*intel_bo
;
873 binding
= &ctx
->UniformBufferBindings
[shader
->UniformBlocks
[i
].Binding
];
874 intel_bo
= intel_buffer_object(binding
->BufferObject
);
876 intel_bufferobj_buffer(brw
, intel_bo
,
878 binding
->BufferObject
->Size
- binding
->Offset
);
880 /* Because behavior for referencing outside of the binding's size in the
881 * glBindBufferRange case is undefined, we can just bind the whole buffer
882 * glBindBufferBase wants and be a correct implementation.
884 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
885 bo
->size
- binding
->Offset
,
887 shader
->Stage
== MESA_SHADER_FRAGMENT
);
890 if (shader
->NumUniformBlocks
)
891 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
895 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
897 struct gl_context
*ctx
= &brw
->ctx
;
899 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
904 /* CACHE_NEW_WM_PROG */
905 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
906 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
909 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
911 .mesa
= _NEW_PROGRAM
,
912 .brw
= BRW_NEW_BATCH
| BRW_NEW_UNIFORM_BUFFER
,
913 .cache
= CACHE_NEW_WM_PROG
,
915 .emit
= brw_upload_wm_ubo_surfaces
,
919 brw_upload_abo_surfaces(struct brw_context
*brw
,
920 struct gl_shader_program
*prog
,
921 struct brw_stage_state
*stage_state
,
922 struct brw_stage_prog_data
*prog_data
)
924 struct gl_context
*ctx
= &brw
->ctx
;
925 uint32_t *surf_offsets
=
926 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
928 for (int i
= 0; i
< prog
->NumAtomicBuffers
; i
++) {
929 struct gl_atomic_buffer_binding
*binding
=
930 &ctx
->AtomicBufferBindings
[prog
->AtomicBuffers
[i
].Binding
];
931 struct intel_buffer_object
*intel_bo
=
932 intel_buffer_object(binding
->BufferObject
);
933 drm_intel_bo
*bo
= intel_bufferobj_buffer(
934 brw
, intel_bo
, binding
->Offset
, intel_bo
->Base
.Size
- binding
->Offset
);
936 brw
->vtbl
.create_raw_surface(brw
, bo
, binding
->Offset
,
937 bo
->size
- binding
->Offset
,
938 &surf_offsets
[i
], true);
941 if (prog
->NumUniformBlocks
)
942 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
946 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
948 struct gl_context
*ctx
= &brw
->ctx
;
950 struct gl_shader_program
*prog
= ctx
->Shader
._CurrentFragmentProgram
;
953 /* CACHE_NEW_WM_PROG */
954 brw_upload_abo_surfaces(brw
, prog
, &brw
->wm
.base
,
955 &brw
->wm
.prog_data
->base
);
959 const struct brw_tracked_state brw_wm_abo_surfaces
= {
961 .mesa
= _NEW_PROGRAM
,
962 .brw
= BRW_NEW_BATCH
| BRW_NEW_ATOMIC_BUFFER
,
963 .cache
= CACHE_NEW_WM_PROG
,
965 .emit
= brw_upload_wm_abo_surfaces
,
969 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
971 brw
->vtbl
.update_texture_surface
= brw_update_texture_surface
;
972 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
973 brw
->vtbl
.update_null_renderbuffer_surface
=
974 brw_update_null_renderbuffer_surface
;
975 brw
->vtbl
.emit_buffer_surface_state
= gen4_emit_buffer_surface_state
;