2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
51 translate_tex_target(GLenum target
)
55 case GL_TEXTURE_1D_ARRAY_EXT
:
56 return BRW_SURFACE_1D
;
58 case GL_TEXTURE_RECTANGLE_NV
:
59 return BRW_SURFACE_2D
;
62 case GL_TEXTURE_2D_ARRAY_EXT
:
63 case GL_TEXTURE_EXTERNAL_OES
:
64 case GL_TEXTURE_2D_MULTISAMPLE
:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
66 return BRW_SURFACE_2D
;
69 return BRW_SURFACE_3D
;
71 case GL_TEXTURE_CUBE_MAP
:
72 case GL_TEXTURE_CUBE_MAP_ARRAY
:
73 return BRW_SURFACE_CUBE
;
76 unreachable("not reached");
81 brw_get_surface_tiling_bits(uint32_t tiling
)
85 return BRW_SURFACE_TILED
;
87 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
95 brw_get_surface_num_multisamples(unsigned num_samples
)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
100 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
104 brw_configure_w_tiled(const struct intel_mipmap_tree
*mt
,
105 bool is_render_target
,
106 unsigned *width
, unsigned *height
,
107 unsigned *pitch
, uint32_t *tiling
, unsigned *format
)
109 static const unsigned halign_stencil
= 8;
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
117 *tiling
= I915_TILING_Y
;
118 *pitch
= mt
->pitch
* 2;
119 *width
= ALIGN(mt
->total_width
, halign_stencil
) * 2;
120 *height
= (mt
->total_height
/ mt
->physical_depth0
) / 2;
122 if (is_render_target
) {
123 *format
= BRW_SURFACEFORMAT_R8_UINT
;
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
133 brw_get_texture_swizzle(const struct gl_context
*ctx
,
134 const struct gl_texture_object
*t
)
136 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
138 int swizzles
[SWIZZLE_NIL
+ 1] = {
148 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
149 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
150 GLenum depth_mode
= t
->DepthMode
;
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
156 if (_mesa_is_gles3(ctx
) &&
157 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
158 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
162 switch (depth_mode
) {
164 swizzles
[0] = SWIZZLE_ZERO
;
165 swizzles
[1] = SWIZZLE_ZERO
;
166 swizzles
[2] = SWIZZLE_ZERO
;
167 swizzles
[3] = SWIZZLE_X
;
170 swizzles
[0] = SWIZZLE_X
;
171 swizzles
[1] = SWIZZLE_X
;
172 swizzles
[2] = SWIZZLE_X
;
173 swizzles
[3] = SWIZZLE_ONE
;
176 swizzles
[0] = SWIZZLE_X
;
177 swizzles
[1] = SWIZZLE_X
;
178 swizzles
[2] = SWIZZLE_X
;
179 swizzles
[3] = SWIZZLE_X
;
182 swizzles
[0] = SWIZZLE_X
;
183 swizzles
[1] = SWIZZLE_ZERO
;
184 swizzles
[2] = SWIZZLE_ZERO
;
185 swizzles
[3] = SWIZZLE_ONE
;
190 /* If the texture's format is alpha-only, force R, G, and B to
191 * 0.0. Similarly, if the texture's format has no alpha channel,
192 * force the alpha value read to 1.0. This allows for the
193 * implementation to use an RGBA texture for any of these formats
194 * without leaking any unexpected values.
196 switch (img
->_BaseFormat
) {
198 swizzles
[0] = SWIZZLE_ZERO
;
199 swizzles
[1] = SWIZZLE_ZERO
;
200 swizzles
[2] = SWIZZLE_ZERO
;
205 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0)
206 swizzles
[3] = SWIZZLE_ONE
;
210 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
211 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
212 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
213 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
217 gen4_emit_buffer_surface_state(struct brw_context
*brw
,
218 uint32_t *out_offset
,
220 unsigned buffer_offset
,
221 unsigned surface_format
,
222 unsigned buffer_size
,
227 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
228 6 * 4, 32, out_offset
);
229 memset(surf
, 0, 6 * 4);
231 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
232 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
233 (brw
->gen
>= 6 ? BRW_SURFACE_RC_READ_WRITE
: 0);
234 surf
[1] = (bo
? bo
->offset64
: 0) + buffer_offset
; /* reloc */
235 surf
[2] = (buffer_size
& 0x7f) << BRW_SURFACE_WIDTH_SHIFT
|
236 ((buffer_size
>> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT
;
237 surf
[3] = ((buffer_size
>> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT
|
238 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
;
240 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
241 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
242 * physical cache. It is mapped in hardware to the sampler cache."
245 drm_intel_bo_emit_reloc(brw
->batch
.bo
, *out_offset
+ 4,
247 I915_GEM_DOMAIN_SAMPLER
,
248 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
253 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
255 uint32_t *surf_offset
)
257 struct brw_context
*brw
= brw_context(ctx
);
258 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
259 struct intel_buffer_object
*intel_obj
=
260 intel_buffer_object(tObj
->BufferObject
);
261 uint32_t size
= tObj
->BufferSize
;
262 drm_intel_bo
*bo
= NULL
;
263 mesa_format format
= tObj
->_BufferObjectFormat
;
264 uint32_t brw_format
= brw_format_for_mesa_format(format
);
265 int texel_size
= _mesa_get_format_bytes(format
);
268 size
= MIN2(size
, intel_obj
->Base
.Size
);
269 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
);
272 if (brw_format
== 0 && format
!= MESA_FORMAT_RGBA_FLOAT32
) {
273 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
274 _mesa_get_format_name(format
));
277 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
, bo
,
287 brw_update_texture_surface(struct gl_context
*ctx
,
289 uint32_t *surf_offset
,
292 struct brw_context
*brw
= brw_context(ctx
);
293 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
294 struct intel_texture_object
*intelObj
= intel_texture_object(tObj
);
295 struct intel_mipmap_tree
*mt
= intelObj
->mt
;
296 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
299 /* BRW_NEW_UNIFORM_BUFFER */
300 if (tObj
->Target
== GL_TEXTURE_BUFFER
) {
301 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
305 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
306 6 * 4, 32, surf_offset
);
308 uint32_t tex_format
= translate_tex_format(brw
, mt
->format
,
309 sampler
->sRGBDecode
);
312 /* Sandybridge's gather4 message is broken for integer formats.
313 * To work around this, we pretend the surface is UNORM for
314 * 8 or 16-bit formats, and emit shader instructions to recover
315 * the real INT/UINT value. For 32-bit formats, we pretend
316 * the surface is FLOAT, and simply reinterpret the resulting
319 switch (tex_format
) {
320 case BRW_SURFACEFORMAT_R8_SINT
:
321 case BRW_SURFACEFORMAT_R8_UINT
:
322 tex_format
= BRW_SURFACEFORMAT_R8_UNORM
;
325 case BRW_SURFACEFORMAT_R16_SINT
:
326 case BRW_SURFACEFORMAT_R16_UINT
:
327 tex_format
= BRW_SURFACEFORMAT_R16_UNORM
;
330 case BRW_SURFACEFORMAT_R32_SINT
:
331 case BRW_SURFACEFORMAT_R32_UINT
:
332 tex_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
340 surf
[0] = (translate_tex_target(tObj
->Target
) << BRW_SURFACE_TYPE_SHIFT
|
341 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
342 BRW_SURFACE_CUBEFACE_ENABLES
|
343 tex_format
<< BRW_SURFACE_FORMAT_SHIFT
);
345 surf
[1] = mt
->bo
->offset64
+ mt
->offset
; /* reloc */
347 surf
[2] = ((intelObj
->_MaxLevel
- tObj
->BaseLevel
) << BRW_SURFACE_LOD_SHIFT
|
348 (mt
->logical_width0
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
349 (mt
->logical_height0
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
351 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
352 (mt
->logical_depth0
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
353 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
355 surf
[4] = (brw_get_surface_num_multisamples(mt
->num_samples
) |
356 SET_FIELD(tObj
->BaseLevel
- mt
->first_level
, BRW_SURFACE_MIN_LOD
));
358 surf
[5] = mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0;
360 /* Emit relocation to surface contents */
361 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
364 surf
[1] - mt
->bo
->offset64
,
365 I915_GEM_DOMAIN_SAMPLER
, 0);
369 * Create the constant buffer surface. Vertex/fragment shader constants will be
370 * read from this buffer with Data Port Read instructions/messages.
373 brw_create_constant_surface(struct brw_context
*brw
,
377 uint32_t *out_offset
,
380 uint32_t stride
= dword_pitch
? 4 : 16;
381 uint32_t elements
= ALIGN(size
, stride
) / stride
;
383 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
384 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
,
385 elements
, stride
, 0, false);
389 * Set up a binding table entry for use by stream output logic (transform
392 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
395 brw_update_sol_surface(struct brw_context
*brw
,
396 struct gl_buffer_object
*buffer_obj
,
397 uint32_t *out_offset
, unsigned num_vector_components
,
398 unsigned stride_dwords
, unsigned offset_dwords
)
400 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
401 uint32_t offset_bytes
= 4 * offset_dwords
;
402 drm_intel_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
404 buffer_obj
->Size
- offset_bytes
);
405 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
407 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
408 size_t size_dwords
= buffer_obj
->Size
/ 4;
409 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
411 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
412 * too big to map using a single binding table entry?
414 assert((size_dwords
- offset_dwords
) / stride_dwords
415 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
417 if (size_dwords
> offset_dwords
+ num_vector_components
) {
418 /* There is room for at least 1 transform feedback output in the buffer.
419 * Compute the number of additional transform feedback outputs the
420 * buffer has room for.
422 buffer_size_minus_1
=
423 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
425 /* There isn't even room for a single transform feedback output in the
426 * buffer. We can't configure the binding table entry to prevent output
427 * entirely; we'll have to rely on the geometry shader to detect
428 * overflow. But to minimize the damage in case of a bug, set up the
429 * binding table entry to just allow a single output.
431 buffer_size_minus_1
= 0;
433 width
= buffer_size_minus_1
& 0x7f;
434 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
435 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
437 switch (num_vector_components
) {
439 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
442 surface_format
= BRW_SURFACEFORMAT_R32G32_FLOAT
;
445 surface_format
= BRW_SURFACEFORMAT_R32G32B32_FLOAT
;
448 surface_format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
451 unreachable("Invalid vector size for transform feedback output");
454 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
455 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
456 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
457 BRW_SURFACE_RC_READ_WRITE
;
458 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
459 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
460 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
461 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
462 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
466 /* Emit relocation to surface contents. */
467 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
470 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
473 /* Creates a new WM constant buffer reflecting the current fragment program's
474 * constants, if needed by the fragment program.
476 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
480 brw_upload_wm_pull_constants(struct brw_context
*brw
)
482 struct gl_context
*ctx
= &brw
->ctx
;
483 /* BRW_NEW_FRAGMENT_PROGRAM */
484 struct brw_fragment_program
*fp
=
485 (struct brw_fragment_program
*) brw
->fragment_program
;
486 struct gl_program_parameter_list
*params
= fp
->program
.Base
.Parameters
;
487 const int size
= brw
->wm
.prog_data
->base
.nr_pull_params
* sizeof(float);
488 const int surf_index
=
489 brw
->wm
.prog_data
->base
.binding_table
.pull_constants_start
;
492 _mesa_load_state_parameters(ctx
, params
);
494 /* CACHE_NEW_WM_PROG */
495 if (brw
->wm
.prog_data
->base
.nr_pull_params
== 0) {
496 if (brw
->wm
.base
.surf_offset
[surf_index
]) {
497 brw
->wm
.base
.surf_offset
[surf_index
] = 0;
498 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
503 /* _NEW_PROGRAM_CONSTANTS */
504 drm_intel_bo
*const_bo
= NULL
;
505 uint32_t const_offset
;
506 float *constants
= intel_upload_space(brw
, size
, 64,
507 &const_bo
, &const_offset
);
508 for (i
= 0; i
< brw
->wm
.prog_data
->base
.nr_pull_params
; i
++) {
509 constants
[i
] = *brw
->wm
.prog_data
->base
.pull_param
[i
];
512 brw_create_constant_surface(brw
, const_bo
, const_offset
, size
,
513 &brw
->wm
.base
.surf_offset
[surf_index
],
515 drm_intel_bo_unreference(const_bo
);
517 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
520 const struct brw_tracked_state brw_wm_pull_constants
= {
522 .mesa
= (_NEW_PROGRAM_CONSTANTS
),
523 .brw
= (BRW_NEW_BATCH
| BRW_NEW_FRAGMENT_PROGRAM
),
524 .cache
= CACHE_NEW_WM_PROG
,
526 .emit
= brw_upload_wm_pull_constants
,
530 brw_update_null_renderbuffer_surface(struct brw_context
*brw
, unsigned int unit
)
532 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
535 * A null surface will be used in instances where an actual surface is
536 * not bound. When a write message is generated to a null surface, no
537 * actual surface is written to. When a read message (including any
538 * sampling engine message) is generated to a null surface, the result
539 * is all zeros. Note that a null surface type is allowed to be used
540 * with all messages, even if it is not specificially indicated as
541 * supported. All of the remaining fields in surface state are ignored
542 * for null surfaces, with the following exceptions:
544 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
545 * depth buffer’s corresponding state for all render target surfaces,
548 * - Surface Format must be R8G8B8A8_UNORM.
550 struct gl_context
*ctx
= &brw
->ctx
;
552 unsigned surface_type
= BRW_SURFACE_NULL
;
553 drm_intel_bo
*bo
= NULL
;
554 unsigned pitch_minus_1
= 0;
555 uint32_t multisampling_state
= 0;
556 uint32_t surf_index
=
557 brw
->wm
.prog_data
->binding_table
.render_target_start
+ unit
;
560 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
562 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
563 &brw
->wm
.base
.surf_offset
[surf_index
]);
565 if (fb
->Visual
.samples
> 1) {
566 /* On Gen6, null render targets seem to cause GPU hangs when
567 * multisampling. So work around this problem by rendering into dummy
570 * To decrease the amount of memory needed by the workaround buffer, we
571 * set its pitch to 128 bytes (the width of a Y tile). This means that
572 * the amount of memory needed for the workaround buffer is
573 * (width_in_tiles + height_in_tiles - 1) tiles.
575 * Note that since the workaround buffer will be interpreted by the
576 * hardware as an interleaved multisampled buffer, we need to compute
577 * width_in_tiles and height_in_tiles by dividing the width and height
578 * by 16 rather than the normal Y-tile size of 32.
580 unsigned width_in_tiles
= ALIGN(fb
->Width
, 16) / 16;
581 unsigned height_in_tiles
= ALIGN(fb
->Height
, 16) / 16;
582 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
583 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
585 bo
= brw
->wm
.multisampled_null_render_target_bo
;
586 surface_type
= BRW_SURFACE_2D
;
588 multisampling_state
=
589 brw_get_surface_num_multisamples(fb
->Visual
.samples
);
592 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
593 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
595 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
596 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
597 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
598 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
600 surf
[1] = bo
? bo
->offset64
: 0;
601 surf
[2] = ((fb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
602 (fb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
604 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
607 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
609 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
610 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
611 surf
[4] = multisampling_state
;
615 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
616 brw
->wm
.base
.surf_offset
[surf_index
] + 4,
618 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
623 * Sets up a surface state structure to point at the given region.
624 * While it is only used for the front/back buffer currently, it should be
625 * usable for further buffers when doing ARB_draw_buffer support.
628 brw_update_renderbuffer_surface(struct brw_context
*brw
,
629 struct gl_renderbuffer
*rb
,
633 struct gl_context
*ctx
= &brw
->ctx
;
634 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
635 struct intel_mipmap_tree
*mt
= irb
->mt
;
637 uint32_t tile_x
, tile_y
;
640 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
641 uint32_t surf_index
=
642 brw
->wm
.prog_data
->binding_table
.render_target_start
+ unit
;
646 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
647 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
649 if (tile_x
!= 0 || tile_y
!= 0) {
650 /* Original gen4 hardware couldn't draw to a non-tile-aligned
651 * destination in a miptree unless you actually setup your renderbuffer
652 * as a miptree and used the fragile lod/array_index/etc. controls to
653 * select the image. So, instead, we just make a new single-level
654 * miptree and render into that.
656 intel_renderbuffer_move_to_temp(brw
, irb
, false);
661 intel_miptree_used_for_rendering(irb
->mt
);
663 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
664 &brw
->wm
.base
.surf_offset
[surf_index
]);
666 format
= brw
->render_target_format
[rb_format
];
667 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
668 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
669 __FUNCTION__
, _mesa_get_format_name(rb_format
));
672 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
673 format
<< BRW_SURFACE_FORMAT_SHIFT
);
676 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
679 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
680 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
682 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
683 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
685 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
687 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
688 /* Note that the low bits of these fields are missing, so
689 * there's the possibility of getting in trouble.
691 assert(tile_x
% 4 == 0);
692 assert(tile_y
% 2 == 0);
693 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
694 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
695 (mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
699 if (!ctx
->Color
.ColorLogicOpEnabled
&&
700 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
701 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
703 if (!ctx
->Color
.ColorMask
[unit
][0])
704 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
705 if (!ctx
->Color
.ColorMask
[unit
][1])
706 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
707 if (!ctx
->Color
.ColorMask
[unit
][2])
708 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
710 /* As mentioned above, disable writes to the alpha component when the
711 * renderbuffer is XRGB.
713 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
714 !ctx
->Color
.ColorMask
[unit
][3]) {
715 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
719 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
720 brw
->wm
.base
.surf_offset
[surf_index
] + 4,
722 surf
[1] - mt
->bo
->offset64
,
723 I915_GEM_DOMAIN_RENDER
,
724 I915_GEM_DOMAIN_RENDER
);
728 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
731 brw_update_renderbuffer_surfaces(struct brw_context
*brw
)
733 struct gl_context
*ctx
= &brw
->ctx
;
736 /* _NEW_BUFFERS | _NEW_COLOR */
737 /* Update surfaces for drawing buffers */
738 if (ctx
->DrawBuffer
->_NumColorDrawBuffers
>= 1) {
739 for (i
= 0; i
< ctx
->DrawBuffer
->_NumColorDrawBuffers
; i
++) {
740 if (intel_renderbuffer(ctx
->DrawBuffer
->_ColorDrawBuffers
[i
])) {
741 brw
->vtbl
.update_renderbuffer_surface(brw
, ctx
->DrawBuffer
->_ColorDrawBuffers
[i
],
742 ctx
->DrawBuffer
->MaxNumLayers
> 0, i
);
744 brw
->vtbl
.update_null_renderbuffer_surface(brw
, i
);
748 brw
->vtbl
.update_null_renderbuffer_surface(brw
, 0);
750 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
753 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
755 .mesa
= (_NEW_COLOR
|
757 .brw
= BRW_NEW_BATCH
,
760 .emit
= brw_update_renderbuffer_surfaces
,
763 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
765 .mesa
= _NEW_BUFFERS
,
766 .brw
= BRW_NEW_BATCH
,
769 .emit
= brw_update_renderbuffer_surfaces
,
774 update_stage_texture_surfaces(struct brw_context
*brw
,
775 const struct gl_program
*prog
,
776 struct brw_stage_state
*stage_state
,
782 struct gl_context
*ctx
= &brw
->ctx
;
784 uint32_t *surf_offset
= stage_state
->surf_offset
;
786 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
788 surf_offset
+= stage_state
->prog_data
->binding_table
.texture_start
;
790 unsigned num_samplers
= _mesa_fls(prog
->SamplersUsed
);
791 for (unsigned s
= 0; s
< num_samplers
; s
++) {
794 if (prog
->SamplersUsed
& (1 << s
)) {
795 const unsigned unit
= prog
->SamplerUnits
[s
];
798 if (ctx
->Texture
.Unit
[unit
]._Current
) {
799 brw
->vtbl
.update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
);
807 * Construct SURFACE_STATE objects for enabled textures.
810 brw_update_texture_surfaces(struct brw_context
*brw
)
812 /* BRW_NEW_VERTEX_PROGRAM */
813 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
815 /* BRW_NEW_GEOMETRY_PROGRAM */
816 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
818 /* BRW_NEW_FRAGMENT_PROGRAM */
819 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
822 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false);
823 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false);
824 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false);
826 /* emit alternate set of surface state for gather. this
827 * allows the surface format to be overriden for only the
828 * gather4 messages. */
830 if (vs
&& vs
->UsesGather
)
831 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true);
832 if (gs
&& gs
->UsesGather
)
833 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true);
834 if (fs
&& fs
->UsesGather
)
835 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true);
838 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
841 const struct brw_tracked_state brw_texture_surfaces
= {
843 .mesa
= _NEW_TEXTURE
,
844 .brw
= BRW_NEW_BATCH
|
845 BRW_NEW_UNIFORM_BUFFER
|
846 BRW_NEW_VERTEX_PROGRAM
|
847 BRW_NEW_GEOMETRY_PROGRAM
|
848 BRW_NEW_FRAGMENT_PROGRAM
,
851 .emit
= brw_update_texture_surfaces
,
855 brw_upload_ubo_surfaces(struct brw_context
*brw
,
856 struct gl_shader
*shader
,
857 struct brw_stage_state
*stage_state
,
858 struct brw_stage_prog_data
*prog_data
)
860 struct gl_context
*ctx
= &brw
->ctx
;
865 uint32_t *surf_offsets
=
866 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
868 for (int i
= 0; i
< shader
->NumUniformBlocks
; i
++) {
869 struct gl_uniform_buffer_binding
*binding
;
870 struct intel_buffer_object
*intel_bo
;
872 binding
= &ctx
->UniformBufferBindings
[shader
->UniformBlocks
[i
].Binding
];
873 intel_bo
= intel_buffer_object(binding
->BufferObject
);
875 intel_bufferobj_buffer(brw
, intel_bo
,
877 binding
->BufferObject
->Size
- binding
->Offset
);
879 /* Because behavior for referencing outside of the binding's size in the
880 * glBindBufferRange case is undefined, we can just bind the whole buffer
881 * glBindBufferBase wants and be a correct implementation.
883 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
884 bo
->size
- binding
->Offset
,
886 shader
->Stage
== MESA_SHADER_FRAGMENT
);
889 if (shader
->NumUniformBlocks
)
890 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
894 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
896 struct gl_context
*ctx
= &brw
->ctx
;
898 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
903 /* CACHE_NEW_WM_PROG */
904 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
905 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
);
908 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
910 .mesa
= _NEW_PROGRAM
,
911 .brw
= BRW_NEW_BATCH
| BRW_NEW_UNIFORM_BUFFER
,
912 .cache
= CACHE_NEW_WM_PROG
,
914 .emit
= brw_upload_wm_ubo_surfaces
,
918 brw_upload_abo_surfaces(struct brw_context
*brw
,
919 struct gl_shader_program
*prog
,
920 struct brw_stage_state
*stage_state
,
921 struct brw_stage_prog_data
*prog_data
)
923 struct gl_context
*ctx
= &brw
->ctx
;
924 uint32_t *surf_offsets
=
925 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
927 for (int i
= 0; i
< prog
->NumAtomicBuffers
; i
++) {
928 struct gl_atomic_buffer_binding
*binding
=
929 &ctx
->AtomicBufferBindings
[prog
->AtomicBuffers
[i
].Binding
];
930 struct intel_buffer_object
*intel_bo
=
931 intel_buffer_object(binding
->BufferObject
);
932 drm_intel_bo
*bo
= intel_bufferobj_buffer(
933 brw
, intel_bo
, binding
->Offset
, intel_bo
->Base
.Size
- binding
->Offset
);
935 brw
->vtbl
.create_raw_surface(brw
, bo
, binding
->Offset
,
936 bo
->size
- binding
->Offset
,
937 &surf_offsets
[i
], true);
940 if (prog
->NumUniformBlocks
)
941 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
945 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
947 struct gl_context
*ctx
= &brw
->ctx
;
949 struct gl_shader_program
*prog
= ctx
->Shader
._CurrentFragmentProgram
;
952 /* CACHE_NEW_WM_PROG */
953 brw_upload_abo_surfaces(brw
, prog
, &brw
->wm
.base
,
954 &brw
->wm
.prog_data
->base
);
958 const struct brw_tracked_state brw_wm_abo_surfaces
= {
960 .mesa
= _NEW_PROGRAM
,
961 .brw
= BRW_NEW_BATCH
| BRW_NEW_ATOMIC_BUFFER
,
962 .cache
= CACHE_NEW_WM_PROG
,
964 .emit
= brw_upload_wm_abo_surfaces
,
968 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
970 brw
->vtbl
.update_texture_surface
= brw_update_texture_surface
;
971 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
972 brw
->vtbl
.update_null_renderbuffer_surface
=
973 brw_update_null_renderbuffer_surface
;
974 brw
->vtbl
.emit_buffer_surface_state
= gen4_emit_buffer_surface_state
;