2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
51 translate_tex_target(GLenum target
)
55 case GL_TEXTURE_1D_ARRAY_EXT
:
56 return BRW_SURFACE_1D
;
58 case GL_TEXTURE_RECTANGLE_NV
:
59 return BRW_SURFACE_2D
;
62 case GL_TEXTURE_2D_ARRAY_EXT
:
63 case GL_TEXTURE_EXTERNAL_OES
:
64 case GL_TEXTURE_2D_MULTISAMPLE
:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY
:
66 return BRW_SURFACE_2D
;
69 return BRW_SURFACE_3D
;
71 case GL_TEXTURE_CUBE_MAP
:
72 case GL_TEXTURE_CUBE_MAP_ARRAY
:
73 return BRW_SURFACE_CUBE
;
76 unreachable("not reached");
81 brw_get_surface_tiling_bits(uint32_t tiling
)
85 return BRW_SURFACE_TILED
;
87 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
95 brw_get_surface_num_multisamples(unsigned num_samples
)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4
;
100 return BRW_SURFACE_MULTISAMPLECOUNT_1
;
104 brw_configure_w_tiled(const struct intel_mipmap_tree
*mt
,
105 bool is_render_target
,
106 unsigned *width
, unsigned *height
,
107 unsigned *pitch
, uint32_t *tiling
, unsigned *format
)
109 static const unsigned halign_stencil
= 8;
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
117 *tiling
= I915_TILING_Y
;
118 *pitch
= mt
->pitch
* 2;
119 *width
= ALIGN(mt
->total_width
, halign_stencil
) * 2;
120 *height
= (mt
->total_height
/ mt
->physical_depth0
) / 2;
122 if (is_render_target
) {
123 *format
= BRW_SURFACEFORMAT_R8_UINT
;
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
133 brw_get_texture_swizzle(const struct gl_context
*ctx
,
134 const struct gl_texture_object
*t
)
136 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
138 int swizzles
[SWIZZLE_NIL
+ 1] = {
148 if (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
149 img
->_BaseFormat
== GL_DEPTH_STENCIL
) {
150 GLenum depth_mode
= t
->DepthMode
;
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
156 if (_mesa_is_gles3(ctx
) &&
157 img
->InternalFormat
!= GL_DEPTH_COMPONENT
&&
158 img
->InternalFormat
!= GL_DEPTH_STENCIL
) {
162 switch (depth_mode
) {
164 swizzles
[0] = SWIZZLE_ZERO
;
165 swizzles
[1] = SWIZZLE_ZERO
;
166 swizzles
[2] = SWIZZLE_ZERO
;
167 swizzles
[3] = SWIZZLE_X
;
170 swizzles
[0] = SWIZZLE_X
;
171 swizzles
[1] = SWIZZLE_X
;
172 swizzles
[2] = SWIZZLE_X
;
173 swizzles
[3] = SWIZZLE_ONE
;
176 swizzles
[0] = SWIZZLE_X
;
177 swizzles
[1] = SWIZZLE_X
;
178 swizzles
[2] = SWIZZLE_X
;
179 swizzles
[3] = SWIZZLE_X
;
182 swizzles
[0] = SWIZZLE_X
;
183 swizzles
[1] = SWIZZLE_ZERO
;
184 swizzles
[2] = SWIZZLE_ZERO
;
185 swizzles
[3] = SWIZZLE_ONE
;
190 /* If the texture's format is alpha-only, force R, G, and B to
191 * 0.0. Similarly, if the texture's format has no alpha channel,
192 * force the alpha value read to 1.0. This allows for the
193 * implementation to use an RGBA texture for any of these formats
194 * without leaking any unexpected values.
196 switch (img
->_BaseFormat
) {
198 swizzles
[0] = SWIZZLE_ZERO
;
199 swizzles
[1] = SWIZZLE_ZERO
;
200 swizzles
[2] = SWIZZLE_ZERO
;
203 if (t
->_IsIntegerFormat
) {
204 swizzles
[0] = SWIZZLE_X
;
205 swizzles
[1] = SWIZZLE_X
;
206 swizzles
[2] = SWIZZLE_X
;
207 swizzles
[3] = SWIZZLE_ONE
;
213 if (_mesa_get_format_bits(img
->TexFormat
, GL_ALPHA_BITS
) > 0)
214 swizzles
[3] = SWIZZLE_ONE
;
218 return MAKE_SWIZZLE4(swizzles
[GET_SWZ(t
->_Swizzle
, 0)],
219 swizzles
[GET_SWZ(t
->_Swizzle
, 1)],
220 swizzles
[GET_SWZ(t
->_Swizzle
, 2)],
221 swizzles
[GET_SWZ(t
->_Swizzle
, 3)]);
225 gen4_emit_buffer_surface_state(struct brw_context
*brw
,
226 uint32_t *out_offset
,
228 unsigned buffer_offset
,
229 unsigned surface_format
,
230 unsigned buffer_size
,
234 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
235 6 * 4, 32, out_offset
);
236 memset(surf
, 0, 6 * 4);
238 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
239 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
240 (brw
->gen
>= 6 ? BRW_SURFACE_RC_READ_WRITE
: 0);
241 surf
[1] = (bo
? bo
->offset64
: 0) + buffer_offset
; /* reloc */
242 surf
[2] = (buffer_size
& 0x7f) << BRW_SURFACE_WIDTH_SHIFT
|
243 ((buffer_size
>> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT
;
244 surf
[3] = ((buffer_size
>> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT
|
245 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
;
247 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
248 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
249 * physical cache. It is mapped in hardware to the sampler cache."
252 drm_intel_bo_emit_reloc(brw
->batch
.bo
, *out_offset
+ 4,
254 I915_GEM_DOMAIN_SAMPLER
,
255 (rw
? I915_GEM_DOMAIN_SAMPLER
: 0));
260 brw_update_buffer_texture_surface(struct gl_context
*ctx
,
262 uint32_t *surf_offset
)
264 struct brw_context
*brw
= brw_context(ctx
);
265 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
266 struct intel_buffer_object
*intel_obj
=
267 intel_buffer_object(tObj
->BufferObject
);
268 uint32_t size
= tObj
->BufferSize
;
269 drm_intel_bo
*bo
= NULL
;
270 mesa_format format
= tObj
->_BufferObjectFormat
;
271 uint32_t brw_format
= brw_format_for_mesa_format(format
);
272 int texel_size
= _mesa_get_format_bytes(format
);
275 size
= MIN2(size
, intel_obj
->Base
.Size
);
276 bo
= intel_bufferobj_buffer(brw
, intel_obj
, tObj
->BufferOffset
, size
);
279 if (brw_format
== 0 && format
!= MESA_FORMAT_RGBA_FLOAT32
) {
280 _mesa_problem(NULL
, "bad format %s for texture buffer\n",
281 _mesa_get_format_name(format
));
284 brw
->vtbl
.emit_buffer_surface_state(brw
, surf_offset
, bo
,
293 brw_update_texture_surface(struct gl_context
*ctx
,
295 uint32_t *surf_offset
,
298 struct brw_context
*brw
= brw_context(ctx
);
299 struct gl_texture_object
*tObj
= ctx
->Texture
.Unit
[unit
]._Current
;
300 struct intel_texture_object
*intelObj
= intel_texture_object(tObj
);
301 struct intel_mipmap_tree
*mt
= intelObj
->mt
;
302 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit
);
305 /* BRW_NEW_TEXTURE_BUFFER */
306 if (tObj
->Target
== GL_TEXTURE_BUFFER
) {
307 brw_update_buffer_texture_surface(ctx
, unit
, surf_offset
);
311 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
,
312 6 * 4, 32, surf_offset
);
314 uint32_t tex_format
= translate_tex_format(brw
, mt
->format
,
315 sampler
->sRGBDecode
);
318 /* Sandybridge's gather4 message is broken for integer formats.
319 * To work around this, we pretend the surface is UNORM for
320 * 8 or 16-bit formats, and emit shader instructions to recover
321 * the real INT/UINT value. For 32-bit formats, we pretend
322 * the surface is FLOAT, and simply reinterpret the resulting
325 switch (tex_format
) {
326 case BRW_SURFACEFORMAT_R8_SINT
:
327 case BRW_SURFACEFORMAT_R8_UINT
:
328 tex_format
= BRW_SURFACEFORMAT_R8_UNORM
;
331 case BRW_SURFACEFORMAT_R16_SINT
:
332 case BRW_SURFACEFORMAT_R16_UINT
:
333 tex_format
= BRW_SURFACEFORMAT_R16_UNORM
;
336 case BRW_SURFACEFORMAT_R32_SINT
:
337 case BRW_SURFACEFORMAT_R32_UINT
:
338 tex_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
346 surf
[0] = (translate_tex_target(tObj
->Target
) << BRW_SURFACE_TYPE_SHIFT
|
347 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
348 BRW_SURFACE_CUBEFACE_ENABLES
|
349 tex_format
<< BRW_SURFACE_FORMAT_SHIFT
);
351 surf
[1] = mt
->bo
->offset64
+ mt
->offset
; /* reloc */
353 surf
[2] = ((intelObj
->_MaxLevel
- tObj
->BaseLevel
) << BRW_SURFACE_LOD_SHIFT
|
354 (mt
->logical_width0
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
355 (mt
->logical_height0
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
357 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
358 (mt
->logical_depth0
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
359 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
361 surf
[4] = (brw_get_surface_num_multisamples(mt
->num_samples
) |
362 SET_FIELD(tObj
->BaseLevel
- mt
->first_level
, BRW_SURFACE_MIN_LOD
));
364 surf
[5] = mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0;
366 /* Emit relocation to surface contents */
367 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
370 surf
[1] - mt
->bo
->offset64
,
371 I915_GEM_DOMAIN_SAMPLER
, 0);
375 * Create the constant buffer surface. Vertex/fragment shader constants will be
376 * read from this buffer with Data Port Read instructions/messages.
379 brw_create_constant_surface(struct brw_context
*brw
,
383 uint32_t *out_offset
,
386 uint32_t stride
= dword_pitch
? 4 : 16;
387 uint32_t elements
= ALIGN(size
, stride
) / stride
;
389 brw
->vtbl
.emit_buffer_surface_state(brw
, out_offset
, bo
, offset
,
390 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
,
391 elements
, stride
, false);
395 * Set up a binding table entry for use by stream output logic (transform
398 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
401 brw_update_sol_surface(struct brw_context
*brw
,
402 struct gl_buffer_object
*buffer_obj
,
403 uint32_t *out_offset
, unsigned num_vector_components
,
404 unsigned stride_dwords
, unsigned offset_dwords
)
406 struct intel_buffer_object
*intel_bo
= intel_buffer_object(buffer_obj
);
407 uint32_t offset_bytes
= 4 * offset_dwords
;
408 drm_intel_bo
*bo
= intel_bufferobj_buffer(brw
, intel_bo
,
410 buffer_obj
->Size
- offset_bytes
);
411 uint32_t *surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
413 uint32_t pitch_minus_1
= 4*stride_dwords
- 1;
414 size_t size_dwords
= buffer_obj
->Size
/ 4;
415 uint32_t buffer_size_minus_1
, width
, height
, depth
, surface_format
;
417 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
418 * too big to map using a single binding table entry?
420 assert((size_dwords
- offset_dwords
) / stride_dwords
421 <= BRW_MAX_NUM_BUFFER_ENTRIES
);
423 if (size_dwords
> offset_dwords
+ num_vector_components
) {
424 /* There is room for at least 1 transform feedback output in the buffer.
425 * Compute the number of additional transform feedback outputs the
426 * buffer has room for.
428 buffer_size_minus_1
=
429 (size_dwords
- offset_dwords
- num_vector_components
) / stride_dwords
;
431 /* There isn't even room for a single transform feedback output in the
432 * buffer. We can't configure the binding table entry to prevent output
433 * entirely; we'll have to rely on the geometry shader to detect
434 * overflow. But to minimize the damage in case of a bug, set up the
435 * binding table entry to just allow a single output.
437 buffer_size_minus_1
= 0;
439 width
= buffer_size_minus_1
& 0x7f;
440 height
= (buffer_size_minus_1
& 0xfff80) >> 7;
441 depth
= (buffer_size_minus_1
& 0x7f00000) >> 20;
443 switch (num_vector_components
) {
445 surface_format
= BRW_SURFACEFORMAT_R32_FLOAT
;
448 surface_format
= BRW_SURFACEFORMAT_R32G32_FLOAT
;
451 surface_format
= BRW_SURFACEFORMAT_R32G32B32_FLOAT
;
454 surface_format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
457 unreachable("Invalid vector size for transform feedback output");
460 surf
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
461 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
|
462 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
463 BRW_SURFACE_RC_READ_WRITE
;
464 surf
[1] = bo
->offset64
+ offset_bytes
; /* reloc */
465 surf
[2] = (width
<< BRW_SURFACE_WIDTH_SHIFT
|
466 height
<< BRW_SURFACE_HEIGHT_SHIFT
);
467 surf
[3] = (depth
<< BRW_SURFACE_DEPTH_SHIFT
|
468 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
472 /* Emit relocation to surface contents. */
473 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
476 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
479 /* Creates a new WM constant buffer reflecting the current fragment program's
480 * constants, if needed by the fragment program.
482 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
486 brw_upload_wm_pull_constants(struct brw_context
*brw
)
488 struct brw_stage_state
*stage_state
= &brw
->wm
.base
;
489 /* BRW_NEW_FRAGMENT_PROGRAM */
490 struct brw_fragment_program
*fp
=
491 (struct brw_fragment_program
*) brw
->fragment_program
;
492 /* BRW_NEW_FS_PROG_DATA */
493 struct brw_stage_prog_data
*prog_data
= &brw
->wm
.prog_data
->base
;
495 /* _NEW_PROGRAM_CONSTANTS */
496 brw_upload_pull_constants(brw
, BRW_NEW_SURFACES
, &fp
->program
.Base
,
497 stage_state
, prog_data
, true);
500 const struct brw_tracked_state brw_wm_pull_constants
= {
502 .mesa
= _NEW_PROGRAM_CONSTANTS
,
503 .brw
= BRW_NEW_BATCH
|
504 BRW_NEW_FRAGMENT_PROGRAM
|
505 BRW_NEW_FS_PROG_DATA
,
507 .emit
= brw_upload_wm_pull_constants
,
511 * Creates a null renderbuffer surface.
513 * This is used when the shader doesn't write to any color output. An FB
514 * write to target 0 will still be emitted, because that's how the thread is
515 * terminated (and computed depth is returned), so we need to have the
516 * hardware discard the target 0 color output..
519 brw_update_null_renderbuffer_surface(struct brw_context
*brw
, unsigned int unit
)
521 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
524 * A null surface will be used in instances where an actual surface is
525 * not bound. When a write message is generated to a null surface, no
526 * actual surface is written to. When a read message (including any
527 * sampling engine message) is generated to a null surface, the result
528 * is all zeros. Note that a null surface type is allowed to be used
529 * with all messages, even if it is not specificially indicated as
530 * supported. All of the remaining fields in surface state are ignored
531 * for null surfaces, with the following exceptions:
533 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
534 * depth buffer’s corresponding state for all render target surfaces,
537 * - Surface Format must be R8G8B8A8_UNORM.
539 struct gl_context
*ctx
= &brw
->ctx
;
541 unsigned surface_type
= BRW_SURFACE_NULL
;
542 drm_intel_bo
*bo
= NULL
;
543 unsigned pitch_minus_1
= 0;
544 uint32_t multisampling_state
= 0;
545 /* BRW_NEW_FS_PROG_DATA */
546 uint32_t surf_index
=
547 brw
->wm
.prog_data
->binding_table
.render_target_start
+ unit
;
550 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
552 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
553 &brw
->wm
.base
.surf_offset
[surf_index
]);
555 if (fb
->Visual
.samples
> 1) {
556 /* On Gen6, null render targets seem to cause GPU hangs when
557 * multisampling. So work around this problem by rendering into dummy
560 * To decrease the amount of memory needed by the workaround buffer, we
561 * set its pitch to 128 bytes (the width of a Y tile). This means that
562 * the amount of memory needed for the workaround buffer is
563 * (width_in_tiles + height_in_tiles - 1) tiles.
565 * Note that since the workaround buffer will be interpreted by the
566 * hardware as an interleaved multisampled buffer, we need to compute
567 * width_in_tiles and height_in_tiles by dividing the width and height
568 * by 16 rather than the normal Y-tile size of 32.
570 unsigned width_in_tiles
= ALIGN(fb
->Width
, 16) / 16;
571 unsigned height_in_tiles
= ALIGN(fb
->Height
, 16) / 16;
572 unsigned size_needed
= (width_in_tiles
+ height_in_tiles
- 1) * 4096;
573 brw_get_scratch_bo(brw
, &brw
->wm
.multisampled_null_render_target_bo
,
575 bo
= brw
->wm
.multisampled_null_render_target_bo
;
576 surface_type
= BRW_SURFACE_2D
;
578 multisampling_state
=
579 brw_get_surface_num_multisamples(fb
->Visual
.samples
);
582 surf
[0] = (surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
583 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
);
585 surf
[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
|
586 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
|
587 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
|
588 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
);
590 surf
[1] = bo
? bo
->offset64
: 0;
591 surf
[2] = ((fb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
592 (fb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
594 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
597 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
599 surf
[3] = (BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
|
600 pitch_minus_1
<< BRW_SURFACE_PITCH_SHIFT
);
601 surf
[4] = multisampling_state
;
605 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
606 brw
->wm
.base
.surf_offset
[surf_index
] + 4,
608 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
);
613 * Sets up a surface state structure to point at the given region.
614 * While it is only used for the front/back buffer currently, it should be
615 * usable for further buffers when doing ARB_draw_buffer support.
618 brw_update_renderbuffer_surface(struct brw_context
*brw
,
619 struct gl_renderbuffer
*rb
,
623 struct gl_context
*ctx
= &brw
->ctx
;
624 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
625 struct intel_mipmap_tree
*mt
= irb
->mt
;
627 uint32_t tile_x
, tile_y
;
630 mesa_format rb_format
= _mesa_get_render_format(ctx
, intel_rb_format(irb
));
631 /* BRW_NEW_FS_PROG_DATA */
632 uint32_t surf_index
=
633 brw
->wm
.prog_data
->binding_table
.render_target_start
+ unit
;
637 if (rb
->TexImage
&& !brw
->has_surface_tile_offset
) {
638 intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
);
640 if (tile_x
!= 0 || tile_y
!= 0) {
641 /* Original gen4 hardware couldn't draw to a non-tile-aligned
642 * destination in a miptree unless you actually setup your renderbuffer
643 * as a miptree and used the fragile lod/array_index/etc. controls to
644 * select the image. So, instead, we just make a new single-level
645 * miptree and render into that.
647 intel_renderbuffer_move_to_temp(brw
, irb
, false);
652 intel_miptree_used_for_rendering(irb
->mt
);
654 surf
= brw_state_batch(brw
, AUB_TRACE_SURFACE_STATE
, 6 * 4, 32,
655 &brw
->wm
.base
.surf_offset
[surf_index
]);
657 format
= brw
->render_target_format
[rb_format
];
658 if (unlikely(!brw
->format_supported_as_render_target
[rb_format
])) {
659 _mesa_problem(ctx
, "%s: renderbuffer format %s unsupported\n",
660 __FUNCTION__
, _mesa_get_format_name(rb_format
));
663 surf
[0] = (BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
664 format
<< BRW_SURFACE_FORMAT_SHIFT
);
667 assert(mt
->offset
% mt
->cpp
== 0);
668 surf
[1] = (intel_renderbuffer_get_tile_offsets(irb
, &tile_x
, &tile_y
) +
669 mt
->bo
->offset64
+ mt
->offset
);
671 surf
[2] = ((rb
->Width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
672 (rb
->Height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
);
674 surf
[3] = (brw_get_surface_tiling_bits(mt
->tiling
) |
675 (mt
->pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
);
677 surf
[4] = brw_get_surface_num_multisamples(mt
->num_samples
);
679 assert(brw
->has_surface_tile_offset
|| (tile_x
== 0 && tile_y
== 0));
680 /* Note that the low bits of these fields are missing, so
681 * there's the possibility of getting in trouble.
683 assert(tile_x
% 4 == 0);
684 assert(tile_y
% 2 == 0);
685 surf
[5] = ((tile_x
/ 4) << BRW_SURFACE_X_OFFSET_SHIFT
|
686 (tile_y
/ 2) << BRW_SURFACE_Y_OFFSET_SHIFT
|
687 (mt
->align_h
== 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE
: 0));
691 if (!ctx
->Color
.ColorLogicOpEnabled
&&
692 (ctx
->Color
.BlendEnabled
& (1 << unit
)))
693 surf
[0] |= BRW_SURFACE_BLEND_ENABLED
;
695 if (!ctx
->Color
.ColorMask
[unit
][0])
696 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT
;
697 if (!ctx
->Color
.ColorMask
[unit
][1])
698 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT
;
699 if (!ctx
->Color
.ColorMask
[unit
][2])
700 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT
;
702 /* As mentioned above, disable writes to the alpha component when the
703 * renderbuffer is XRGB.
705 if (ctx
->DrawBuffer
->Visual
.alphaBits
== 0 ||
706 !ctx
->Color
.ColorMask
[unit
][3]) {
707 surf
[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT
;
711 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
712 brw
->wm
.base
.surf_offset
[surf_index
] + 4,
714 surf
[1] - mt
->bo
->offset64
,
715 I915_GEM_DOMAIN_RENDER
,
716 I915_GEM_DOMAIN_RENDER
);
720 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
723 brw_update_renderbuffer_surfaces(struct brw_context
*brw
)
725 struct gl_context
*ctx
= &brw
->ctx
;
728 /* _NEW_BUFFERS | _NEW_COLOR */
729 /* Update surfaces for drawing buffers */
730 if (ctx
->DrawBuffer
->_NumColorDrawBuffers
>= 1) {
731 for (i
= 0; i
< ctx
->DrawBuffer
->_NumColorDrawBuffers
; i
++) {
732 if (intel_renderbuffer(ctx
->DrawBuffer
->_ColorDrawBuffers
[i
])) {
733 brw
->vtbl
.update_renderbuffer_surface(brw
, ctx
->DrawBuffer
->_ColorDrawBuffers
[i
],
734 ctx
->DrawBuffer
->MaxNumLayers
> 0, i
);
736 brw
->vtbl
.update_null_renderbuffer_surface(brw
, i
);
740 brw
->vtbl
.update_null_renderbuffer_surface(brw
, 0);
742 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
745 const struct brw_tracked_state brw_renderbuffer_surfaces
= {
747 .mesa
= _NEW_BUFFERS
|
749 .brw
= BRW_NEW_BATCH
|
750 BRW_NEW_FS_PROG_DATA
,
752 .emit
= brw_update_renderbuffer_surfaces
,
755 const struct brw_tracked_state gen6_renderbuffer_surfaces
= {
757 .mesa
= _NEW_BUFFERS
,
758 .brw
= BRW_NEW_BATCH
,
760 .emit
= brw_update_renderbuffer_surfaces
,
765 update_stage_texture_surfaces(struct brw_context
*brw
,
766 const struct gl_program
*prog
,
767 struct brw_stage_state
*stage_state
,
773 struct gl_context
*ctx
= &brw
->ctx
;
775 uint32_t *surf_offset
= stage_state
->surf_offset
;
777 /* BRW_NEW_*_PROG_DATA */
779 surf_offset
+= stage_state
->prog_data
->binding_table
.gather_texture_start
;
781 surf_offset
+= stage_state
->prog_data
->binding_table
.texture_start
;
783 unsigned num_samplers
= _mesa_fls(prog
->SamplersUsed
);
784 for (unsigned s
= 0; s
< num_samplers
; s
++) {
787 if (prog
->SamplersUsed
& (1 << s
)) {
788 const unsigned unit
= prog
->SamplerUnits
[s
];
791 if (ctx
->Texture
.Unit
[unit
]._Current
) {
792 brw
->vtbl
.update_texture_surface(ctx
, unit
, surf_offset
+ s
, for_gather
);
800 * Construct SURFACE_STATE objects for enabled textures.
803 brw_update_texture_surfaces(struct brw_context
*brw
)
805 /* BRW_NEW_VERTEX_PROGRAM */
806 struct gl_program
*vs
= (struct gl_program
*) brw
->vertex_program
;
808 /* BRW_NEW_GEOMETRY_PROGRAM */
809 struct gl_program
*gs
= (struct gl_program
*) brw
->geometry_program
;
811 /* BRW_NEW_FRAGMENT_PROGRAM */
812 struct gl_program
*fs
= (struct gl_program
*) brw
->fragment_program
;
815 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, false);
816 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, false);
817 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, false);
819 /* emit alternate set of surface state for gather. this
820 * allows the surface format to be overriden for only the
821 * gather4 messages. */
823 if (vs
&& vs
->UsesGather
)
824 update_stage_texture_surfaces(brw
, vs
, &brw
->vs
.base
, true);
825 if (gs
&& gs
->UsesGather
)
826 update_stage_texture_surfaces(brw
, gs
, &brw
->gs
.base
, true);
827 if (fs
&& fs
->UsesGather
)
828 update_stage_texture_surfaces(brw
, fs
, &brw
->wm
.base
, true);
831 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
834 const struct brw_tracked_state brw_texture_surfaces
= {
836 .mesa
= _NEW_TEXTURE
,
837 .brw
= BRW_NEW_BATCH
|
838 BRW_NEW_FRAGMENT_PROGRAM
|
839 BRW_NEW_FS_PROG_DATA
|
840 BRW_NEW_GEOMETRY_PROGRAM
|
841 BRW_NEW_GS_PROG_DATA
|
842 BRW_NEW_TEXTURE_BUFFER
|
843 BRW_NEW_VERTEX_PROGRAM
|
844 BRW_NEW_VS_PROG_DATA
,
846 .emit
= brw_update_texture_surfaces
,
850 brw_upload_ubo_surfaces(struct brw_context
*brw
,
851 struct gl_shader
*shader
,
852 struct brw_stage_state
*stage_state
,
853 struct brw_stage_prog_data
*prog_data
,
856 struct gl_context
*ctx
= &brw
->ctx
;
861 uint32_t *surf_offsets
=
862 &stage_state
->surf_offset
[prog_data
->binding_table
.ubo_start
];
864 for (int i
= 0; i
< shader
->NumUniformBlocks
; i
++) {
865 struct gl_uniform_buffer_binding
*binding
;
866 struct intel_buffer_object
*intel_bo
;
868 binding
= &ctx
->UniformBufferBindings
[shader
->UniformBlocks
[i
].Binding
];
869 intel_bo
= intel_buffer_object(binding
->BufferObject
);
871 intel_bufferobj_buffer(brw
, intel_bo
,
873 binding
->BufferObject
->Size
- binding
->Offset
);
875 /* Because behavior for referencing outside of the binding's size in the
876 * glBindBufferRange case is undefined, we can just bind the whole buffer
877 * glBindBufferBase wants and be a correct implementation.
879 brw_create_constant_surface(brw
, bo
, binding
->Offset
,
880 bo
->size
- binding
->Offset
,
885 if (shader
->NumUniformBlocks
)
886 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
890 brw_upload_wm_ubo_surfaces(struct brw_context
*brw
)
892 struct gl_context
*ctx
= &brw
->ctx
;
894 struct gl_shader_program
*prog
= ctx
->_Shader
->_CurrentFragmentProgram
;
899 /* BRW_NEW_FS_PROG_DATA */
900 brw_upload_ubo_surfaces(brw
, prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
],
901 &brw
->wm
.base
, &brw
->wm
.prog_data
->base
, true);
904 const struct brw_tracked_state brw_wm_ubo_surfaces
= {
906 .mesa
= _NEW_PROGRAM
,
907 .brw
= BRW_NEW_BATCH
|
908 BRW_NEW_FS_PROG_DATA
|
909 BRW_NEW_UNIFORM_BUFFER
,
911 .emit
= brw_upload_wm_ubo_surfaces
,
915 brw_upload_abo_surfaces(struct brw_context
*brw
,
916 struct gl_shader_program
*prog
,
917 struct brw_stage_state
*stage_state
,
918 struct brw_stage_prog_data
*prog_data
)
920 struct gl_context
*ctx
= &brw
->ctx
;
921 uint32_t *surf_offsets
=
922 &stage_state
->surf_offset
[prog_data
->binding_table
.abo_start
];
924 for (int i
= 0; i
< prog
->NumAtomicBuffers
; i
++) {
925 struct gl_atomic_buffer_binding
*binding
=
926 &ctx
->AtomicBufferBindings
[prog
->AtomicBuffers
[i
].Binding
];
927 struct intel_buffer_object
*intel_bo
=
928 intel_buffer_object(binding
->BufferObject
);
929 drm_intel_bo
*bo
= intel_bufferobj_buffer(
930 brw
, intel_bo
, binding
->Offset
, intel_bo
->Base
.Size
- binding
->Offset
);
932 brw
->vtbl
.create_raw_surface(brw
, bo
, binding
->Offset
,
933 bo
->size
- binding
->Offset
,
934 &surf_offsets
[i
], true);
937 if (prog
->NumAtomicBuffers
)
938 brw
->state
.dirty
.brw
|= BRW_NEW_SURFACES
;
942 brw_upload_wm_abo_surfaces(struct brw_context
*brw
)
944 struct gl_context
*ctx
= &brw
->ctx
;
946 struct gl_shader_program
*prog
= ctx
->Shader
._CurrentFragmentProgram
;
949 /* BRW_NEW_FS_PROG_DATA */
950 brw_upload_abo_surfaces(brw
, prog
, &brw
->wm
.base
,
951 &brw
->wm
.prog_data
->base
);
955 const struct brw_tracked_state brw_wm_abo_surfaces
= {
957 .mesa
= _NEW_PROGRAM
,
958 .brw
= BRW_NEW_ATOMIC_BUFFER
|
960 BRW_NEW_FS_PROG_DATA
,
962 .emit
= brw_upload_wm_abo_surfaces
,
966 gen4_init_vtable_surface_functions(struct brw_context
*brw
)
968 brw
->vtbl
.update_texture_surface
= brw_update_texture_surface
;
969 brw
->vtbl
.update_renderbuffer_surface
= brw_update_renderbuffer_surface
;
970 brw
->vtbl
.update_null_renderbuffer_surface
=
971 brw_update_null_renderbuffer_surface
;
972 brw
->vtbl
.emit_buffer_surface_state
= gen4_emit_buffer_surface_state
;