2 * Copyright © 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "main/teximage.h"
25 #include "main/blend.h"
26 #include "main/fbobject.h"
27 #include "main/renderbuffer.h"
28 #include "main/glformats.h"
30 #include "util/ralloc.h"
32 #include "intel_fbo.h"
34 #include "brw_blorp.h"
35 #include "brw_meta_util.h"
36 #include "brw_context.h"
38 #include "brw_state.h"
40 #define FILE_DEBUG_FLAG DEBUG_BLORP
42 struct brw_blorp_const_color_prog_key
44 bool use_simd16_replicated_data
;
49 * Parameters for a blorp operation where the fragment shader outputs a
50 * constant color. This is used for both fast color clears and color
53 class brw_blorp_const_color_params
: public brw_blorp_params
56 brw_blorp_const_color_prog_key wm_prog_key
;
59 class brw_blorp_clear_params
: public brw_blorp_const_color_params
62 brw_blorp_clear_params(struct brw_context
*brw
,
63 struct gl_framebuffer
*fb
,
64 struct gl_renderbuffer
*rb
,
73 * Parameters for a blorp operation that performs a "render target resolve".
74 * This is used to resolve pending fast clear pixels before a color buffer is
75 * used for texturing, ReadPixels, or scanout.
77 class brw_blorp_rt_resolve_params
: public brw_blorp_const_color_params
80 brw_blorp_rt_resolve_params(struct brw_context
*brw
,
81 struct intel_mipmap_tree
*mt
);
85 class brw_blorp_const_color_program
88 brw_blorp_const_color_program(struct brw_context
*brw
,
89 const brw_blorp_const_color_prog_key
*key
);
90 ~brw_blorp_const_color_program();
92 const GLuint
*compile(struct brw_context
*brw
, GLuint
*program_size
);
94 brw_blorp_prog_data prog_data
;
100 const brw_blorp_const_color_prog_key
*key
;
101 struct brw_codegen func
;
103 /* Thread dispatch header */
106 /* Pixel X/Y coordinates (always in R1). */
109 /* Register with push constants (a single vec4) */
110 struct brw_reg clear_rgba
;
112 /* MRF used for render target writes */
116 brw_blorp_const_color_program::brw_blorp_const_color_program(
117 struct brw_context
*brw
,
118 const brw_blorp_const_color_prog_key
*key
)
119 : mem_ctx(ralloc_context(NULL
)),
126 prog_data
.first_curbe_grf
= 0;
127 prog_data
.persample_msaa_dispatch
= false;
128 brw_init_codegen(brw
->intelScreen
->devinfo
, &func
, mem_ctx
);
131 brw_blorp_const_color_program::~brw_blorp_const_color_program()
133 ralloc_free(mem_ctx
);
137 brw_blorp_params_get_clear_kernel(struct brw_context
*brw
,
138 struct brw_blorp_params
*params
,
139 brw_blorp_const_color_prog_key
*wm_prog_key
)
141 if (!brw_search_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
,
142 wm_prog_key
, sizeof(*wm_prog_key
),
143 ¶ms
->wm_prog_kernel
, ¶ms
->wm_prog_data
)) {
144 brw_blorp_const_color_program
prog(brw
, wm_prog_key
);
146 const GLuint
*program
= prog
.compile(brw
, &program_size
);
147 brw_upload_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
,
148 wm_prog_key
, sizeof(*wm_prog_key
),
149 program
, program_size
,
150 &prog
.prog_data
, sizeof(prog
.prog_data
),
151 ¶ms
->wm_prog_kernel
, ¶ms
->wm_prog_data
);
156 set_write_disables(const struct intel_renderbuffer
*irb
,
157 const GLubyte
*color_mask
, bool *color_write_disable
)
159 /* Format information in the renderbuffer represents the requirements
160 * given by the client. There are cases where the backing miptree uses,
161 * for example, RGBA to represent RGBX. Since the client is only expecting
162 * RGB we can treat alpha as not used and write whatever we like into it.
164 const GLenum base_format
= irb
->Base
.Base
._BaseFormat
;
165 const int components
= _mesa_base_format_component_count(base_format
);
166 bool disables
= false;
168 assert(components
> 0);
170 for (int i
= 0; i
< components
; i
++) {
171 color_write_disable
[i
] = !color_mask
[i
];
172 disables
= disables
|| !color_mask
[i
];
178 brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context
*brw
,
179 struct gl_framebuffer
*fb
,
180 struct gl_renderbuffer
*rb
,
186 struct gl_context
*ctx
= &brw
->ctx
;
187 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
188 mesa_format format
= irb
->mt
->format
;
190 if (!encode_srgb
&& _mesa_get_format_color_encoding(format
) == GL_SRGB
)
191 format
= _mesa_get_srgb_format_linear(format
);
193 dst
.set(brw
, irb
->mt
, irb
->mt_level
, layer
, format
, true);
195 /* Override the surface format according to the context's sRGB rules. */
196 dst
.brw_surfaceformat
= brw
->render_target_format
[format
];
204 y0
= rb
->Height
- fb
->_Ymax
;
205 y1
= rb
->Height
- fb
->_Ymin
;
208 memcpy(&wm_push_consts
.dst_x0
, ctx
->Color
.ClearColor
.f
, sizeof(float) * 4);
210 memset(&wm_prog_key
, 0, sizeof(wm_prog_key
));
212 wm_prog_key
.use_simd16_replicated_data
= true;
214 /* From the SNB PRM (Vol4_Part1):
216 * "Replicated data (Message Type = 111) is only supported when
217 * accessing tiled memory. Using this Message Type to access linear
218 * (untiled) memory is UNDEFINED."
220 if (irb
->mt
->tiling
== I915_TILING_NONE
)
221 wm_prog_key
.use_simd16_replicated_data
= false;
223 /* Constant color writes ignore everyting in blend and color calculator
224 * state. This is not documented.
226 if (set_write_disables(irb
, color_mask
, color_write_disable
))
227 wm_prog_key
.use_simd16_replicated_data
= false;
229 if (irb
->mt
->fast_clear_state
!= INTEL_FAST_CLEAR_STATE_NO_MCS
&&
230 !partial_clear
&& wm_prog_key
.use_simd16_replicated_data
&&
231 brw_is_color_fast_clear_compatible(brw
, irb
->mt
,
232 &ctx
->Color
.ClearColor
)) {
233 memset(&wm_push_consts
, 0xff, 4*sizeof(float));
234 fast_clear_op
= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE
;
236 brw_get_fast_clear_rect(brw
, fb
, irb
->mt
, &x0
, &y0
, &x1
, &y1
);
238 brw_meta_get_buffer_rect(fb
, &x0
, &y0
, &x1
, &y1
);
241 brw_blorp_params_get_clear_kernel(brw
, this, &wm_prog_key
);
245 brw_blorp_rt_resolve_params::brw_blorp_rt_resolve_params(
246 struct brw_context
*brw
,
247 struct intel_mipmap_tree
*mt
)
249 const mesa_format format
= _mesa_get_srgb_format_linear(mt
->format
);
251 dst
.set(brw
, mt
, 0 /* level */, 0 /* layer */, format
, true);
253 brw_get_resolve_rect(brw
, mt
, &x0
, &y0
, &x1
, &y1
);
255 fast_clear_op
= GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE
;
257 /* Note: there is no need to initialize push constants because it doesn't
258 * matter what data gets dispatched to the render target. However, we must
259 * ensure that the fragment shader delivers the data using the "replicated
262 memset(&wm_prog_key
, 0, sizeof(wm_prog_key
));
263 wm_prog_key
.use_simd16_replicated_data
= true;
265 brw_blorp_params_get_clear_kernel(brw
, this, &wm_prog_key
);
270 brw_blorp_const_color_program::alloc_regs()
273 this->R0
= retype(brw_vec8_grf(reg
++, 0), BRW_REGISTER_TYPE_UW
);
274 this->R1
= retype(brw_vec8_grf(reg
++, 0), BRW_REGISTER_TYPE_UW
);
276 prog_data
.first_curbe_grf
= reg
;
277 clear_rgba
= retype(brw_vec4_grf(reg
++, 0), BRW_REGISTER_TYPE_F
);
278 reg
+= BRW_BLORP_NUM_PUSH_CONST_REGS
;
280 /* Make sure we didn't run out of registers */
281 assert(reg
<= GEN7_MRF_HACK_START
);
287 brw_blorp_const_color_program::compile(struct brw_context
*brw
,
288 GLuint
*program_size
)
290 /* Set up prog_data */
291 memset(&prog_data
, 0, sizeof(prog_data
));
292 prog_data
.persample_msaa_dispatch
= false;
296 brw_set_default_compression_control(&func
, BRW_COMPRESSION_COMPRESSED
);
298 struct brw_reg mrf_rt_write
=
299 retype(vec16(brw_message_reg(base_mrf
)), BRW_REGISTER_TYPE_F
);
301 uint32_t mlen
, msg_type
;
302 if (key
->use_simd16_replicated_data
) {
303 /* The message payload is a single register with the low 4 floats/ints
304 * filled with the constant clear color.
306 brw_set_default_exec_size(&func
, BRW_EXECUTE_4
);
307 brw_set_default_mask_control(&func
, BRW_MASK_DISABLE
);
308 brw_MOV(&func
, vec4(brw_message_reg(base_mrf
)), clear_rgba
);
309 brw_set_default_mask_control(&func
, BRW_MASK_ENABLE
);
310 brw_set_default_exec_size(&func
, BRW_EXECUTE_16
);
312 msg_type
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED
;
315 brw_set_default_exec_size(&func
, BRW_EXECUTE_16
);
316 for (int i
= 0; i
< 4; i
++) {
317 /* The message payload is pairs of registers for 16 pixels each of r,
321 brw_message_reg(base_mrf
+ i
* 2),
322 brw_vec1_grf(clear_rgba
.nr
, i
));
325 msg_type
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
329 /* Now write to the render target and terminate the thread */
331 16 /* dispatch_width */,
332 base_mrf
>= 0 ? brw_message_reg(base_mrf
) : mrf_rt_write
,
333 brw_null_reg() /* header */,
335 BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX
,
337 0 /* response_length */,
339 true /* last render target */,
340 false /* header present */);
342 if (unlikely(INTEL_DEBUG
& DEBUG_BLORP
)) {
343 fprintf(stderr
, "Native code for BLORP clear:\n");
344 brw_disassemble(brw
->intelScreen
->devinfo
,
345 func
.store
, 0, func
.next_insn_offset
, stderr
);
346 fprintf(stderr
, "\n");
349 brw_compact_instructions(&func
, 0, 0, NULL
);
350 return brw_get_program(&func
, program_size
);
355 do_single_blorp_clear(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
356 struct gl_renderbuffer
*rb
, unsigned buf
,
357 bool partial_clear
, bool encode_srgb
, unsigned layer
)
359 struct gl_context
*ctx
= &brw
->ctx
;
360 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
362 brw_blorp_clear_params
params(brw
, fb
, rb
, ctx
->Color
.ColorMask
[buf
],
363 partial_clear
, encode_srgb
, layer
);
365 const bool is_fast_clear
=
366 params
.fast_clear_op
== GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE
;
368 /* Record the clear color in the miptree so that it will be
369 * programmed in SURFACE_STATE by later rendering and resolve
372 brw_meta_set_fast_clear_color(brw
, irb
->mt
, &ctx
->Color
.ClearColor
);
374 /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
375 * is redundant and can be skipped.
377 if (irb
->mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_CLEAR
)
380 /* If the MCS buffer hasn't been allocated yet, we need to allocate
383 if (!irb
->mt
->mcs_mt
) {
384 if (!intel_miptree_alloc_non_msrt_mcs(brw
, irb
->mt
)) {
385 /* MCS allocation failed--probably this will only happen in
386 * out-of-memory conditions. But in any case, try to recover
387 * by falling back to a non-blorp clear technique.
394 const char *clear_type
;
397 else if (params
.wm_prog_key
.use_simd16_replicated_data
)
398 clear_type
= "replicated";
402 DBG("%s (%s) to mt %p level %d layer %d\n", __FUNCTION__
, clear_type
,
403 irb
->mt
, irb
->mt_level
, irb
->mt_layer
);
405 brw_blorp_exec(brw
, ¶ms
);
408 /* Now that the fast clear has occurred, put the buffer in
409 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
412 irb
->mt
->fast_clear_state
= INTEL_FAST_CLEAR_STATE_CLEAR
;
421 brw_blorp_clear_color(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
422 GLbitfield mask
, bool partial_clear
, bool encode_srgb
)
424 for (unsigned buf
= 0; buf
< fb
->_NumColorDrawBuffers
; buf
++) {
425 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[buf
];
426 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
428 /* Only clear the buffers present in the provided mask */
429 if (((1 << fb
->_ColorDrawBufferIndexes
[buf
]) & mask
) == 0)
432 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
433 * the framebuffer can be complete with some attachments missing. In
434 * this case the _ColorDrawBuffers pointer will be NULL.
439 if (fb
->MaxNumLayers
> 0) {
440 unsigned layer_multiplier
=
441 (irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
442 irb
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
443 irb
->mt
->num_samples
: 1;
444 unsigned num_layers
= irb
->layer_count
;
445 for (unsigned layer
= 0; layer
< num_layers
; layer
++) {
446 if (!do_single_blorp_clear(
447 brw
, fb
, rb
, buf
, partial_clear
, encode_srgb
,
448 irb
->mt_layer
+ layer
* layer_multiplier
)) {
453 unsigned layer
= irb
->mt_layer
;
454 if (!do_single_blorp_clear(brw
, fb
, rb
, buf
, partial_clear
,
459 irb
->need_downsample
= true;
466 brw_blorp_resolve_color(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
)
468 DBG("%s to mt %p\n", __FUNCTION__
, mt
);
470 brw_blorp_rt_resolve_params
params(brw
, mt
);
471 brw_blorp_exec(brw
, ¶ms
);
472 mt
->fast_clear_state
= INTEL_FAST_CLEAR_STATE_RESOLVED
;