i965/blorp: Write blorp code to do render target resolves.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_blorp_clear.cpp
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 extern "C" {
25 #include "main/teximage.h"
26 #include "main/blend.h"
27 #include "main/fbobject.h"
28 #include "main/renderbuffer.h"
29 }
30
31 #include "glsl/ralloc.h"
32
33 #include "intel_fbo.h"
34
35 #include "brw_blorp.h"
36 #include "brw_context.h"
37 #include "brw_eu.h"
38 #include "brw_state.h"
39
40 struct brw_blorp_const_color_prog_key
41 {
42 bool use_simd16_replicated_data;
43 bool pad[3];
44 };
45
46 /**
47 * Parameters for a blorp operation where the fragment shader outputs a
48 * constant color. This is used for both fast color clears and color
49 * resolves.
50 */
51 class brw_blorp_const_color_params : public brw_blorp_params
52 {
53 public:
54 virtual uint32_t get_wm_prog(struct brw_context *brw,
55 brw_blorp_prog_data **prog_data) const;
56
57 protected:
58 brw_blorp_const_color_prog_key wm_prog_key;
59 };
60
61 class brw_blorp_clear_params : public brw_blorp_const_color_params
62 {
63 public:
64 brw_blorp_clear_params(struct brw_context *brw,
65 struct gl_framebuffer *fb,
66 struct gl_renderbuffer *rb,
67 GLubyte *color_mask,
68 bool partial_clear);
69 };
70
71
72 /**
73 * Parameters for a blorp operation that performs a "render target resolve".
74 * This is used to resolve pending fast clear pixels before a color buffer is
75 * used for texturing, ReadPixels, or scanout.
76 */
77 class brw_blorp_rt_resolve_params : public brw_blorp_const_color_params
78 {
79 public:
80 brw_blorp_rt_resolve_params(struct brw_context *brw,
81 struct intel_mipmap_tree *mt);
82 };
83
84
85 class brw_blorp_const_color_program
86 {
87 public:
88 brw_blorp_const_color_program(struct brw_context *brw,
89 const brw_blorp_const_color_prog_key *key);
90 ~brw_blorp_const_color_program();
91
92 const GLuint *compile(struct brw_context *brw, GLuint *program_size);
93
94 brw_blorp_prog_data prog_data;
95
96 private:
97 void alloc_regs();
98
99 void *mem_ctx;
100 struct brw_context *brw;
101 const brw_blorp_const_color_prog_key *key;
102 struct brw_compile func;
103
104 /* Thread dispatch header */
105 struct brw_reg R0;
106
107 /* Pixel X/Y coordinates (always in R1). */
108 struct brw_reg R1;
109
110 /* Register with push constants (a single vec4) */
111 struct brw_reg clear_rgba;
112
113 /* MRF used for render target writes */
114 GLuint base_mrf;
115 };
116
117 brw_blorp_const_color_program::brw_blorp_const_color_program(
118 struct brw_context *brw,
119 const brw_blorp_const_color_prog_key *key)
120 : mem_ctx(ralloc_context(NULL)),
121 brw(brw),
122 key(key)
123 {
124 brw_init_compile(brw, &func, mem_ctx);
125 }
126
127 brw_blorp_const_color_program::~brw_blorp_const_color_program()
128 {
129 ralloc_free(mem_ctx);
130 }
131
132
133 /**
134 * Determine if fast color clear supports the given clear color.
135 *
136 * Fast color clear can only clear to color values of 1.0 or 0.0. At the
137 * moment we only support floating point, unorm, and snorm buffers.
138 */
139 static bool
140 is_color_fast_clear_compatible(struct intel_context *intel,
141 gl_format format,
142 const union gl_color_union *color)
143 {
144 if (_mesa_is_format_integer_color(format))
145 return false;
146
147 for (int i = 0; i < 4; i++) {
148 if (color->f[i] != 0.0 && color->f[i] != 1.0) {
149 perf_debug("Clear color unsupported by fast color clear. "
150 "Falling back to slow clear.");
151 return false;
152 }
153 }
154 return true;
155 }
156
157
158 /**
159 * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
160 * SURFACE_STATE.
161 */
162 static uint32_t
163 compute_fast_clear_color_bits(const union gl_color_union *color)
164 {
165 uint32_t bits = 0;
166 for (int i = 0; i < 4; i++) {
167 if (color->f[i] != 0.0)
168 bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
169 }
170 return bits;
171 }
172
173
174 brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
175 struct gl_framebuffer *fb,
176 struct gl_renderbuffer *rb,
177 GLubyte *color_mask,
178 bool partial_clear)
179 {
180 struct intel_context *intel = &brw->intel;
181 struct gl_context *ctx = &intel->ctx;
182 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
183
184 dst.set(brw, irb->mt, irb->mt_level, irb->mt_layer);
185
186 /* Override the surface format according to the context's sRGB rules. */
187 gl_format format = _mesa_get_render_format(ctx, irb->mt->format);
188 dst.brw_surfaceformat = brw->render_target_format[format];
189
190 x0 = fb->_Xmin;
191 x1 = fb->_Xmax;
192 if (rb->Name != 0) {
193 y0 = fb->_Ymin;
194 y1 = fb->_Ymax;
195 } else {
196 y0 = rb->Height - fb->_Ymax;
197 y1 = rb->Height - fb->_Ymin;
198 }
199
200 float *push_consts = (float *)&wm_push_consts;
201
202 push_consts[0] = ctx->Color.ClearColor.f[0];
203 push_consts[1] = ctx->Color.ClearColor.f[1];
204 push_consts[2] = ctx->Color.ClearColor.f[2];
205 push_consts[3] = ctx->Color.ClearColor.f[3];
206
207 use_wm_prog = true;
208
209 memset(&wm_prog_key, 0, sizeof(wm_prog_key));
210
211 wm_prog_key.use_simd16_replicated_data = true;
212
213 /* From the SNB PRM (Vol4_Part1):
214 *
215 * "Replicated data (Message Type = 111) is only supported when
216 * accessing tiled memory. Using this Message Type to access linear
217 * (untiled) memory is UNDEFINED."
218 */
219 if (irb->mt->region->tiling == I915_TILING_NONE)
220 wm_prog_key.use_simd16_replicated_data = false;
221
222 /* Constant color writes ignore everyting in blend and color calculator
223 * state. This is not documented.
224 */
225 for (int i = 0; i < 4; i++) {
226 if (!color_mask[i]) {
227 color_write_disable[i] = true;
228 wm_prog_key.use_simd16_replicated_data = false;
229 }
230 }
231
232 /* If we can do this as a fast color clear, do so. */
233 if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
234 wm_prog_key.use_simd16_replicated_data &&
235 is_color_fast_clear_compatible(intel, format, &ctx->Color.ClearColor)) {
236 memset(push_consts, 0xff, 4*sizeof(float));
237 fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
238
239 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
240 * Target(s)", beneath the "Fast Color Clear" bullet (p327):
241 *
242 * Clear pass must have a clear rectangle that must follow alignment
243 * rules in terms of pixels and lines as shown in the table
244 * below. Further, the clear-rectangle height and width must be
245 * multiple of the following dimensions. If the height and width of
246 * the render target being cleared do not meet these requirements,
247 * an MCS buffer can be created such that it follows the requirement
248 * and covers the RT.
249 *
250 * The alignment size in the table that follows is related to the
251 * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
252 * with X alignment multiplied by 16 and Y alignment multiplied by 32.
253 */
254 unsigned x_align, y_align;
255 intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align);
256 x_align *= 16;
257 y_align *= 32;
258 x0 = ROUND_DOWN_TO(x0, x_align);
259 y0 = ROUND_DOWN_TO(y0, y_align);
260 x1 = ALIGN(x1, x_align);
261 y1 = ALIGN(y1, y_align);
262
263 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
264 * Target(s)", beneath the "Fast Color Clear" bullet (p327):
265 *
266 * In order to optimize the performance MCS buffer (when bound to 1X
267 * RT) clear similarly to MCS buffer clear for MSRT case, clear rect
268 * is required to be scaled by the following factors in the
269 * horizontal and vertical directions:
270 *
271 * The X and Y scale down factors in the table that follows are each
272 * equal to half the alignment value computed above.
273 */
274 unsigned x_scaledown = x_align / 2;
275 unsigned y_scaledown = y_align / 2;
276 x0 /= x_scaledown;
277 y0 /= y_scaledown;
278 x1 /= x_scaledown;
279 y1 /= y_scaledown;
280 }
281 }
282
283
284 brw_blorp_rt_resolve_params::brw_blorp_rt_resolve_params(
285 struct brw_context *brw,
286 struct intel_mipmap_tree *mt)
287 {
288 dst.set(brw, mt, 0 /* level */, 0 /* layer */);
289
290 /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
291 *
292 * A rectangle primitive must be scaled down by the following factors
293 * with respect to render target being resolved.
294 *
295 * The scaledown factors in the table that follows are related to the
296 * alignment size returned by intel_get_non_msrt_mcs_alignment(), but with
297 * X and Y alignment each divided by 2.
298 */
299 unsigned x_align, y_align;
300 intel_get_non_msrt_mcs_alignment(&brw->intel, mt, &x_align, &y_align);
301 unsigned x_scaledown = x_align / 2;
302 unsigned y_scaledown = y_align / 2;
303 x0 = y0 = 0;
304 x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown;
305 y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown;
306
307 fast_clear_op = GEN7_FAST_CLEAR_OP_RESOLVE;
308
309 /* Note: there is no need to initialize push constants because it doesn't
310 * matter what data gets dispatched to the render target. However, we must
311 * ensure that the fragment shader delivers the data using the "replicated
312 * color" message.
313 */
314 use_wm_prog = true;
315 memset(&wm_prog_key, 0, sizeof(wm_prog_key));
316 wm_prog_key.use_simd16_replicated_data = true;
317 }
318
319
320 uint32_t
321 brw_blorp_const_color_params::get_wm_prog(struct brw_context *brw,
322 brw_blorp_prog_data **prog_data)
323 const
324 {
325 uint32_t prog_offset;
326 if (!brw_search_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
327 &this->wm_prog_key, sizeof(this->wm_prog_key),
328 &prog_offset, prog_data)) {
329 brw_blorp_const_color_program prog(brw, &this->wm_prog_key);
330 GLuint program_size;
331 const GLuint *program = prog.compile(brw, &program_size);
332 brw_upload_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
333 &this->wm_prog_key, sizeof(this->wm_prog_key),
334 program, program_size,
335 &prog.prog_data, sizeof(prog.prog_data),
336 &prog_offset, prog_data);
337 }
338 return prog_offset;
339 }
340
341 void
342 brw_blorp_const_color_program::alloc_regs()
343 {
344 int reg = 0;
345 this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
346 this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
347
348 prog_data.first_curbe_grf = reg;
349 clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
350 reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
351
352 /* Make sure we didn't run out of registers */
353 assert(reg <= GEN7_MRF_HACK_START);
354
355 this->base_mrf = 2;
356 }
357
358 const GLuint *
359 brw_blorp_const_color_program::compile(struct brw_context *brw,
360 GLuint *program_size)
361 {
362 /* Set up prog_data */
363 memset(&prog_data, 0, sizeof(prog_data));
364 prog_data.persample_msaa_dispatch = false;
365
366 alloc_regs();
367
368 brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
369
370 struct brw_reg mrf_rt_write =
371 retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);
372
373 uint32_t mlen, msg_type;
374 if (key->use_simd16_replicated_data) {
375 /* The message payload is a single register with the low 4 floats/ints
376 * filled with the constant clear color.
377 */
378 brw_set_mask_control(&func, BRW_MASK_DISABLE);
379 brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
380 brw_set_mask_control(&func, BRW_MASK_ENABLE);
381
382 msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
383 mlen = 1;
384 } else {
385 for (int i = 0; i < 4; i++) {
386 /* The message payload is pairs of registers for 16 pixels each of r,
387 * g, b, and a.
388 */
389 brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
390 brw_MOV(&func,
391 brw_message_reg(base_mrf + i * 2),
392 brw_vec1_grf(clear_rgba.nr, i));
393 brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
394 }
395
396 msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
397 mlen = 8;
398 }
399
400 /* Now write to the render target and terminate the thread */
401 brw_fb_WRITE(&func,
402 16 /* dispatch_width */,
403 base_mrf /* msg_reg_nr */,
404 mrf_rt_write /* src0 */,
405 msg_type,
406 BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
407 mlen,
408 0 /* response_length */,
409 true /* eot */,
410 false /* header present */);
411
412 if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
413 printf("Native code for BLORP clear:\n");
414 brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
415 printf("\n");
416 }
417 return brw_get_program(&func, program_size);
418 }
419
420 extern "C" {
421 bool
422 brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
423 bool partial_clear)
424 {
425 struct gl_context *ctx = &intel->ctx;
426 struct brw_context *brw = brw_context(ctx);
427
428 /* The constant color clear code doesn't work for multisampled surfaces, so
429 * we need to support falling back to other clear mechanisms.
430 * Unfortunately, our clear code is based on a bitmask that doesn't
431 * distinguish individual color attachments, so we walk the attachments to
432 * see if any require fallback, and fall back for all if any of them need
433 * to.
434 */
435 for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
436 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
437 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
438
439 if (irb && irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE)
440 return false;
441 }
442
443 for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
444 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
445 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
446
447 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
448 * the framebuffer can be complete with some attachments missing. In
449 * this case the _ColorDrawBuffers pointer will be NULL.
450 */
451 if (rb == NULL)
452 continue;
453
454 brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
455 partial_clear);
456
457 bool is_fast_clear =
458 (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
459 if (is_fast_clear) {
460 /* Record the clear color in the miptree so that it will be
461 * programmed in SURFACE_STATE by later rendering and resolve
462 * operations.
463 */
464 uint32_t new_color_value =
465 compute_fast_clear_color_bits(&ctx->Color.ClearColor);
466 if (irb->mt->fast_clear_color_value != new_color_value) {
467 irb->mt->fast_clear_color_value = new_color_value;
468 brw->state.dirty.brw |= BRW_NEW_SURFACES;
469 }
470
471 /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
472 * redundant and can be skipped.
473 */
474 if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
475 continue;
476
477 /* If the MCS buffer hasn't been allocated yet, we need to allocate
478 * it now.
479 */
480 if (!irb->mt->mcs_mt) {
481 if (!intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) {
482 /* MCS allocation failed--probably this will only happen in
483 * out-of-memory conditions. But in any case, try to recover
484 * by falling back to a non-blorp clear technique.
485 */
486 return false;
487 }
488 brw->state.dirty.brw |= BRW_NEW_SURFACES;
489 }
490 }
491
492 brw_blorp_exec(intel, &params);
493
494 if (is_fast_clear) {
495 /* Now that the fast clear has occurred, put the buffer in
496 * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
497 * clears.
498 */
499 irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
500 }
501 }
502
503 return true;
504 }
505
506 void
507 brw_blorp_resolve_color(struct intel_context *intel, struct intel_mipmap_tree *mt)
508 {
509 struct brw_context *brw = brw_context(&intel->ctx);
510 brw_blorp_rt_resolve_params params(brw, mt);
511 brw_blorp_exec(intel, &params);
512 mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
513 }
514
515 } /* extern "C" */