i965/blorp: Refactor to get rid of the get_wm_prog virtual function
[mesa.git] / src / mesa / drivers / dri / i965 / brw_blorp_clear.cpp
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/teximage.h"
25 #include "main/blend.h"
26 #include "main/fbobject.h"
27 #include "main/renderbuffer.h"
28 #include "main/glformats.h"
29
30 #include "util/ralloc.h"
31
32 #include "intel_fbo.h"
33
34 #include "brw_blorp.h"
35 #include "brw_meta_util.h"
36 #include "brw_context.h"
37 #include "brw_eu.h"
38 #include "brw_state.h"
39
40 #define FILE_DEBUG_FLAG DEBUG_BLORP
41
42 struct brw_blorp_const_color_prog_key
43 {
44 bool use_simd16_replicated_data;
45 bool pad[3];
46 };
47
48 /**
49 * Parameters for a blorp operation where the fragment shader outputs a
50 * constant color. This is used for both fast color clears and color
51 * resolves.
52 */
53 class brw_blorp_const_color_params : public brw_blorp_params
54 {
55 public:
56 brw_blorp_const_color_prog_key wm_prog_key;
57 };
58
59 class brw_blorp_clear_params : public brw_blorp_const_color_params
60 {
61 public:
62 brw_blorp_clear_params(struct brw_context *brw,
63 struct gl_framebuffer *fb,
64 struct gl_renderbuffer *rb,
65 GLubyte *color_mask,
66 bool partial_clear,
67 bool encode_srgb,
68 unsigned layer);
69 };
70
71
72 /**
73 * Parameters for a blorp operation that performs a "render target resolve".
74 * This is used to resolve pending fast clear pixels before a color buffer is
75 * used for texturing, ReadPixels, or scanout.
76 */
77 class brw_blorp_rt_resolve_params : public brw_blorp_const_color_params
78 {
79 public:
80 brw_blorp_rt_resolve_params(struct brw_context *brw,
81 struct intel_mipmap_tree *mt);
82 };
83
84
85 class brw_blorp_const_color_program
86 {
87 public:
88 brw_blorp_const_color_program(struct brw_context *brw,
89 const brw_blorp_const_color_prog_key *key);
90 ~brw_blorp_const_color_program();
91
92 const GLuint *compile(struct brw_context *brw, GLuint *program_size);
93
94 brw_blorp_prog_data prog_data;
95
96 private:
97 void alloc_regs();
98
99 void *mem_ctx;
100 const brw_blorp_const_color_prog_key *key;
101 struct brw_codegen func;
102
103 /* Thread dispatch header */
104 struct brw_reg R0;
105
106 /* Pixel X/Y coordinates (always in R1). */
107 struct brw_reg R1;
108
109 /* Register with push constants (a single vec4) */
110 struct brw_reg clear_rgba;
111
112 /* MRF used for render target writes */
113 GLuint base_mrf;
114 };
115
116 brw_blorp_const_color_program::brw_blorp_const_color_program(
117 struct brw_context *brw,
118 const brw_blorp_const_color_prog_key *key)
119 : mem_ctx(ralloc_context(NULL)),
120 key(key),
121 R0(),
122 R1(),
123 clear_rgba(),
124 base_mrf(0)
125 {
126 prog_data.first_curbe_grf = 0;
127 prog_data.persample_msaa_dispatch = false;
128 brw_init_codegen(brw->intelScreen->devinfo, &func, mem_ctx);
129 }
130
131 brw_blorp_const_color_program::~brw_blorp_const_color_program()
132 {
133 ralloc_free(mem_ctx);
134 }
135
136 static void
137 brw_blorp_params_get_clear_kernel(struct brw_context *brw,
138 struct brw_blorp_params *params,
139 brw_blorp_const_color_prog_key *wm_prog_key)
140 {
141 if (!brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
142 wm_prog_key, sizeof(*wm_prog_key),
143 &params->wm_prog_kernel, &params->wm_prog_data)) {
144 brw_blorp_const_color_program prog(brw, wm_prog_key);
145 GLuint program_size;
146 const GLuint *program = prog.compile(brw, &program_size);
147 brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
148 wm_prog_key, sizeof(*wm_prog_key),
149 program, program_size,
150 &prog.prog_data, sizeof(prog.prog_data),
151 &params->wm_prog_kernel, &params->wm_prog_data);
152 }
153 }
154
155 static bool
156 set_write_disables(const struct intel_renderbuffer *irb,
157 const GLubyte *color_mask, bool *color_write_disable)
158 {
159 /* Format information in the renderbuffer represents the requirements
160 * given by the client. There are cases where the backing miptree uses,
161 * for example, RGBA to represent RGBX. Since the client is only expecting
162 * RGB we can treat alpha as not used and write whatever we like into it.
163 */
164 const GLenum base_format = irb->Base.Base._BaseFormat;
165 const int components = _mesa_base_format_component_count(base_format);
166 bool disables = false;
167
168 assert(components > 0);
169
170 for (int i = 0; i < components; i++) {
171 color_write_disable[i] = !color_mask[i];
172 disables = disables || !color_mask[i];
173 }
174
175 return disables;
176 }
177
178 brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
179 struct gl_framebuffer *fb,
180 struct gl_renderbuffer *rb,
181 GLubyte *color_mask,
182 bool partial_clear,
183 bool encode_srgb,
184 unsigned layer)
185 {
186 struct gl_context *ctx = &brw->ctx;
187 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
188 mesa_format format = irb->mt->format;
189
190 if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
191 format = _mesa_get_srgb_format_linear(format);
192
193 dst.set(brw, irb->mt, irb->mt_level, layer, format, true);
194
195 /* Override the surface format according to the context's sRGB rules. */
196 dst.brw_surfaceformat = brw->render_target_format[format];
197
198 x0 = fb->_Xmin;
199 x1 = fb->_Xmax;
200 if (rb->Name != 0) {
201 y0 = fb->_Ymin;
202 y1 = fb->_Ymax;
203 } else {
204 y0 = rb->Height - fb->_Ymax;
205 y1 = rb->Height - fb->_Ymin;
206 }
207
208 memcpy(&wm_push_consts.dst_x0, ctx->Color.ClearColor.f, sizeof(float) * 4);
209
210 memset(&wm_prog_key, 0, sizeof(wm_prog_key));
211
212 wm_prog_key.use_simd16_replicated_data = true;
213
214 /* From the SNB PRM (Vol4_Part1):
215 *
216 * "Replicated data (Message Type = 111) is only supported when
217 * accessing tiled memory. Using this Message Type to access linear
218 * (untiled) memory is UNDEFINED."
219 */
220 if (irb->mt->tiling == I915_TILING_NONE)
221 wm_prog_key.use_simd16_replicated_data = false;
222
223 /* Constant color writes ignore everyting in blend and color calculator
224 * state. This is not documented.
225 */
226 if (set_write_disables(irb, color_mask, color_write_disable))
227 wm_prog_key.use_simd16_replicated_data = false;
228
229 if (irb->mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_NO_MCS &&
230 !partial_clear && wm_prog_key.use_simd16_replicated_data &&
231 brw_is_color_fast_clear_compatible(brw, irb->mt,
232 &ctx->Color.ClearColor)) {
233 memset(&wm_push_consts, 0xff, 4*sizeof(float));
234 fast_clear_op = GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
235
236 brw_get_fast_clear_rect(brw, fb, irb->mt, &x0, &y0, &x1, &y1);
237 } else {
238 brw_meta_get_buffer_rect(fb, &x0, &y0, &x1, &y1);
239 }
240
241 brw_blorp_params_get_clear_kernel(brw, this, &wm_prog_key);
242 }
243
244
245 brw_blorp_rt_resolve_params::brw_blorp_rt_resolve_params(
246 struct brw_context *brw,
247 struct intel_mipmap_tree *mt)
248 {
249 const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
250
251 dst.set(brw, mt, 0 /* level */, 0 /* layer */, format, true);
252
253 brw_get_resolve_rect(brw, mt, &x0, &y0, &x1, &y1);
254
255 fast_clear_op = GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE;
256
257 /* Note: there is no need to initialize push constants because it doesn't
258 * matter what data gets dispatched to the render target. However, we must
259 * ensure that the fragment shader delivers the data using the "replicated
260 * color" message.
261 */
262 memset(&wm_prog_key, 0, sizeof(wm_prog_key));
263 wm_prog_key.use_simd16_replicated_data = true;
264
265 brw_blorp_params_get_clear_kernel(brw, this, &wm_prog_key);
266 }
267
268
269 void
270 brw_blorp_const_color_program::alloc_regs()
271 {
272 int reg = 0;
273 this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
274 this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
275
276 prog_data.first_curbe_grf = reg;
277 clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
278 reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
279
280 /* Make sure we didn't run out of registers */
281 assert(reg <= GEN7_MRF_HACK_START);
282
283 this->base_mrf = 2;
284 }
285
286 const GLuint *
287 brw_blorp_const_color_program::compile(struct brw_context *brw,
288 GLuint *program_size)
289 {
290 /* Set up prog_data */
291 memset(&prog_data, 0, sizeof(prog_data));
292 prog_data.persample_msaa_dispatch = false;
293
294 alloc_regs();
295
296 brw_set_default_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
297
298 struct brw_reg mrf_rt_write =
299 retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);
300
301 uint32_t mlen, msg_type;
302 if (key->use_simd16_replicated_data) {
303 /* The message payload is a single register with the low 4 floats/ints
304 * filled with the constant clear color.
305 */
306 brw_set_default_exec_size(&func, BRW_EXECUTE_4);
307 brw_set_default_mask_control(&func, BRW_MASK_DISABLE);
308 brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
309 brw_set_default_mask_control(&func, BRW_MASK_ENABLE);
310 brw_set_default_exec_size(&func, BRW_EXECUTE_16);
311
312 msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
313 mlen = 1;
314 } else {
315 brw_set_default_exec_size(&func, BRW_EXECUTE_16);
316 for (int i = 0; i < 4; i++) {
317 /* The message payload is pairs of registers for 16 pixels each of r,
318 * g, b, and a.
319 */
320 brw_MOV(&func,
321 brw_message_reg(base_mrf + i * 2),
322 brw_vec1_grf(clear_rgba.nr, i));
323 }
324
325 msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
326 mlen = 8;
327 }
328
329 /* Now write to the render target and terminate the thread */
330 brw_fb_WRITE(&func,
331 16 /* dispatch_width */,
332 base_mrf >= 0 ? brw_message_reg(base_mrf) : mrf_rt_write,
333 brw_null_reg() /* header */,
334 msg_type,
335 BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
336 mlen,
337 0 /* response_length */,
338 true /* eot */,
339 true /* last render target */,
340 false /* header present */);
341
342 if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
343 fprintf(stderr, "Native code for BLORP clear:\n");
344 brw_disassemble(brw->intelScreen->devinfo,
345 func.store, 0, func.next_insn_offset, stderr);
346 fprintf(stderr, "\n");
347 }
348
349 brw_compact_instructions(&func, 0, 0, NULL);
350 return brw_get_program(&func, program_size);
351 }
352
353
354 static bool
355 do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
356 struct gl_renderbuffer *rb, unsigned buf,
357 bool partial_clear, bool encode_srgb, unsigned layer)
358 {
359 struct gl_context *ctx = &brw->ctx;
360 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
361
362 brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
363 partial_clear, encode_srgb, layer);
364
365 const bool is_fast_clear =
366 params.fast_clear_op == GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
367 if (is_fast_clear) {
368 /* Record the clear color in the miptree so that it will be
369 * programmed in SURFACE_STATE by later rendering and resolve
370 * operations.
371 */
372 brw_meta_set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
373
374 /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
375 * is redundant and can be skipped.
376 */
377 if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR)
378 return true;
379
380 /* If the MCS buffer hasn't been allocated yet, we need to allocate
381 * it now.
382 */
383 if (!irb->mt->mcs_mt) {
384 if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt)) {
385 /* MCS allocation failed--probably this will only happen in
386 * out-of-memory conditions. But in any case, try to recover
387 * by falling back to a non-blorp clear technique.
388 */
389 return false;
390 }
391 }
392 }
393
394 const char *clear_type;
395 if (is_fast_clear)
396 clear_type = "fast";
397 else if (params.wm_prog_key.use_simd16_replicated_data)
398 clear_type = "replicated";
399 else
400 clear_type = "slow";
401
402 DBG("%s (%s) to mt %p level %d layer %d\n", __FUNCTION__, clear_type,
403 irb->mt, irb->mt_level, irb->mt_layer);
404
405 brw_blorp_exec(brw, &params);
406
407 if (is_fast_clear) {
408 /* Now that the fast clear has occurred, put the buffer in
409 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
410 * redundant clears.
411 */
412 irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
413 }
414
415 return true;
416 }
417
418
419 extern "C" {
420 bool
421 brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
422 GLbitfield mask, bool partial_clear, bool encode_srgb)
423 {
424 for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
425 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
426 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
427
428 /* Only clear the buffers present in the provided mask */
429 if (((1 << fb->_ColorDrawBufferIndexes[buf]) & mask) == 0)
430 continue;
431
432 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
433 * the framebuffer can be complete with some attachments missing. In
434 * this case the _ColorDrawBuffers pointer will be NULL.
435 */
436 if (rb == NULL)
437 continue;
438
439 if (fb->MaxNumLayers > 0) {
440 unsigned layer_multiplier =
441 (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
442 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
443 irb->mt->num_samples : 1;
444 unsigned num_layers = irb->layer_count;
445 for (unsigned layer = 0; layer < num_layers; layer++) {
446 if (!do_single_blorp_clear(
447 brw, fb, rb, buf, partial_clear, encode_srgb,
448 irb->mt_layer + layer * layer_multiplier)) {
449 return false;
450 }
451 }
452 } else {
453 unsigned layer = irb->mt_layer;
454 if (!do_single_blorp_clear(brw, fb, rb, buf, partial_clear,
455 encode_srgb, layer))
456 return false;
457 }
458
459 irb->need_downsample = true;
460 }
461
462 return true;
463 }
464
465 void
466 brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt)
467 {
468 DBG("%s to mt %p\n", __FUNCTION__, mt);
469
470 brw_blorp_rt_resolve_params params(brw, mt);
471 brw_blorp_exec(brw, &params);
472 mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
473 }
474
475 } /* extern "C" */