r300g: fix and re-enable 8x8 zbuffer compression mode
[mesa.git] / src / gallium / drivers / r300 / r300_blit.c
1 /*
2 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "r300_context.h"
24 #include "r300_emit.h"
25 #include "r300_hyperz.h"
26 #include "r300_texture.h"
27 #include "r300_winsys.h"
28
29 #include "util/u_format.h"
30 #include "util/u_pack_color.h"
31
32 enum r300_blitter_op /* bitmask */
33 {
34 R300_CLEAR = 1,
35 R300_CLEAR_SURFACE = 2,
36 R300_COPY = 4
37 };
38
39 static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op)
40 {
41 if (r300->query_current) {
42 r300->blitter_saved_query = r300->query_current;
43 r300_stop_query(r300);
44 }
45
46 /* Yeah we have to save all those states to ensure the blitter operation
47 * is really transparent. The states will be restored by the blitter once
48 * copying is done. */
49 util_blitter_save_blend(r300->blitter, r300->blend_state.state);
50 util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state);
51 util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref));
52 util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state);
53 util_blitter_save_fragment_shader(r300->blitter, r300->fs.state);
54 util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
55 util_blitter_save_viewport(r300->blitter, &r300->viewport);
56 util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state);
57 util_blitter_save_vertex_elements(r300->blitter, r300->velems);
58 util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count,
59 r300->vertex_buffer);
60
61 if (op & (R300_CLEAR_SURFACE | R300_COPY)) {
62 util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
63 }
64
65 if (op & R300_COPY) {
66 struct r300_textures_state* state =
67 (struct r300_textures_state*)r300->textures_state.state;
68
69 util_blitter_save_fragment_sampler_states(
70 r300->blitter, state->sampler_state_count,
71 (void**)state->sampler_states);
72
73 util_blitter_save_fragment_sampler_views(
74 r300->blitter, state->sampler_view_count,
75 (struct pipe_sampler_view**)state->sampler_views);
76 }
77 }
78
79 static void r300_blitter_end(struct r300_context *r300)
80 {
81 if (r300->blitter_saved_query) {
82 r300_resume_query(r300, r300->blitter_saved_query);
83 r300->blitter_saved_query = NULL;
84 }
85 }
86
87 static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
88 const float* rgba)
89 {
90 union util_color uc;
91 util_pack_color(rgba, format, &uc);
92
93 if (util_format_get_blocksizebits(format) == 32)
94 return uc.ui;
95 else
96 return uc.us | (uc.us << 16);
97 }
98
99 static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
100 unsigned clear_buffers)
101 {
102 struct pipe_framebuffer_state *fb =
103 (struct pipe_framebuffer_state*)r300->fb_state.state;
104
105 /* Only color clear allowed, and only one colorbuffer. */
106 if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
107 return FALSE;
108
109 return r300_surface(fb->cbufs[0])->cbzb_allowed;
110 }
111
112 static boolean r300_fast_zclear_allowed(struct r300_context *r300)
113 {
114 struct pipe_framebuffer_state *fb =
115 (struct pipe_framebuffer_state*)r300->fb_state.state;
116
117 return r300_texture(fb->zsbuf->texture)->desc.zmask_dwords[fb->zsbuf->u.tex.level];
118 }
119
120 static uint32_t r300_depth_clear_value(enum pipe_format format,
121 double depth, unsigned stencil)
122 {
123 switch (format) {
124 case PIPE_FORMAT_Z16_UNORM:
125 case PIPE_FORMAT_X8Z24_UNORM:
126 return util_pack_z(format, depth);
127
128 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
129 return util_pack_z_stencil(format, depth, stencil);
130
131 default:
132 assert(0);
133 return 0;
134 }
135 }
136
137 /* Clear currently bound buffers. */
138 static void r300_clear(struct pipe_context* pipe,
139 unsigned buffers,
140 const float* rgba,
141 double depth,
142 unsigned stencil)
143 {
144 /* My notes about Zbuffer compression:
145 *
146 * 1) The zbuffer must be micro-tiled and whole microtiles must be
147 * written if compression is enabled. If microtiling is disabled,
148 * it locks up.
149 *
150 * 2) There is ZMASK RAM which contains a compressed zbuffer.
151 * Each dword of the Z Mask contains compression information
152 * for 16 4x4 pixel tiles, that is 2 bits for each tile.
153 * On chips with 2 Z pipes, every other dword maps to a different
154 * pipe. On newer chipsets, there is a new compression mode
155 * with 8x8 pixel tiles per 2 bits.
156 *
157 * 3) The FASTFILL bit has nothing to do with filling. It only tells hw
158 * it should look in the ZMASK RAM first before fetching from a real
159 * zbuffer.
160 *
161 * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned
162 * during zbuffer reads instead of the value that is actually stored
163 * in the zbuffer memory. A pixel is in a cleared state when its ZMASK
164 * is equal to 0. Therefore, if you clear ZMASK with zeros, you may
165 * leave the zbuffer memory uninitialized, but then you must enable
166 * compression, so that the ZMASK RAM is actually used.
167 *
168 * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed
169 * during zbuffer updates. A special decompressing operation should be
170 * used to fully decompress a zbuffer, which basically just stores all
171 * compressed tiles in ZMASK to the zbuffer memory.
172 *
173 * 6) For a 16-bit zbuffer, compression causes a hung with one or
174 * two samples and should not be used.
175 *
176 * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
177 * to avoid needless decompression.
178 *
179 * 8) Fastfill must not be used if reading of compressed Z data is disabled
180 * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
181 * i.e. it cannot be used to compress the zbuffer.
182 *
183 * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way.
184 *
185 * - Marek
186 */
187
188 struct r300_context* r300 = r300_context(pipe);
189 struct pipe_framebuffer_state *fb =
190 (struct pipe_framebuffer_state*)r300->fb_state.state;
191 struct r300_hyperz_state *hyperz =
192 (struct r300_hyperz_state*)r300->hyperz_state.state;
193 struct r300_texture *zstex =
194 fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
195 uint32_t width = fb->width;
196 uint32_t height = fb->height;
197 boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
198 uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
199
200 /* Enable fast Z clear.
201 * The zbuffer must be in micro-tiled mode, otherwise it locks up. */
202 if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
203 hyperz_dcv = hyperz->zb_depthclearvalue =
204 r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
205
206 if (r300_fast_zclear_allowed(r300)) {
207 r300_mark_atom_dirty(r300, &r300->zmask_clear);
208 buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
209 }
210
211 if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
212 r300_mark_atom_dirty(r300, &r300->hiz_clear);
213
214 /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state);
215 * once hiz offset is constant. */
216 r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
217 }
218
219 /* Enable CBZB clear. */
220 if (r300_cbzb_clear_allowed(r300, buffers)) {
221 struct r300_surface *surf = r300_surface(fb->cbufs[0]);
222
223 hyperz->zb_depthclearvalue =
224 r300_depth_clear_cb_value(surf->base.format, rgba);
225
226 width = surf->cbzb_width;
227 height = surf->cbzb_height;
228
229 r300->cbzb_clear = TRUE;
230 r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
231 }
232
233 /* Clear. */
234 if (buffers) {
235 /* Clear using the blitter. */
236 r300_blitter_begin(r300, R300_CLEAR);
237 util_blitter_clear(r300->blitter,
238 width,
239 height,
240 fb->nr_cbufs,
241 buffers, rgba, depth, stencil);
242 r300_blitter_end(r300);
243 } else if (r300->zmask_clear.dirty) {
244 /* Just clear zmask and hiz now, this does not use the standard draw
245 * procedure. */
246 unsigned dwords;
247
248 /* Calculate zmask_clear and hiz_clear atom sizes. */
249 r300_update_hyperz_state(r300);
250 dwords = r300->zmask_clear.size +
251 (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
252 r300_get_num_cs_end_dwords(r300);
253
254 /* Reserve CS space. */
255 if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
256 r300->context.flush(&r300->context, 0, NULL);
257 }
258
259 /* Emit clear packets. */
260 r300_emit_zmask_clear(r300, r300->zmask_clear.size,
261 r300->zmask_clear.state);
262 r300->zmask_clear.dirty = FALSE;
263 if (r300->hiz_clear.dirty) {
264 r300_emit_hiz_clear(r300, r300->hiz_clear.size,
265 r300->hiz_clear.state);
266 r300->hiz_clear.dirty = FALSE;
267 }
268 } else {
269 assert(0);
270 }
271
272 /* Disable CBZB clear. */
273 if (r300->cbzb_clear) {
274 r300->cbzb_clear = FALSE;
275 hyperz->zb_depthclearvalue = hyperz_dcv;
276 r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
277 }
278
279 /* Enable fastfill and/or hiz.
280 *
281 * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
282 * looks if zmask/hiz is in use and enables fastfill accordingly. */
283 if (r300->zmask_in_use ||
284 (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
285 r300_mark_atom_dirty(r300, &r300->hyperz_state);
286 }
287 }
288
289 /* Clear a region of a color surface to a constant value. */
290 static void r300_clear_render_target(struct pipe_context *pipe,
291 struct pipe_surface *dst,
292 const float *rgba,
293 unsigned dstx, unsigned dsty,
294 unsigned width, unsigned height)
295 {
296 struct r300_context *r300 = r300_context(pipe);
297
298 r300->zmask_locked = TRUE;
299 r300_mark_atom_dirty(r300, &r300->hyperz_state);
300
301 r300_blitter_begin(r300, R300_CLEAR_SURFACE);
302 util_blitter_clear_render_target(r300->blitter, dst, rgba,
303 dstx, dsty, width, height);
304 r300_blitter_end(r300);
305
306 r300->zmask_locked = FALSE;
307 r300_mark_atom_dirty(r300, &r300->hyperz_state);
308 }
309
310 /* Clear a region of a depth stencil surface. */
311 static void r300_clear_depth_stencil(struct pipe_context *pipe,
312 struct pipe_surface *dst,
313 unsigned clear_flags,
314 double depth,
315 unsigned stencil,
316 unsigned dstx, unsigned dsty,
317 unsigned width, unsigned height)
318 {
319 struct r300_context *r300 = r300_context(pipe);
320 struct pipe_framebuffer_state *fb =
321 (struct pipe_framebuffer_state*)r300->fb_state.state;
322
323 if (r300->zmask_in_use && !r300->zmask_locked) {
324 if (fb->zsbuf->texture == dst->texture) {
325 r300_decompress_zmask(r300);
326 } else {
327 r300->zmask_locked = TRUE;
328 r300_mark_atom_dirty(r300, &r300->hyperz_state);
329 }
330 }
331
332 r300_blitter_begin(r300, R300_CLEAR_SURFACE);
333 util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
334 dstx, dsty, width, height);
335 r300_blitter_end(r300);
336
337 if (r300->zmask_locked) {
338 r300->zmask_locked = FALSE;
339 r300_mark_atom_dirty(r300, &r300->hyperz_state);
340 }
341 }
342
343 void r300_decompress_zmask(struct r300_context *r300)
344 {
345 struct pipe_framebuffer_state *fb =
346 (struct pipe_framebuffer_state*)r300->fb_state.state;
347
348 if (!r300->zmask_in_use || r300->zmask_locked)
349 return;
350
351 r300->zmask_decompress = TRUE;
352 r300_mark_atom_dirty(r300, &r300->hyperz_state);
353
354 r300_blitter_begin(r300, R300_CLEAR);
355 util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0,
356 r300->dsa_decompress_zmask);
357 r300_blitter_end(r300);
358
359 r300->zmask_decompress = FALSE;
360 r300->zmask_in_use = FALSE;
361 r300_mark_atom_dirty(r300, &r300->hyperz_state);
362 }
363
364 void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
365 {
366 struct pipe_framebuffer_state fb = {0};
367 fb.width = r300->locked_zbuffer->width;
368 fb.height = r300->locked_zbuffer->height;
369 fb.nr_cbufs = 0;
370 fb.zsbuf = r300->locked_zbuffer;
371
372 r300->context.set_framebuffer_state(&r300->context, &fb);
373 r300_decompress_zmask(r300);
374 }
375
376 void r300_decompress_zmask_locked(struct r300_context *r300)
377 {
378 struct pipe_framebuffer_state saved_fb = {0};
379
380 util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
381 r300_decompress_zmask_locked_unsafe(r300);
382 r300->context.set_framebuffer_state(&r300->context, &saved_fb);
383 util_unreference_framebuffer_state(&saved_fb);
384 }
385
386 /* Copy a block of pixels from one surface to another using HW. */
387 static void r300_hw_copy_region(struct pipe_context* pipe,
388 struct pipe_resource *dst,
389 unsigned dst_level,
390 unsigned dstx, unsigned dsty, unsigned dstz,
391 struct pipe_resource *src,
392 unsigned src_level,
393 const struct pipe_box *src_box)
394 {
395 struct r300_context* r300 = r300_context(pipe);
396
397 r300_blitter_begin(r300, R300_COPY);
398 util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
399 src, src_level, src_box, TRUE);
400 r300_blitter_end(r300);
401 }
402
403 /* Copy a block of pixels from one surface to another. */
404 static void r300_resource_copy_region(struct pipe_context *pipe,
405 struct pipe_resource *dst,
406 unsigned dst_level,
407 unsigned dstx, unsigned dsty, unsigned dstz,
408 struct pipe_resource *src,
409 unsigned src_level,
410 const struct pipe_box *src_box)
411 {
412 struct r300_context *r300 = r300_context(pipe);
413 struct pipe_framebuffer_state *fb =
414 (struct pipe_framebuffer_state*)r300->fb_state.state;
415 enum pipe_format old_format = dst->format;
416 enum pipe_format new_format = old_format;
417 const struct util_format_description *desc = util_format_description(old_format);
418
419 if (r300->zmask_in_use && !r300->zmask_locked) {
420 if (fb->zsbuf->texture == src ||
421 fb->zsbuf->texture == dst) {
422 r300_decompress_zmask(r300);
423 } else {
424 r300->zmask_locked = TRUE;
425 r300_mark_atom_dirty(r300, &r300->hyperz_state);
426 }
427 }
428
429 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
430 (!pipe->screen->is_format_supported(pipe->screen,
431 old_format, src->target,
432 src->nr_samples,
433 PIPE_BIND_RENDER_TARGET |
434 PIPE_BIND_SAMPLER_VIEW, 0) &&
435 desc->layout == UTIL_FORMAT_LAYOUT_PLAIN)) {
436 switch (util_format_get_blocksize(old_format)) {
437 case 1:
438 new_format = PIPE_FORMAT_I8_UNORM;
439 break;
440 case 2:
441 new_format = PIPE_FORMAT_B4G4R4A4_UNORM;
442 break;
443 case 4:
444 new_format = PIPE_FORMAT_B8G8R8A8_UNORM;
445 break;
446 case 8:
447 new_format = PIPE_FORMAT_R16G16B16A16_UNORM;
448 break;
449 default:
450 debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n"
451 "r300: surface_copy: Software fallback doesn't work for tiled textures.\n",
452 util_format_short_name(old_format));
453 }
454 }
455
456 if (old_format != new_format) {
457 r300_texture_reinterpret_format(pipe->screen,
458 dst, new_format);
459 r300_texture_reinterpret_format(pipe->screen,
460 src, new_format);
461 }
462
463 r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
464 src, src_level, src_box);
465
466 if (old_format != new_format) {
467 r300_texture_reinterpret_format(pipe->screen,
468 dst, old_format);
469 r300_texture_reinterpret_format(pipe->screen,
470 src, old_format);
471 }
472
473 if (r300->zmask_locked) {
474 r300->zmask_locked = FALSE;
475 r300_mark_atom_dirty(r300, &r300->hyperz_state);
476 }
477 }
478
479 void r300_init_blit_functions(struct r300_context *r300)
480 {
481 r300->context.clear = r300_clear;
482 r300->context.clear_render_target = r300_clear_render_target;
483 r300->context.clear_depth_stencil = r300_clear_depth_stencil;
484 r300->context.resource_copy_region = r300_resource_copy_region;
485 }