From d99ec708afbb785ce05031661222b38c9447059f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 28 Feb 2011 13:38:01 +0100 Subject: [PATCH] r300g: fix HiZ memory size computation and deciding when to use HiZ I removed the HiZ memory management, because the HiZ RAM is too small and I also did it in hope that HiZ will be enabled more often. This also sets aligned strides to HIZ_PITCH and ZMASK_PITCH. --- src/gallium/drivers/r300/r300_blit.c | 63 ++++++------- src/gallium/drivers/r300/r300_context.c | 17 ++-- src/gallium/drivers/r300/r300_context.h | 26 +++--- src/gallium/drivers/r300/r300_emit.c | 66 +++----------- src/gallium/drivers/r300/r300_hyperz.c | 69 +------------- src/gallium/drivers/r300/r300_hyperz.h | 35 -------- src/gallium/drivers/r300/r300_state.c | 30 ++----- src/gallium/drivers/r300/r300_state_derived.c | 3 +- src/gallium/drivers/r300/r300_texture.c | 8 +- src/gallium/drivers/r300/r300_texture_desc.c | 89 ++++++++++++++----- 10 files changed, 148 insertions(+), 258 deletions(-) delete mode 100644 src/gallium/drivers/r300/r300_hyperz.h diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 4f86db39926..fd8ef444dc4 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,7 +22,6 @@ #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -117,6 +116,14 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300) return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; } +static boolean r300_hiz_clear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level]; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -190,8 +197,6 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_resource *zstex = - fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -200,20 +205,17 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { - hyperz_dcv = hyperz->zb_depthclearvalue = - r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - if (r300_fast_zclear_allowed(r300)) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) + if (r300_hiz_clear_allowed(r300)) { r300_mark_atom_dirty(r300, &r300->hiz_clear); - - /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state); - * once hiz offset is constant. */ - r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); + } } /* Enable CBZB clear. */ @@ -240,14 +242,14 @@ static void r300_clear(struct pipe_context* pipe, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); - } else if (r300->zmask_clear.dirty) { + } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; /* Calculate zmask_clear and hiz_clear atom sizes. */ r300_update_hyperz_state(r300); - dwords = r300->zmask_clear.size + + dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + r300_get_num_cs_end_dwords(r300); @@ -257,9 +259,11 @@ static void r300_clear(struct pipe_context* pipe, } /* Emit clear packets. */ - r300_emit_zmask_clear(r300, r300->zmask_clear.size, - r300->zmask_clear.state); - r300->zmask_clear.dirty = FALSE; + if (r300->zmask_clear.dirty) { + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + r300->zmask_clear.dirty = FALSE; + } if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); @@ -279,9 +283,8 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update - * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (r300->zmask_in_use || - (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + * looks if zmask/hiz is in use and programs hardware accordingly. */ + if (r300->zmask_in_use || r300->hiz_in_use) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -295,7 +298,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); r300_blitter_begin(r300, R300_CLEAR_SURFACE); @@ -303,7 +306,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } @@ -320,11 +323,11 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == dst->texture) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -334,8 +337,8 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -345,7 +348,7 @@ void r300_decompress_zmask(struct r300_context *r300) struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!r300->zmask_in_use || r300->zmask_locked) + if (!r300->zmask_in_use || r300->hyperz_locked) return; r300->zmask_decompress = TRUE; @@ -420,12 +423,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe, util_format_description(dst->format); struct pipe_box box; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == src || fb->zsbuf->texture == dst) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -502,8 +505,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, if (old_dst.format != new_dst.format) r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d422ffe03f8..61041bfce23 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,7 +30,6 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -227,7 +226,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (can_hyperz) { /* HiZ Clear */ if (has_hiz_ram) - R300_INIT_ATOM(hiz_clear, 0); + R300_INIT_ATOM(hiz_clear, 4); /* zmask clear */ R300_INIT_ATOM(zmask_clear, 4); } @@ -447,16 +446,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, /* Render functions must be initialized after blitter. */ r300_init_render_functions(r300); + r300_init_states(&r300->context); rws->cs_set_flush(r300->cs, r300_flush_cb, r300); - /* setup hyper-z mm */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - if (!r300_hyperz_init_mm(r300)) - goto fail; - - r300_init_states(&r300->context); - /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this * dummy texture there. */ @@ -507,10 +500,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } /* Print driver info. */ -#ifdef NDEBUG - if (DBG_ON(r300, DBG_INFO)) { -#else +#ifdef DEBUG { +#else + if (DBG_ON(r300, DBG_INFO)) { #endif fprintf(stderr, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e9c7d7bf63f..e18f876fc2d 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -295,6 +295,8 @@ struct r300_surface { uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t pitch_zmask; /* ZMASK_PITCH */ + uint32_t pitch_hiz; /* HIZ_PITCH */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ /* Parameters dedicated to the CBZB clear. */ @@ -363,8 +365,12 @@ struct r300_texture_desc { /* Zbuffer compression info for each miplevel. */ boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; - /* If zero, then disable compression. */ + /* If zero, then disable Z compression/HiZ. */ unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS]; + /* Zmask/HiZ strides for each miplevel. */ + unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; }; struct r300_resource @@ -390,10 +396,6 @@ struct r300_resource /* Where the texture starts in the buffer. */ unsigned tex_offset; - /* HiZ memory allocations. */ - struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; - /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -545,22 +547,21 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - + /* Whether fast color clear is enabled. */ boolean cbzb_clear; /* Whether ZMASK is enabled. */ boolean zmask_in_use; /* Whether ZMASK is being decompressed. */ boolean zmask_decompress; - /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */ - boolean zmask_locked; + /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */ + boolean hyperz_locked; /* The zbuffer the ZMASK of which is locked. */ struct pipe_surface *locked_zbuffer; + /* Whether HIZ is enabled. */ + boolean hiz_in_use; void *dsa_decompress_zmask; - /* two mem block managers for hiz/zmask ram space */ - struct mem_block *hiz_mm; - struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; @@ -644,6 +645,9 @@ void r300_decompress_zmask(struct r300_context *r300); void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); void r300_decompress_zmask_locked(struct r300_context *r300); +/* r300_hyperz.c */ +void r300_update_hyperz_state(struct r300_context* r300); + /* r300_query.c */ void r300_resume_query(struct r300_context *r300, struct r300_query *query); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e2e4719ec82..1adac3454b4 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -425,27 +425,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_RELOC(surf); if (can_hyperz) { - uint32_t surf_pitch; - struct r300_resource *tex; - int level = surf->base.u.tex.level; - tex = r300_resource(surf->base.texture); - - surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; - /* HiZ RAM. */ - if (r300->screen->caps.hiz_ram) { - if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); - } - } - + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); /* Z Mask RAM. (compressed zbuffer) */ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } } @@ -1039,56 +1024,29 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } -static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0, height, offset_shift; struct r300_resource* tex; - int i; + CS_LOCALS(r300); tex = r300_resource(fb->zsbuf->texture); - offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level]; - - /* convert from pixels to 4x4 blocks */ - stride = ALIGN_DIVUP(stride, 4); - - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); - /* there are 4 blocks per dwords */ - stride = ALIGN_DIVUP(stride, 4); - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - - offset_shift = 2; - offset_shift += (r300screen->caps.num_frag_pipes / 2); + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(0); + OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(0xffffffff); + END_CS; - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); - } z->current_func = -1; /* Mark the current zbuffer's hiz ram as in use. */ - tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->hiz_in_use = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 873e0209d42..7ff643f84db 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -22,7 +22,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" -#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" #include "r300_winsys.h" @@ -100,6 +99,7 @@ static boolean r300_can_hiz(struct r300_context *r300) if (r300->query_current) return FALSE; + /* if stencil fail/zfail op is not KEEP */ if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) @@ -139,7 +139,6 @@ static void r300_update_hyperz(struct r300_context* r300) (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_resource *zstex = fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; - boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -157,10 +156,8 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - /* Zbuffer compression. */ - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ R300_RD_COMP_ENABLE; @@ -174,7 +171,8 @@ static void r300_update_hyperz(struct r300_context* r300) z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; } - if (hiz_in_use && r300_can_hiz(r300)) { + /* XXX Use can_hiz to disable hyperz for good, instead of turning it off/on. */ + if (r300->hiz_in_use && !r300->hyperz_locked && r300_can_hiz(r300)) { z->zb_bw_cntl |= R300_HIZ_ENABLE | r300_get_hiz_min(r300); @@ -282,18 +280,6 @@ static void r300_update_ztop(struct r300_context* r300) r300_mark_atom_dirty(r300, &r300->ztop_state); } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static void r300_update_hiz_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - r300->hiz_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); @@ -301,51 +287,4 @@ void r300_update_hyperz_state(struct r300_context* r300) if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } - - if (r300->hiz_clear.dirty) { - r300_update_hiz_clear(r300); - } -} - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) -{ - struct r300_resource *tex; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - - tex = r300_resource(surf->base.texture); - - if (tex->hiz_mem[level]) - return; - - zsize = tex->tex.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->b.b.b.format); - ndw = ALIGN_DIVUP(zsize, 64); - - tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); -} - -boolean r300_hyperz_init_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - int frag_pipes = r300screen->caps.num_frag_pipes; - - if (r300screen->caps.hiz_ram) { - r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); - if (!r300->hiz_mm) { - return FALSE; - } - } - - return TRUE; -} - -void r300_hyperz_destroy_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - - if (r300screen->caps.hiz_ram) { - u_mmDestroy(r300->hiz_mm); - r300->hiz_mm = NULL; - } } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h deleted file mode 100644 index d4c8e7c60a9..00000000000 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2010 Marek Olšák - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_HYPERZ_H -#define R300_HYPERZ_H - -struct r300_context; - -void r300_update_hyperz_state(struct r300_context* r300); - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); - -boolean r300_hyperz_init_mm(struct r300_context *r300); -void r300_hyperz_destroy_mm(struct r300_context *r300); - -#endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 09f18b3e624..be222155e67 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -45,7 +45,6 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" -#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -707,7 +706,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, else if (state->zsbuf) { r300->fb_state.size += 10; if (can_hyperz) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + r300->fb_state.size += 8; } /* The size of the rest of atoms stays the same. */ @@ -720,7 +719,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; @@ -738,28 +736,30 @@ r300_set_framebuffer_state(struct pipe_context* pipe, return; } - if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) { + if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) { /* There is a zmask in use, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { /* Decompress the currently bound zbuffer before we bind another one. */ r300_decompress_zmask(r300); + r300->hiz_in_use = FALSE; } } else { /* We don't bind another zbuffer, so lock the current one. */ - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); } - } else if (r300->zmask_locked && r300->locked_zbuffer) { + } else if (r300->hyperz_locked && r300->locked_zbuffer) { /* We have a locked zbuffer now, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { /* We are binding some other zbuffer, so decompress the locked one, * it gets unlocked automatically. */ r300_decompress_zmask_locked_unsafe(r300); + r300->hiz_in_use = FALSE; } else { /* We are binding the locked zbuffer again, so unlock it. */ - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; } } } @@ -778,7 +778,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe, util_copy_framebuffer_state(r300->fb_state.state, state); - if (!r300->zmask_locked) { + if (!r300->hyperz_locked) { pipe_surface_reference(&r300->locked_zbuffer, NULL); } @@ -794,20 +794,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, break; } - /* Setup Hyper-Z. */ - if (can_hyperz) { - struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_resource *tex = r300_resource(zs_surf->base.texture); - int level = zs_surf->base.u.tex.level; - - /* work out whether we can support hiz on this buffer */ - r300_hiz_alloc_block(r300, zs_surf); - - DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef); - } - /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 003fe9a58cd..7776ab5a3ff 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,7 +29,6 @@ #include "r300_context.h" #include "r300_fs.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state_inlines.h" @@ -937,7 +936,7 @@ static void r300_decompress_depth_textures(struct r300_context *r300) state->sampler_state_count); unsigned i; - if (!r300->zmask_locked || !r300->locked_zbuffer) { + if (!r300->hyperz_locked || !r300->locked_zbuffer) { return; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index b97c45ac198..bbd3f972a55 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -676,6 +676,8 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf) R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | R300_DEPTHMICROTILE(tex->tex.microtile); surf->format = r300_translate_zsformat(surf->base.format); + surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level]; + surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level]; } else { surf->pitch = tex->tex.stride_in_pixels[level] | @@ -713,14 +715,8 @@ static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { struct r300_resource* tex = (struct r300_resource*)texture; - int i; r300_winsys_bo_reference(&tex->buf, NULL); - for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { - if (tex->hiz_mem[i]) - u_mmFreeMem(tex->hiz_mem[i]); - } - FREE(tex); } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 2cfeec7d751..9dcdf153e35 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -334,12 +334,17 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen, tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) +static unsigned r300_pixels_to_dwords(unsigned stride, + unsigned height, + unsigned xblock, unsigned yblock) +{ + return (align(stride, xblock) * align(height, yblock)) / (xblock * yblock); +} -static void r300_setup_zmask_flags(struct r300_screen *screen, - struct r300_resource *tex) +static void r300_setup_hyperz_properties(struct r300_screen *screen, + struct r300_resource *tex) { - /* The tile size of 1 DWORD is: + /* The tile size of 1 DWORD in ZMASK RAM is: * * GPU Pipes 4x4 mode 8x8 mode * ------------------------------------------ @@ -348,8 +353,31 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, * RV530 1P/2Z 32x16 64x32 * 1P/1Z 16x16 32x32 */ - static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; - static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), + * but the blocks have very weird ordering. + * + * With 2 pipes and an image of size 8xY, where Y >= 1, + * clearing 4 dwords clears blocks like this: + * + * 01012323 + * + * where numbers correspond to dword indices. The blocks are interleaved + * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). + * + * With 4 pipes and an image of size 8xY, where Y >= 4, + * clearing 8 dwords clears blocks like this: + * 01012323 + * 45456767 + * 01012323 + * 45456767 + * where numbers correspond to dword indices. The blocks are interleaved + * in both directions, so the alignment must be 4x4 blocks (32x32 pixels) + */ + static unsigned hiz_align_x[4] = {8, 32, 48, 32}; + static unsigned hiz_align_y[4] = {8, 8, 8, 32}; if (util_format_is_depth_or_stencil(tex->b.b.b.format) && util_format_get_blocksizebits(tex->b.b.b.format) == 32 && @@ -363,30 +391,49 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, } for (i = 0; i <= tex->b.b.b.last_level; i++) { - unsigned numdw, compsize; + unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; + + stride = align(tex->tex.stride_in_pixels[i], 16); + height = u_minify(tex->b.b.b.height0, i); /* The 8x8 compression mode needs macrotiling. */ - compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && tex->tex.macrotile[i] && tex->b.b.b.nr_samples <= 1 ? 8 : 4; - /* Get the zbuffer size (with the aligned width and height). */ - numdw = align(tex->tex.stride_in_pixels[i], - num_blocks_x_per_dw[pipes-1] * compsize) * - align(u_minify(tex->b.b.b.height0, i), - num_blocks_y_per_dw[pipes-1] * compsize); + /* Get the ZMASK buffer size in dwords. */ + zcomp_numdw = r300_pixels_to_dwords(stride, height, + zmask_blocks_x_per_dw[pipes-1] * zcompsize, + zmask_blocks_y_per_dw[pipes-1] * zcompsize); - /* Convert pixels -> dwords. */ - numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * - num_blocks_y_per_dw[pipes-1] * compsize); + /* Check whether we have enough ZMASK memory. */ + if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + zcomp_numdw <= screen->caps.zmask_ram * pipes) { + tex->tex.zmask_dwords[i] = zcomp_numdw; + tex->tex.zcomp8x8[i] = zcompsize == 8; - /* Check that we have enough ZMASK memory. */ - if (numdw <= screen->caps.zmask_ram * pipes) { - tex->tex.zmask_dwords[i] = numdw; - tex->tex.zcomp8x8[i] = compsize == 8; + tex->tex.zmask_stride_in_pixels[i] = + align(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); } else { tex->tex.zmask_dwords[i] = 0; tex->tex.zcomp8x8[i] = FALSE; + tex->tex.zmask_stride_in_pixels[i] = 0; + } + + /* Now setup HIZ. */ + stride = align(stride, hiz_align_x[pipes-1]); + height = align(height, hiz_align_y[pipes-1]); + + /* Get the HIZ buffer size in dwords. */ + hiz_numdw = (stride * height) / (8*8 * pipes); + + /* Check whether we have enough HIZ memory. */ + if (hiz_numdw <= screen->caps.hiz_ram * pipes) { + tex->tex.hiz_dwords[i] = hiz_numdw; + tex->tex.hiz_stride_in_pixels[i] = stride; + } else { + tex->tex.hiz_dwords[i] = 0; + tex->tex.hiz_stride_in_pixels[i] = 0; } } } @@ -495,7 +542,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, } r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_setup_zmask_flags(rscreen, tex); + r300_setup_hyperz_properties(rscreen, tex); if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ -- 2.30.2