r600g: add htile support v16
authorJerome Glisse <jglisse@redhat.com>
Thu, 11 Oct 2012 14:40:30 +0000 (10:40 -0400)
committerJerome Glisse <jglisse@redhat.com>
Thu, 20 Dec 2012 23:23:51 +0000 (18:23 -0500)
htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
   disable fast zclear if clearing different layer of texture
   array to different value
v10 Disable hyperz for texture array making test simpler. Force
    db_misc_state update when no depth buffer is bound. Remove
    unused variable, rename depth_clearstencil to depth_clear.
    Don't allocate htile surface for flushed depth. Something
    broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
v12 Rebase on top of newer mesa
v13 Rebase on top of newer mesa, htile surface need to be initialized
    to zero, somehow special casing first clear to not use fast clear
    and thus initialize the htile surface with proper value does not
    work in all case.
v14 Use resource not texture for htile buffer make the htile buffer
    size computation easier and simpler. Disable preload on evergreen
    as its still troublesome in some case
v15 Cleanup some comment and remove some left over
v16 Define name for bit 20 of CP_COHER_CNTL

Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_resource.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_texture.c
src/gallium/drivers/r600/r600d.h

index 58964c47675e92ebcc859a0c0d3ec868abe1fa62..032af78c1f1197da62fb7891cb27c9e7924b2eb6 100644 (file)
@@ -1545,6 +1545,18 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
                                        S_028044_FORMAT(V_028044_STENCIL_8);
        }
 
+       surf->htile_enabled = 0;
+       /* use htile only for first level */
+       if (rtex->htile && !level) {
+               surf->htile_enabled = 1;
+               surf->db_htile_data_base = 0;
+               surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+                                       S_028ABC_HTILE_HEIGHT(1) |
+                                       S_028ABC_LINEAR(1);
+               surf->db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+               surf->db_preload_control = 0;
+       }
+
        surf->depth_initialized = true;
 }
 
@@ -1625,6 +1637,16 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
                        rctx->poly_offset_state.zs_format = state->zsbuf->format;
                        rctx->poly_offset_state.atom.dirty = true;
                }
+
+               if (rctx->db_state.rsurf != surf) {
+                       rctx->db_state.rsurf = surf;
+                       rctx->db_state.atom.dirty = true;
+                       rctx->db_misc_state.atom.dirty = true;
+               }
+       } else if (rctx->db_state.rsurf) {
+               rctx->db_state.rsurf = NULL;
+               rctx->db_state.atom.dirty = true;
+               rctx->db_misc_state.atom.dirty = true;
        }
 
        if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
@@ -2081,6 +2103,28 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
        r600_write_value(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */
 }
 
+static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = rctx->cs;
+       struct r600_db_state *a = (struct r600_db_state*)atom;
+
+       if (a->rsurf && a->rsurf->htile_enabled) {
+               struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
+               unsigned reloc_idx;
+
+               r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear));
+               r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+               r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
+               r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+               reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
+               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+               cs->buf[cs->cdw++] = reloc_idx;
+       } else {
+               r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
+               r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
+       }
+}
+
 static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = rctx->cs;
@@ -2088,7 +2132,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
        unsigned db_render_control = 0;
        unsigned db_count_control = 0;
        unsigned db_render_override =
-               S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
                S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
                S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
 
@@ -2099,7 +2142,12 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
                }
                db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
        }
-
+       if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
+               /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+               db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
+       } else {
+               db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
+       }
        if (a->flush_depthstencil_through_cb) {
                assert(a->copy_depth || a->copy_stencil);
 
@@ -2112,6 +2160,10 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
                                     S_028000_STENCIL_COMPRESS_DISABLE(1);
                db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
        }
+       if (a->htile_clear) {
+               /* FIXME we might want to disable cliprect here */
+               db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
+       }
 
        r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
        r600_write_value(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */
@@ -2424,6 +2476,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
        r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
        r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26);
        r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 10);
+       r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14);
        r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
        r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6);
        r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
@@ -2544,9 +2597,7 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 
        r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
 
-       r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
-       r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
-       r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+       r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
 
        r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
 
@@ -2992,9 +3043,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 
        r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
 
-       r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
-       r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
-       r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+       r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
 
        r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
        r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
index c91b2d820d29282c054da3c1db374cfa652839df..d9dba959bd57b673ebe127f6a71ff7a047bdd060 100644 (file)
 #define R_028AC0_DB_SRESULTS_COMPARE_STATE0          0x00028AC0
 #define R_028AC4_DB_SRESULTS_COMPARE_STATE1          0x00028AC4
 #define R_028AC8_DB_PRELOAD_CONTROL                  0x00028AC8
+#define   S_028AC8_MAX_X(x)                            (((x) & 0xff) << 16)
+#define   S_028AC8_MAX_Y(x)                            (((x) & 0xff) << 24)
 #define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0          0x028AD0
 #define R_028AD4_VGT_STRMOUT_VTX_STRIDE_0           0x028AD4
 #define R_028AD8_VGT_STRMOUT_BUFFER_BASE_0          0x028AD8
index 219d940b3c1e45a7b8040780885b943d5662a3ae..6ef1d78c6fef89a0cb57c69086ca1cd4cb76cbeb 100644 (file)
@@ -433,11 +433,39 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct pipe_framebuffer_state *fb = &rctx->framebuffer.state;
 
+       /* if hyperz enabled just clear hyperz */
+       if (fb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
+               struct r600_texture *rtex;
+               unsigned level = fb->zsbuf->u.tex.level;
+
+               rtex = (struct r600_texture*)fb->zsbuf->texture;
+
+               /* We can't use hyperz fast clear if each slice of a texture
+                * array are clear to different value. To simplify code just
+                * disable fast clear for texture array.
+                */
+               /* Only use htile for first level */
+               if (rtex->htile && !level && rtex->surface.array_size == 1) {
+                       if (rtex->depth_clear != depth) {
+                               rtex->depth_clear = depth;
+                               rctx->db_state.atom.dirty = true;
+                       }
+                       rctx->db_misc_state.htile_clear = true;
+                       rctx->db_misc_state.atom.dirty = true;
+               }
+       }
+
        r600_blitter_begin(ctx, R600_CLEAR);
        util_blitter_clear(rctx->blitter, fb->width, fb->height,
                           fb->nr_cbufs, buffers, fb->nr_cbufs ? fb->cbufs[0]->format : PIPE_FORMAT_NONE,
                           color, depth, stencil);
        r600_blitter_end(ctx);
+
+       /* disable fast clear */
+       if (rctx->db_misc_state.htile_clear) {
+               rctx->db_misc_state.htile_clear = false;
+               rctx->db_misc_state.atom.dirty = true;
+       }
 }
 
 static void r600_clear_render_target(struct pipe_context *ctx,
index 8a22b885d2a246864b1bfd8395a204758574e37e..cdd31a4fcf753d0fd618cf0ed896cf6b8b663925 100644 (file)
@@ -630,20 +630,20 @@ void r600_flush_emit(struct r600_context *rctx)
                                        S_0085F0_DB_ACTION_ENA(1) |
                                        S_0085F0_SH_ACTION_ENA(1) |
                                        S_0085F0_SMX_ACTION_ENA(1) |
-                                       (1 << 20); /* unknown bit */
+                                       S_0085F0_FULL_CACHE_ENA(1);
                } else {
                        cp_coher_cntl = S_0085F0_SMX_ACTION_ENA(1) |
                                        S_0085F0_SH_ACTION_ENA(1) |
                                        S_0085F0_VC_ACTION_ENA(1) |
                                        S_0085F0_TC_ACTION_ENA(1) |
-                                       (1 << 20); /* unknown bit */
+                                       S_0085F0_FULL_CACHE_ENA(1);
                }
        }
 
        if (rctx->flags & R600_CONTEXT_GPU_FLUSH) {
                cp_coher_cntl |= S_0085F0_VC_ACTION_ENA(1) |
                                S_0085F0_TC_ACTION_ENA(1) |
-                               (1 << 20); /* unknown bit */
+                               S_0085F0_FULL_CACHE_ENA(1);
                emit_flush = 1;
        }
 
@@ -740,6 +740,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
        ctx->clip_misc_state.atom.dirty = true;
        ctx->clip_state.atom.dirty = true;
        ctx->db_misc_state.atom.dirty = true;
+       ctx->db_state.atom.dirty = true;
        ctx->framebuffer.atom.dirty = true;
        ctx->poly_offset_state.atom.dirty = true;
        ctx->vgt_state.atom.dirty = true;
index 6e39fd2d9dbe5484e387d775da4ed42d10a449b5..290aa51aa0a3a8b152da89438a1146918ec72ab4 100644 (file)
@@ -1032,6 +1032,14 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
        LIST_INITHEAD(&rscreen->fences.blocks);
        pipe_mutex_init(rscreen->fences.mutex);
 
+       /* Hyperz is very lockup prone any code that touch related part should be
+        * carefully tested especialy on r6xx/r7xx Development show that some piglit
+        * case were triggering lockup quickly such as :
+        * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8
+        */
+       rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", TRUE);
+       rscreen->use_hyperz = rscreen->info.drm_minor >= 26 ? rscreen->use_hyperz : FALSE;
+
        rscreen->global_pool = compute_memory_pool_new(rscreen);
 
        return &rscreen->screen;
index 8df50e9bc835afbc103f9712ee1451515563facc..515174a2e6e14aae73a0e13234b0a0fdfb8f5b83 100644 (file)
@@ -35,7 +35,7 @@
 #include "r600_resource.h"
 #include "evergreen_compute.h"
 
-#define R600_NUM_ATOMS 36
+#define R600_NUM_ATOMS 37
 
 #define R600_MAX_USER_CONST_BUFFERS 1
 #define R600_MAX_DRIVER_CONST_BUFFERS 2
@@ -77,15 +77,21 @@ struct r600_command_buffer {
        unsigned pkt_flags;
 };
 
+struct r600_db_state {
+       struct r600_atom                atom;
+       struct r600_surface             *rsurf;
+};
+
 struct r600_db_misc_state {
-       struct r600_atom atom;
-       bool occlusion_query_enabled;
-       bool flush_depthstencil_through_cb;
-       bool flush_depthstencil_in_place;
-       bool copy_depth, copy_stencil;
-       unsigned copy_sample;
-       unsigned log_samples;
-       unsigned db_shader_control;
+       struct r600_atom                atom;
+       bool                            occlusion_query_enabled;
+       bool                            flush_depthstencil_through_cb;
+       bool                            flush_depthstencil_in_place;
+       bool                            copy_depth, copy_stencil;
+       unsigned                        copy_sample;
+       unsigned                        log_samples;
+       unsigned                        db_shader_control;
+       bool                            htile_clear;
 };
 
 struct r600_cb_misc_state {
@@ -220,6 +226,7 @@ struct r600_screen {
        bool                            has_streamout;
        bool                            has_msaa;
        enum r600_msaa_texture_mode     msaa_texture_support;
+       bool                            use_hyperz;
        struct r600_tiling_info         tiling_info;
        struct r600_pipe_fences         fences;
 
@@ -439,6 +446,7 @@ struct r600_context {
        struct r600_clip_misc_state     clip_misc_state;
        struct r600_clip_state          clip_state;
        struct r600_db_misc_state       db_misc_state;
+       struct r600_db_state            db_state;
        struct r600_cso_state           dsa_state;
        struct r600_framebuffer         framebuffer;
        struct r600_poly_offset_state   poly_offset_state;
index 007d5e08d3594b8ed20e3f201957971efebc99f6..dd0b613485c086f4bb3bf4e141d8530ee923add8 100644 (file)
@@ -60,6 +60,10 @@ struct r600_texture {
         * MSAA textures cannot have mipmaps. */
        unsigned                        fmask_offset, fmask_size, fmask_bank_height;
        unsigned                        cmask_offset, cmask_size, cmask_slice_tile_max;
+
+       struct r600_resource            *htile;
+       /* use htile only for first level */
+       float                           depth_clear;
 };
 
 #define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
@@ -113,6 +117,11 @@ struct r600_surface {
        unsigned db_stencil_info;       /* EG only */
        unsigned db_prefetch_limit;     /* R600 only */
        unsigned pa_su_poly_offset_db_fmt_cntl;
+
+       unsigned                        htile_enabled;
+       unsigned                        db_htile_surface;
+       unsigned                        db_htile_data_base;
+       unsigned                        db_preload_control;
 };
 
 /* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
index f969808603b70c57bdfcf4cf3e5c5ca9cf268619..ef4edca807ab3abd3f7166d0fceee9f3a58307a1 100644 (file)
@@ -1440,6 +1440,18 @@ static void r600_init_depth_surface(struct r600_context *rctx,
        default:;
        }
 
+       surf->htile_enabled = 0;
+       /* use htile only for first level */
+       if (rtex->htile && !level) {
+               surf->htile_enabled = 1;
+               surf->db_htile_data_base = 0;
+               surf->db_htile_surface = S_028D24_HTILE_WIDTH(1) |
+                                       S_028D24_HTILE_HEIGHT(1) |
+                                       S_028D24_LINEAR(1);
+               /* preload is not working properly on r6xx/r7xx */
+               surf->db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
+       }
+
        surf->depth_initialized = true;
 }
 
@@ -1530,6 +1542,16 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
                        rctx->poly_offset_state.zs_format = state->zsbuf->format;
                        rctx->poly_offset_state.atom.dirty = true;
                }
+
+               if (rctx->db_state.rsurf != surf) {
+                       rctx->db_state.rsurf = surf;
+                       rctx->db_state.atom.dirty = true;
+                       rctx->db_misc_state.atom.dirty = true;
+               }
+       } else if (rctx->db_state.rsurf) {
+               rctx->db_state.rsurf = NULL;
+               rctx->db_state.atom.dirty = true;
+               rctx->db_misc_state.atom.dirty = true;
        }
 
        if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
@@ -1831,13 +1853,32 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
        }
 }
 
+static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = rctx->cs;
+       struct r600_db_state *a = (struct r600_db_state*)atom;
+
+       if (a->rsurf && a->rsurf->htile_enabled) {
+               struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
+               unsigned reloc_idx;
+
+               r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear));
+               r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+               r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+               reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE);
+               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+               cs->buf[cs->cdw++] = reloc_idx;
+       } else {
+               r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
+       }
+}
+
 static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = rctx->cs;
        struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
        unsigned db_render_control = 0;
        unsigned db_render_override =
-               S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
                S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
                S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
 
@@ -1847,6 +1888,12 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
                }
                db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
        }
+       if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
+               /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+               db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
+       } else {
+               db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
+       }
        if (a->flush_depthstencil_through_cb) {
                assert(a->copy_depth || a->copy_stencil);
 
@@ -1859,6 +1906,9 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
                                     S_028D0C_STENCIL_COMPRESS_DISABLE(1);
                db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
        }
+       if (a->htile_clear) {
+               db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1);
+       }
 
        r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
        r600_write_value(cs, db_render_control); /* R_028D0C_DB_RENDER_CONTROL */
@@ -2175,6 +2225,7 @@ void r600_init_state_functions(struct r600_context *rctx)
        r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
        r600_init_atom(rctx, &rctx->clip_state.atom, id++, r600_emit_clip_state, 26);
        r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, r600_emit_db_misc_state, 7);
+       r600_init_atom(rctx, &rctx->db_state.atom, id++, r600_emit_db_state, 11);
        r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
        r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6);
        r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
@@ -2530,9 +2581,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
 
        r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
 
-       r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
-       r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
-       r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+       r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
 
        r600_store_context_reg_seq(cb, R_0286DC_SPI_FOG_CNTL, 3);
        r600_store_value(cb, 0); /* R_0286DC_SPI_FOG_CNTL */
index 0925333236adb339417ffa1d7c1f93f067e72ee9..111183eb0a04a11b2e077ee5b4574253acc8a8e5 100644 (file)
@@ -438,6 +438,44 @@ r600_texture_create_object(struct pipe_screen *screen,
        /* Tiled depth textures utilize the non-displayable tile order. */
        rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
 
+       /* only enable hyperz for PIPE_TEXTURE_2D not for PIPE_TEXTURE_2D_ARRAY
+        * Thought it might still be interessting to use hyperz for texture
+        * array without using fast clear features
+        */
+       rtex->htile = NULL;
+       if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | R600_RESOURCE_FLAG_FLUSHED_DEPTH)) &&
+           util_format_is_depth_or_stencil(base->format) &&
+           rscreen->use_hyperz &&
+           base->target == PIPE_TEXTURE_2D &&
+           rtex->surface.level[0].nblk_x >= 32 &&
+           rtex->surface.level[0].nblk_y >= 32) {
+               unsigned sw = rtex->surface.level[0].nblk_x * rtex->surface.blk_w;
+               unsigned sh = rtex->surface.level[0].nblk_y * rtex->surface.blk_h;
+               unsigned htile_size;
+               unsigned npipes = rscreen->info.r600_num_tile_pipes;
+
+               /* this alignment and htile size only apply to linear htile buffer */
+               sw = align(sw, 16 << 3);
+               sh = align(sh, npipes << 3);
+               htile_size = (sw >> 3) * (sh >> 3) * 4;
+               /* must be aligned with 2K * npipes */
+               htile_size = align(htile_size, (2 << 10) * npipes);
+
+               rtex->htile = (struct r600_resource*)pipe_buffer_create(&rscreen->screen, PIPE_BIND_CUSTOM,
+                                                                       PIPE_USAGE_STATIC, htile_size);
+               if (rtex->htile == NULL) {
+                       /* this is not a fatal error as we can still keep rendering
+                        * without htile buffer
+                        */
+                       R600_ERR("r600: failed to create bo for htile buffers\n");
+               } else {
+                       void *ptr;
+                       ptr = rscreen->ws->buffer_map(rtex->htile->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+                       memset(ptr, 0x0, htile_size);
+                       rscreen->ws->buffer_unmap(rtex->htile->cs_buf);
+               }
+       }
+
        /* Now create the backing buffer. */
        if (!buf && alloc_bo) {
                unsigned base_align = rtex->surface.bo_alignment;
index 78fa6b689ec66e4e62c6c329523b2426152635b7..69bfd7a2f8744fe229b74c4283cabaebb6263053 100644 (file)
 #define   S_028010_ZRANGE_PRECISION(x)                 (((x) & 0x1) << 31)
 #define   G_028010_ZRANGE_PRECISION(x)                 (((x) >> 31) & 0x1)
 #define   C_028010_ZRANGE_PRECISION                    0x7FFFFFFF
+#define R_028014_DB_HTILE_DATA_BASE                  0x00028014
 #define R_028414_CB_BLEND_RED                        0x028414
 #define   S_028414_BLEND_RED(x)                        (((x) & 0xFFFFFFFF) << 0)
 #define   G_028414_BLEND_RED(x)                        (((x) >> 0) & 0xFFFFFFFF)
 #define   S_0085F0_CB11_DEST_BASE_ENA(x)               (((x) & 0x1) << 18)
 #define   G_0085F0_CB11_DEST_BASE_ENA(x)               (((x) >> 18) & 0x1)
 /* evergreen only end */
+/* evergreen and r7xx only */
+#define   S_0085F0_FULL_CACHE_ENA(x)                   (((x) & 0x1) << 20)
+#define   G_0085F0_FULL_CACHE_ENA(x)                   (((x) >> 20) & 0x1)
+/* evergreen and r7xx only end */
 #define   S_0085F0_TC_ACTION_ENA(x)                    (((x) & 0x1) << 23)
 #define   G_0085F0_TC_ACTION_ENA(x)                    (((x) >> 23) & 0x1)
 #define   C_0085F0_TC_ACTION_ENA                       0xFF7FFFFF