From 7fdab3b201bd2a011e8e0b0b15aca7b7fb5a7aa5 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 14 Jan 2014 14:51:51 +0800 Subject: [PATCH] ilo: disable HiZ for misaligned levels We need to disable HiZ for non-8x4 aligned levels, except for level 0, layer 0. For the very first layer we can adjust Width and Height fields of 3DSTATE_DEPTH_BUFFER to make it aligned. Specifically, add ILO_TEXTURE_HIZ and set the flag only for properly aligned levels. ilo_texture_can_enable_hiz() is updated to check for the flag. In tex_layout_validate(), align the depth bo to 8x4 so that we can adjust Width/Height of 3DSTATE_DEPTH_BUFFER without introducing out-of-bound access. Finally in rectlist blitter, add the ability to adjust 3DSTATE_DEPTH_BUFFER. --- .../drivers/ilo/ilo_blitter_rectlist.c | 153 +++++++++--------- src/gallium/drivers/ilo/ilo_gpe_gen6.h | 35 ++++ src/gallium/drivers/ilo/ilo_resource.c | 85 +++++++++- src/gallium/drivers/ilo/ilo_resource.h | 24 ++- 4 files changed, 215 insertions(+), 82 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 015cfa459a5..472ab6a1755 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -32,7 +32,8 @@ #include "ilo_3d.h" #include "ilo_3d_pipeline.h" #include "ilo_gpe.h" -#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components */ +#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components and + zs_align_surface */ /** * Set the states that are invariant between all ops. @@ -223,49 +224,18 @@ ilo_blitter_set_uses(struct ilo_blitter *blitter, uint32_t uses) } static void -hiz_emit_rectlist(struct ilo_blitter *blitter) +hiz_align_fb(struct ilo_blitter *blitter) { - struct ilo_3d *hw3d = blitter->ilo->hw3d; - struct ilo_3d_pipeline *p = hw3d->pipeline; - - ilo_3d_own_render_ring(hw3d); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 313: - * - * "If other rendering operations have preceded this clear, a - * PIPE_CONTROL with write cache flush enabled and Z-inhibit - * disabled must be issued before the rectangle primitive used for - * the depth buffer clear operation." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 314: - * - * "Depth buffer clear pass must be followed by a PIPE_CONTROL - * command with DEPTH_STALL bit set and Then followed by Depth - * FLUSH" - * - * But the pipeline has to be flushed both before and after not only - * because of these workarounds. We need them for reasons such as - * - * - we may sample from a texture that was rendered to - * - we may sample from the fb shortly after - */ - if (!ilo_cp_empty(p->cp)) - ilo_3d_pipeline_emit_flush(p); - - ilo_3d_pipeline_emit_rectlist(p, blitter); - - ilo_3d_pipeline_emit_flush(p); -} + unsigned align_w, align_h; -/** - * This must be called after ilo_blitter_set_fb(). - */ -static void -hiz_set_rectlist(struct ilo_blitter *blitter, bool aligned) -{ - unsigned width = blitter->fb.width; - unsigned height = blitter->fb.height; + switch (blitter->op) { + case ILO_BLITTER_RECTLIST_CLEAR_ZS: + case ILO_BLITTER_RECTLIST_RESOLVE_Z: + break; + default: + return; + break; + } /* * From the Sandy Bridge PRM, volume 2 part 1, page 313-314: @@ -296,38 +266,76 @@ hiz_set_rectlist(struct ilo_blitter *blitter, bool aligned) * buffer clear operation must be delivered, and depth buffer state * cannot have changed since the previous depth buffer clear * operation." - * - * Making the RECTLIST aligned to 8x4 is easy. But how about - * 3DSTATE_DRAWING_RECTANGLE and 3DSTATE_DEPTH_BUFFER? Since we use - * HALIGN_8 and VALIGN_4 for depth buffers, we can safely align the drawing - * rectangle, except that the PRM requires the drawing rectangle to be - * clampped to the render target boundary. For 3DSTATE_DEPTH_BUFFER, we - * cannot align the Width and Height fields if level or slice is greater - * than zero. */ - if (aligned) { - switch (blitter->fb.num_samples) { - case 1: - width = align(width, 8); - height = align(height, 4); - break; - case 2: - width = align(width, 4); - height = align(height, 4); - break; - case 4: - width = align(width, 4); - height = align(height, 2); - break; - case 8: - default: - width = align(width, 2); - height = align(height, 2); - break; - } + switch (blitter->fb.num_samples) { + case 1: + align_w = 8; + align_h = 4; + break; + case 2: + align_w = 4; + align_h = 4; + break; + case 4: + align_w = 4; + align_h = 2; + break; + case 8: + default: + align_w = 2; + align_h = 2; + break; } - ilo_blitter_set_rectlist(blitter, 0, 0, width, height); + if (blitter->fb.width % align_w || blitter->fb.height % align_h) { + blitter->fb.width = align(blitter->fb.width, align_w); + blitter->fb.height = align(blitter->fb.width, align_h); + + assert(!blitter->fb.dst.is_rt); + zs_align_surface(blitter->ilo->dev, align_w, align_h, + &blitter->fb.dst.u.zs); + } +} + +static void +hiz_emit_rectlist(struct ilo_blitter *blitter) +{ + struct ilo_3d *hw3d = blitter->ilo->hw3d; + struct ilo_3d_pipeline *p = hw3d->pipeline; + + hiz_align_fb(blitter); + + ilo_blitter_set_rectlist(blitter, 0, 0, + blitter->fb.width, blitter->fb.height); + + ilo_3d_own_render_ring(hw3d); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 313: + * + * "If other rendering operations have preceded this clear, a + * PIPE_CONTROL with write cache flush enabled and Z-inhibit + * disabled must be issued before the rectangle primitive used for + * the depth buffer clear operation." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 314: + * + * "Depth buffer clear pass must be followed by a PIPE_CONTROL + * command with DEPTH_STALL bit set and Then followed by Depth + * FLUSH" + * + * But the pipeline has to be flushed both before and after not only + * because of these workarounds. We need them for reasons such as + * + * - we may sample from a texture that was rendered to + * - we may sample from the fb shortly after + */ + if (!ilo_cp_empty(p->cp)) + ilo_3d_pipeline_emit_flush(p); + + ilo_3d_pipeline_emit_rectlist(p, blitter); + + ilo_3d_pipeline_emit_flush(p); } static bool @@ -452,7 +460,6 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, uses |= ILO_BLITTER_USE_CC | ILO_BLITTER_USE_FB_STENCIL; ilo_blitter_set_uses(blitter, uses); - hiz_set_rectlist(blitter, true); hiz_emit_rectlist(blitter); return true; @@ -489,7 +496,6 @@ ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter, ilo_blitter_set_uses(blitter, ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH); - hiz_set_rectlist(blitter, true); hiz_emit_rectlist(blitter); } @@ -522,6 +528,5 @@ ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter, ilo_blitter_set_uses(blitter, ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH); - hiz_set_rectlist(blitter, false); hiz_emit_rectlist(blitter); } diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index 76288d267f9..3c63a7108c9 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -1490,6 +1490,41 @@ gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev, ilo_cp_end(cp); } +static inline void +zs_align_surface(const struct ilo_dev_info *dev, + unsigned align_w, unsigned align_h, + struct ilo_zs_surface *zs) +{ + unsigned mask, shift_w, shift_h; + unsigned width, height; + uint32_t dw3; + + ILO_GPE_VALID_GEN(dev, 6, 7.5); + + if (dev->gen >= ILO_GEN(7)) { + shift_w = 4; + shift_h = 18; + mask = 0x3fff; + } + else { + shift_w = 6; + shift_h = 19; + mask = 0x1fff; + } + + dw3 = zs->payload[2]; + + /* aligned width and height */ + width = align(((dw3 >> shift_w) & mask) + 1, align_w); + height = align(((dw3 >> shift_h) & mask) + 1, align_h); + + dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) | + (width - 1) << shift_w | + (height - 1) << shift_h; + + zs->payload[2] = dw3; +} + static inline void gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, const struct ilo_zs_surface *zs, diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index 1048fe3d03c..f9a53318613 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -852,6 +852,16 @@ tex_layout_validate(struct tex_layout *layout) layout->height = align(layout->height, 64); } + /* + * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In + * ilo_texture_can_enable_hiz(), we always return true for the first slice. + * To avoid out-of-bound access, we have to pad. + */ + if (layout->hiz) { + layout->width = align(layout->width, 8); + layout->height = align(layout->height, 4); + } + assert(layout->width % layout->block_width == 0); assert(layout->height % layout->block_height == 0); assert(layout->qpitch % layout->block_height == 0); @@ -1037,9 +1047,8 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout) struct ilo_screen *is = ilo_screen(tex->base.screen); const struct pipe_resource *templ = layout->templ; const int hz_align_j = 8; - unsigned hz_width, hz_height; + unsigned hz_width, hz_height, lv; unsigned long pitch; - int i; /* * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge @@ -1054,9 +1063,9 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout) if (templ->target == PIPE_TEXTURE_3D) { hz_height = 0; - for (i = 0; i <= templ->last_level; i++) { - const unsigned h = align(layout->levels[i].h, hz_align_j); - hz_height += h * layout->levels[i].d; + for (lv = 0; lv <= templ->last_level; lv++) { + const unsigned h = align(layout->levels[lv].h, hz_align_j); + hz_height += h * layout->levels[lv].d; } hz_height /= 2; @@ -1087,6 +1096,72 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout) tex->hiz.bo_stride = pitch; + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 313-314: + * + * "A rectangle primitive representing the clear area is delivered. The + * primitive must adhere to the following restrictions on size: + * + * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be + * aligned to an 8x4 pixel block relative to the upper left corner + * of the depth buffer, and contain an integer number of these pixel + * blocks, and all 8x4 pixels must be lit. + * + * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be + * aligned to a 4x2 pixel block (8x4 sample block) relative to the + * upper left corner of the depth buffer, and contain an integer + * number of these pixel blocks, and all samples of the 4x2 pixels + * must be lit + * + * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be + * aligned to a 2x2 pixel block (8x4 sample block) relative to the + * upper left corner of the depth buffer, and contain an integer + * number of these pixel blocks, and all samples of the 2x2 pixels + * must be list." + * + * "The following is required when performing a depth buffer resolve: + * + * - A rectangle primitive of the same size as the previous depth + * buffer clear operation must be delivered, and depth buffer state + * cannot have changed since the previous depth buffer clear + * operation." + * + * Experiments on Haswell show that depth buffer resolves have the same + * alignment requirements, and aligning the RECTLIST primitive and + * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The mipmap size must be + * aligned. + */ + for (lv = 0; lv <= templ->last_level; lv++) { + unsigned align_w = 8, align_h = 4; + + switch (templ->nr_samples) { + case 0: + case 1: + break; + case 2: + align_w /= 2; + break; + case 4: + align_w /= 2; + align_h /= 2; + break; + case 8: + default: + align_w /= 4; + align_h /= 2; + break; + } + + if (u_minify(templ->width0, lv) % align_w == 0 && + u_minify(templ->height0, lv) % align_h == 0) { + const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ? + u_minify(templ->depth0, lv) : templ->array_size; + + ilo_texture_set_slice_flags(tex, lv, 0, num_slices, + ILO_TEXTURE_HIZ, ILO_TEXTURE_HIZ); + } + } + return true; } diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h index 125535a2771..fb4fde77d08 100644 --- a/src/gallium/drivers/ilo/ilo_resource.h +++ b/src/gallium/drivers/ilo/ilo_resource.h @@ -31,6 +31,7 @@ #include "intel_winsys.h" #include "ilo_common.h" +#include "ilo_screen.h" enum ilo_texture_flags { ILO_TEXTURE_RENDER_WRITE = 1 << 0, @@ -40,10 +41,9 @@ enum ilo_texture_flags { ILO_TEXTURE_BLT_READ = 1 << 4, ILO_TEXTURE_CPU_READ = 1 << 5, ILO_TEXTURE_CLEAR = 1 << 6, + ILO_TEXTURE_HIZ = 1 << 7, }; -struct ilo_screen; - struct ilo_buffer { struct pipe_resource base; @@ -161,7 +161,25 @@ static inline bool ilo_texture_can_enable_hiz(const struct ilo_texture *tex, unsigned level, unsigned first_slice, unsigned num_slices) { - return (tex->hiz.bo != NULL); + const struct ilo_screen *is = ilo_screen(tex->base.screen); + const struct ilo_texture_slice *slice = + ilo_texture_get_slice(tex, level, first_slice); + + if (!tex->hiz.bo) + return false; + + /* we can adjust 3DSTATE_DEPTH_BUFFER for the first slice */ + if (level == 0 && first_slice == 0 && num_slices == 1) + return true; + + /* HiZ is non-mipmapped and non-array on GEN6 */ + assert(is->dev.gen > ILO_GEN(6)); + + /* + * Either all or none of the slices in the same level have ILO_TEXTURE_HIZ + * set. It suffices to check only the first slice. + */ + return (slice->flags & ILO_TEXTURE_HIZ); } #endif /* ILO_RESOURCE_H */ -- 2.30.2