#include "ilo_3d.h"
#include "ilo_3d_pipeline.h"
#include "ilo_gpe.h"
-#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components */
+#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components and
+ zs_align_surface */
/**
* Set the states that are invariant between all ops.
}
static void
-hiz_emit_rectlist(struct ilo_blitter *blitter)
+hiz_align_fb(struct ilo_blitter *blitter)
{
- struct ilo_3d *hw3d = blitter->ilo->hw3d;
- struct ilo_3d_pipeline *p = hw3d->pipeline;
-
- ilo_3d_own_render_ring(hw3d);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 313:
- *
- * "If other rendering operations have preceded this clear, a
- * PIPE_CONTROL with write cache flush enabled and Z-inhibit
- * disabled must be issued before the rectangle primitive used for
- * the depth buffer clear operation."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 314:
- *
- * "Depth buffer clear pass must be followed by a PIPE_CONTROL
- * command with DEPTH_STALL bit set and Then followed by Depth
- * FLUSH"
- *
- * But the pipeline has to be flushed both before and after not only
- * because of these workarounds. We need them for reasons such as
- *
- * - we may sample from a texture that was rendered to
- * - we may sample from the fb shortly after
- */
- if (!ilo_cp_empty(p->cp))
- ilo_3d_pipeline_emit_flush(p);
-
- ilo_3d_pipeline_emit_rectlist(p, blitter);
-
- ilo_3d_pipeline_emit_flush(p);
-}
+ unsigned align_w, align_h;
-/**
- * This must be called after ilo_blitter_set_fb().
- */
-static void
-hiz_set_rectlist(struct ilo_blitter *blitter, bool aligned)
-{
- unsigned width = blitter->fb.width;
- unsigned height = blitter->fb.height;
+ switch (blitter->op) {
+ case ILO_BLITTER_RECTLIST_CLEAR_ZS:
+ case ILO_BLITTER_RECTLIST_RESOLVE_Z:
+ break;
+ default:
+ return;
+ break;
+ }
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
* buffer clear operation must be delivered, and depth buffer state
* cannot have changed since the previous depth buffer clear
* operation."
- *
- * Making the RECTLIST aligned to 8x4 is easy. But how about
- * 3DSTATE_DRAWING_RECTANGLE and 3DSTATE_DEPTH_BUFFER? Since we use
- * HALIGN_8 and VALIGN_4 for depth buffers, we can safely align the drawing
- * rectangle, except that the PRM requires the drawing rectangle to be
- * clampped to the render target boundary. For 3DSTATE_DEPTH_BUFFER, we
- * cannot align the Width and Height fields if level or slice is greater
- * than zero.
*/
- if (aligned) {
- switch (blitter->fb.num_samples) {
- case 1:
- width = align(width, 8);
- height = align(height, 4);
- break;
- case 2:
- width = align(width, 4);
- height = align(height, 4);
- break;
- case 4:
- width = align(width, 4);
- height = align(height, 2);
- break;
- case 8:
- default:
- width = align(width, 2);
- height = align(height, 2);
- break;
- }
+ switch (blitter->fb.num_samples) {
+ case 1:
+ align_w = 8;
+ align_h = 4;
+ break;
+ case 2:
+ align_w = 4;
+ align_h = 4;
+ break;
+ case 4:
+ align_w = 4;
+ align_h = 2;
+ break;
+ case 8:
+ default:
+ align_w = 2;
+ align_h = 2;
+ break;
}
- ilo_blitter_set_rectlist(blitter, 0, 0, width, height);
+ if (blitter->fb.width % align_w || blitter->fb.height % align_h) {
+ blitter->fb.width = align(blitter->fb.width, align_w);
+ blitter->fb.height = align(blitter->fb.width, align_h);
+
+ assert(!blitter->fb.dst.is_rt);
+ zs_align_surface(blitter->ilo->dev, align_w, align_h,
+ &blitter->fb.dst.u.zs);
+ }
+}
+
+static void
+hiz_emit_rectlist(struct ilo_blitter *blitter)
+{
+ struct ilo_3d *hw3d = blitter->ilo->hw3d;
+ struct ilo_3d_pipeline *p = hw3d->pipeline;
+
+ hiz_align_fb(blitter);
+
+ ilo_blitter_set_rectlist(blitter, 0, 0,
+ blitter->fb.width, blitter->fb.height);
+
+ ilo_3d_own_render_ring(hw3d);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 313:
+ *
+ * "If other rendering operations have preceded this clear, a
+ * PIPE_CONTROL with write cache flush enabled and Z-inhibit
+ * disabled must be issued before the rectangle primitive used for
+ * the depth buffer clear operation."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 314:
+ *
+ * "Depth buffer clear pass must be followed by a PIPE_CONTROL
+ * command with DEPTH_STALL bit set and Then followed by Depth
+ * FLUSH"
+ *
+ * But the pipeline has to be flushed both before and after not only
+ * because of these workarounds. We need them for reasons such as
+ *
+ * - we may sample from a texture that was rendered to
+ * - we may sample from the fb shortly after
+ */
+ if (!ilo_cp_empty(p->cp))
+ ilo_3d_pipeline_emit_flush(p);
+
+ ilo_3d_pipeline_emit_rectlist(p, blitter);
+
+ ilo_3d_pipeline_emit_flush(p);
}
static bool
uses |= ILO_BLITTER_USE_CC | ILO_BLITTER_USE_FB_STENCIL;
ilo_blitter_set_uses(blitter, uses);
- hiz_set_rectlist(blitter, true);
hiz_emit_rectlist(blitter);
return true;
ilo_blitter_set_uses(blitter,
ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
- hiz_set_rectlist(blitter, true);
hiz_emit_rectlist(blitter);
}
ilo_blitter_set_uses(blitter,
ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
- hiz_set_rectlist(blitter, false);
hiz_emit_rectlist(blitter);
}
ilo_cp_end(cp);
}
+static inline void
+zs_align_surface(const struct ilo_dev_info *dev,
+ unsigned align_w, unsigned align_h,
+ struct ilo_zs_surface *zs)
+{
+ unsigned mask, shift_w, shift_h;
+ unsigned width, height;
+ uint32_t dw3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7.5);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ shift_w = 4;
+ shift_h = 18;
+ mask = 0x3fff;
+ }
+ else {
+ shift_w = 6;
+ shift_h = 19;
+ mask = 0x1fff;
+ }
+
+ dw3 = zs->payload[2];
+
+ /* aligned width and height */
+ width = align(((dw3 >> shift_w) & mask) + 1, align_w);
+ height = align(((dw3 >> shift_h) & mask) + 1, align_h);
+
+ dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) |
+ (width - 1) << shift_w |
+ (height - 1) << shift_h;
+
+ zs->payload[2] = dw3;
+}
+
static inline void
gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
layout->height = align(layout->height, 64);
}
+ /*
+ * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
+ * ilo_texture_can_enable_hiz(), we always return true for the first slice.
+ * To avoid out-of-bound access, we have to pad.
+ */
+ if (layout->hiz) {
+ layout->width = align(layout->width, 8);
+ layout->height = align(layout->height, 4);
+ }
+
assert(layout->width % layout->block_width == 0);
assert(layout->height % layout->block_height == 0);
assert(layout->qpitch % layout->block_height == 0);
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = layout->templ;
const int hz_align_j = 8;
- unsigned hz_width, hz_height;
+ unsigned hz_width, hz_height, lv;
unsigned long pitch;
- int i;
/*
* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
if (templ->target == PIPE_TEXTURE_3D) {
hz_height = 0;
- for (i = 0; i <= templ->last_level; i++) {
- const unsigned h = align(layout->levels[i].h, hz_align_j);
- hz_height += h * layout->levels[i].d;
+ for (lv = 0; lv <= templ->last_level; lv++) {
+ const unsigned h = align(layout->levels[lv].h, hz_align_j);
+ hz_height += h * layout->levels[lv].d;
}
hz_height /= 2;
tex->hiz.bo_stride = pitch;
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
+ *
+ * "A rectangle primitive representing the clear area is delivered. The
+ * primitive must adhere to the following restrictions on size:
+ *
+ * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
+ * aligned to an 8x4 pixel block relative to the upper left corner
+ * of the depth buffer, and contain an integer number of these pixel
+ * blocks, and all 8x4 pixels must be lit.
+ *
+ * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
+ * aligned to a 4x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 4x2 pixels
+ * must be lit
+ *
+ * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
+ * aligned to a 2x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 2x2 pixels
+ * must be list."
+ *
+ * "The following is required when performing a depth buffer resolve:
+ *
+ * - A rectangle primitive of the same size as the previous depth
+ * buffer clear operation must be delivered, and depth buffer state
+ * cannot have changed since the previous depth buffer clear
+ * operation."
+ *
+ * Experiments on Haswell show that depth buffer resolves have the same
+ * alignment requirements, and aligning the RECTLIST primitive and
+ * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The mipmap size must be
+ * aligned.
+ */
+ for (lv = 0; lv <= templ->last_level; lv++) {
+ unsigned align_w = 8, align_h = 4;
+
+ switch (templ->nr_samples) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ align_w /= 2;
+ break;
+ case 4:
+ align_w /= 2;
+ align_h /= 2;
+ break;
+ case 8:
+ default:
+ align_w /= 4;
+ align_h /= 2;
+ break;
+ }
+
+ if (u_minify(templ->width0, lv) % align_w == 0 &&
+ u_minify(templ->height0, lv) % align_h == 0) {
+ const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ?
+ u_minify(templ->depth0, lv) : templ->array_size;
+
+ ilo_texture_set_slice_flags(tex, lv, 0, num_slices,
+ ILO_TEXTURE_HIZ, ILO_TEXTURE_HIZ);
+ }
+ }
+
return true;
}
#include "intel_winsys.h"
#include "ilo_common.h"
+#include "ilo_screen.h"
enum ilo_texture_flags {
ILO_TEXTURE_RENDER_WRITE = 1 << 0,
ILO_TEXTURE_BLT_READ = 1 << 4,
ILO_TEXTURE_CPU_READ = 1 << 5,
ILO_TEXTURE_CLEAR = 1 << 6,
+ ILO_TEXTURE_HIZ = 1 << 7,
};
-struct ilo_screen;
-
struct ilo_buffer {
struct pipe_resource base;
ilo_texture_can_enable_hiz(const struct ilo_texture *tex, unsigned level,
unsigned first_slice, unsigned num_slices)
{
- return (tex->hiz.bo != NULL);
+ const struct ilo_screen *is = ilo_screen(tex->base.screen);
+ const struct ilo_texture_slice *slice =
+ ilo_texture_get_slice(tex, level, first_slice);
+
+ if (!tex->hiz.bo)
+ return false;
+
+ /* we can adjust 3DSTATE_DEPTH_BUFFER for the first slice */
+ if (level == 0 && first_slice == 0 && num_slices == 1)
+ return true;
+
+ /* HiZ is non-mipmapped and non-array on GEN6 */
+ assert(is->dev.gen > ILO_GEN(6));
+
+ /*
+ * Either all or none of the slices in the same level have ILO_TEXTURE_HIZ
+ * set. It suffices to check only the first slice.
+ */
+ return (slice->flags & ILO_TEXTURE_HIZ);
}
#endif /* ILO_RESOURCE_H */