const struct ilo_dev_info *dev;
const struct pipe_resource *templ;
+ bool has_depth, has_stencil;
+ bool hiz, separate_stencil;
+
enum pipe_format format;
unsigned block_width, block_height, block_size;
bool compressed;
- bool has_depth, has_stencil, separate_stencil;
enum intel_tiling_mode tiling;
bool can_be_linear;
layout->align_j = 8;
break;
default:
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 319:
- *
- * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and
- * Depth Coordinate Offset X) must be zero to ensure correct
- * alignment"
- *
- * We will make use of them and setting align_i to 8 help us meet
- * the requirement.
- */
- layout->align_i = (templ->last_level > 0) ? 8 : 4;
+ layout->align_i = 4;
layout->align_j = 4;
break;
}
* "The cursor surface address must be 4K byte aligned. The cursor must
* be in linear memory, it cannot be tiled."
*/
- if (unlikely(templ->bind & PIPE_BIND_CURSOR))
+ if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
valid_tilings &= tile_none;
/*
{
const struct pipe_resource *templ = layout->templ;
enum pipe_format format;
- const struct util_format_description *desc;
- bool separate_stencil;
-
- /* GEN7+ requires separate stencil buffers */
- separate_stencil = (layout->dev->gen >= ILO_GEN(7));
switch (templ->format) {
case PIPE_FORMAT_ETC1_RGB8:
format = PIPE_FORMAT_R8G8B8X8_UNORM;
break;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- if (separate_stencil) {
+ if (layout->separate_stencil)
format = PIPE_FORMAT_Z24X8_UNORM;
- layout->separate_stencil = true;
- }
- else {
+ else
format = templ->format;
- }
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- if (separate_stencil) {
+ if (layout->separate_stencil)
format = PIPE_FORMAT_Z32_FLOAT;
- layout->separate_stencil = true;
- }
- else {
+ else
format = templ->format;
- }
break;
default:
format = templ->format;
layout->block_height = util_format_get_blockheight(format);
layout->block_size = util_format_get_blocksize(format);
layout->compressed = util_format_is_compressed(format);
+}
- desc = util_format_description(format);
+static void
+tex_layout_init_hiz(struct tex_layout *layout)
+{
+ const struct pipe_resource *templ = layout->templ;
+ const struct util_format_description *desc;
+
+ desc = util_format_description(templ->format);
layout->has_depth = util_format_has_depth(desc);
layout->has_stencil = util_format_has_stencil(desc);
+
+ if (!layout->has_depth)
+ return;
+
+ layout->hiz = true;
+
+ /* no point in having HiZ */
+ if (templ->usage == PIPE_USAGE_STAGING)
+ layout->hiz = false;
+
+ if (layout->dev->gen == ILO_GEN(6)) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 312:
+ *
+ * "The hierarchical depth buffer does not support the LOD field, it
+ * is assumed by hardware to be zero. A separate hierarachical
+ * depth buffer is required for each LOD used, and the
+ * corresponding buffer's state delivered to hardware each time a
+ * new depth buffer state with modified LOD is delivered."
+ *
+ * But we have a stronger requirement. Because of layer offsetting
+ * (check out the callers of ilo_texture_get_slice_offset()), we already
+ * have to require the texture to be non-mipmapped and non-array.
+ */
+ if (templ->last_level > 0 || templ->array_size > 1 || templ->depth0 > 1)
+ layout->hiz = false;
+ }
+
+ if (ilo_debug & ILO_DEBUG_NOHIZ)
+ layout->hiz = false;
+
+ if (layout->has_stencil) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+ *
+ * "This field (Separate Stencil Buffer Enable) must be set to the
+ * same value (enabled or disabled) as Hierarchical Depth Buffer
+ * Enable."
+ *
+ * GEN7+ requires separate stencil buffers.
+ */
+ if (layout->dev->gen >= ILO_GEN(7))
+ layout->separate_stencil = true;
+ else
+ layout->separate_stencil = layout->hiz;
+
+ if (layout->separate_stencil)
+ layout->has_stencil = false;
+ }
}
static void
layout->templ = templ;
/* note that there are dependencies between these functions */
+ tex_layout_init_hiz(layout);
tex_layout_init_format(layout);
tex_layout_init_tiling(layout);
tex_layout_init_spacing(layout);
static void
tex_layout_validate(struct tex_layout *layout)
{
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 118:
+ *
+ * "To determine the necessary padding on the bottom and right side of
+ * the surface, refer to the table in Section 7.18.3.4 for the i and j
+ * parameters for the surface format in use. The surface must then be
+ * extended to the next multiple of the alignment unit size in each
+ * dimension, and all texels contained in this extended surface must
+ * have valid GTT entries."
+ *
+ * "For cube surfaces, an additional two rows of padding are required
+ * at the bottom of the surface. This must be ensured regardless of
+ * whether the surface is stored tiled or linear. This is due to the
+ * potential rotation of cache line orientation from memory to cache."
+ *
+ * "For compressed textures (BC* and FXT1 surface formats), padding at
+ * the bottom of the surface is to an even compressed row, which is
+ * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
+ * purposes, these surfaces behave as if j = 8 only for surface
+ * padding purposes. The value of 4 for j still applies for mip level
+ * alignment and QPitch calculation."
+ */
+ if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) {
+ layout->width = align(layout->width, layout->align_i);
+ layout->height = align(layout->height, layout->align_j);
+
+ if (layout->templ->target == PIPE_TEXTURE_CUBE)
+ layout->height += 2;
+
+ if (layout->compressed)
+ layout->height = align(layout->height, layout->align_j * 2);
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 118:
+ *
+ * "If the surface contains an odd number of rows of data, a final row
+ * below the surface must be allocated."
+ */
+ if (layout->templ->bind & PIPE_BIND_RENDER_TARGET)
+ layout->height = align(layout->height, 2);
+
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 22:
*
layout->height = align(layout->height, 64);
}
+ /*
+ * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
+ * ilo_texture_can_enable_hiz(), we always return true for the first slice.
+ * To avoid out-of-bound access, we have to pad.
+ */
+ if (layout->hiz) {
+ layout->width = align(layout->width, 8);
+ layout->height = align(layout->height, 4);
+ }
+
assert(layout->width % layout->block_width == 0);
assert(layout->height % layout->block_height == 0);
assert(layout->qpitch % layout->block_height == 0);
static void
tex_free_slices(struct ilo_texture *tex)
{
- FREE(tex->slice_offsets[0]);
+ FREE(tex->slices[0]);
}
static bool
if (!slices)
return false;
- tex->slice_offsets[0] = slices;
+ tex->slices[0] = slices;
/* point to the respective positions in the buffer */
for (lv = 1; lv <= templ->last_level; lv++) {
- tex->slice_offsets[lv] = tex->slice_offsets[lv - 1] +
+ tex->slices[lv] = tex->slices[lv - 1] +
u_minify(templ->depth0, lv - 1) * templ->array_size;
}
struct ilo_screen *is = ilo_screen(tex->base.screen);
const char *name;
struct intel_bo *bo;
+ enum intel_tiling_mode tiling;
+ unsigned long pitch;
switch (tex->base.target) {
case PIPE_TEXTURE_1D:
}
if (handle) {
- bo = intel_winsys_import_handle(is->winsys, name,
- tex->bo_width, tex->bo_height, tex->bo_cpp, handle);
+ bo = intel_winsys_import_handle(is->winsys, name, handle,
+ tex->bo_width, tex->bo_height, tex->bo_cpp,
+ &tiling, &pitch);
}
else {
- bo = intel_winsys_alloc(is->winsys, name,
+ const uint32_t initial_domain =
+ (tex->base.bind & (PIPE_BIND_DEPTH_STENCIL |
+ PIPE_BIND_RENDER_TARGET)) ?
+ INTEL_DOMAIN_RENDER : 0;
+
+ bo = intel_winsys_alloc_texture(is->winsys, name,
tex->bo_width, tex->bo_height, tex->bo_cpp,
- tex->tiling, tex->bo_flags);
+ tex->tiling, initial_domain, &pitch);
+
+ tiling = tex->tiling;
}
if (!bo)
intel_bo_unreference(tex->bo);
tex->bo = bo;
- tex->tiling = intel_bo_get_tiling(bo);
- tex->bo_stride = intel_bo_get_pitch(bo);
+ tex->tiling = tiling;
+ tex->bo_stride = pitch;
+
+ return true;
+}
+
+static bool
+tex_create_separate_stencil(struct ilo_texture *tex)
+{
+ struct pipe_resource templ = tex->base;
+ struct pipe_resource *s8;
+
+ /*
+ * Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other
+ * tilings. But that should be fine since it will never be bound as the
+ * stencil buffer, and our transfer code can handle all tilings.
+ */
+ templ.format = PIPE_FORMAT_S8_UINT;
+
+ s8 = tex->base.screen->resource_create(tex->base.screen, &templ);
+ if (!s8)
+ return false;
+
+ tex->separate_s8 = ilo_texture(s8);
+
+ assert(tex->separate_s8->bo_format == PIPE_FORMAT_S8_UINT);
+
+ return true;
+}
+
+static bool
+tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout)
+{
+ struct ilo_screen *is = ilo_screen(tex->base.screen);
+ const struct pipe_resource *templ = layout->templ;
+ const int hz_align_j = 8;
+ unsigned hz_width, hz_height, lv;
+ unsigned long pitch;
+
+ /*
+ * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
+ * PRM, volume 2 part 1, page 312-313.
+ *
+ * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
+ * memory row.
+ */
+
+ hz_width = align(layout->levels[0].w, 16);
+
+ if (templ->target == PIPE_TEXTURE_3D) {
+ hz_height = 0;
+
+ for (lv = 0; lv <= templ->last_level; lv++) {
+ const unsigned h = align(layout->levels[lv].h, hz_align_j);
+ hz_height += h * layout->levels[lv].d;
+ }
+
+ hz_height /= 2;
+ }
+ else {
+ const unsigned h0 = align(layout->levels[0].h, hz_align_j);
+ unsigned hz_qpitch = h0;
+
+ if (layout->array_spacing_full) {
+ const unsigned h1 = align(layout->levels[1].h, hz_align_j);
+ const unsigned htail =
+ ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
+
+ hz_qpitch += h1 + htail;
+ }
+
+ hz_height = hz_qpitch * templ->array_size / 2;
+
+ if (layout->dev->gen >= ILO_GEN(7))
+ hz_height = align(hz_height, 8);
+ }
+
+ tex->hiz.bo = intel_winsys_alloc_texture(is->winsys,
+ "hiz texture", hz_width, hz_height, 1,
+ INTEL_TILING_Y, INTEL_DOMAIN_RENDER, &pitch);
+ if (!tex->hiz.bo)
+ return false;
+
+ tex->hiz.bo_stride = pitch;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
+ *
+ * "A rectangle primitive representing the clear area is delivered. The
+ * primitive must adhere to the following restrictions on size:
+ *
+ * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
+ * aligned to an 8x4 pixel block relative to the upper left corner
+ * of the depth buffer, and contain an integer number of these pixel
+ * blocks, and all 8x4 pixels must be lit.
+ *
+ * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
+ * aligned to a 4x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 4x2 pixels
+ * must be lit
+ *
+ * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
+ * aligned to a 2x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 2x2 pixels
+ * must be list."
+ *
+ * "The following is required when performing a depth buffer resolve:
+ *
+ * - A rectangle primitive of the same size as the previous depth
+ * buffer clear operation must be delivered, and depth buffer state
+ * cannot have changed since the previous depth buffer clear
+ * operation."
+ *
+ * Experiments on Haswell show that depth buffer resolves have the same
+ * alignment requirements, and aligning the RECTLIST primitive and
+ * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The mipmap size must be
+ * aligned.
+ */
+ for (lv = 0; lv <= templ->last_level; lv++) {
+ unsigned align_w = 8, align_h = 4;
+ unsigned flags = 0;
+
+ switch (templ->nr_samples) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ align_w /= 2;
+ break;
+ case 4:
+ align_w /= 2;
+ align_h /= 2;
+ break;
+ case 8:
+ default:
+ align_w /= 4;
+ align_h /= 2;
+ break;
+ }
+
+ if (u_minify(templ->width0, lv) % align_w == 0 &&
+ u_minify(templ->height0, lv) % align_h == 0) {
+ flags |= ILO_TEXTURE_HIZ;
+
+ /* this will trigger a HiZ resolve */
+ if (tex->imported)
+ flags |= ILO_TEXTURE_CPU_WRITE;
+ }
+
+ if (flags) {
+ const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ?
+ u_minify(templ->depth0, lv) : templ->array_size;
+ ilo_texture_set_slice_flags(tex, lv, 0, num_slices, flags, flags);
+ }
+ }
return true;
}
static void
tex_destroy(struct ilo_texture *tex)
{
+ if (tex->hiz.bo)
+ intel_bo_unreference(tex->hiz.bo);
+
if (tex->separate_s8)
tex_destroy(tex->separate_s8);
tex->imported = (handle != NULL);
- if (tex->base.bind & (PIPE_BIND_DEPTH_STENCIL |
- PIPE_BIND_RENDER_TARGET))
- tex->bo_flags |= INTEL_ALLOC_FOR_RENDER;
-
- tex_layout_init(&layout, screen, templ, tex->slice_offsets);
+ tex_layout_init(&layout, screen, templ, tex->slices);
switch (templ->target) {
case PIPE_TEXTURE_1D:
}
/* allocate separate stencil resource */
- if (layout.separate_stencil) {
- struct pipe_resource s8_templ = *layout.templ;
- struct pipe_resource *s8;
-
- /*
- * Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other
- * tilings. But that should be fine since it will never be bound as the
- * stencil buffer, and our transfer code can handle all tilings.
- */
- s8_templ.format = PIPE_FORMAT_S8_UINT;
+ if (layout.separate_stencil && !tex_create_separate_stencil(tex)) {
+ tex_destroy(tex);
+ return NULL;
+ }
- s8 = screen->resource_create(screen, &s8_templ);
- if (!s8) {
+ if (layout.hiz && !tex_create_hiz(tex, &layout)) {
+ /* Separate Stencil Buffer requires HiZ to be enabled */
+ if (layout.dev->gen == ILO_GEN(6) && layout.separate_stencil) {
tex_destroy(tex);
return NULL;
}
-
- tex->separate_s8 = ilo_texture(s8);
-
- assert(tex->separate_s8->bo_format == PIPE_FORMAT_S8_UINT);
}
return &tex->base;
static bool
tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
{
+ struct ilo_screen *is = ilo_screen(tex->base.screen);
int err;
- err = intel_bo_export_handle(tex->bo, handle);
+ err = intel_winsys_export_handle(is->winsys, tex->bo,
+ tex->tiling, tex->bo_stride, handle);
return !err;
}
static bool
buf_create_bo(struct ilo_buffer *buf)
{
+ const uint32_t initial_domain =
+ (buf->base.bind & PIPE_BIND_STREAM_OUTPUT) ?
+ INTEL_DOMAIN_RENDER : 0;
struct ilo_screen *is = ilo_screen(buf->base.screen);
const char *name;
struct intel_bo *bo;
}
bo = intel_winsys_alloc_buffer(is->winsys,
- name, buf->bo_size, buf->bo_flags);
+ name, buf->bo_size, initial_domain);
if (!bo)
return false;
pipe_reference_init(&buf->base.reference, 1);
buf->bo_size = templ->width0;
- buf->bo_flags = 0;
+
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 118:
+ *
+ * "For buffers, which have no inherent "height," padding requirements
+ * are different. A buffer must be padded to the next multiple of 256
+ * array elements, with an additional 16 bytes added beyond that to
+ * account for the L1 cache line."
+ */
+ if (templ->bind & PIPE_BIND_SAMPLER_VIEW)
+ buf->bo_size = align(buf->bo_size, 256) + 16;
+
+ if (templ->bind & PIPE_BIND_VERTEX_BUFFER) {
+ /*
+ * As noted in ilo_translate_format(), we treat some 3-component formats
+ * as 4-component formats to work around hardware limitations. Imagine
+ * the case where the vertex buffer holds a single
+ * PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. The
+ * hardware would fail to fetch it at boundary check because the vertex
+ * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
+ * and that takes at least 8 bytes.
+ *
+ * For the workaround to work, we should add 2 to the bo size. But that
+ * would waste a page when the bo size is already page aligned. Let's
+ * round it to page size for now and revisit this when needed.
+ */
+ buf->bo_size = align(buf->bo_size, 4096);
+ }
if (!buf_create_bo(buf)) {
FREE(buf);
*/
unsigned
ilo_texture_get_slice_offset(const struct ilo_texture *tex,
- int level, int slice,
+ unsigned level, unsigned slice,
unsigned *x_offset, unsigned *y_offset)
{
+ const struct ilo_texture_slice *s =
+ ilo_texture_get_slice(tex, level, slice);
unsigned tile_w, tile_h, tile_size, row_size;
unsigned x, y, slice_offset;
row_size = tex->bo_stride * tile_h;
/* in bytes */
- x = tex->slice_offsets[level][slice].x / tex->block_width * tex->bo_cpp;
- y = tex->slice_offsets[level][slice].y / tex->block_height;
+ x = s->x / tex->block_width * tex->bo_cpp;
+ y = s->y / tex->block_height;
slice_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
/*