inst->mlen = 1;
inst->size_written = 4 * REG_SIZE;
- bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0));
+ /* SKL PRM, vol07, 3D Media GPGPU Engine, Bounds Checking and Faulting:
+ *
+ * "Out-of-bounds checking is always performed at a DWord granularity. If
+ * any part of the DWord is out-of-bounds then the whole DWord is
+ * considered out-of-bounds."
+ *
+ * This implies that types with size smaller than 4-bytes need to be
+ * padded if they don't complete the last dword of the buffer. But as we
+ * need to maintain the original size we need to reverse the padding
+ * calculation to return the correct size to know the number of elements
+ * of an unsized array. As we stored in the last two bits of the surface
+ * size the needed padding for the buffer, we calculate here the
+ * original buffer_size reversing the surface_size calculation:
+ *
+ * surface_size = isl_align(buffer_size, 4) +
+ * (isl_align(buffer_size) - buffer_size)
+ *
+ * buffer_size = surface_size & ~3 - surface_size & 3
+ */
+
+ fs_reg size_aligned4 = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ fs_reg size_padding = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ fs_reg buffer_size = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ ubld.AND(size_padding, ret_payload, brw_imm_ud(3));
+ ubld.AND(size_aligned4, ret_payload, brw_imm_ud(~3));
+ ubld.ADD(buffer_size, size_aligned4, negate(size_padding));
+
+ bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0));
+
brw_mark_surface_used(prog_data, index);
break;
}
isl_genX(buffer_fill_state_s)(void *state,
const struct isl_buffer_fill_state_info *restrict info)
{
- uint32_t num_elements = info->size / info->stride;
+ uint64_t buffer_size = info->size;
+
+ /* Uniform and Storage buffers need to have surface size not less that the
+ * aligned 32-bit size of the buffer. To calculate the array lenght on
+ * unsized arrays in StorageBuffer the last 2 bits store the padding size
+ * added to the surface, so we can calculate latter the original buffer
+ * size to know the number of elements.
+ *
+ * surface_size = isl_align(buffer_size, 4) +
+ * (isl_align(buffer_size) - buffer_size)
+ *
+ * buffer_size = (surface_size & ~3) - (surface_size & 3)
+ */
+ if (info->format == ISL_FORMAT_RAW ||
+ info->stride < isl_format_get_layout(info->format)->bpb / 8) {
+ assert(info->stride == 1);
+ uint64_t aligned_size = isl_align(buffer_size, 4);
+ buffer_size = aligned_size + (aligned_size - buffer_size);
+ }
+
+ uint32_t num_elements = buffer_size / info->stride;
if (GEN_GEN >= 7) {
/* From the IVB PRM, SURFACE_STATE::Height,
pMemoryRequirements->size = buffer->size;
pMemoryRequirements->alignment = alignment;
+
+ /* Storage and Uniform buffers should have their size aligned to
+ * 32-bits to avoid boundary checks when last DWord is not complete.
+ * This would ensure that not internal padding would be needed for
+ * 16-bit types.
+ */
+ if (device->robust_buffer_access &&
+ (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
+ buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
+ pMemoryRequirements->size = align_u64(buffer->size, 4);
+
pMemoryRequirements->memoryTypeBits = memory_types;
}