num_records = size / stride;
num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
- if (screen->b.chip_class == VI)
+ /* The NUM_RECORDS field has a different meaning depending on the chip,
+ * instruction type, STRIDE, and SWIZZLE_ENABLE.
+ *
+ * SI-CIK:
+ * - If STRIDE == 0, it's in byte units.
+ * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
+ *
+ * VI:
+ * - For SMEM and STRIDE == 0, it's in byte units.
+ * - For SMEM and STRIDE != 0, it's in units of STRIDE.
+ * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
+ * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
+ * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
+ * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
+ * using SMEM. This can be done in the shader by clearing STRIDE with s_and.
+ * That way the same descriptor can be used by both SMEM and VMEM.
+ *
+ * GFX9:
+ * - For SMEM and STRIDE == 0, it's in byte units.
+ * - For SMEM and STRIDE != 0, it's in units of STRIDE.
+ * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
+ * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
+ */
+ if (screen->b.chip_class >= GFX9)
+ /* When vindex == 0, LLVM sets IDXEN = 0, thus changing units
+ * from STRIDE to bytes. This works around it by setting
+ * NUM_RECORDS to at least the size of one element, so that
+ * the first element is readable when IDXEN == 0.
+ *
+ * TODO: Fix this in LLVM, but do we need a new intrinsic where
+ * IDXEN is enforced?
+ */
+ num_records = num_records ? MAX2(num_records, stride) : 0;
+ else if (screen->b.chip_class == VI)
num_records *= stride;
state[4] = 0;