[BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
[BI_BLEND] = BI_ADD,
[BI_BITWISE] = BI_GENERIC | BI_SCHED_ALL,
- [BI_CONVERT] = BI_SCHED_ALL,
+ [BI_CONVERT] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_CSEL] = BI_SCHED_FMA,
[BI_DISCARD] = BI_SCHED_ADD,
[BI_FMA] = BI_ROUNDMODE | BI_SCHED_FMA,
[BI_STORE] = BI_SCHED_ADD,
[BI_STORE_VAR] = BI_SCHED_ADD,
[BI_SPECIAL] = BI_SCHED_ADD | BI_SCHED_SLOW,
+ [BI_SWIZZLE] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_TEX] = BI_SCHED_ADD,
[BI_ROUND] = BI_GENERIC | BI_ROUNDMODE | BI_SCHED_ALL,
};
BI_STORE,
BI_STORE_VAR,
BI_SPECIAL, /* _FAST, _TABLE on supported GPUs */
+ BI_SWIZZLE,
BI_TEX,
BI_ROUND,
BI_NUM_CLASSES
* nopped out. Used for _FAST operations. */
#define BI_SCHED_SLOW (1 << 5)
+/* Swizzling allowed for the 8/16-bit source */
+#define BI_SWIZZLABLE (1 << 6)
+
/* It can't get any worse than csel4... can it? */
#define BIR_SRC_COUNT 4
* int). Zero if there is no destination. Bitsize included */
nir_alu_type dest_type;
+ /* If the source type is 8-bit or 16-bit such that SIMD is possible, and
+ * the class has BI_SWIZZLABLE, this is a swizzle for the input. Swizzles
+ * in practice only occur with one-source arguments (conversions,
+ * dedicated swizzle ops) and as component selection on two-sources
+ * where it is unambiguous which is which. Bounds are 32/type_size. */
+ unsigned swizzle[4];
+
/* A class-specific op from which the actual opcode can be derived
* (along with the above information) */