bc->cf_last->ndw += 2;
bc->ndw += 2;
- bc->cf_last->kcache0_mode = 2;
+ /* The following configuration provides 64 128-bit constants.
+ * Each cacheline holds 16 128-bit constants and each
+ * kcache can lock 2 cachelines and there are 2 kcaches per
+ * ALU clause for a max of 64 constants.
+ * For supporting more than 64 constants, the code needs
+ * to be broken down into multiple ALU clauses.
+ */
+ /* select the constant buffer (0-15) for each kcache */
+ bc->cf_last->kcache0_bank = 0;
+ bc->cf_last->kcache1_bank = 0;
+ /* lock 2 cachelines per kcache; 4 total */
+ bc->cf_last->kcache0_mode = V_SQ_CF_KCACHE_LOCK_2;
+ bc->cf_last->kcache1_mode = V_SQ_CF_KCACHE_LOCK_2;
+ /* set the cacheline offsets for each kcache */
+ bc->cf_last->kcache0_addr = 0;
+ bc->cf_last->kcache1_addr = 2;
/* process cur ALU instructions for bank swizzle */
if (alu->last) {
#define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30)
#define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3)
#define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF
+#define V_SQ_CF_KCACHE_NOP 0x00000000
+#define V_SQ_CF_KCACHE_LOCK_1 0x00000001
+#define V_SQ_CF_KCACHE_LOCK_2 0x00000002
+#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003
#define P_SQ_CF_ALU_WORD1
#define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0)
#define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3)