:3:0006:0014[7f64a39ax_609f35bcx] (sy)(jp)(rpt3)(ul)sad.s16 hr38.z, hc367.x, (neg)hc50.y, (r)hr39.w
        :7:0007:0018[f352cfcbx_ecad502bx] (sy)unknown(7,6).g
        :4:0008:0019[818209d0x_74021646x] (rpt1)unknown(4,12) hr52.x, (r)hc401.z
-       :6:0009:0021[c90972c0x_8e905e80x] (jp)stl.s16 l[hr48.x], hr976.x, 142
+       :6:0009:0021[c90972c0x_8e905e80x] (jp)stl.s16 l[hr48.x], hr16.x, 142
        :5:0010:0022[a4827242x_46248300x] gather4b.a (s8)(y)hr16.z, hr32.x, s#1, t#35
        :4:0011:0023[82342205x_cd064d21x] (rpt2)(ul)unknown(4,17) r1.y, (neg)c<a0.x + 289>
        :5:0012:0026[a923bf8bx_81f95908x] (jp)samb.3d.a.p (u32)(xyzw)r34.w, hr33.x, hr43.x, s#15, t#64
-       :1:0013:0027[3dda8123x_a0d91ccdx] (sy)(jp)(rpt1)cov.u8u16 (even)(pos_infinity)hr<a0.x + 35>, 0xa0d91ccd
-../src/freedreno/decode/instr-a3xx.h:979: is_cat6_legacy: Assertion `instr->cat6.opc == 0' failed.
+
+../src/freedreno/ir3/disasm-a3xx.c:173: regmask_set: Assertion `num < MAX_REG' failed.
        -----------------------------------------------
        8192 (0x2000) bytes
        000000: a018c54a 0600e824 11fa09c3 edc6145b     |J...$.......[...|
       size: 2048
        :2:0000:0000[40846422x_d81251c5x] (sat)(ul)sign.f r8.z, (neg)hc113.y
        :4:0001:0001[938a16e2x_520c369ax] (sy)(ss)(sat)(rpt2)unknown(4,28) hr56.z, -358
-       :1:0002:0004[200a00c1x_094864d2x] cov.u16f16 hr<a0.x + 193>, hr308.z
-       :2:0003:0005[44109084x_4a201507x] (ss)unknown(2,32) (ei)r33.x, c321.w, (neg)r<a0.x + 544>
-       :4:0004:0006[882fadabx_14a391b1x] (jp)(sat)(rpt1)(ul)rsq hr42.w, (abs)(r)hc108.y
-       :3:0005:0008[6060f068x_7106601ax] (ss)(ul)mad.u16 r26.x, (neg)hr6.z, (neg)hc48.y, (r)hc65.z
-       :3:0006:0009[60ed4212x_02900201x] (rpt2)madsh.u16 hr4.z, r128.y, r54.z, r164.x
-       :0:0007:0012[005b6589x_8a054280x] (eq)(rpt5)bkt #17024
-       :3:0008:0018[7cebfff7x_dbae7deex] (sy)(ss)(jp)(sat)(rpt3)(ul)sel.b32 a3.x, (neg)(r)c891.z, (neg)c53.w, (neg)c747.z
-       :5:0009:0022[aff86b27x_fd7472ffx] (jp)unknown(5,31).o.p.base4 (u8)(xyw)hr9.w, r14.y, a1.x
-       :0:0010:0023[0ed959d7x_6d7a21a4x] (ss)(jp)(eq)(rpt1)unknown(0,13)
-       :2:0011:0025[445a8ebex_8d6e703bx] (sat)(rpt2)cmpv.s.gt (ei)r47.z, (neg)(r)59, (abs)(r)c<a0.x + 366>
-../src/freedreno/decode/instr-a3xx.h:979: is_cat6_legacy: Assertion `instr->cat6.opc == 0' failed.
+       :1:0002:0004[200a00c1x_094864d2x] cov.u16f16 hr<a0.x + 193>, 
+../src/freedreno/ir3/disasm-a3xx.c:185: regmask_get: Assertion `num < MAX_REG' failed.
        -----------------------------------------------
        8192 (0x2000) bytes
        000000: d81251c5 40846422 520c369a 938a16e2     |.Q.."d.@.6.R....|
       size: 2048
        :0:0000:0000[00000000x_00003002x] nop
        :0:0001:0001[00000000x_00000000x] nop
-       :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0008:0008[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0009:0009[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0010:0010[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0011:0011[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0012:0012[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0013:0013[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0014:0014[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0015:0015[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0016:0016[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0017:0017[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0018:0018[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0019:0019[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0020:0020[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0021:0021[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0022:0022[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0023:0023[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0024:0024[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0025:0025[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0026:0026[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0027:0027[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0028:0028[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0029:0029[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0030:0030[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0031:0031[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0032:0032[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0033:0033[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0034:0034[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0035:0035[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0036:0036[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0037:0037[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0038:0038[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0039:0039[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0040:0040[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0041:0041[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0042:0042[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0043:0043[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0044:0044[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0045:0045[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0046:0046[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0047:0047[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0048:0048[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0049:0049[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0050:0050[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0051:0051[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0052:0052[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0053:0053[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0054:0054[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0055:0055[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0056:0056[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0057:0057[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0058:0058[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0059:0059[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0060:0060[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0061:0061[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0062:0062[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0063:0063[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0064:0064[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0065:0065[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0066:0066[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0067:0067[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0068:0068[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0069:0069[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0070:0070[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0071:0071[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0072:0072[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0073:0073[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0074:0074[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0075:0075[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0076:0076[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0077:0077[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0078:0078[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0079:0079[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0080:0080[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0081:0081[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0082:0082[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0083:0083[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0084:0084[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0085:0085[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0086:0086[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0087:0087[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0088:0088[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0089:0089[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0090:0090[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0091:0091[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0092:0092[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0093:0093[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0094:0094[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0095:0095[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0096:0096[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0097:0097[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0098:0098[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0099:0099[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0100:0100[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0101:0101[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0102:0102[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0103:0103[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0104:0104[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0105:0105[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0106:0106[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0107:0107[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0108:0108[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0109:0109[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0110:0110[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0111:0111[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0112:0112[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0113:0113[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0114:0114[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0115:0115[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0116:0116[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0117:0117[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0118:0118[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0119:0119[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0120:0120[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0121:0121[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0122:0122[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0123:0123[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0124:0124[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0125:0125[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0126:0126[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0127:0127[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0128:0128[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0129:0129[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0130:0130[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0131:0131[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0132:0132[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0133:0133[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0134:0134[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0135:0135[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0136:0136[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0137:0137[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0138:0138[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0139:0139[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0140:0140[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0141:0141[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0142:0142[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0143:0143[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0144:0144[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0145:0145[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0146:0146[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0147:0147[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0148:0148[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0149:0149[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0150:0150[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0151:0151[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0152:0152[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0153:0153[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0154:0154[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0155:0155[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0156:0156[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0157:0157[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0158:0158[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0159:0159[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0160:0160[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0161:0161[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0162:0162[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0163:0163[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0164:0164[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0165:0165[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0166:0166[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0167:0167[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0168:0168[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0169:0169[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0170:0170[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0171:0171[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0172:0172[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0173:0173[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0174:0174[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0175:0175[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0176:0176[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0177:0177[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0178:0178[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0179:0179[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0180:0180[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0181:0181[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0182:0182[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0183:0183[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0184:0184[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0185:0185[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0186:0186[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0187:0187[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0188:0188[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0189:0189[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0190:0190[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0191:0191[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0192:0192[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0193:0193[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0194:0194[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0195:0195[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0196:0196[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0197:0197[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0198:0198[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0199:0199[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0200:0200[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0201:0201[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0202:0202[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0203:0203[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0204:0204[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0205:0205[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0206:0206[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0207:0207[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0208:0208[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0209:0209[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0210:0210[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0211:0211[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0212:0212[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0213:0213[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0214:0214[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0215:0215[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0216:0216[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0217:0217[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0218:0218[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0219:0219[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0220:0220[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0221:0221[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0222:0222[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0223:0223[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0224:0224[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0225:0225[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0226:0226[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0227:0227[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0228:0228[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0229:0229[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0230:0230[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0231:0231[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0232:0232[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0233:0233[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0234:0234[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0235:0235[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0236:0236[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0237:0237[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0238:0238[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0239:0239[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0240:0240[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0241:0241[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0242:0242[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0243:0243[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0244:0244[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0245:0245[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0246:0246[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0247:0247[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0248:0248[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0249:0249[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0250:0250[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0251:0251[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0252:0252[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0253:0253[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0254:0254[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0255:0255[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0256:0256[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0257:0257[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0258:0258[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0259:0259[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0260:0260[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0261:0261[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0262:0262[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0263:0263[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0264:0264[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0265:0265[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0266:0266[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0267:0267[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0268:0268[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0269:0269[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0270:0270[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0271:0271[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0272:0272[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0273:0273[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0274:0274[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0275:0275[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0276:0276[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0277:0277[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0278:0278[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0279:0279[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0280:0280[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0281:0281[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0282:0282[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0283:0283[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0284:0284[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0285:0285[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0286:0286[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0287:0287[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0288:0288[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0289:0289[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0290:0290[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0291:0291[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0292:0292[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0293:0293[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0294:0294[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0295:0295[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0296:0296[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0297:0297[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0298:0298[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0299:0299[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0300:0300[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0301:0301[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0302:0302[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0303:0303[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0304:0304[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0305:0305[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0306:0306[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0307:0307[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0308:0308[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0309:0309[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0310:0310[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0311:0311[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0312:0312[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0313:0313[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0314:0314[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0315:0315[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0316:0316[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0317:0317[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0318:0318[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0319:0319[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0320:0320[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0321:0321[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0322:0322[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0323:0323[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0324:0324[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0325:0325[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0326:0326[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0327:0327[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0328:0328[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0329:0329[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0330:0330[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0331:0331[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0332:0332[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0333:0333[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0334:0334[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0335:0335[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0336:0336[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0337:0337[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0338:0338[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0339:0339[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0340:0340[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0341:0341[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0342:0342[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0343:0343[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0344:0344[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0345:0345[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0346:0346[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0347:0347[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0348:0348[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0349:0349[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0350:0350[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0351:0351[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0352:0352[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0353:0353[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0354:0354[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0355:0355[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0356:0356[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0357:0357[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0358:0358[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0359:0359[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0360:0360[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0361:0361[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0362:0362[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0363:0363[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0364:0364[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0365:0365[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0366:0366[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0367:0367[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0368:0368[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0369:0369[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0370:0370[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0371:0371[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0372:0372[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0373:0373[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0374:0374[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0375:0375[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0376:0376[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0377:0377[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0378:0378[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0379:0379[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0380:0380[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0381:0381[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0382:0382[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0383:0383[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0384:0384[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0385:0385[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0386:0386[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0387:0387[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0388:0388[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0389:0389[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0390:0390[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0391:0391[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0392:0392[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0393:0393[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0394:0394[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0395:0395[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0396:0396[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0397:0397[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0398:0398[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0399:0399[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0400:0400[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0401:0401[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0402:0402[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0403:0403[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0404:0404[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0405:0405[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0406:0406[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0407:0407[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0408:0408[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0409:0409[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0410:0410[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0411:0411[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0412:0412[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0413:0413[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0414:0414[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0415:0415[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0416:0416[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0417:0417[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0418:0418[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0419:0419[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0420:0420[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0421:0421[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0422:0422[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0423:0423[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0424:0424[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0425:0425[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0426:0426[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0427:0427[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0428:0428[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0429:0429[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0430:0430[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0431:0431[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0432:0432[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0433:0433[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0434:0434[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0435:0435[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0436:0436[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0437:0437[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0438:0438[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0439:0439[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0440:0440[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0441:0441[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0442:0442[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0443:0443[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0444:0444[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0445:0445[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0446:0446[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0447:0447[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0448:0448[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0449:0449[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0450:0450[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0451:0451[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0452:0452[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0453:0453[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0454:0454[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0455:0455[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0456:0456[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0457:0457[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0458:0458[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0459:0459[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0460:0460[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0461:0461[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0462:0462[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0463:0463[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0464:0464[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0465:0465[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0466:0466[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0467:0467[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0468:0468[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0469:0469[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0470:0470[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0471:0471[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0472:0472[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0473:0473[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0474:0474[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0475:0475[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0476:0476[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0477:0477[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0478:0478[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0479:0479[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0480:0480[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0481:0481[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0482:0482[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0483:0483[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0484:0484[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0485:0485[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0486:0486[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0487:0487[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0488:0488[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0489:0489[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0490:0490[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0491:0491[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0492:0492[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0493:0493[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0494:0494[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0495:0495[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0496:0496[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0497:0497[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0498:0498[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0499:0499[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0500:0500[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0501:0501[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0502:0502[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0503:0503[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0504:0504[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0505:0505[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0506:0506[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0507:0507[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0508:0508[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0509:0509[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0510:0510[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0511:0511[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :0:0512:0512[00500240x_00024000x] (rpt2)nop
-       :0:0513:0515[00402020x_00000000x] nop
-       :0:0514:0516[00000040x_00001000x] nop
-       :0:0515:0517[00510401x_00024020x] (eq)(rpt4)nop
-       :0:0516:0522[00100080x_00000008x] nop
-       :0:0517:0523[00000044x_00002080x] nop
-       :0:0518:0524[00001000x_00000000x] (ss)nop
-       :0:0519:0525[00200000x_00000008x] nop
-       :0:0520:0526[00000044x_00048110x] nop
-       :0:0521:0527[00000040x_00508000x] nop
-       :0:0522:0528[00010200x_00020044x] (eq)(rpt2)nop
-       :0:0523:0531[00000000x_00201014x] nop
-       :0:0524:0532[00012100x_00101100x] (eq)(rpt1)nop
-       :0:0525:0534[00000012x_00005000x] nop
-       :0:0526:0535[00000010x_00005000x] nop
-       :0:0527:0536[00040000x_00000020x] nop
-       :0:0528:0537[00002101x_00082514x] (rpt1)nop
-       :0:0529:0539[00000000x_00210020x] nop
-       :0:0530:0540[00440004x_00010002x] nop
-       :0:0531:0541[00000002x_00000250x] nop
-       :0:0532:0542[00000040x_00100000x] nop
-       :0:0533:0543[00000000x_00020014x] nop
-       :0:0534:0544[000400a0x_00050020x] nop
-       :0:0535:0545[00100000x_00000000x] nop
-       :0:0536:0546[00000000x_00044081x] nop
-       :0:0537:0547[00000000x_00000000x] nop
-       :0:0538:0548[00200048x_00000100x] nop
-       :0:0539:0549[00080020x_00000000x] nop
-       :0:0540:0550[00200002x_00200001x] nop
-       :0:0541:0551[002000a4x_00000404x] nop
-       :0:0542:0552[00440246x_00000004x] (rpt2)nop
-       :0:0543:0555[0008c040x_00442000x] nop
-       :0:0544:0556[002112a0x_00200000x] (ss)(eq)(rpt2)nop
-       :0:0545:0559[00000000x_00000000x] nop
-       :0:0546:0560[00000240x_00400001x] (rpt2)nop
-       :0:0547:0563[00000000x_00040400x] nop
-       :0:0548:0564[0000a100x_00104010x] (rpt1)nop
-       :0:0549:0566[00008480x_00002001x] (rpt4)nop
-       :0:0550:0571[00000001x_00000040x] nop
-       :0:0551:0572[00040001x_00040400x] nop
-       :0:0552:0573[00200000x_00040600x] nop
-       :0:0553:0574[00000100x_00100000x] (rpt1)nop
-       :0:0554:0576[00504180x_0020a200x] (rpt1)nop
-       :0:0555:0578[00000000x_00000000x] nop
-       :0:0556:0579[00000024x_00004000x] nop
-       :0:0557:0580[00200000x_00100008x] nop
-       :0:0558:0581[00010080x_00000000x] (eq)nop
-       :0:0559:0582[00080000x_00000000x] nop
-       :0:0560:0583[00084000x_00500400x] nop
-       :0:0561:0584[00004000x_00008000x] nop
-       :0:0562:0585[00200000x_00000300x] nop
-       :0:0563:0586[00000042x_00020001x] nop
-       :0:0564:0587[00005600x_00400088x] (ss)(rpt6)nop
-       :0:0565:0594[00000002x_00000000x] nop
-       :0:0566:0595[0002005ex_00400008x] bkt #8
-       :0:0567:0596[00020020x_00200000x] bkt #0
-       :0:0568:0597[001e0414x_00055480x] (rpt4)bkt #21632
-       :0:0569:0602[00000000x_00000000x] nop
-       :0:0570:0603[00000442x_00000480x] (rpt4)nop
-       :0:0571:0608[00000200x_00080000x] (rpt2)nop
-       :0:0572:0611[00520000x_00600400x] bkt #1024
-       :0:0573:0612[00001200x_00000008x] (ss)(rpt2)nop
-       :0:0574:0615[00400114x_00201000x] (rpt1)nop
-       :0:0575:0617[00110100x_00100002x] (eq)(rpt1)nop
-       :0:0576:0619[00404200x_00200683x] (rpt2)nop
-       :0:0577:0622[00000090x_00000004x] nop
-       :0:0578:0623[00502000x_00002000x] nop
-       :0:0579:0624[00000004x_00000020x] nop
-       :0:0580:0625[00103100x_00600010x] (ss)(rpt1)nop
-       :0:0581:0627[00000002x_00000010x] nop
-       :0:0582:0628[00004000x_00021200x] nop
-       :0:0583:0629[00000000x_00000000x] nop
-       :0:0584:0630[00201400x_0010220ax] (ss)(rpt4)nop
-       :0:0585:0635[00000000x_00030000x] nop
-       :0:0586:0636[00080040x_00400000x] nop
-       :0:0587:0637[00000080x_00000002x] nop
-       :0:0588:0638[00000580x_00000400x] (rpt5)nop
-       :0:0589:0644[00000200x_00000022x] (rpt2)nop
-       :0:0590:0647[00080000x_00300042x] nop
-       :0:0591:0648[00008000x_00040200x] nop
-       :0:0592:0649[00000000x_00040000x] nop
-       :0:0593:0650[0012008ax_00000010x] bkt #16
-       :0:0594:0651[00000100x_00000000x] (rpt1)nop
-       :0:0595:0653[00010000x_00010018x] (eq)nop
-       :0:0596:0654[00500011x_00440020x] nop
-       :0:0597:0655[00100000x_00000000x] nop
-       :0:0598:0656[00008200x_0004020cx] (rpt2)nop
-       :0:0599:0659[00000400x_00100010x] (rpt4)nop
-       :0:0600:0664[00000004x_00118000x] nop
-       :0:0601:0665[00000002x_00004200x] nop
-       :0:0602:0666[00026300x_00000210x] (rpt3)bkt #528
-       :0:0603:0670[0000a002x_00000040x] nop
-       :0:0604:0671[00081100x_00004082x] (ss)(rpt1)nop
-       :0:0605:0673[00000008x_00210000x] nop
-       :0:0606:0674[00020004x_00020000x] bkt #0
-       :0:0607:0675[00020000x_00064108x] bkt #16648
-       :0:0608:0676[00000084x_00020000x] nop
-       :0:0609:0677[00000181x_00000430x] (rpt1)nop
-       :0:0610:0679[001c8100x_00100002x] (rpt1)nop
-       :0:0611:0681[00000000x_00200020x] nop
-       :0:0612:0682[00100081x_00002000x] nop
-       :0:0613:0683[00000000x_00000008x] nop
-       :0:0614:0684[00009420x_00000024x] (ss)(rpt4)nop
-       :0:0615:0689[00000100x_00002010x] (rpt1)nop
-       :0:0616:0691[00004188x_00000000x] (rpt1)nop
-       :0:0617:0693[00100000x_00002000x] nop
-       :0:0618:0694[00120102x_00040000x] (rpt1)bkt #0
-       :0:0619:0696[00040002x_00000000x] nop
-       :0:0620:0697[00224200x_00210201x] (rpt2)bkt #513
-       :0:0621:0700[00000200x_00040000x] (rpt2)nop
-       :0:0622:0703[0000000cx_00000000x] nop
-       :0:0623:0704[00000000x_00005000x] nop
-       :0:0624:0705[00082208x_00010200x] (rpt2)nop
-       :0:0625:0708[00194011x_00000000x] (eq)nop
-       :0:0626:0709[00012100x_00000502x] (eq)(rpt1)nop
-       :0:0627:0711[00000240x_00040050x] (rpt2)nop
-       :0:0628:0714[00080211x_00004180x] (rpt2)nop
-       :0:0629:0717[00000000x_00001008x] nop
-       :0:0630:0718[00020490x_002004a0x] (rpt4)bkt #1184
-       :0:0631:0723[00210004x_00001080x] (eq)nop
-       :0:0632:0724[00000000x_00300040x] nop
-       :0:0633:0725[00008002x_00000020x] nop
-       :0:0634:0726[00000000x_00041098x] nop
-       :0:0635:0727[002000a0x_00000000x] nop
-       :0:0636:0728[00000000x_000c0400x] nop
-       :0:0637:0729[00000401x_00000402x] (rpt4)nop
-       :0:0638:0734[00002000x_00200400x] nop
-       :0:0639:0735[00000101x_00001000x] (rpt1)nop
-       :0:0640:0737[00500240x_00024000x] (rpt2)nop
-       :0:0641:0740[00402020x_00000000x] nop
-       :0:0642:0741[00000040x_00001000x] nop
-       :0:0643:0742[00510401x_00024020x] (eq)(rpt4)nop
-       :0:0644:0747[00100080x_00000008x] nop
-       :0:0645:0748[00000044x_00002080x] nop
-       :0:0646:0749[00001000x_00000000x] (ss)nop
-       :0:0647:0750[00200000x_00000008x] nop
-       :0:0648:0751[00000044x_00048110x] nop
-       :0:0649:0752[00000040x_00508000x] nop
-       :0:0650:0753[00010200x_00020044x] (eq)(rpt2)nop
-       :0:0651:0756[00000000x_00201014x] nop
-       :0:0652:0757[00012100x_00101100x] (eq)(rpt1)nop
-       :0:0653:0759[00000012x_00005000x] nop
-       :0:0654:0760[00000010x_00005000x] nop
-       :0:0655:0761[00040000x_00000020x] nop
-       :0:0656:0762[00002101x_00082514x] (rpt1)nop
-       :0:0657:0764[00000000x_00210020x] nop
-       :0:0658:0765[00440004x_00010002x] nop
-       :0:0659:0766[00000002x_00000250x] nop
-       :0:0660:0767[00000040x_00100000x] nop
-       :0:0661:0768[00000000x_00020014x] nop
-       :0:0662:0769[000400a0x_00050020x] nop
-       :0:0663:0770[00100000x_00000000x] nop
-       :0:0664:0771[00000000x_00044081x] nop
-       :0:0665:0772[00000000x_00000000x] nop
-       :0:0666:0773[00200048x_00000100x] nop
-       :0:0667:0774[00080020x_00000000x] nop
-       :0:0668:0775[00200002x_00200001x] nop
-       :0:0669:0776[002000a4x_00000404x] nop
-       :0:0670:0777[00440246x_00000004x] (rpt2)nop
-       :0:0671:0780[0008c040x_00442000x] nop
-       :0:0672:0781[002112a0x_00200000x] (ss)(eq)(rpt2)nop
-       :0:0673:0784[00000000x_00000000x] nop
-       :0:0674:0785[00000240x_00400001x] (rpt2)nop
-       :0:0675:0788[00000000x_00040400x] nop
-       :0:0676:0789[0000a100x_00104010x] (rpt1)nop
-       :0:0677:0791[00008480x_00002001x] (rpt4)nop
-       :0:0678:0796[00000001x_00000040x] nop
-       :0:0679:0797[00040001x_00040400x] nop
-       :0:0680:0798[00200000x_00040600x] nop
-       :0:0681:0799[00000100x_00100000x] (rpt1)nop
-       :0:0682:0801[00504180x_0020a200x] (rpt1)nop
-       :0:0683:0803[00000000x_00000000x] nop
-       :0:0684:0804[00000024x_00004000x] nop
-       :0:0685:0805[00200000x_00100008x] nop
-       :0:0686:0806[00010080x_00000000x] (eq)nop
-       :0:0687:0807[00080000x_00000000x] nop
-       :0:0688:0808[00084000x_00500400x] nop
-       :0:0689:0809[00004000x_00008000x] nop
-       :0:0690:0810[00200000x_00000300x] nop
-       :0:0691:0811[00000042x_00020001x] nop
-       :0:0692:0812[00005600x_00400088x] (ss)(rpt6)nop
-       :0:0693:0819[00000002x_00000000x] nop
-       :0:0694:0820[0002005ex_00400008x] bkt #8
-       :0:0695:0821[00020020x_00200000x] bkt #0
-       :0:0696:0822[001e0414x_00055480x] (rpt4)bkt #21632
-       :0:0697:0827[00000000x_00000000x] nop
-       :0:0698:0828[00000442x_00000480x] (rpt4)nop
-       :0:0699:0833[00000200x_00080000x] (rpt2)nop
-       :0:0700:0836[00520000x_00600400x] bkt #1024
-       :0:0701:0837[00001200x_00000008x] (ss)(rpt2)nop
-       :0:0702:0840[00400114x_00201000x] (rpt1)nop
-       :0:0703:0842[00110100x_00100002x] (eq)(rpt1)nop
-       :0:0704:0844[00404200x_00200683x] (rpt2)nop
-       :0:0705:0847[00000090x_00000004x] nop
-       :0:0706:0848[00502000x_00002000x] nop
-       :0:0707:0849[00000004x_00000020x] nop
-       :0:0708:0850[00103100x_00600010x] (ss)(rpt1)nop
-       :0:0709:0852[00000002x_00000010x] nop
-       :0:0710:0853[00004000x_00021200x] nop
-       :0:0711:0854[00000000x_00000000x] nop
-       :0:0712:0855[00201400x_0010220ax] (ss)(rpt4)nop
-       :0:0713:0860[00000000x_00030000x] nop
-       :0:0714:0861[00080040x_00400000x] nop
-       :0:0715:0862[00000080x_00000002x] nop
-       :0:0716:0863[00000580x_00000400x] (rpt5)nop
-       :0:0717:0869[00000200x_00000022x] (rpt2)nop
-       :0:0718:0872[00080000x_00300042x] nop
-       :0:0719:0873[00008000x_00040200x] nop
-       :0:0720:0874[00000000x_00040000x] nop
-       :0:0721:0875[0012008ax_00000010x] bkt #16
-       :0:0722:0876[00000100x_00000000x] (rpt1)nop
-       :0:0723:0878[00010000x_00010018x] (eq)nop
-       :0:0724:0879[00500011x_00440020x] nop
-       :0:0725:0880[00100000x_00000000x] nop
-       :0:0726:0881[00008200x_0004020cx] (rpt2)nop
-       :0:0727:0884[00000400x_00100010x] (rpt4)nop
-       :0:0728:0889[00000004x_00118000x] nop
-       :0:0729:0890[00000002x_00004200x] nop
-       :0:0730:0891[00026300x_00000210x] (rpt3)bkt #528
-       :0:0731:0895[0000a002x_00000040x] nop
-       :0:0732:0896[00081100x_00004082x] (ss)(rpt1)nop
-       :0:0733:0898[00000008x_00210000x] nop
-       :0:0734:0899[00020004x_00020000x] bkt #0
-       :0:0735:0900[00020000x_00064108x] bkt #16648
-       :0:0736:0901[00000084x_00020000x] nop
-       :0:0737:0902[00000181x_00000430x] (rpt1)nop
-       :0:0738:0904[001c8100x_00100002x] (rpt1)nop
-       :0:0739:0906[00000000x_00200020x] nop
-       :0:0740:0907[00100081x_00002000x] nop
-       :0:0741:0908[00000000x_00000008x] nop
-       :0:0742:0909[00009420x_00000024x] (ss)(rpt4)nop
-       :0:0743:0914[00000100x_00002010x] (rpt1)nop
-       :0:0744:0916[00004188x_00000000x] (rpt1)nop
-       :0:0745:0918[00100000x_00002000x] nop
-       :0:0746:0919[00120102x_00040000x] (rpt1)bkt #0
-       :0:0747:0921[00040002x_00000000x] nop
-       :0:0748:0922[00224200x_00210201x] (rpt2)bkt #513
-       :0:0749:0925[00000200x_00040000x] (rpt2)nop
-       :0:0750:0928[0000000cx_00000000x] nop
-       :0:0751:0929[00000000x_00005000x] nop
-       :0:0752:0930[00082208x_00010200x] (rpt2)nop
-       :0:0753:0933[00194011x_00000000x] (eq)nop
-       :0:0754:0934[00012100x_00000502x] (eq)(rpt1)nop
-       :0:0755:0936[00000240x_00040050x] (rpt2)nop
-       :0:0756:0939[00080211x_00004180x] (rpt2)nop
-       :0:0757:0942[00000000x_00001008x] nop
-       :0:0758:0943[00020490x_002004a0x] (rpt4)bkt #1184
-       :0:0759:0948[00210004x_00001080x] (eq)nop
-       :0:0760:0949[00000000x_00300040x] nop
-       :0:0761:0950[00008002x_00000020x] nop
-       :0:0762:0951[00000000x_00041098x] nop
-       :0:0763:0952[002000a0x_00000000x] nop
-       :0:0764:0953[00000000x_000c0400x] nop
-       :0:0765:0954[00000401x_00000402x] (rpt4)nop
-       :0:0766:0959[00002000x_00200400x] nop
-       :0:0767:0960[00000101x_00001000x] (rpt1)nop
-       :0:0768:0962[00000000x_00000000x] nop
-       :0:0769:0963[00000000x_00000000x] nop
-       :0:0770:0964[00000000x_00000000x] nop
-       :0:0771:0965[00000000x_00000000x] nop
-       :0:0772:0966[00000000x_00000000x] nop
-       :0:0773:0967[00000000x_00000000x] nop
-       :0:0774:0968[00000000x_00000000x] nop
-       :0:0775:0969[00000000x_00000000x] nop
-       :0:0776:0970[00000000x_00000000x] nop
-       :0:0777:0971[00000000x_00000000x] nop
-       :0:0778:0972[00000000x_00000000x] nop
-       :0:0779:0973[00000000x_00000000x] nop
-       :0:0780:0974[00000000x_00000000x] nop
-       :0:0781:0975[00000000x_00000000x] nop
-       :0:0782:0976[00000000x_00000000x] nop
-       :0:0783:0977[00000000x_00000000x] nop
-       :0:0784:0978[00000000x_00000000x] nop
-       :0:0785:0979[00000000x_00000000x] nop
-       :0:0786:0980[00000000x_00000000x] nop
-       :0:0787:0981[00000000x_00000000x] nop
-       :0:0788:0982[00000000x_00000000x] nop
-       :0:0789:0983[00000000x_00000000x] nop
-       :0:0790:0984[00000000x_00000000x] nop
-       :0:0791:0985[00000000x_00000000x] nop
-       :0:0792:0986[00000000x_00000000x] nop
-       :0:0793:0987[00000000x_00000000x] nop
-       :0:0794:0988[00000000x_00000000x] nop
-       :0:0795:0989[00000000x_00000000x] nop
-       :0:0796:0990[00000000x_00000000x] nop
-       :0:0797:0991[00000000x_00000000x] nop
-       :0:0798:0992[00000000x_00000000x] nop
-       :0:0799:0993[00000000x_00000000x] nop
-       :0:0800:0994[00000000x_00000000x] nop
-       :0:0801:0995[00000000x_00000000x] nop
-       :0:0802:0996[00000000x_00000000x] nop
-       :0:0803:0997[00000000x_00000000x] nop
-       :0:0804:0998[00000000x_00000000x] nop
-       :0:0805:0999[00000000x_00000000x] nop
-       :0:0806:1000[00000000x_00000000x] nop
-       :0:0807:1001[00000000x_00000000x] nop
-       :0:0808:1002[00000000x_00000000x] nop
-       :0:0809:1003[00000000x_00000000x] nop
-       :0:0810:1004[00000000x_00000000x] nop
-       :0:0811:1005[00000000x_00000000x] nop
-       :0:0812:1006[00000000x_00000000x] nop
-       :0:0813:1007[00000000x_00000000x] nop
-       :0:0814:1008[00000000x_00000000x] nop
-       :0:0815:1009[00000000x_00000000x] nop
-       :0:0816:1010[00000000x_00000000x] nop
-       :0:0817:1011[00000000x_00000000x] nop
-       :0:0818:1012[00000000x_00000000x] nop
-       :0:0819:1013[00000000x_00000000x] nop
-       :0:0820:1014[00000000x_00000000x] nop
-       :0:0821:1015[00000000x_00000000x] nop
-       :0:0822:1016[00000000x_00000000x] nop
-       :0:0823:1017[00000000x_00000000x] nop
-       :0:0824:1018[00000000x_00000000x] nop
-       :0:0825:1019[00000000x_00000000x] nop
-       :0:0826:1020[00000000x_00000000x] nop
-       :0:0827:1021[00000000x_00000000x] nop
-       :0:0828:1022[00000000x_00000000x] nop
-       :0:0829:1023[00000000x_00000000x] nop
-       :0:0830:1024[00000000x_00000000x] nop
-       :0:0831:1025[00000000x_00000000x] nop
-       :0:0832:1026[00000000x_00000000x] nop
-       :0:0833:1027[00000000x_00000000x] nop
-       :0:0834:1028[00000000x_00000000x] nop
-       :0:0835:1029[00000000x_00000000x] nop
-       :0:0836:1030[00000000x_00000000x] nop
-       :0:0837:1031[00000000x_00000000x] nop
-       :0:0838:1032[00000000x_00000000x] nop
-       :0:0839:1033[00000000x_00000000x] nop
-       :0:0840:1034[00000000x_00000000x] nop
-       :0:0841:1035[00000000x_00000000x] nop
-       :0:0842:1036[00000000x_00000000x] nop
-       :0:0843:1037[00000000x_00000000x] nop
-       :0:0844:1038[00000000x_00000000x] nop
-       :0:0845:1039[00000000x_00000000x] nop
-       :0:0846:1040[00000000x_00000000x] nop
-       :0:0847:1041[00000000x_00000000x] nop
-       :0:0848:1042[00000000x_00000000x] nop
-       :0:0849:1043[00000000x_00000000x] nop
-       :0:0850:1044[00000000x_00000000x] nop
-       :0:0851:1045[00000000x_00000000x] nop
-       :0:0852:1046[00000000x_00000000x] nop
-       :0:0853:1047[00000000x_00000000x] nop
-       :0:0854:1048[00000000x_00000000x] nop
-       :0:0855:1049[00000000x_00000000x] nop
-       :0:0856:1050[00000000x_00000000x] nop
-       :0:0857:1051[00000000x_00000000x] nop
-       :0:0858:1052[00000000x_00000000x] nop
-       :0:0859:1053[00000000x_00000000x] nop
-       :0:0860:1054[00000000x_00000000x] nop
-       :0:0861:1055[00000000x_00000000x] nop
-       :0:0862:1056[00000000x_00000000x] nop
-       :0:0863:1057[00000000x_00000000x] nop
-       :0:0864:1058[00000000x_00000000x] nop
-       :0:0865:1059[00000000x_00000000x] nop
-       :0:0866:1060[00000000x_00000000x] nop
-       :0:0867:1061[00000000x_00000000x] nop
-       :0:0868:1062[00000000x_00000000x] nop
-       :0:0869:1063[00000000x_00000000x] nop
-       :0:0870:1064[00000000x_00000000x] nop
-       :0:0871:1065[00000000x_00000000x] nop
-       :0:0872:1066[00000000x_00000000x] nop
-       :0:0873:1067[00000000x_00000000x] nop
-       :0:0874:1068[00000000x_00000000x] nop
-       :0:0875:1069[00000000x_00000000x] nop
-       :0:0876:1070[00000000x_00000000x] nop
-       :0:0877:1071[00000000x_00000000x] nop
-       :0:0878:1072[00000000x_00000000x] nop
-       :0:0879:1073[00000000x_00000000x] nop
-       :0:0880:1074[00000000x_00000000x] nop
-       :0:0881:1075[00000000x_00000000x] nop
-       :0:0882:1076[00000000x_00000000x] nop
-       :0:0883:1077[00000000x_00000000x] nop
-       :0:0884:1078[00000000x_00000000x] nop
-       :0:0885:1079[00000000x_00000000x] nop
-       :0:0886:1080[00000000x_00000000x] nop
-       :0:0887:1081[00000000x_00000000x] nop
-       :0:0888:1082[00000000x_00000000x] nop
-       :0:0889:1083[00000000x_00000000x] nop
-       :0:0890:1084[00000000x_00000000x] nop
-       :0:0891:1085[00000000x_00000000x] nop
-       :0:0892:1086[00000000x_00000000x] nop
-       :0:0893:1087[00000000x_00000000x] nop
-       :0:0894:1088[00000000x_00000000x] nop
-       :0:0895:1089[00000000x_00000000x] nop
-       :0:0896:1090[00000000x_00000000x] nop
-       :0:0897:1091[00000000x_00000000x] nop
-       :0:0898:1092[00000000x_00000000x] nop
-       :0:0899:1093[00000000x_00000000x] nop
-       :0:0900:1094[00000000x_00000000x] nop
-       :0:0901:1095[00000000x_00000000x] nop
-       :0:0902:1096[00000000x_00000000x] nop
-       :0:0903:1097[00000000x_00000000x] nop
-       :0:0904:1098[00000000x_00000000x] nop
-       :0:0905:1099[00000000x_00000000x] nop
-       :0:0906:1100[00000000x_00000000x] nop
-       :0:0907:1101[00000000x_00000000x] nop
-       :0:0908:1102[00000000x_00000000x] nop
-       :0:0909:1103[00000000x_00000000x] nop
-       :0:0910:1104[00000000x_00000000x] nop
-       :0:0911:1105[00000000x_00000000x] nop
-       :0:0912:1106[00000000x_00000000x] nop
-       :0:0913:1107[00000000x_00000000x] nop
-       :0:0914:1108[00000000x_00000000x] nop
-       :0:0915:1109[00000000x_00000000x] nop
-       :0:0916:1110[00000000x_00000000x] nop
-       :0:0917:1111[00000000x_00000000x] nop
-       :0:0918:1112[00000000x_00000000x] nop
-       :0:0919:1113[00000000x_00000000x] nop
-       :0:0920:1114[00000000x_00000000x] nop
-       :0:0921:1115[00000000x_00000000x] nop
-       :0:0922:1116[00000000x_00000000x] nop
-       :0:0923:1117[00000000x_00000000x] nop
-       :0:0924:1118[00000000x_00000000x] nop
-       :0:0925:1119[00000000x_00000000x] nop
-       :0:0926:1120[00000000x_00000000x] nop
-       :0:0927:1121[00000000x_00000000x] nop
-       :0:0928:1122[00000000x_00000000x] nop
-       :0:0929:1123[00000000x_00000000x] nop
-       :0:0930:1124[00000000x_00000000x] nop
-       :0:0931:1125[00000000x_00000000x] nop
-       :0:0932:1126[00000000x_00000000x] nop
-       :0:0933:1127[00000000x_00000000x] nop
-       :0:0934:1128[00000000x_00000000x] nop
-       :0:0935:1129[00000000x_00000000x] nop
-       :0:0936:1130[00000000x_00000000x] nop
-       :0:0937:1131[00000000x_00000000x] nop
-       :0:0938:1132[00000000x_00000000x] nop
-       :0:0939:1133[00000000x_00000000x] nop
-       :0:0940:1134[00000000x_00000000x] nop
-       :0:0941:1135[00000000x_00000000x] nop
-       :0:0942:1136[00000000x_00000000x] nop
-       :0:0943:1137[00000000x_00000000x] nop
-       :0:0944:1138[00000000x_00000000x] nop
-       :0:0945:1139[00000000x_00000000x] nop
-       :0:0946:1140[00000000x_00000000x] nop
-       :0:0947:1141[00000000x_00000000x] nop
-       :0:0948:1142[00000000x_00000000x] nop
-       :0:0949:1143[00000000x_00000000x] nop
-       :0:0950:1144[00000000x_00000000x] nop
-       :0:0951:1145[00000000x_00000000x] nop
-       :0:0952:1146[00000000x_00000000x] nop
-       :0:0953:1147[00000000x_00000000x] nop
-       :0:0954:1148[00000000x_00000000x] nop
-       :0:0955:1149[00000000x_00000000x] nop
-       :0:0956:1150[00000000x_00000000x] nop
-       :0:0957:1151[00000000x_00000000x] nop
-       :0:0958:1152[00000000x_00000000x] nop
-       :0:0959:1153[00000000x_00000000x] nop
-       :0:0960:1154[00000000x_00000000x] nop
-       :0:0961:1155[00000000x_00000000x] nop
-       :0:0962:1156[00000000x_00000000x] nop
-       :0:0963:1157[00000000x_00000000x] nop
-       :0:0964:1158[00000000x_00000000x] nop
-       :0:0965:1159[00000000x_00000000x] nop
-       :0:0966:1160[00000000x_00000000x] nop
-       :0:0967:1161[00000000x_00000000x] nop
-       :0:0968:1162[00000000x_00000000x] nop
-       :0:0969:1163[00000000x_00000000x] nop
-       :0:0970:1164[00000000x_00000000x] nop
-       :0:0971:1165[00000000x_00000000x] nop
-       :0:0972:1166[00000000x_00000000x] nop
-       :0:0973:1167[00000000x_00000000x] nop
-       :0:0974:1168[00000000x_00000000x] nop
-       :0:0975:1169[00000000x_00000000x] nop
-       :0:0976:1170[00000000x_00000000x] nop
-       :0:0977:1171[00000000x_00000000x] nop
-       :0:0978:1172[00000000x_00000000x] nop
-       :0:0979:1173[00000000x_00000000x] nop
-       :0:0980:1174[00000000x_00000000x] nop
-       :0:0981:1175[00000000x_00000000x] nop
-       :0:0982:1176[00000000x_00000000x] nop
-       :0:0983:1177[00000000x_00000000x] nop
-       :0:0984:1178[00000000x_00000000x] nop
-       :0:0985:1179[00000000x_00000000x] nop
-       :0:0986:1180[00000000x_00000000x] nop
-       :0:0987:1181[00000000x_00000000x] nop
-       :0:0988:1182[00000000x_00000000x] nop
-       :0:0989:1183[00000000x_00000000x] nop
-       :0:0990:1184[00000000x_00000000x] nop
-       :0:0991:1185[00000000x_00000000x] nop
-       :0:0992:1186[00000000x_00000000x] nop
-       :0:0993:1187[00000000x_00000000x] nop
-       :0:0994:1188[00000000x_00000000x] nop
-       :0:0995:1189[00000000x_00000000x] nop
-       :0:0996:1190[00000000x_00000000x] nop
-       :0:0997:1191[00000000x_00000000x] nop
-       :0:0998:1192[00000000x_00000000x] nop
-       :0:0999:1193[00000000x_00000000x] nop
-       :0:1000:1194[00000000x_00000000x] nop
-       :0:1001:1195[00000000x_00000000x] nop
-       :0:1002:1196[00000000x_00000000x] nop
-       :0:1003:1197[00000000x_00000000x] nop
-       :0:1004:1198[00000000x_00000000x] nop
-       :0:1005:1199[00000000x_00000000x] nop
-       :0:1006:1200[00000000x_00000000x] nop
-       :0:1007:1201[00000000x_00000000x] nop
-       :0:1008:1202[00000000x_00000000x] nop
-       :0:1009:1203[00000000x_00000000x] nop
-       :0:1010:1204[00000000x_00000000x] nop
-       :0:1011:1205[00000000x_00000000x] nop
-       :0:1012:1206[00000000x_00000000x] nop
-       :0:1013:1207[00000000x_00000000x] nop
-       :0:1014:1208[00000000x_00000000x] nop
-       :0:1015:1209[00000000x_00000000x] nop
-       :0:1016:1210[00000000x_00000000x] nop
-       :0:1017:1211[00000000x_00000000x] nop
-       :0:1018:1212[00000000x_00000000x] nop
-       :0:1019:1213[00000000x_00000000x] nop
-       :0:1020:1214[00000000x_00000000x] nop
-       :0:1021:1215[00000000x_00000000x] nop
-       :0:1022:1216[00000000x_00000000x] nop
-       :0:1023:1217[00000000x_00000000x] nop
-       Register Stats:
-       - used (half): 173 239 (cnt=2, max=173)
-       - used (full): 182 190 (cnt=2, max=190)
-       - used (merged): 173 239 364-365 380-381 (cnt=6, max=173)
-       - input (half): 173 239 (cnt=2, max=173)
-       - input (full): 182 190 (cnt=2, max=190)
-       - const (half): (cnt=0, max=0)
-       - const (full): (cnt=0, max=0)
-       - output (half): (cnt=0, max=0)  (estimated)
-       - output (full): (cnt=0, max=0)  (estimated)
-       - shaderdb: 1218 instructions, 658 nops, 560 non-nops, (1024 instlen), 44 half, 48 full
-       - shaderdb: 16 (ss), 510 (sy)
+       :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[
+../src/freedreno/ir3/disasm-a3xx.c:173: regmask_set: Assertion `num < MAX_REG' failed.
        -----------------------------------------------
        8192 (0x2000) bytes
        000000: 00003002 00000000 00000000 00000000     |.0..............|
 
                                                        - used (merged): (cnt=0, max=0)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): (cnt=0, max=0)
-                                                       - const (half): (cnt=0, max=0)
-                                                       - const (full): (cnt=0, max=0)
+                                                       - max const: 0
+
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): (cnt=0, max=0)  (estimated)
                                                        - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                                                - used (merged): (cnt=0, max=0)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): (cnt=0, max=0)
-                                               - const (half): (cnt=0, max=0)
-                                               - const (full): (cnt=0, max=0)
+                                               - max const: 0
+
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): (cnt=0, max=0)  (estimated)
                                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                                - used (merged): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                                                        - used (merged): (cnt=0, max=0)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): (cnt=0, max=0)
-                                                       - const (half): (cnt=0, max=0)
-                                                       - const (full): (cnt=0, max=0)
+                                                       - max const: 0
+
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): (cnt=0, max=0)  (estimated)
                                                        - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                                                - used (merged): (cnt=0, max=0)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): (cnt=0, max=0)
-                                               - const (half): (cnt=0, max=0)
-                                               - const (full): (cnt=0, max=0)
+                                               - max const: 0
+
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): (cnt=0, max=0)  (estimated)
                                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                                                        - used (merged): 0-147 (cnt=148, max=147)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): 19-20 (cnt=2, max=20)
-                                                       - const (half): (cnt=0, max=0)
-                                                       - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113)
+                                                       - max const: 113
+
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): 4-7 (cnt=4, max=7)  (estimated)
                                                        - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
                                                - used (merged): 0-147 (cnt=148, max=147)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): 19-20 (cnt=2, max=20)
-                                               - const (half): (cnt=0, max=0)
-                                               - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113)
+                                               - max const: 113
+
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): 4-7 (cnt=4, max=7)  (estimated)
                                                - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
                                - used (merged): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                                - used (merged): 0-147 (cnt=148, max=147)
                                - input (half): (cnt=0, max=0)
                                - input (full): 19-20 (cnt=2, max=20)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113)
+                               - max const: 113
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 4-7 (cnt=4, max=7)  (estimated)
                                - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
 
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): (cnt=0, max=0)
-                               - used (merged): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-3 (cnt=4, max=3)
-                               - used (merged): 0-7 (cnt=8, max=7)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-3 (cnt=4, max=3)
+                               - max const: 3
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 0-3 (cnt=4, max=3)  (estimated)
                                - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-13 (cnt=14, max=13)
-                               - used (merged): 0-27 (cnt=28, max=27)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-5 (cnt=4, max=5)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 6-13 (cnt=8, max=13)  (estimated)
                                - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109ce878:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109cf040:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0 2-5 (cnt=5, max=5)
-                               - used (merged): 0-1 4-11 (cnt=10, max=11)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0 (cnt=1, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
                                - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-13 (cnt=14, max=13)
-                               - used (merged): 0-27 (cnt=28, max=27)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-5 (cnt=4, max=5)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 6-13 (cnt=8, max=13)  (estimated)
                                - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109cfb78:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109d02c0:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0 2-5 (cnt=5, max=5)
-                               - used (merged): 0-1 4-11 (cnt=10, max=11)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0 (cnt=1, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
                                - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-13 (cnt=14, max=13)
-                               - used (merged): 0-27 (cnt=28, max=27)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-5 (cnt=4, max=5)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 6-13 (cnt=8, max=13)  (estimated)
                                - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109d0df8:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109d1540:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0 2-5 (cnt=5, max=5)
-                               - used (merged): 0-1 4-11 (cnt=10, max=11)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0 (cnt=1, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
                                - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
                        Register Stats:
                        - used (half): (cnt=0, max=0)
                        - used (full): (cnt=0, max=0)
-                       - used (merged): (cnt=0, max=0)
                        - input (half): (cnt=0, max=0)
                        - input (full): (cnt=0, max=0)
-                       - const (half): (cnt=0, max=0)
-                       - const (full): (cnt=0, max=0)
+                       - max const: 0
+
                        - output (half): (cnt=0, max=0)  (estimated)
                        - output (full): (cnt=0, max=0)  (estimated)
                        - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
                        Register Stats:
                        - used (half): (cnt=0, max=0)
                        - used (full): 0-3 (cnt=4, max=3)
-                       - used (merged): 0-7 (cnt=8, max=7)
                        - input (half): (cnt=0, max=0)
                        - input (full): (cnt=0, max=0)
-                       - const (half): (cnt=0, max=0)
-                       - const (full): 0-3 (cnt=4, max=3)
+                       - max const: 3
+
                        - output (half): (cnt=0, max=0)  (estimated)
                        - output (full): 0-3 (cnt=4, max=3)  (estimated)
                        - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full
 
--- /dev/null
+/*
+ * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef DISASM_H_
+#define DISASM_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "compiler/shader_enums.h"
+
+/* bitmask of debug flags */
+enum debug_t {
+       PRINT_RAW      = 0x1,    /* dump raw hexdump */
+       PRINT_VERBOSE  = 0x2,
+       PRINT_STATS    = 0x4,
+       EXPAND_REPEAT  = 0x8,
+};
+
+struct shader_stats {
+       /* instructions counts rpnN, and instlen does not */
+       int instructions, instlen;
+       int nops;
+       int ss, sy;
+       int constlen;
+};
+
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type);
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
+               unsigned gpu_id, struct shader_stats *stats);
+
+void disasm_a2xx_set_debug(enum debug_t debug);
+void disasm_a3xx_set_debug(enum debug_t debug);
+
+#endif /* DISASM_H_ */
 
 libfreedreno_common = static_library(
   'freedreno_common',
   [
+    'disasm.h',
     'freedreno_uuid.c',
     'freedreno_uuid.h',
     'freedreno_guardband.h',
 
 
 int main(int argc, char **argv)
 {
+       enum debug_t debug = PRINT_RAW | PRINT_STATS;
        int ret = -1;
        int start = 0, end = 0x7ffffff, draw = -1;
        int c;
                        /* option that set a flag, nothing to do */
                        break;
                case 'v':
-                       disasm_set_debug(PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE);
+                       debug |= (PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE);
                        break;
                case 's':
                        options.summary = true;
                }
        }
 
+       disasm_a2xx_set_debug(debug);
+       disasm_a3xx_set_debug(debug);
+
        if (interactive) {
                pager_open();
        }
 
 #include "pager.h"
 #include "rnnutil.h"
 #include "util.h"
-#include "instr-a3xx.h"
+#include "ir3/instr-a3xx.h"
 
 
 static FILE *in;
 ir3_assert_handler(const char *expr, const char *file, int line,
                const char *func)
 {
-       printf("%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr);
+       printf("\n%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr);
        if (jmp_env_valid)
                longjmp(jmp_env, 1);
        abort();
                }
        }
 
+       disasm_a3xx_set_debug(PRINT_RAW);
+
        if (interactive) {
                pager_open();
        }
 
                "x",
 };
 
-enum debug_t debug;
+static enum debug_t debug;
 
 static struct rnn *rnn;
 
        return 0;
 }
 
-void disasm_set_debug(enum debug_t d)
+void disasm_a2xx_set_debug(enum debug_t d)
 {
        debug = d;
 }
 
+++ /dev/null
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-#include <assert.h>
-
-#include "disasm.h"
-#include "instr-a3xx.h"
-
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
-extern enum debug_t debug;
-
-static const char *levels[] = {
-               "",
-               "\t",
-               "\t\t",
-               "\t\t\t",
-               "\t\t\t\t",
-               "\t\t\t\t\t",
-               "\t\t\t\t\t\t",
-               "\t\t\t\t\t\t\t",
-               "\t\t\t\t\t\t\t\t",
-               "\t\t\t\t\t\t\t\t\t",
-               "x",
-               "x",
-               "x",
-               "x",
-               "x",
-               "x",
-};
-
-static const char *component = "xyzw";
-
-static const char *type[] = {
-               [TYPE_F16] = "f16",
-               [TYPE_F32] = "f32",
-               [TYPE_U16] = "u16",
-               [TYPE_U32] = "u32",
-               [TYPE_S16] = "s16",
-               [TYPE_S32] = "s32",
-               [TYPE_U8]  = "u8",
-               [TYPE_S8]  = "s8",
-};
-
-
-#define MAX_REG 4096
-
-typedef struct {
-       uint8_t full[MAX_REG/8];
-       uint8_t half[MAX_REG/8];
-} regmask_t;
-
-struct disasm_ctx {
-       FILE *out;
-       int level;
-       unsigned gpu_id;
-
-       struct shader_stats *stats;
-
-       /* we have to process the dst register after src to avoid tripping up
-        * the read-before-write detection
-        */
-       unsigned last_dst;
-       bool last_dst_full;
-       bool last_dst_valid;
-
-       /* current instruction repeat flag: */
-       unsigned repeat;
-       /* current instruction repeat indx/offset (for --expand): */
-       unsigned repeatidx;
-
-       /* tracking for register usage */
-       struct {
-               regmask_t used;
-               regmask_t used_merged;
-               regmask_t rbw;      /* read before write */
-               regmask_t war;      /* write after read */
-               regmask_t cnst;     /* used consts */
-       } regs;
-};
-
-static const char *float_imms[] = {
-       "0.0",
-       "0.5",
-       "1.0",
-       "2.0",
-       "e",
-       "pi",
-       "1/pi",
-       "1/log2(e)",
-       "log2(e)",
-       "1/log2(10)",
-       "log2(10)",
-       "4.0",
-};
-
-static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full,
-               bool is_float, bool r,
-               bool c, bool im, bool neg, bool abs, bool addr_rel)
-{
-       const char type = c ? 'c' : 'r';
-
-       // XXX I prefer - and || for neg/abs, but preserving format used
-       // by libllvm-a3xx for easy diffing..
-
-       if (abs && neg)
-               fprintf(ctx->out, "(absneg)");
-       else if (neg)
-               fprintf(ctx->out, "(neg)");
-       else if (abs)
-               fprintf(ctx->out, "(abs)");
-
-       if (r)
-               fprintf(ctx->out, "(r)");
-
-       if (im) {
-               if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) {
-                       fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]);
-               } else {
-                       fprintf(ctx->out, "%d", reg.iim_val);
-               }
-       } else if (addr_rel) {
-               /* I would just use %+d but trying to make it diff'able with
-                * libllvm-a3xx...
-                */
-               if (reg.iim_val < 0)
-                       fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
-               else if (reg.iim_val > 0)
-                       fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
-               else
-                       fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
-       } else if ((reg.num == REG_A0) && !c) {
-               /* This matches libllvm output, the second (scalar) address register
-                * seems to be called a1.x instead of a0.y.
-                */
-               fprintf(ctx->out, "a%d.x", reg.comp);
-       } else if ((reg.num == REG_P0) && !c) {
-               fprintf(ctx->out, "p0.%c", component[reg.comp]);
-       } else {
-               fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
-       }
-}
-
-/* Tracking for registers used, read-before-write (input), and
- * write-after-read (output.. but not 100%)..
- */
-
-static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val)
-{
-       unsigned i = num / 8;
-       unsigned j = num % 8;
-       ir3_assert(num < MAX_REG);
-       if (full) {
-               regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j);
-       } else {
-               regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j);
-       }
-}
-
-static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
-{
-       unsigned i = num / 8;
-       unsigned j = num % 8;
-       ir3_assert(num < MAX_REG);
-       if (full) {
-               return (regmask->full[i] >> j) & 0x1;
-       } else {
-               return (regmask->half[i] >> j) & 0x1;
-       }
-}
-
-static unsigned regidx(reg_t reg)
-{
-       return (4 * reg.num) + reg.comp;
-}
-
-static reg_t idxreg(unsigned idx)
-{
-       return (reg_t){
-               .comp = idx & 0x3,
-               .num  = idx >> 2,
-       };
-}
-
-static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full)
-{
-       int num, max = 0, cnt = 0;
-       int first, last;
-
-       void print_sequence(void)
-       {
-               if (first != MAX_REG) {
-                       if (first == last) {
-                               fprintf(ctx->out, " %d", first);
-                       } else {
-                               fprintf(ctx->out, " %d-%d", first, last);
-                       }
-               }
-       }
-
-       first = last = MAX_REG;
-
-       for (num = 0; num < MAX_REG; num++) {
-               if (regmask_get(regmask, num, full)) {
-                       if (num != (last + 1)) {
-                               print_sequence();
-                               first = num;
-                       }
-                       last = num;
-                       if (num < (48*4))
-                               max = num;
-                       cnt++;
-               }
-       }
-
-       print_sequence();
-
-       fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max);
-
-       return max;
-}
-
-static void print_reg_stats(struct disasm_ctx *ctx)
-{
-       int fullreg, halfreg;
-
-       fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]);
-       fprintf(ctx->out, "%s- used (half):", levels[ctx->level]);
-       halfreg = print_regs(ctx, &ctx->regs.used, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
-       fullreg = print_regs(ctx, &ctx->regs.used, true);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.used_merged, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.rbw, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- input (full):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.rbw, true);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- const (half):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.cnst, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- const (full):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.cnst, true);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- output (half):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.war, false);
-       fprintf(ctx->out, "  (estimated)\n");
-       fprintf(ctx->out, "%s- output (full):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.war, true);
-       fprintf(ctx->out, "  (estimated)\n");
-
-       /* convert to vec4, which is the granularity that registers are
-        * assigned to shader:
-        */
-       fullreg = (fullreg + 3) / 4;
-       halfreg = (halfreg + 3) / 4;
-
-       // Note this count of instructions includes rptN, which matches
-       // up to how mesa prints this:
-       fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, "
-                       "(%d instlen), %d half, %d full\n",
-                       levels[ctx->level], ctx->stats->instructions, ctx->stats->nops,
-                       ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen,
-                       halfreg, fullreg);
-       fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level],
-                       ctx->stats->ss, ctx->stats->sy);
-}
-
-static void process_reg_dst(struct disasm_ctx *ctx)
-{
-       int i;
-
-       if (!ctx->last_dst_valid)
-               return;
-
-       for (i = 0; i <= ctx->repeat; i++) {
-               unsigned dst = ctx->last_dst + i;
-
-               regmask_set(&ctx->regs.war, dst, ctx->last_dst_full, 1);
-               regmask_set(&ctx->regs.used, dst, ctx->last_dst_full, 1);
-
-               if (ctx->last_dst_full) {
-                       regmask_set(&ctx->regs.used_merged, (dst*2)+0, false, 1);
-                       regmask_set(&ctx->regs.used_merged, (dst*2)+1, false, 1);
-               } else {
-                       regmask_set(&ctx->regs.used_merged, dst, false, 1);
-               }
-       }
-
-       ctx->last_dst_valid = false;
-}
-
-static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
-{
-       /* presumably the special registers a0.c and p0.c don't count.. */
-       if (!(addr_rel || (reg.num == 61) || (reg.num == 62))) {
-               ctx->last_dst = regidx(reg);
-               ctx->last_dst_full = full;
-               ctx->last_dst_valid = true;
-       }
-       reg = idxreg(regidx(reg) + ctx->repeatidx);
-       print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel);
-}
-
-static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool f, bool r,
-               bool c, bool im, bool neg, bool abs, bool addr_rel)
-{
-       /* presumably the special registers a0.c and p0.c don't count.. */
-       if (!(addr_rel || c || im || (reg.num == 61) || (reg.num == 62))) {
-               int i, num = regidx(reg);
-               for (i = 0; i <= ctx->repeat; i++) {
-                       unsigned src = num + i;
-
-                       if (!regmask_get(&ctx->regs.used, src, full))
-                               regmask_set(&ctx->regs.rbw, src, full, 1);
-
-                       regmask_set(&ctx->regs.war, src, full, 0);
-                       regmask_set(&ctx->regs.used, src, full, 1);
-
-                       if (full) {
-                               regmask_set(&ctx->regs.used_merged, (src*2)+0, false, 1);
-                               regmask_set(&ctx->regs.used_merged, (src*2)+1, false, 1);
-                       } else {
-                               regmask_set(&ctx->regs.used_merged, src, false, 1);
-                       }
-
-                       if (!r)
-                               break;
-               }
-       } else if (c) {
-               int i, num = regidx(reg);
-               for (i = 0; i <= ctx->repeat; i++) {
-                       unsigned src = num + i;
-
-                       regmask_set(&ctx->regs.cnst, src, full, 1);
-
-                       if (!r)
-                               break;
-               }
-
-               unsigned max = (num + ctx->repeat + 1 + 3) / 4;
-               if (max > ctx->stats->constlen)
-                       ctx->stats->constlen = max;
-       }
-
-       if (r)
-               reg = idxreg(regidx(reg) + ctx->repeatidx);
-
-       print_reg(ctx, reg, full, f, r, c, im, neg, abs, addr_rel);
-}
-
-/* TODO switch to using reginfo struct everywhere, since more readable
- * than passing a bunch of bools to print_reg_src
- */
-
-struct reginfo {
-       reg_t reg;
-       bool full;
-       bool r;
-       bool c;
-       bool f; /* src reg is interpreted as float, used for printing immediates */
-       bool im;
-       bool neg;
-       bool abs;
-       bool addr_rel;
-};
-
-static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
-{
-       reg_t reg = info->reg;
-
-       if (info->r)
-               reg = idxreg(regidx(info->reg) + ctx->repeatidx);
-
-       print_reg_src(ctx, reg, info->full, info->f, info->r, info->c, info->im,
-                       info->neg, info->abs, info->addr_rel);
-}
-
-//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
-//{
-//     print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
-//}
-
-static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
-{
-       static const struct {
-               const char *suffix;
-               int nsrc;
-               bool idx;
-       } brinfo[7] = {
-               [BRANCH_PLAIN] = { "r",   1, false },
-               [BRANCH_OR]    = { "rao", 2, false },
-               [BRANCH_AND]   = { "raa", 2, false },
-               [BRANCH_CONST] = { "rac", 0, true  },
-               [BRANCH_ANY]   = { "any", 1, false },
-               [BRANCH_ALL]   = { "all", 1, false },
-               [BRANCH_X]     = { "rax", 0, false },
-       };
-       instr_cat0_t *cat0 = &instr->cat0;
-
-       switch (instr_opc(instr, ctx->gpu_id)) {
-       case OPC_KILL:
-       case OPC_PREDT:
-       case OPC_PREDF:
-               fprintf(ctx->out, " %sp0.%c", cat0->inv0 ? "!" : "",
-                               component[cat0->comp0]);
-               break;
-       case OPC_B:
-               fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix);
-               if (brinfo[cat0->brtype].idx) {
-                       fprintf(ctx->out, ".%u", cat0->idx);
-               }
-               if (brinfo[cat0->brtype].nsrc >= 1) {
-                       fprintf(ctx->out, " %sp0.%c,", cat0->inv0 ? "!" : "",
-                                       component[cat0->comp0]);
-               }
-               if (brinfo[cat0->brtype].nsrc >= 2) {
-                       fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "",
-                                       component[cat0->comp1]);
-               }
-               fprintf(ctx->out, " #%d", cat0->a3xx.immed);
-               break;
-       case OPC_JUMP:
-       case OPC_CALL:
-       case OPC_BKT:
-       case OPC_GETONE:
-       case OPC_SHPS:
-               fprintf(ctx->out, " #%d", cat0->a3xx.immed);
-               break;
-       }
-
-       if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4))
-               fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4);
-}
-
-static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat1_t *cat1 = &instr->cat1;
-
-       if (cat1->ul)
-               fprintf(ctx->out, "(ul)");
-
-       if (cat1->src_type == cat1->dst_type) {
-               if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
-                       /* special case (nmemonic?): */
-                       fprintf(ctx->out, "mova");
-               } else {
-                       fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
-               }
-       } else {
-               fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
-       }
-
-       fprintf(ctx->out, " ");
-
-       if (cat1->even)
-               fprintf(ctx->out, "(even)");
-
-       if (cat1->pos_inf)
-               fprintf(ctx->out, "(pos_infinity)");
-
-       print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
-                       cat1->dst_rel);
-
-       fprintf(ctx->out, ", ");
-
-       /* ugg, have to special case this.. vs print_reg().. */
-       if (cat1->src_im) {
-               if (type_float(cat1->src_type))
-                       fprintf(ctx->out, "(%f)", cat1->fim_val);
-               else if (type_uint(cat1->src_type))
-                       fprintf(ctx->out, "0x%08x", cat1->uim_val);
-               else
-                       fprintf(ctx->out, "%d", cat1->iim_val);
-       } else if (cat1->src_rel && !cat1->src_c) {
-               /* I would just use %+d but trying to make it diff'able with
-                * libllvm-a3xx...
-                */
-               char type = cat1->src_rel_c ? 'c' : 'r';
-               const char *full = (type_size(cat1->src_type) == 32) ? "" : "h";
-               if (cat1->off < 0)
-                       fprintf(ctx->out, "%s%c<a0.x - %d>", full, type, -cat1->off);
-               else if (cat1->off > 0)
-                       fprintf(ctx->out, "%s%c<a0.x + %d>", full, type, cat1->off);
-               else
-                       fprintf(ctx->out, "%s%c<a0.x>", full, type);
-       } else {
-               struct reginfo src = {
-                       .reg = (reg_t)cat1->src,
-                       .full = type_size(cat1->src_type) == 32,
-                       .r = cat1->src_r,
-                       .c = cat1->src_c,
-                       .im = cat1->src_im,
-               };
-               print_src(ctx, &src);
-       }
-
-       if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
-               fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
-}
-
-static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat2_t *cat2 = &instr->cat2;
-       int opc = _OPC(2, cat2->opc);
-       static const char *cond[] = {
-                       "lt",
-                       "le",
-                       "gt",
-                       "ge",
-                       "eq",
-                       "ne",
-                       "?6?",
-       };
-
-       switch (opc) {
-       case OPC_CMPS_F:
-       case OPC_CMPS_U:
-       case OPC_CMPS_S:
-       case OPC_CMPV_F:
-       case OPC_CMPV_U:
-       case OPC_CMPV_S:
-               fprintf(ctx->out, ".%s", cond[cat2->cond]);
-               break;
-       }
-
-       fprintf(ctx->out, " ");
-       if (cat2->ei)
-               fprintf(ctx->out, "(ei)");
-       print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
-       fprintf(ctx->out, ", ");
-
-       struct reginfo src1 = {
-               .full = cat2->full,
-               .r = cat2->repeat ? cat2->src1_r : 0,
-               .f = is_cat2_float(opc),
-               .im = cat2->src1_im,
-               .abs = cat2->src1_abs,
-               .neg = cat2->src1_neg,
-       };
-
-       if (cat2->c1.src1_c) {
-               src1.reg = (reg_t)(cat2->c1.src1);
-               src1.c = true;
-       } else if (cat2->rel1.src1_rel) {
-               src1.reg = (reg_t)(cat2->rel1.src1);
-               src1.c = cat2->rel1.src1_c;
-               src1.addr_rel = true;
-       } else {
-               src1.reg = (reg_t)(cat2->src1);
-       }
-       print_src(ctx, &src1);
-
-       struct reginfo src2 = {
-               .r = cat2->repeat ? cat2->src2_r : 0,
-               .full = cat2->full,
-               .f = is_cat2_float(opc),
-               .abs = cat2->src2_abs,
-               .neg = cat2->src2_neg,
-               .im = cat2->src2_im,
-       };
-       switch (opc) {
-       case OPC_ABSNEG_F:
-       case OPC_ABSNEG_S:
-       case OPC_CLZ_B:
-       case OPC_CLZ_S:
-       case OPC_SIGN_F:
-       case OPC_FLOOR_F:
-       case OPC_CEIL_F:
-       case OPC_RNDNE_F:
-       case OPC_RNDAZ_F:
-       case OPC_TRUNC_F:
-       case OPC_NOT_B:
-       case OPC_BFREV_B:
-       case OPC_SETRM:
-       case OPC_CBITS_B:
-               /* these only have one src reg */
-               break;
-       default:
-               fprintf(ctx->out, ", ");
-               if (cat2->c2.src2_c) {
-                       src2.reg = (reg_t)(cat2->c2.src2);
-                       src2.c = true;
-               } else if (cat2->rel2.src2_rel) {
-                       src2.reg = (reg_t)(cat2->rel2.src2);
-                       src2.c = cat2->rel2.src2_c;
-                       src2.addr_rel = true;
-               } else {
-                       src2.reg = (reg_t)(cat2->src2);
-               }
-               print_src(ctx, &src2);
-               break;
-       }
-}
-
-static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat3_t *cat3 = &instr->cat3;
-       bool full = instr_cat3_full(cat3);
-
-       fprintf(ctx->out, " ");
-       print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
-       fprintf(ctx->out, ", ");
-
-       struct reginfo src1 = {
-               .r = cat3->repeat ? cat3->src1_r : 0,
-               .full = full,
-               .neg = cat3->src1_neg,
-       };
-       if (cat3->c1.src1_c) {
-               src1.reg = (reg_t)(cat3->c1.src1);
-               src1.c = true;
-       } else if (cat3->rel1.src1_rel) {
-               src1.reg = (reg_t)(cat3->rel1.src1);
-               src1.c = cat3->rel1.src1_c;
-               src1.addr_rel = true;
-       } else {
-               src1.reg = (reg_t)(cat3->src1);
-       }
-       print_src(ctx, &src1);
-
-       fprintf(ctx->out, ", ");
-       struct reginfo src2 = {
-               .reg = (reg_t)cat3->src2,
-               .full = full,
-               .r = cat3->repeat ? cat3->src2_r : 0,
-               .c = cat3->src2_c,
-               .neg = cat3->src2_neg,
-       };
-       print_src(ctx, &src2);
-
-       fprintf(ctx->out, ", ");
-       struct reginfo src3 = {
-               .r = cat3->src3_r,
-               .full = full,
-               .neg = cat3->src3_neg,
-       };
-       if (cat3->c2.src3_c) {
-               src3.reg = (reg_t)(cat3->c2.src3);
-               src3.c = true;
-       } else if (cat3->rel2.src3_rel) {
-               src3.reg = (reg_t)(cat3->rel2.src3);
-               src3.c = cat3->rel2.src3_c;
-               src3.addr_rel = true;
-       } else {
-               src3.reg = (reg_t)(cat3->src3);
-       }
-       print_src(ctx, &src3);
-}
-
-static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat4_t *cat4 = &instr->cat4;
-
-       fprintf(ctx->out, " ");
-       print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
-       fprintf(ctx->out, ", ");
-
-       struct reginfo src = {
-               .r = cat4->src_r,
-               .im = cat4->src_im,
-               .full = cat4->full,
-               .neg = cat4->src_neg,
-               .abs = cat4->src_abs,
-       };
-       if (cat4->c.src_c) {
-               src.reg = (reg_t)(cat4->c.src);
-               src.c = true;
-       } else if (cat4->rel.src_rel) {
-               src.reg = (reg_t)(cat4->rel.src);
-               src.c = cat4->rel.src_c;
-               src.addr_rel = true;
-       } else {
-               src.reg = (reg_t)(cat4->src);
-       }
-       print_src(ctx, &src);
-
-       if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
-               fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
-}
-
-static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
-{
-       static const struct {
-               bool src1, src2, samp, tex;
-       } info[0x1f] = {
-                       [opc_op(OPC_ISAM)]     = { true,  false, true,  true,  },
-                       [opc_op(OPC_ISAML)]    = { true,  true,  true,  true,  },
-                       [opc_op(OPC_ISAMM)]    = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAM)]      = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMB)]     = { true,  true,  true,  true,  },
-                       [opc_op(OPC_SAML)]     = { true,  true,  true,  true,  },
-                       [opc_op(OPC_SAMGQ)]    = { true,  false, true,  true,  },
-                       [opc_op(OPC_GETLOD)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_CONV)]     = { true,  true,  true,  true,  },
-                       [opc_op(OPC_CONVM)]    = { true,  true,  true,  true,  },
-                       [opc_op(OPC_GETSIZE)]  = { true,  false, false, true,  },
-                       [opc_op(OPC_GETBUF)]   = { false, false, false, true,  },
-                       [opc_op(OPC_GETPOS)]   = { true,  false, false, true,  },
-                       [opc_op(OPC_GETINFO)]  = { false, false, false, true,  },
-                       [opc_op(OPC_DSX)]      = { true,  false, false, false, },
-                       [opc_op(OPC_DSY)]      = { true,  false, false, false, },
-                       [opc_op(OPC_GATHER4R)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_GATHER4G)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_GATHER4B)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_GATHER4A)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP0)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP1)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP2)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP3)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_DSXPP_1)]  = { true,  false, false, false, },
-                       [opc_op(OPC_DSYPP_1)]  = { true,  false, false, false, },
-                       [opc_op(OPC_RGETPOS)]  = { true,  false, false, false, },
-                       [opc_op(OPC_RGETINFO)] = { false, false, false, false, },
-       };
-
-       static const struct {
-               bool indirect;
-               bool bindless;
-               bool use_a1;
-               bool uniform;
-       } desc_features[8] = {
-               [CAT5_NONUNIFORM] = { .indirect = true, },
-               [CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
-               [CAT5_BINDLESS_IMM] = { .bindless = true, },
-               [CAT5_BINDLESS_UNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .uniform = true,
-               },
-               [CAT5_BINDLESS_NONUNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-               },
-               [CAT5_BINDLESS_A1_IMM] = {
-                       .bindless = true,
-                       .use_a1 = true,
-               },
-               [CAT5_BINDLESS_A1_UNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .uniform = true,
-                       .use_a1 = true,
-               },
-               [CAT5_BINDLESS_A1_NONUNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .use_a1 = true,
-               },
-       };
-
-       instr_cat5_t *cat5 = &instr->cat5;
-       int i;
-
-       bool desc_indirect =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].indirect;
-       bool bindless =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].bindless;
-       bool use_a1 =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].use_a1;
-       bool uniform =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].uniform;
-
-       if (cat5->is_3d)   fprintf(ctx->out, ".3d");
-       if (cat5->is_a)    fprintf(ctx->out, ".a");
-       if (cat5->is_o)    fprintf(ctx->out, ".o");
-       if (cat5->is_p)    fprintf(ctx->out, ".p");
-       if (cat5->is_s)    fprintf(ctx->out, ".s");
-       if (desc_indirect) fprintf(ctx->out, ".s2en");
-       if (uniform)       fprintf(ctx->out, ".uniform");
-
-       if (bindless) {
-               unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
-               fprintf(ctx->out, ".base%d", base);
-       }
-
-       fprintf(ctx->out, " ");
-
-       switch (_OPC(5, cat5->opc)) {
-       case OPC_DSXPP_1:
-       case OPC_DSYPP_1:
-               break;
-       default:
-               fprintf(ctx->out, "(%s)", type[cat5->type]);
-               break;
-       }
-
-       fprintf(ctx->out, "(");
-       for (i = 0; i < 4; i++)
-               if (cat5->wrmask & (1 << i))
-                       fprintf(ctx->out, "%c", "xyzw"[i]);
-       fprintf(ctx->out, ")");
-
-       print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
-
-       if (info[cat5->opc].src1) {
-               fprintf(ctx->out, ", ");
-               struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full };
-               print_src(ctx, &src);
-       }
-
-       if (cat5->is_o || info[cat5->opc].src2) {
-               fprintf(ctx->out, ", ");
-               struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full };
-               print_src(ctx, &src);
-       }
-       if (cat5->is_s2en_bindless) {
-               if (!desc_indirect) {
-                       if (info[cat5->opc].samp) {
-                               if (use_a1)
-                                       fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
-                               else
-                                       fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
-                       }
-
-                       if (info[cat5->opc].tex && !use_a1) {
-                               fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
-                       }
-               }
-       } else {
-               if (info[cat5->opc].samp)
-                       fprintf(ctx->out, ", s#%d", cat5->norm.samp);
-               if (info[cat5->opc].tex)
-                       fprintf(ctx->out, ", t#%d", cat5->norm.tex);
-       }
-
-       if (desc_indirect) {
-               fprintf(ctx->out, ", ");
-               struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless };
-               print_src(ctx, &src);
-       }
-
-       if (use_a1)
-               fprintf(ctx->out, ", a1.x");
-
-       if (debug & PRINT_VERBOSE) {
-               if (cat5->is_s2en_bindless) {
-                       if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
-                               fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
-               } else {
-                       if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
-                               fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
-               }
-       }
-}
-
-static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat6_t *cat6 = &instr->cat6;
-       char sd = 0, ss = 0;  /* dst/src address space */
-       bool nodst = false;
-       struct reginfo dst, src1, src2;
-       int src1off = 0, dstoff = 0;
-
-       memset(&dst, 0, sizeof(dst));
-       memset(&src1, 0, sizeof(src1));
-       memset(&src2, 0, sizeof(src2));
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_RESINFO:
-       case OPC_RESFMT:
-               dst.full  = type_size(cat6->type) == 32;
-               src1.full = type_size(cat6->type) == 32;
-               src2.full = type_size(cat6->type) == 32;
-               break;
-       case OPC_L2G:
-       case OPC_G2L:
-               dst.full = true;
-               src1.full = true;
-               src2.full = true;
-               break;
-       case OPC_STG:
-       case OPC_STL:
-       case OPC_STP:
-       case OPC_STLW:
-       case OPC_STIB:
-               dst.full  = type_size(cat6->type) == 32;
-               src1.full = type_size(cat6->type) == 32;
-               src2.full = type_size(cat6->type) == 32;
-               break;
-       default:
-               dst.full  = type_size(cat6->type) == 32;
-               src1.full = true;
-               src2.full = true;
-               break;
-       }
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_PREFETCH:
-               break;
-       case OPC_RESINFO:
-               fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-               break;
-       case OPC_LDGB:
-               fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
-               break;
-       case OPC_STGB:
-       case OPC_STIB:
-               fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
-               break;
-       case OPC_ATOMIC_ADD:
-       case OPC_ATOMIC_SUB:
-       case OPC_ATOMIC_XCHG:
-       case OPC_ATOMIC_INC:
-       case OPC_ATOMIC_DEC:
-       case OPC_ATOMIC_CMPXCHG:
-       case OPC_ATOMIC_MIN:
-       case OPC_ATOMIC_MAX:
-       case OPC_ATOMIC_AND:
-       case OPC_ATOMIC_OR:
-       case OPC_ATOMIC_XOR:
-               ss = cat6->g ? 'g' : 'l';
-               fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
-               fprintf(ctx->out, ".%c", ss);
-               break;
-       default:
-               dst.im = cat6->g && !cat6->dst_off;
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               break;
-       }
-       fprintf(ctx->out, " ");
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_STG:
-               sd = 'g';
-               break;
-       case OPC_STP:
-               sd = 'p';
-               break;
-       case OPC_STL:
-       case OPC_STLW:
-               sd = 'l';
-               break;
-
-       case OPC_LDG:
-       case OPC_LDC:
-               ss = 'g';
-               break;
-       case OPC_LDP:
-               ss = 'p';
-               break;
-       case OPC_LDL:
-       case OPC_LDLW:
-       case OPC_LDLV:
-               ss = 'l';
-               break;
-
-       case OPC_L2G:
-               ss = 'l';
-               sd = 'g';
-               break;
-
-       case OPC_G2L:
-               ss = 'g';
-               sd = 'l';
-               break;
-
-       case OPC_PREFETCH:
-               ss = 'g';
-               nodst = true;
-               break;
-       }
-
-       if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
-               struct reginfo src3;
-
-               memset(&src3, 0, sizeof(src3));
-
-               src1.reg = (reg_t)(cat6->stgb.src1);
-               src2.reg = (reg_t)(cat6->stgb.src2);
-               src2.im  = cat6->stgb.src2_im;
-               src3.reg = (reg_t)(cat6->stgb.src3);
-               src3.im  = cat6->stgb.src3_im;
-               src3.full = true;
-
-               fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
-               print_src(ctx, &src1);
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src2);
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src3);
-
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
-
-               return;
-       }
-
-       if (is_atomic(_OPC(6, cat6->opc))) {
-
-               src1.reg = (reg_t)(cat6->ldgb.src1);
-               src1.im  = cat6->ldgb.src1_im;
-               src2.reg = (reg_t)(cat6->ldgb.src2);
-               src2.im  = cat6->ldgb.src2_im;
-               dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", ");
-               if (ss == 'g') {
-                       struct reginfo src3;
-                       memset(&src3, 0, sizeof(src3));
-
-                       src3.reg = (reg_t)(cat6->ldgb.src3);
-                       src3.full = true;
-
-                       /* For images, the ".typed" variant is used and src2 is
-                        * the ivecN coordinates, ie ivec2 for 2d.
-                        *
-                        * For SSBOs, the ".untyped" variant is used and src2 is
-                        * a simple dword offset..  src3 appears to be
-                        * uvec2(offset * 4, 0).  Not sure the point of that.
-                        */
-
-                       fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
-                       print_src(ctx, &src1);  /* value */
-                       fprintf(ctx->out, ", ");
-                       print_src(ctx, &src2);  /* offset/coords */
-                       fprintf(ctx->out, ", ");
-                       print_src(ctx, &src3);  /* 64b byte offset.. */
-
-                       if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0,
-                                               cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-                       }
-               } else { /* ss == 'l' */
-                       fprintf(ctx->out, "l[");
-                       print_src(ctx, &src1);  /* simple byte offset */
-                       fprintf(ctx->out, "], ");
-                       print_src(ctx, &src2);  /* value */
-
-                       if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)",
-                                               cat6->ldgb.src3, cat6->ldgb.pad0,
-                                               cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-                       }
-               }
-
-               return;
-       } else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
-               dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", ");
-               fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo);
-
-               return;
-       } else if (_OPC(6, cat6->opc) == OPC_LDGB) {
-
-               src1.reg = (reg_t)(cat6->ldgb.src1);
-               src1.im  = cat6->ldgb.src1_im;
-               src2.reg = (reg_t)(cat6->ldgb.src2);
-               src2.im  = cat6->ldgb.src2_im;
-               dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", ");
-               fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
-               print_src(ctx, &src1);
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src2);
-
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-
-               return;
-       } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) {
-               struct reginfo src3;
-
-               memset(&src3, 0, sizeof(src3));
-               src1.reg = (reg_t)(cat6->a.src1);
-               src2.reg = (reg_t)(cat6->a.src2);
-               src2.im  = cat6->a.src2_im;
-               src3.reg = (reg_t)(cat6->a.off);
-               src3.full = true;
-               dst.reg  = (reg_t)(cat6->d.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", g[");
-               print_src(ctx, &src1);
-               fprintf(ctx->out, "+");
-               print_src(ctx, &src3);
-               fprintf(ctx->out, "], ");
-               print_src(ctx, &src2);
-
-               return;
-       }
-       if (cat6->dst_off) {
-               dst.reg = (reg_t)(cat6->c.dst);
-               dstoff  = cat6->c.off;
-       } else {
-               dst.reg = (reg_t)(cat6->d.dst);
-       }
-
-       if (cat6->src_off) {
-               src1.reg = (reg_t)(cat6->a.src1);
-               src1.im  = cat6->a.src1_im;
-               src2.reg = (reg_t)(cat6->a.src2);
-               src2.im  = cat6->a.src2_im;
-               src1off  = cat6->a.off;
-       } else {
-               src1.reg = (reg_t)(cat6->b.src1);
-               src1.im  = cat6->b.src1_im;
-               src2.reg = (reg_t)(cat6->b.src2);
-               src2.im  = cat6->b.src2_im;
-       }
-
-       if (!nodst) {
-               if (sd)
-                       fprintf(ctx->out, "%c[", sd);
-               /* note: dst might actually be a src (ie. address to store to) */
-               print_src(ctx, &dst);
-               if (cat6->dst_off && cat6->g) {
-                       struct reginfo dstoff_reg = {0};
-                       dstoff_reg.reg = (reg_t) cat6->c.off;
-                       dstoff_reg.full  = true;
-                       fprintf(ctx->out, "+");
-                       print_src(ctx, &dstoff_reg);
-               } else if (dstoff)
-                       fprintf(ctx->out, "%+d", dstoff);
-               if (sd)
-                       fprintf(ctx->out, "]");
-               fprintf(ctx->out, ", ");
-       }
-
-       if (ss)
-               fprintf(ctx->out, "%c[", ss);
-
-       /* can have a larger than normal immed, so hack: */
-       if (src1.im) {
-               fprintf(ctx->out, "%u", src1.reg.dummy13);
-       } else {
-               print_src(ctx, &src1);
-       }
-
-       if (cat6->src_off && cat6->g)
-               print_src(ctx, &src2);
-       else if (src1off)
-               fprintf(ctx->out, "%+d", src1off);
-       if (ss)
-               fprintf(ctx->out, "]");
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_RESINFO:
-       case OPC_RESFMT:
-               break;
-       default:
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src2);
-               break;
-       }
-}
-
-static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
-       struct reginfo src1, src2, ssbo;
-       bool uses_type = _OPC(6, cat6->opc) != OPC_LDC;
-
-       static const struct {
-               bool indirect;
-               bool bindless;
-               const char *name;
-       } desc_features[8] = {
-               [CAT6_IMM] = {
-                       .name = "imm"
-               },
-               [CAT6_UNIFORM] = {
-                       .indirect = true,
-                       .name = "uniform"
-               },
-               [CAT6_NONUNIFORM] = {
-                       .indirect = true,
-                       .name = "nonuniform"
-               },
-               [CAT6_BINDLESS_IMM] = {
-                       .bindless = true,
-                       .name = "imm"
-               },
-               [CAT6_BINDLESS_UNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .name = "uniform"
-               },
-               [CAT6_BINDLESS_NONUNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .name = "nonuniform"
-               },
-       };
-
-       bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
-       bool bindless = desc_features[cat6->desc_mode].bindless;
-       bool type_full = cat6->type != TYPE_U16;
-
-
-       memset(&src1, 0, sizeof(src1));
-       memset(&src2, 0, sizeof(src2));
-       memset(&ssbo, 0, sizeof(ssbo));
-
-       if (uses_type) {
-               fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-       } else {
-               fprintf(ctx->out, ".offset%d", cat6->d);
-       }
-       fprintf(ctx->out, ".%u", cat6->type_size + 1);
-
-       fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name);
-       if (bindless)
-               fprintf(ctx->out, ".base%d", cat6->base);
-       fprintf(ctx->out, " ");
-
-       src2.reg = (reg_t)(cat6->src2);
-       src2.full = type_full;
-       print_src(ctx, &src2);
-       fprintf(ctx->out, ", ");
-
-       src1.reg = (reg_t)(cat6->src1);
-       src1.full = true; // XXX
-       print_src(ctx, &src1);
-       fprintf(ctx->out, ", ");
-       ssbo.reg = (reg_t)(cat6->ssbo);
-       ssbo.im = !indirect_ssbo;
-       ssbo.full = true;
-       print_src(ctx, &ssbo);
-
-       if (debug & PRINT_VERBOSE) {
-               fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
-                               cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
-       }
-}
-
-static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
-{
-       if (!is_cat6_legacy(instr, ctx->gpu_id)) {
-               print_instr_cat6_a6xx(ctx, instr);
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " NEW");
-       } else {
-               print_instr_cat6_a3xx(ctx, instr);
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " LEGACY");
-       }
-}
-static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat7_t *cat7 = &instr->cat7;
-
-       if (cat7->g)
-               fprintf(ctx->out, ".g");
-       if (cat7->l)
-               fprintf(ctx->out, ".l");
-
-       if (_OPC(7, cat7->opc) == OPC_FENCE) {
-               if (cat7->r)
-                       fprintf(ctx->out, ".r");
-               if (cat7->w)
-                       fprintf(ctx->out, ".w");
-       }
-}
-
-/* size of largest OPC field of all the instruction categories: */
-#define NOPC_BITS 6
-
-static const struct opc_info {
-       uint16_t cat;
-       uint16_t opc;
-       const char *name;
-       void (*print)(struct disasm_ctx *ctx, instr_t *instr);
-} opcs[1 << (3+NOPC_BITS)] = {
-#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
-       /* category 0: */
-       OPC(0, OPC_NOP,          nop),
-       OPC(0, OPC_B,            b),
-       OPC(0, OPC_JUMP,         jump),
-       OPC(0, OPC_CALL,         call),
-       OPC(0, OPC_RET,          ret),
-       OPC(0, OPC_KILL,         kill),
-       OPC(0, OPC_END,          end),
-       OPC(0, OPC_EMIT,         emit),
-       OPC(0, OPC_CUT,          cut),
-       OPC(0, OPC_CHMASK,       chmask),
-       OPC(0, OPC_CHSH,         chsh),
-       OPC(0, OPC_FLOW_REV,     flow_rev),
-       OPC(0, OPC_PREDT,        predt),
-       OPC(0, OPC_PREDF,        predf),
-       OPC(0, OPC_PREDE,        prede),
-       OPC(0, OPC_BKT,          bkt),
-       OPC(0, OPC_STKS,         stks),
-       OPC(0, OPC_STKR,         stkr),
-       OPC(0, OPC_XSET,         xset),
-       OPC(0, OPC_XCLR,         xclr),
-       OPC(0, OPC_GETONE,       getone),
-       OPC(0, OPC_DBG,          dbg),
-       OPC(0, OPC_SHPS,         shps),
-       OPC(0, OPC_SHPE,         shpe),
-
-       /* category 1: */
-       OPC(1, OPC_MOV, ),
-
-       /* category 2: */
-       OPC(2, OPC_ADD_F,        add.f),
-       OPC(2, OPC_MIN_F,        min.f),
-       OPC(2, OPC_MAX_F,        max.f),
-       OPC(2, OPC_MUL_F,        mul.f),
-       OPC(2, OPC_SIGN_F,       sign.f),
-       OPC(2, OPC_CMPS_F,       cmps.f),
-       OPC(2, OPC_ABSNEG_F,     absneg.f),
-       OPC(2, OPC_CMPV_F,       cmpv.f),
-       OPC(2, OPC_FLOOR_F,      floor.f),
-       OPC(2, OPC_CEIL_F,       ceil.f),
-       OPC(2, OPC_RNDNE_F,      rndne.f),
-       OPC(2, OPC_RNDAZ_F,      rndaz.f),
-       OPC(2, OPC_TRUNC_F,      trunc.f),
-       OPC(2, OPC_ADD_U,        add.u),
-       OPC(2, OPC_ADD_S,        add.s),
-       OPC(2, OPC_SUB_U,        sub.u),
-       OPC(2, OPC_SUB_S,        sub.s),
-       OPC(2, OPC_CMPS_U,       cmps.u),
-       OPC(2, OPC_CMPS_S,       cmps.s),
-       OPC(2, OPC_MIN_U,        min.u),
-       OPC(2, OPC_MIN_S,        min.s),
-       OPC(2, OPC_MAX_U,        max.u),
-       OPC(2, OPC_MAX_S,        max.s),
-       OPC(2, OPC_ABSNEG_S,     absneg.s),
-       OPC(2, OPC_AND_B,        and.b),
-       OPC(2, OPC_OR_B,         or.b),
-       OPC(2, OPC_NOT_B,        not.b),
-       OPC(2, OPC_XOR_B,        xor.b),
-       OPC(2, OPC_CMPV_U,       cmpv.u),
-       OPC(2, OPC_CMPV_S,       cmpv.s),
-       OPC(2, OPC_MUL_U24,      mul.u24),
-       OPC(2, OPC_MUL_S24,      mul.s24),
-       OPC(2, OPC_MULL_U,       mull.u),
-       OPC(2, OPC_BFREV_B,      bfrev.b),
-       OPC(2, OPC_CLZ_S,        clz.s),
-       OPC(2, OPC_CLZ_B,        clz.b),
-       OPC(2, OPC_SHL_B,        shl.b),
-       OPC(2, OPC_SHR_B,        shr.b),
-       OPC(2, OPC_ASHR_B,       ashr.b),
-       OPC(2, OPC_BARY_F,       bary.f),
-       OPC(2, OPC_MGEN_B,       mgen.b),
-       OPC(2, OPC_GETBIT_B,     getbit.b),
-       OPC(2, OPC_SETRM,        setrm),
-       OPC(2, OPC_CBITS_B,      cbits.b),
-       OPC(2, OPC_SHB,          shb),
-       OPC(2, OPC_MSAD,         msad),
-
-       /* category 3: */
-       OPC(3, OPC_MAD_U16,      mad.u16),
-       OPC(3, OPC_MADSH_U16,    madsh.u16),
-       OPC(3, OPC_MAD_S16,      mad.s16),
-       OPC(3, OPC_MADSH_M16,    madsh.m16),
-       OPC(3, OPC_MAD_U24,      mad.u24),
-       OPC(3, OPC_MAD_S24,      mad.s24),
-       OPC(3, OPC_MAD_F16,      mad.f16),
-       OPC(3, OPC_MAD_F32,      mad.f32),
-       OPC(3, OPC_SEL_B16,      sel.b16),
-       OPC(3, OPC_SEL_B32,      sel.b32),
-       OPC(3, OPC_SEL_S16,      sel.s16),
-       OPC(3, OPC_SEL_S32,      sel.s32),
-       OPC(3, OPC_SEL_F16,      sel.f16),
-       OPC(3, OPC_SEL_F32,      sel.f32),
-       OPC(3, OPC_SAD_S16,      sad.s16),
-       OPC(3, OPC_SAD_S32,      sad.s32),
-
-       /* category 4: */
-       OPC(4, OPC_RCP,          rcp),
-       OPC(4, OPC_RSQ,          rsq),
-       OPC(4, OPC_LOG2,         log2),
-       OPC(4, OPC_EXP2,         exp2),
-       OPC(4, OPC_SIN,          sin),
-       OPC(4, OPC_COS,          cos),
-       OPC(4, OPC_SQRT,         sqrt),
-       OPC(4, OPC_HRSQ,         hrsq),
-       OPC(4, OPC_HLOG2,        hlog2),
-       OPC(4, OPC_HEXP2,        hexp2),
-
-       /* category 5: */
-       OPC(5, OPC_ISAM,         isam),
-       OPC(5, OPC_ISAML,        isaml),
-       OPC(5, OPC_ISAMM,        isamm),
-       OPC(5, OPC_SAM,          sam),
-       OPC(5, OPC_SAMB,         samb),
-       OPC(5, OPC_SAML,         saml),
-       OPC(5, OPC_SAMGQ,        samgq),
-       OPC(5, OPC_GETLOD,       getlod),
-       OPC(5, OPC_CONV,         conv),
-       OPC(5, OPC_CONVM,        convm),
-       OPC(5, OPC_GETSIZE,      getsize),
-       OPC(5, OPC_GETBUF,       getbuf),
-       OPC(5, OPC_GETPOS,       getpos),
-       OPC(5, OPC_GETINFO,      getinfo),
-       OPC(5, OPC_DSX,          dsx),
-       OPC(5, OPC_DSY,          dsy),
-       OPC(5, OPC_GATHER4R,     gather4r),
-       OPC(5, OPC_GATHER4G,     gather4g),
-       OPC(5, OPC_GATHER4B,     gather4b),
-       OPC(5, OPC_GATHER4A,     gather4a),
-       OPC(5, OPC_SAMGP0,       samgp0),
-       OPC(5, OPC_SAMGP1,       samgp1),
-       OPC(5, OPC_SAMGP2,       samgp2),
-       OPC(5, OPC_SAMGP3,       samgp3),
-       OPC(5, OPC_DSXPP_1,      dsxpp.1),
-       OPC(5, OPC_DSYPP_1,      dsypp.1),
-       OPC(5, OPC_RGETPOS,      rgetpos),
-       OPC(5, OPC_RGETINFO,     rgetinfo),
-
-
-       /* category 6: */
-       OPC(6, OPC_LDG,          ldg),
-       OPC(6, OPC_LDL,          ldl),
-       OPC(6, OPC_LDP,          ldp),
-       OPC(6, OPC_STG,          stg),
-       OPC(6, OPC_STL,          stl),
-       OPC(6, OPC_STP,          stp),
-       OPC(6, OPC_LDIB,         ldib),
-       OPC(6, OPC_G2L,          g2l),
-       OPC(6, OPC_L2G,          l2g),
-       OPC(6, OPC_PREFETCH,     prefetch),
-       OPC(6, OPC_LDLW,         ldlw),
-       OPC(6, OPC_STLW,         stlw),
-       OPC(6, OPC_RESFMT,       resfmt),
-       OPC(6, OPC_RESINFO,      resinfo),
-       OPC(6, OPC_ATOMIC_ADD,     atomic.add),
-       OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
-       OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
-       OPC(6, OPC_ATOMIC_INC,     atomic.inc),
-       OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
-       OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
-       OPC(6, OPC_ATOMIC_MIN,     atomic.min),
-       OPC(6, OPC_ATOMIC_MAX,     atomic.max),
-       OPC(6, OPC_ATOMIC_AND,     atomic.and),
-       OPC(6, OPC_ATOMIC_OR,      atomic.or),
-       OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
-       OPC(6, OPC_LDGB,         ldgb),
-       OPC(6, OPC_STGB,         stgb),
-       OPC(6, OPC_STIB,         stib),
-       OPC(6, OPC_LDC,          ldc),
-       OPC(6, OPC_LDLV,         ldlv),
-
-       OPC(7, OPC_BAR,          bar),
-       OPC(7, OPC_FENCE,        fence),
-
-
-#undef OPC
-};
-
-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
-
-static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr)
-{
-       const char *name = GETINFO(instr)->name;
-       uint32_t opc = instr_opc(instr, ctx->gpu_id);
-
-       if (name) {
-               fprintf(ctx->out, "%s", name);
-               GETINFO(instr)->print(ctx, instr);
-       } else {
-               fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
-
-               switch (instr->opc_cat) {
-               case 0: print_instr_cat0(ctx, instr); break;
-               case 1: print_instr_cat1(ctx, instr); break;
-               case 2: print_instr_cat2(ctx, instr); break;
-               case 3: print_instr_cat3(ctx, instr); break;
-               case 4: print_instr_cat4(ctx, instr); break;
-               case 5: print_instr_cat5(ctx, instr); break;
-               case 6: print_instr_cat6(ctx, instr); break;
-               case 7: print_instr_cat7(ctx, instr); break;
-               }
-       }
-}
-
-static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
-{
-       instr_t *instr = (instr_t *)dwords;
-       uint32_t opc = instr_opc(instr, ctx->gpu_id);
-       unsigned nop = 0;
-       unsigned cycles = ctx->stats->instructions;
-
-       fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
-                       instr->opc_cat, n, cycles++, dwords[1], dwords[0]);
-
-#if 0
-       /* print unknown bits: */
-       if (debug & PRINT_RAW)
-               fprintf(ctx->out, "[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000);
-
-       if (debug & PRINT_VERBOSE)
-               fprintf(ctx->out, "%d,%02d ", instr->opc_cat, opc);
-#endif
-
-       /* NOTE: order flags are printed is a bit fugly.. but for now I
-        * try to match the order in llvm-a3xx disassembler for easy
-        * diff'ing..
-        */
-
-       ctx->repeat = instr_repeat(instr);
-       ctx->stats->instructions += 1 + ctx->repeat;
-       ctx->stats->instlen++;
-
-       if (instr->sync) {
-               fprintf(ctx->out, "(sy)");
-               ctx->stats->sy++;
-       }
-       if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) {
-               fprintf(ctx->out, "(ss)");
-               ctx->stats->ss++;
-       }
-       if (instr->jmp_tgt)
-               fprintf(ctx->out, "(jp)");
-       if ((instr->opc_cat == 0) && instr->cat0.eq)
-               fprintf(ctx->out, "(eq)");
-       if (instr_sat(instr))
-               fprintf(ctx->out, "(sat)");
-       if (ctx->repeat)
-               fprintf(ctx->out, "(rpt%d)", ctx->repeat);
-       else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r))
-               nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
-       else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r))
-               nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
-       ctx->stats->instructions += nop;
-       ctx->stats->nops += nop;
-       if (opc == OPC_NOP)
-               ctx->stats->nops += 1 + ctx->repeat;
-       if (nop)
-               fprintf(ctx->out, "(nop%d) ", nop);
-
-       if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
-               fprintf(ctx->out, "(ul)");
-
-       print_single_instr(ctx, instr);
-       fprintf(ctx->out, "\n");
-
-       process_reg_dst(ctx);
-
-       if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) {
-               int i;
-               for (i = 0; i < nop; i++) {
-                       fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
-                                       levels[ctx->level], instr->opc_cat, n, cycles++);
-                       fprintf(ctx->out, "nop\n");
-               }
-               for (i = 0; i < ctx->repeat; i++) {
-                       ctx->repeatidx = i + 1;
-                       fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
-                                       levels[ctx->level], instr->opc_cat, n, cycles++);
-
-                       print_single_instr(ctx, instr);
-                       fprintf(ctx->out, "\n");
-               }
-               ctx->repeatidx = 0;
-       }
-
-       return (instr->opc_cat == 0) &&
-               ((opc == OPC_END) || (opc == OPC_CHSH));
-}
-
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
-{
-       struct shader_stats stats;
-       return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
-}
-
-int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
-               unsigned gpu_id, struct shader_stats *stats)
-{
-       struct disasm_ctx ctx;
-       int i;
-       int nop_count = 0;
-       bool has_end = false;
-
-//     ir3_assert((sizedwords % 2) == 0);
-
-       memset(&ctx, 0, sizeof(ctx));
-       ctx.out = out;
-       ctx.level = level;
-       ctx.gpu_id = gpu_id;
-       ctx.stats = stats;
-       memset(ctx.stats, 0, sizeof(*ctx.stats));
-
-       for (i = 0; i < sizedwords; i += 2) {
-               has_end |= print_instr(&ctx, &dwords[i], i/2);
-               if (!has_end)
-                       continue;
-               if (dwords[i] == 0 && dwords[i + 1] == 0)
-                       nop_count++;
-               else
-                       nop_count = 0;
-               if (nop_count > 3)
-                       break;
-       }
-
-       print_reg_stats(&ctx);
-
-       return 0;
-}
 
+++ /dev/null
-/*
- * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef DISASM_H_
-#define DISASM_H_
-
-#include <stdio.h>
-
-#include "compiler/shader_enums.h"
-
-/* bitmask of debug flags */
-enum debug_t {
-       PRINT_RAW      = 0x1,    /* dump raw hexdump */
-       PRINT_VERBOSE  = 0x2,
-       EXPAND_REPEAT  = 0x4,
-};
-
-struct shader_stats {
-       /* instructions counts rpnN, and instlen does not */
-       int instructions, instlen;
-       int nops;
-       int ss, sy;
-       int constlen;
-};
-
-int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type);
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
-int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
-               unsigned gpu_id, struct shader_stats *stats);
-void disasm_set_debug(enum debug_t debug);
-
-#endif /* DISASM_H_ */
 
+++ /dev/null
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef INSTR_A3XX_H_
-#define INSTR_A3XX_H_
-
-#define PACKED __attribute__((__packed__))
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <assert.h>
-
-void ir3_assert_handler(const char *expr, const char *file, int line,
-               const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
-
-/* A wrapper for assert() that allows overriding handling of a failed
- * assert.  This is needed for tools like crashdec which can want to
- * attempt to disassemble memory that might not actually be valid
- * instructions.
- */
-#define ir3_assert(expr) do { \
-               if (!(expr)) { \
-                       if (ir3_assert_handler) { \
-                               ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
-                       } \
-                       assert(expr); \
-               } \
-       } while (0)
-
-/* size of largest OPC field of all the instruction categories: */
-#define NOPC_BITS 6
-
-#define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
-
-typedef enum {
-       /* category 0: */
-       OPC_NOP             = _OPC(0, 0),
-       OPC_B               = _OPC(0, 1),
-       OPC_JUMP            = _OPC(0, 2),
-       OPC_CALL            = _OPC(0, 3),
-       OPC_RET             = _OPC(0, 4),
-       OPC_KILL            = _OPC(0, 5),
-       OPC_END             = _OPC(0, 6),
-       OPC_EMIT            = _OPC(0, 7),
-       OPC_CUT             = _OPC(0, 8),
-       OPC_CHMASK          = _OPC(0, 9),
-       OPC_CHSH            = _OPC(0, 10),
-       OPC_FLOW_REV        = _OPC(0, 11),
-
-       OPC_BKT             = _OPC(0, 16),
-       OPC_STKS            = _OPC(0, 17),
-       OPC_STKR            = _OPC(0, 18),
-       OPC_XSET            = _OPC(0, 19),
-       OPC_XCLR            = _OPC(0, 20),
-       OPC_GETONE          = _OPC(0, 21),
-       OPC_DBG             = _OPC(0, 22),
-       OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
-       OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
-
-       OPC_PREDT           = _OPC(0, 29),   /* predicated true */
-       OPC_PREDF           = _OPC(0, 30),   /* predicated false */
-       OPC_PREDE           = _OPC(0, 31),   /* predicated end */
-
-       /* category 1: */
-       OPC_MOV             = _OPC(1, 0),
-
-       /* category 2: */
-       OPC_ADD_F           = _OPC(2, 0),
-       OPC_MIN_F           = _OPC(2, 1),
-       OPC_MAX_F           = _OPC(2, 2),
-       OPC_MUL_F           = _OPC(2, 3),
-       OPC_SIGN_F          = _OPC(2, 4),
-       OPC_CMPS_F          = _OPC(2, 5),
-       OPC_ABSNEG_F        = _OPC(2, 6),
-       OPC_CMPV_F          = _OPC(2, 7),
-       /* 8 - invalid */
-       OPC_FLOOR_F         = _OPC(2, 9),
-       OPC_CEIL_F          = _OPC(2, 10),
-       OPC_RNDNE_F         = _OPC(2, 11),
-       OPC_RNDAZ_F         = _OPC(2, 12),
-       OPC_TRUNC_F         = _OPC(2, 13),
-       /* 14-15 - invalid */
-       OPC_ADD_U           = _OPC(2, 16),
-       OPC_ADD_S           = _OPC(2, 17),
-       OPC_SUB_U           = _OPC(2, 18),
-       OPC_SUB_S           = _OPC(2, 19),
-       OPC_CMPS_U          = _OPC(2, 20),
-       OPC_CMPS_S          = _OPC(2, 21),
-       OPC_MIN_U           = _OPC(2, 22),
-       OPC_MIN_S           = _OPC(2, 23),
-       OPC_MAX_U           = _OPC(2, 24),
-       OPC_MAX_S           = _OPC(2, 25),
-       OPC_ABSNEG_S        = _OPC(2, 26),
-       /* 27 - invalid */
-       OPC_AND_B           = _OPC(2, 28),
-       OPC_OR_B            = _OPC(2, 29),
-       OPC_NOT_B           = _OPC(2, 30),
-       OPC_XOR_B           = _OPC(2, 31),
-       /* 32 - invalid */
-       OPC_CMPV_U          = _OPC(2, 33),
-       OPC_CMPV_S          = _OPC(2, 34),
-       /* 35-47 - invalid */
-       OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
-       OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
-       OPC_MULL_U          = _OPC(2, 50),
-       OPC_BFREV_B         = _OPC(2, 51),
-       OPC_CLZ_S           = _OPC(2, 52),
-       OPC_CLZ_B           = _OPC(2, 53),
-       OPC_SHL_B           = _OPC(2, 54),
-       OPC_SHR_B           = _OPC(2, 55),
-       OPC_ASHR_B          = _OPC(2, 56),
-       OPC_BARY_F          = _OPC(2, 57),
-       OPC_MGEN_B          = _OPC(2, 58),
-       OPC_GETBIT_B        = _OPC(2, 59),
-       OPC_SETRM           = _OPC(2, 60),
-       OPC_CBITS_B         = _OPC(2, 61),
-       OPC_SHB             = _OPC(2, 62),
-       OPC_MSAD            = _OPC(2, 63),
-
-       /* category 3: */
-       OPC_MAD_U16         = _OPC(3, 0),
-       OPC_MADSH_U16       = _OPC(3, 1),
-       OPC_MAD_S16         = _OPC(3, 2),
-       OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
-       OPC_MAD_U24         = _OPC(3, 4),
-       OPC_MAD_S24         = _OPC(3, 5),
-       OPC_MAD_F16         = _OPC(3, 6),
-       OPC_MAD_F32         = _OPC(3, 7),
-       OPC_SEL_B16         = _OPC(3, 8),
-       OPC_SEL_B32         = _OPC(3, 9),
-       OPC_SEL_S16         = _OPC(3, 10),
-       OPC_SEL_S32         = _OPC(3, 11),
-       OPC_SEL_F16         = _OPC(3, 12),
-       OPC_SEL_F32         = _OPC(3, 13),
-       OPC_SAD_S16         = _OPC(3, 14),
-       OPC_SAD_S32         = _OPC(3, 15),
-
-       /* category 4: */
-       OPC_RCP             = _OPC(4, 0),
-       OPC_RSQ             = _OPC(4, 1),
-       OPC_LOG2            = _OPC(4, 2),
-       OPC_EXP2            = _OPC(4, 3),
-       OPC_SIN             = _OPC(4, 4),
-       OPC_COS             = _OPC(4, 5),
-       OPC_SQRT            = _OPC(4, 6),
-       /* NOTE that these are 8+opc from their highp equivs, so it's possible
-        * that the high order bit in the opc field has been repurposed for
-        * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
-        * still use the same opc as highp
-        */
-       OPC_HRSQ            = _OPC(4, 9),
-       OPC_HLOG2           = _OPC(4, 10),
-       OPC_HEXP2           = _OPC(4, 11),
-
-       /* category 5: */
-       OPC_ISAM            = _OPC(5, 0),
-       OPC_ISAML           = _OPC(5, 1),
-       OPC_ISAMM           = _OPC(5, 2),
-       OPC_SAM             = _OPC(5, 3),
-       OPC_SAMB            = _OPC(5, 4),
-       OPC_SAML            = _OPC(5, 5),
-       OPC_SAMGQ           = _OPC(5, 6),
-       OPC_GETLOD          = _OPC(5, 7),
-       OPC_CONV            = _OPC(5, 8),
-       OPC_CONVM           = _OPC(5, 9),
-       OPC_GETSIZE         = _OPC(5, 10),
-       OPC_GETBUF          = _OPC(5, 11),
-       OPC_GETPOS          = _OPC(5, 12),
-       OPC_GETINFO         = _OPC(5, 13),
-       OPC_DSX             = _OPC(5, 14),
-       OPC_DSY             = _OPC(5, 15),
-       OPC_GATHER4R        = _OPC(5, 16),
-       OPC_GATHER4G        = _OPC(5, 17),
-       OPC_GATHER4B        = _OPC(5, 18),
-       OPC_GATHER4A        = _OPC(5, 19),
-       OPC_SAMGP0          = _OPC(5, 20),
-       OPC_SAMGP1          = _OPC(5, 21),
-       OPC_SAMGP2          = _OPC(5, 22),
-       OPC_SAMGP3          = _OPC(5, 23),
-       OPC_DSXPP_1         = _OPC(5, 24),
-       OPC_DSYPP_1         = _OPC(5, 25),
-       OPC_RGETPOS         = _OPC(5, 26),
-       OPC_RGETINFO        = _OPC(5, 27),
-
-       /* category 6: */
-       OPC_LDG             = _OPC(6, 0),        /* load-global */
-       OPC_LDL             = _OPC(6, 1),
-       OPC_LDP             = _OPC(6, 2),
-       OPC_STG             = _OPC(6, 3),        /* store-global */
-       OPC_STL             = _OPC(6, 4),
-       OPC_STP             = _OPC(6, 5),
-       OPC_LDIB            = _OPC(6, 6),
-       OPC_G2L             = _OPC(6, 7),
-       OPC_L2G             = _OPC(6, 8),
-       OPC_PREFETCH        = _OPC(6, 9),
-       OPC_LDLW            = _OPC(6, 10),
-       OPC_STLW            = _OPC(6, 11),
-       OPC_RESFMT          = _OPC(6, 14),
-       OPC_RESINFO         = _OPC(6, 15),
-       OPC_ATOMIC_ADD      = _OPC(6, 16),
-       OPC_ATOMIC_SUB      = _OPC(6, 17),
-       OPC_ATOMIC_XCHG     = _OPC(6, 18),
-       OPC_ATOMIC_INC      = _OPC(6, 19),
-       OPC_ATOMIC_DEC      = _OPC(6, 20),
-       OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
-       OPC_ATOMIC_MIN      = _OPC(6, 22),
-       OPC_ATOMIC_MAX      = _OPC(6, 23),
-       OPC_ATOMIC_AND      = _OPC(6, 24),
-       OPC_ATOMIC_OR       = _OPC(6, 25),
-       OPC_ATOMIC_XOR      = _OPC(6, 26),
-       OPC_LDGB            = _OPC(6, 27),
-       OPC_STGB            = _OPC(6, 28),
-       OPC_STIB            = _OPC(6, 29),
-       OPC_LDC             = _OPC(6, 30),
-       OPC_LDLV            = _OPC(6, 31),
-
-       /* category 7: */
-       OPC_BAR             = _OPC(7, 0),
-       OPC_FENCE           = _OPC(7, 1),
-} opc_t;
-
-#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
-#define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
-
-typedef enum {
-       TYPE_F16 = 0,
-       TYPE_F32 = 1,
-       TYPE_U16 = 2,
-       TYPE_U32 = 3,
-       TYPE_S16 = 4,
-       TYPE_S32 = 5,
-       TYPE_U8  = 6,
-       TYPE_S8  = 7,  // XXX I assume?
-} type_t;
-
-static inline uint32_t type_size(type_t type)
-{
-       switch (type) {
-       case TYPE_F32:
-       case TYPE_U32:
-       case TYPE_S32:
-               return 32;
-       case TYPE_F16:
-       case TYPE_U16:
-       case TYPE_S16:
-               return 16;
-       case TYPE_U8:
-       case TYPE_S8:
-               return 8;
-       default:
-               ir3_assert(0); /* invalid type */
-               return 0;
-       }
-}
-
-static inline int type_float(type_t type)
-{
-       return (type == TYPE_F32) || (type == TYPE_F16);
-}
-
-static inline int type_uint(type_t type)
-{
-       return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
-}
-
-static inline int type_sint(type_t type)
-{
-       return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
-}
-
-typedef union PACKED {
-       /* normal gpr or const src register: */
-       struct PACKED {
-               uint32_t comp  : 2;
-               uint32_t num   : 10;
-       };
-       /* for immediate val: */
-       int32_t  iim_val   : 11;
-       /* to make compiler happy: */
-       uint32_t dummy32;
-       uint32_t dummy10   : 10;
-       int32_t  idummy10  : 10;
-       uint32_t dummy11   : 11;
-       uint32_t dummy12   : 12;
-       uint32_t dummy13   : 13;
-       uint32_t dummy8    : 8;
-       int32_t  idummy13  : 13;
-       int32_t  idummy8   : 8;
-} reg_t;
-
-/* special registers: */
-#define REG_A0 61       /* address register */
-#define REG_P0 62       /* predicate register */
-
-static inline int reg_special(reg_t reg)
-{
-       return (reg.num == REG_A0) || (reg.num == REG_P0);
-}
-
-typedef enum {
-       BRANCH_PLAIN = 0,   /* br */
-       BRANCH_OR    = 1,   /* brao */
-       BRANCH_AND   = 2,   /* braa */
-       BRANCH_CONST = 3,   /* brac */
-       BRANCH_ANY   = 4,   /* bany */
-       BRANCH_ALL   = 5,   /* ball */
-       BRANCH_X     = 6,   /* brax ??? */
-} brtype_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       int16_t  immed    : 16;
-                       uint32_t dummy1   : 16;
-               } a3xx;
-               struct PACKED {
-                       int32_t  immed    : 20;
-                       uint32_t dummy1   : 12;
-               } a4xx;
-               struct PACKED {
-                       int32_t immed     : 32;
-               } a5xx;
-       };
-
-       /* dword1: */
-       uint32_t idx      : 5;  /* brac.N index */
-       uint32_t brtype   : 3;  /* branch type, see brtype_t */
-       uint32_t repeat   : 3;
-       uint32_t dummy3   : 1;
-       uint32_t ss       : 1;
-       uint32_t inv1     : 1;
-       uint32_t comp1    : 2;
-       uint32_t eq       : 1;
-       uint32_t opc_hi   : 1;  /* at least one bit */
-       uint32_t dummy4   : 2;
-       uint32_t inv0     : 1;
-       uint32_t comp0    : 2;  /* component for first src */
-       uint32_t opc      : 4;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat0_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               /* for normal src register: */
-               struct PACKED {
-                       uint32_t src : 11;
-                       /* at least low bit of pad must be zero or it will
-                        * look like a address relative src
-                        */
-                       uint32_t pad : 21;
-               };
-               /* for address relative: */
-               struct PACKED {
-                       int32_t  off : 10;
-                       uint32_t src_rel_c : 1;
-                       uint32_t src_rel : 1;
-                       uint32_t unknown : 20;
-               };
-               /* for immediate: */
-               int32_t  iim_val;
-               uint32_t uim_val;
-               float    fim_val;
-       };
-
-       /* dword1: */
-       uint32_t dst        : 8;
-       uint32_t repeat     : 3;
-       uint32_t src_r      : 1;
-       uint32_t ss         : 1;
-       uint32_t ul         : 1;
-       uint32_t dst_type   : 3;
-       uint32_t dst_rel    : 1;
-       uint32_t src_type   : 3;
-       uint32_t src_c      : 1;
-       uint32_t src_im     : 1;
-       uint32_t even       : 1;
-       uint32_t pos_inf    : 1;
-       uint32_t must_be_0  : 2;
-       uint32_t jmp_tgt    : 1;
-       uint32_t sync       : 1;
-       uint32_t opc_cat    : 3;
-} instr_cat1_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       uint32_t src1         : 11;
-                       uint32_t must_be_zero1: 2;
-                       uint32_t src1_im      : 1;   /* immediate */
-                       uint32_t src1_neg     : 1;   /* negate */
-                       uint32_t src1_abs     : 1;   /* absolute value */
-               };
-               struct PACKED {
-                       uint32_t src1         : 10;
-                       uint32_t src1_c       : 1;   /* relative-const */
-                       uint32_t src1_rel     : 1;   /* relative address */
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel1;
-               struct PACKED {
-                       uint32_t src1         : 12;
-                       uint32_t src1_c       : 1;   /* const */
-                       uint32_t dummy        : 3;
-               } c1;
-       };
-
-       union PACKED {
-               struct PACKED {
-                       uint32_t src2         : 11;
-                       uint32_t must_be_zero2: 2;
-                       uint32_t src2_im      : 1;   /* immediate */
-                       uint32_t src2_neg     : 1;   /* negate */
-                       uint32_t src2_abs     : 1;   /* absolute value */
-               };
-               struct PACKED {
-                       uint32_t src2         : 10;
-                       uint32_t src2_c       : 1;   /* relative-const */
-                       uint32_t src2_rel     : 1;   /* relative address */
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel2;
-               struct PACKED {
-                       uint32_t src2         : 12;
-                       uint32_t src2_c       : 1;   /* const */
-                       uint32_t dummy        : 3;
-               } c2;
-       };
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t repeat   : 2;
-       uint32_t sat      : 1;
-       uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
-       uint32_t ss       : 1;
-       uint32_t ul       : 1;   /* dunno */
-       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-       uint32_t ei       : 1;
-       uint32_t cond     : 3;
-       uint32_t src2_r   : 1;   /* doubles as nop1 if repeat==0 */
-       uint32_t full     : 1;   /* not half */
-       uint32_t opc      : 6;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat2_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       uint32_t src1         : 11;
-                       uint32_t must_be_zero1: 2;
-                       uint32_t src2_c       : 1;
-                       uint32_t src1_neg     : 1;
-                       uint32_t src2_r       : 1;  /* doubles as nop1 if repeat==0 */
-               };
-               struct PACKED {
-                       uint32_t src1         : 10;
-                       uint32_t src1_c       : 1;
-                       uint32_t src1_rel     : 1;
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel1;
-               struct PACKED {
-                       uint32_t src1         : 12;
-                       uint32_t src1_c       : 1;
-                       uint32_t dummy        : 3;
-               } c1;
-       };
-
-       union PACKED {
-               struct PACKED {
-                       uint32_t src3         : 11;
-                       uint32_t must_be_zero2: 2;
-                       uint32_t src3_r       : 1;
-                       uint32_t src2_neg     : 1;
-                       uint32_t src3_neg     : 1;
-               };
-               struct PACKED {
-                       uint32_t src3         : 10;
-                       uint32_t src3_c       : 1;
-                       uint32_t src3_rel     : 1;
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel2;
-               struct PACKED {
-                       uint32_t src3         : 12;
-                       uint32_t src3_c       : 1;
-                       uint32_t dummy        : 3;
-               } c2;
-       };
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t repeat   : 2;
-       uint32_t sat      : 1;
-       uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
-       uint32_t ss       : 1;
-       uint32_t ul       : 1;
-       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-       uint32_t src2     : 8;
-       uint32_t opc      : 4;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat3_t;
-
-static inline bool instr_cat3_full(instr_cat3_t *cat3)
-{
-       switch (_OPC(3, cat3->opc)) {
-       case OPC_MAD_F16:
-       case OPC_MAD_U16:
-       case OPC_MAD_S16:
-       case OPC_SEL_B16:
-       case OPC_SEL_S16:
-       case OPC_SEL_F16:
-       case OPC_SAD_S16:
-       case OPC_SAD_S32:  // really??
-               return false;
-       default:
-               return true;
-       }
-}
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       uint32_t src          : 11;
-                       uint32_t must_be_zero1: 2;
-                       uint32_t src_im       : 1;   /* immediate */
-                       uint32_t src_neg      : 1;   /* negate */
-                       uint32_t src_abs      : 1;   /* absolute value */
-               };
-               struct PACKED {
-                       uint32_t src          : 10;
-                       uint32_t src_c        : 1;   /* relative-const */
-                       uint32_t src_rel      : 1;   /* relative address */
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel;
-               struct PACKED {
-                       uint32_t src          : 12;
-                       uint32_t src_c        : 1;   /* const */
-                       uint32_t dummy        : 3;
-               } c;
-       };
-       uint32_t dummy1   : 16;  /* seem to be ignored */
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t repeat   : 2;
-       uint32_t sat      : 1;
-       uint32_t src_r    : 1;
-       uint32_t ss       : 1;
-       uint32_t ul       : 1;
-       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-       uint32_t dummy2   : 5;   /* seem to be ignored */
-       uint32_t full     : 1;   /* not half */
-       uint32_t opc      : 6;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat4_t;
-
-/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
- * if so, how to get the (base, index) pair for both sampler and texture.
- * There is a single base embedded in the instruction, which is always used
- * for the texture.
- */
-typedef enum {
-       /* Use traditional GL binding model, get texture and sampler index
-        * from src3 which is not presumed to be uniform. This is
-        * backwards-compatible with earlier generations, where this field was
-        * always 0 and nonuniform-indexed sampling always worked.
-        */
-       CAT5_NONUNIFORM = 0,
-
-       /* The sampler base comes from the low 3 bits of a1.x, and the sampler
-        * and texture index come from src3 which is presumed to be uniform.
-        */
-       CAT5_BINDLESS_A1_UNIFORM = 1,
-
-       /* The texture and sampler share the same base, and the sampler and
-        * texture index come from src3 which is *not* presumed to be uniform.
-        */
-       CAT5_BINDLESS_NONUNIFORM = 2,
-
-       /* The sampler base comes from the low 3 bits of a1.x, and the sampler
-        * and texture index come from src3 which is *not* presumed to be
-        * uniform.
-        */
-       CAT5_BINDLESS_A1_NONUNIFORM = 3,
-
-       /* Use traditional GL binding model, get texture and sampler index
-        * from src3 which is presumed to be uniform.
-        */
-       CAT5_UNIFORM = 4,
-
-       /* The texture and sampler share the same base, and the sampler and
-        * texture index come from src3 which is presumed to be uniform.
-        */
-       CAT5_BINDLESS_UNIFORM = 5,
-
-       /* The texture and sampler share the same base, get sampler index from low
-        * 4 bits of src3 and texture index from high 4 bits.
-        */
-       CAT5_BINDLESS_IMM = 6,
-
-       /* The sampler base comes from the low 3 bits of a1.x, and the texture
-        * index comes from the next 8 bits of a1.x. The sampler index is an
-        * immediate in src3.
-        */
-       CAT5_BINDLESS_A1_IMM = 7,
-} cat5_desc_mode_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               /* normal case: */
-               struct PACKED {
-                       uint32_t full     : 1;   /* not half */
-                       uint32_t src1     : 8;
-                       uint32_t src2     : 8;
-                       uint32_t dummy1   : 4;   /* seem to be ignored */
-                       uint32_t samp     : 4;
-                       uint32_t tex      : 7;
-               } norm;
-               /* s2en case: */
-               struct PACKED {
-                       uint32_t full         : 1;   /* not half */
-                       uint32_t src1         : 8;
-                       uint32_t src2         : 8;
-                       uint32_t dummy1       : 2;
-                       uint32_t base_hi      : 2;
-                       uint32_t src3         : 8;
-                       uint32_t desc_mode    : 3;
-               } s2en_bindless;
-               /* same in either case: */
-               // XXX I think, confirm this
-               struct PACKED {
-                       uint32_t full     : 1;   /* not half */
-                       uint32_t src1     : 8;
-                       uint32_t src2     : 8;
-                       uint32_t pad      : 15;
-               };
-       };
-
-       /* dword1: */
-       uint32_t dst              : 8;
-       uint32_t wrmask           : 4;   /* write-mask */
-       uint32_t type             : 3;
-       uint32_t base_lo          : 1;   /* used with bindless */
-       uint32_t is_3d            : 1;
-
-       uint32_t is_a             : 1;
-       uint32_t is_s             : 1;
-       uint32_t is_s2en_bindless : 1;
-       uint32_t is_o             : 1;
-       uint32_t is_p             : 1;
-
-       uint32_t opc              : 5;
-       uint32_t jmp_tgt          : 1;
-       uint32_t sync             : 1;
-       uint32_t opc_cat          : 3;
-} instr_cat5_t;
-
-/* dword0 encoding for src_off: [src1 + off], src2: */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t mustbe1  : 1;
-       int32_t  off      : 13;
-       uint32_t src1     : 8;
-       uint32_t src1_im  : 1;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t dword1;
-} instr_cat6a_t;
-
-/* dword0 encoding for !src_off: [src1], src2 */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t mustbe0  : 1;
-       uint32_t src1     : 13;
-       uint32_t ignore0  : 8;
-       uint32_t src1_im  : 1;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t dword1;
-} instr_cat6b_t;
-
-/* dword1 encoding for dst_off: */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t dword0;
-
-       /* note: there is some weird stuff going on where sometimes
-        * cat6->a.off is involved.. but that seems like a bug in
-        * the blob, since it is used even if !cat6->src_off
-        * It would make sense for there to be some more bits to
-        * bring us to 11 bits worth of offset, but not sure..
-        */
-       int32_t off       : 8;
-       uint32_t mustbe1  : 1;
-       uint32_t dst      : 8;
-       uint32_t pad1     : 15;
-} instr_cat6c_t;
-
-/* dword1 encoding for !dst_off: */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t dword0;
-
-       uint32_t dst      : 8;
-       uint32_t mustbe0  : 1;
-       uint32_t idx      : 8;
-       uint32_t pad0     : 15;
-} instr_cat6d_t;
-
-/* ldgb and atomics..
- *
- * ldgb:      pad0=0, pad3=1
- * atomic .g: pad0=1, pad3=1
- *        .l: pad0=1, pad3=0
- */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t pad0     : 1;
-       uint32_t src3     : 8;
-       uint32_t d        : 2;
-       uint32_t typed    : 1;
-       uint32_t type_size : 2;
-       uint32_t src1     : 8;
-       uint32_t src1_im  : 1;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t mustbe0  : 1;
-       uint32_t src_ssbo : 8;
-       uint32_t pad2     : 3;  // type
-       uint32_t g        : 1;
-       uint32_t pad3     : 1;
-       uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
-} instr_cat6ldgb_t;
-
-/* stgb, pad0=0, pad3=2
- */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t mustbe1  : 1;  // ???
-       uint32_t src1     : 8;
-       uint32_t d        : 2;
-       uint32_t typed    : 1;
-       uint32_t type_size : 2;
-       uint32_t pad0     : 9;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t src3     : 8;
-       uint32_t src3_im  : 1;
-       uint32_t dst_ssbo : 8;
-       uint32_t pad2     : 3;  // type
-       uint32_t pad3     : 2;
-       uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
-} instr_cat6stgb_t;
-
-typedef union PACKED {
-       instr_cat6a_t a;
-       instr_cat6b_t b;
-       instr_cat6c_t c;
-       instr_cat6d_t d;
-       instr_cat6ldgb_t ldgb;
-       instr_cat6stgb_t stgb;
-       struct PACKED {
-               /* dword0: */
-               uint32_t src_off  : 1;
-               uint32_t pad1     : 31;
-
-               /* dword1: */
-               uint32_t pad2     : 8;
-               uint32_t dst_off  : 1;
-               uint32_t pad3     : 8;
-               uint32_t type     : 3;
-               uint32_t g        : 1;  /* or in some cases it means dst immed */
-               uint32_t pad4     : 1;
-               uint32_t opc      : 5;
-               uint32_t jmp_tgt  : 1;
-               uint32_t sync     : 1;
-               uint32_t opc_cat  : 3;
-       };
-} instr_cat6_t;
-
-/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
- */
-typedef enum {
-       /* Use old GL binding model with an immediate index. */
-       CAT6_IMM = 0,
-
-       CAT6_UNIFORM = 1,
-
-       CAT6_NONUNIFORM = 2,
-
-       /* Use the bindless model, with an immediate index.
-        */
-       CAT6_BINDLESS_IMM = 4,
-
-       /* Use the bindless model, with a uniform register index.
-        */
-       CAT6_BINDLESS_UNIFORM = 5,
-
-       /* Use the bindless model, with a register index that isn't guaranteed
-        * to be uniform. This presumably checks if the indices are equal and
-        * splits up the load/store, because it works the way you would
-        * expect.
-        */
-       CAT6_BINDLESS_NONUNIFORM = 6,
-} cat6_desc_mode_t;
-
-/**
- * For atomic ops (which return a value):
- *
- *    pad1=1, pad3=c, pad5=3
- *    src1    - vecN offset/coords
- *    src2.x  - is actually dest register
- *    src2.y  - is 'data' except for cmpxchg where src2.y is 'compare'
- *              and src2.z is 'data'
- *
- * For stib (which does not return a value):
- *    pad1=0, pad3=c, pad5=2
- *    src1    - vecN offset/coords
- *    src2    - value to store
- *
- * For ldib:
- *    pad1=1, pad3=c, pad5=2
- *    src1    - vecN offset/coords
- *
- * for ldc (load from UBO using descriptor):
- *    pad1=0, pad3=8, pad5=2
- *
- * pad2 and pad5 are only observed to be 0.
- */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t pad1     : 1;
-       uint32_t base     : 3;
-       uint32_t pad2     : 2;
-       uint32_t desc_mode : 3;
-       uint32_t d        : 2;
-       uint32_t typed    : 1;
-       uint32_t type_size : 2;
-       uint32_t opc      : 5;
-       uint32_t pad3     : 5;
-       uint32_t src1     : 8;  /* coordinate/offset */
-
-       /* dword1: */
-       uint32_t src2     : 8;  /* or the dst for load instructions */
-       uint32_t pad4     : 1;  //mustbe0 ??
-       uint32_t ssbo     : 8;  /* ssbo/image binding point */
-       uint32_t type     : 3;
-       uint32_t pad5     : 7;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat6_a6xx_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t pad1     : 32;
-
-       /* dword1: */
-       uint32_t pad2     : 12;
-       uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
-       uint32_t pad3     : 6;
-       uint32_t w        : 1;  /* write */
-       uint32_t r        : 1;  /* read */
-       uint32_t l        : 1;  /* local */
-       uint32_t g        : 1;  /* global */
-       uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
-       uint32_t jmp_tgt  : 1;  /* (jp) */
-       uint32_t sync     : 1;  /* (sy) */
-       uint32_t opc_cat  : 3;
-} instr_cat7_t;
-
-typedef union PACKED {
-       instr_cat0_t cat0;
-       instr_cat1_t cat1;
-       instr_cat2_t cat2;
-       instr_cat3_t cat3;
-       instr_cat4_t cat4;
-       instr_cat5_t cat5;
-       instr_cat6_t cat6;
-       instr_cat6_a6xx_t cat6_a6xx;
-       instr_cat7_t cat7;
-       struct PACKED {
-               /* dword0: */
-               uint32_t pad1     : 32;
-
-               /* dword1: */
-               uint32_t pad2     : 12;
-               uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
-               uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
-               uint32_t pad3     : 13;
-               uint32_t jmp_tgt  : 1;
-               uint32_t sync     : 1;
-               uint32_t opc_cat  : 3;
-
-       };
-} instr_t;
-
-static inline uint32_t instr_repeat(instr_t *instr)
-{
-       switch (instr->opc_cat) {
-       case 0:  return instr->cat0.repeat;
-       case 1:  return instr->cat1.repeat;
-       case 2:  return instr->cat2.repeat;
-       case 3:  return instr->cat3.repeat;
-       case 4:  return instr->cat4.repeat;
-       default: return 0;
-       }
-}
-
-static inline bool instr_sat(instr_t *instr)
-{
-       switch (instr->opc_cat) {
-       case 2:  return instr->cat2.sat;
-       case 3:  return instr->cat3.sat;
-       case 4:  return instr->cat4.sat;
-       default: return false;
-       }
-}
-
-/* We can probably drop the gpu_id arg, but keeping it for now so we can
- * assert if we see something we think should be new encoding on an older
- * gpu.
- */
-static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
-{
-       instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
-
-       /* At least one of these two bits is pad in all the possible
-        * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
-        * cmdstream traces I have indicates that the pad bit is zero
-        * in all cases.  So we can use this to detect new encoding:
-        */
-       if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
-               ir3_assert(gpu_id >= 600);
-               ir3_assert(instr->cat6.opc == 0);
-               return false;
-       }
-
-       return true;
-}
-
-static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
-{
-       switch (instr->opc_cat) {
-       case 0:  return instr->cat0.opc | instr->cat0.opc_hi << 4;
-       case 1:  return 0;
-       case 2:  return instr->cat2.opc;
-       case 3:  return instr->cat3.opc;
-       case 4:  return instr->cat4.opc;
-       case 5:  return instr->cat5.opc;
-       case 6:
-               if (!is_cat6_legacy(instr, gpu_id))
-                       return instr->cat6_a6xx.opc;
-               return instr->cat6.opc;
-       case 7:  return instr->cat7.opc;
-       default: return 0;
-       }
-}
-
-static inline bool is_mad(opc_t opc)
-{
-       switch (opc) {
-       case OPC_MAD_U16:
-       case OPC_MAD_S16:
-       case OPC_MAD_U24:
-       case OPC_MAD_S24:
-       case OPC_MAD_F16:
-       case OPC_MAD_F32:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_madsh(opc_t opc)
-{
-       switch (opc) {
-       case OPC_MADSH_U16:
-       case OPC_MADSH_M16:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_atomic(opc_t opc)
-{
-       switch (opc) {
-       case OPC_ATOMIC_ADD:
-       case OPC_ATOMIC_SUB:
-       case OPC_ATOMIC_XCHG:
-       case OPC_ATOMIC_INC:
-       case OPC_ATOMIC_DEC:
-       case OPC_ATOMIC_CMPXCHG:
-       case OPC_ATOMIC_MIN:
-       case OPC_ATOMIC_MAX:
-       case OPC_ATOMIC_AND:
-       case OPC_ATOMIC_OR:
-       case OPC_ATOMIC_XOR:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_ssbo(opc_t opc)
-{
-       switch (opc) {
-       case OPC_RESFMT:
-       case OPC_RESINFO:
-       case OPC_LDGB:
-       case OPC_STGB:
-       case OPC_STIB:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_isam(opc_t opc)
-{
-       switch (opc) {
-       case OPC_ISAM:
-       case OPC_ISAML:
-       case OPC_ISAMM:
-               return true;
-       default:
-               return false;
-       }
-}
-
-
-static inline bool is_cat2_float(opc_t opc)
-{
-       switch (opc) {
-       case OPC_ADD_F:
-       case OPC_MIN_F:
-       case OPC_MAX_F:
-       case OPC_MUL_F:
-       case OPC_SIGN_F:
-       case OPC_CMPS_F:
-       case OPC_ABSNEG_F:
-       case OPC_CMPV_F:
-       case OPC_FLOOR_F:
-       case OPC_CEIL_F:
-       case OPC_RNDNE_F:
-       case OPC_RNDAZ_F:
-       case OPC_TRUNC_F:
-               return true;
-
-       default:
-               return false;
-       }
-}
-
-static inline bool is_cat3_float(opc_t opc)
-{
-       switch (opc) {
-       case OPC_MAD_F16:
-       case OPC_MAD_F32:
-       case OPC_SEL_F16:
-       case OPC_SEL_F32:
-               return true;
-       default:
-               return false;
-       }
-}
-
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
-
-#endif /* INSTR_A3XX_H_ */
 
     'cffdec.c',
     'cffdec.h',
     'disasm-a2xx.c',
-    'disasm-a3xx.c',
-    'disasm.h',
     'instr-a2xx.h',
-    'instr-a3xx.h',
     'pager.c',
     'pager.h',
     'rnnutil.c',
     'util.h',
   ],
   include_directories: [
+    inc_freedreno,
     inc_freedreno_rnn,
     inc_include,
     inc_src,
   c_args : [ no_override_init_args ],
   gnu_symbol_visibility: 'hidden',
   dependencies: [],
-  link_with: libfreedreno_rnn,
+  link_with: [
+    libfreedreno_rnn,
+    libfreedreno_ir3,  # for disasm_a3xx
+  ],
   build_by_default: false,
 )
 
       'script.h'
     ],
     include_directories: [
+      inc_freedreno,
       inc_freedreno_rnn,
       inc_include,
       inc_src,
   'crashdec',
   'crashdec.c',
   include_directories: [
+    inc_freedreno,
     inc_freedreno_rnn,
     inc_include,
     inc_src,
     'pgmdump',
     'pgmdump.c',
     include_directories: [
+      inc_freedreno,
       inc_include,
       inc_src,
     ],
     link_with: [
       libfreedreno_cffdec,
       libfreedreno_io,
+      libfreedreno_ir3,  # for disasm_a3xx
     ],
     build_by_default: with_tools.contains('freedreno'),
     install: false,
     'pgmdump2',
     'pgmdump2.c',
     include_directories: [
+      inc_freedreno,
       inc_include,
       inc_src,
     ],
     link_with: [
       libfreedreno_cffdec,
       libfreedreno_io,
+      libfreedreno_ir3,  # for disasm_a3xx
     ],
     build_by_default: with_tools.contains('freedreno'),
     install: false,
 
 int main(int argc, char **argv)
 {
        enum rd_sect_type type = RD_NONE;
-       enum debug_t debug = 0;
+       enum debug_t debug = PRINT_RAW | PRINT_STATS;
        void *buf = NULL;
        int sz;
        struct io *io;
                return -1;
        }
 
-       disasm_set_debug(debug);
+       disasm_a2xx_set_debug(debug);
+       disasm_a3xx_set_debug(debug);
 
        infile = argv[1];
 
 
 int main(int argc, char **argv)
 {
        enum rd_sect_type type = RD_NONE;
-       enum debug_t debug = 0;
+       enum debug_t debug = PRINT_RAW | PRINT_STATS;
        void *buf = NULL;
        int sz;
        struct io *io;
                return -1;
        }
 
-       disasm_set_debug(debug);
+       disasm_a3xx_set_debug(debug);
 
        infile = argv[1];
 
 
 
 #include <util/u_debug.h>
 
+#include "disasm.h"
 #include "instr-a3xx.h"
-
-/* bitmask of debug flags */
-enum debug_t {
-       PRINT_RAW      = 0x1,    /* dump raw hexdump */
-       PRINT_VERBOSE  = 0x2,
-       EXPAND_REPEAT  = 0x4,
-};
+#include "regmask.h"
 
 static enum debug_t debug;
 
        int level;
        unsigned gpu_id;
 
+       struct shader_stats *stats;
+
+       /* we have to process the dst register after src to avoid tripping up
+        * the read-before-write detection
+        */
+       unsigned last_dst;
+       bool last_dst_full;
+       bool last_dst_valid;
+
        /* current instruction repeat flag: */
        unsigned repeat;
        /* current instruction repeat indx/offset (for --expand): */
        unsigned repeatidx;
 
-       unsigned instructions;
+       /* tracking for register usage */
+       struct {
+               regmask_t used;
+               regmask_t used_merged;
+               regmask_t rbw;      /* read before write */
+               regmask_t war;      /* write after read */
+               unsigned max_const;
+       } regs;
 };
 
 static const char *float_imms[] = {
        }
 }
 
+static void regmask_set(regmask_t *regmask, unsigned num, bool full)
+{
+       ir3_assert(num < MAX_REG);
+       __regmask_set(regmask, !full, num);
+}
+
+static void regmask_clear(regmask_t *regmask, unsigned num, bool full)
+{
+       ir3_assert(num < MAX_REG);
+       __regmask_clear(regmask, !full, num);
+}
+
+static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
+{
+       ir3_assert(num < MAX_REG);
+       return __regmask_get(regmask, !full, num);
+}
+
 static unsigned regidx(reg_t reg)
 {
        return (4 * reg.num) + reg.comp;
        };
 }
 
+static void print_sequence(struct disasm_ctx *ctx, int first, int last)
+{
+       if (first != MAX_REG) {
+               if (first == last) {
+                       fprintf(ctx->out, " %d", first);
+               } else {
+                       fprintf(ctx->out, " %d-%d", first, last);
+               }
+       }
+}
+
+static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full)
+{
+       int num, max = 0, cnt = 0;
+       int first, last;
+
+       first = last = MAX_REG;
+
+       for (num = 0; num < MAX_REG; num++) {
+               if (regmask_get(regmask, num, full)) {
+                       if (num != (last + 1)) {
+                               print_sequence(ctx, first, last);
+                               first = num;
+                       }
+                       last = num;
+                       if (num < (48*4))
+                               max = num;
+                       cnt++;
+               }
+       }
+
+       print_sequence(ctx, first, last);
+
+       fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max);
+
+       return max;
+}
+
+static void print_reg_stats(struct disasm_ctx *ctx)
+{
+       int fullreg, halfreg;
+
+       fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]);
+       fprintf(ctx->out, "%s- used (half):", levels[ctx->level]);
+       halfreg = print_regs(ctx, &ctx->regs.used, false);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
+       fullreg = print_regs(ctx, &ctx->regs.used, true);
+       fprintf(ctx->out, "\n");
+       if (ctx->gpu_id >= 600) {
+               fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
+               print_regs(ctx, &ctx->regs.used_merged, false);
+               fprintf(ctx->out, "\n");
+       }
+       fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.rbw, false);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- input (full):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.rbw, true);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- max const: %u\n", levels[ctx->level], ctx->regs.max_const);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- output (half):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.war, false);
+       fprintf(ctx->out, "  (estimated)\n");
+       fprintf(ctx->out, "%s- output (full):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.war, true);
+       fprintf(ctx->out, "  (estimated)\n");
+
+       /* convert to vec4, which is the granularity that registers are
+        * assigned to shader:
+        */
+       fullreg = (fullreg + 3) / 4;
+       halfreg = (halfreg + 3) / 4;
+
+       // Note this count of instructions includes rptN, which matches
+       // up to how mesa prints this:
+       fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, "
+                       "(%d instlen), %d half, %d full\n",
+                       levels[ctx->level], ctx->stats->instructions, ctx->stats->nops,
+                       ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen,
+                       halfreg, fullreg);
+       fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level],
+                       ctx->stats->ss, ctx->stats->sy);
+}
+
+static void process_reg_dst(struct disasm_ctx *ctx)
+{
+       if (!ctx->last_dst_valid)
+               return;
+
+       /* ignore dummy writes (ie. r63.x): */
+       if (!VALIDREG(ctx->last_dst))
+               return;
+
+       for (unsigned i = 0; i <= ctx->repeat; i++) {
+               unsigned dst = ctx->last_dst + i;
+
+               regmask_set(&ctx->regs.war, dst, ctx->last_dst_full);
+               regmask_set(&ctx->regs.used, dst, ctx->last_dst_full);
+
+               if (ctx->gpu_id >= 600) {
+                       if (ctx->last_dst_full) {
+                               regmask_set(&ctx->regs.used_merged, (dst*2)+0, false);
+                               regmask_set(&ctx->regs.used_merged, (dst*2)+1, false);
+                       } else {
+                               regmask_set(&ctx->regs.used_merged, dst, false);
+                       }
+               }
+       }
+
+       ctx->last_dst_valid = false;
+}
 static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
 {
+       /* presumably the special registers a0.c and p0.c don't count.. */
+       if (!(addr_rel || (reg.num == REG_A0) || (reg.num == REG_P0))) {
+               ctx->last_dst = regidx(reg);
+               ctx->last_dst_full = full;
+               ctx->last_dst_valid = true;
+       }
        reg = idxreg(regidx(reg) + ctx->repeatidx);
        print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel);
 }
 {
        reg_t reg = info->reg;
 
+       /* presumably the special registers a0.c and p0.c don't count.. */
+       if (!(info->addr_rel || info->c || info->im ||
+                       (reg.num == REG_A0) || (reg.num == REG_P0))) {
+               int i, num = regidx(reg);
+               for (i = 0; i <= ctx->repeat; i++) {
+                       unsigned src = num + i;
+
+                       if (!regmask_get(&ctx->regs.used, src, info->full))
+                               regmask_set(&ctx->regs.rbw, src, info->full);
+
+                       regmask_clear(&ctx->regs.war, src, info->full);
+                       regmask_set(&ctx->regs.used, src, info->full);
+
+                       if (info->full) {
+                               regmask_set(&ctx->regs.used_merged, (src*2)+0, false);
+                               regmask_set(&ctx->regs.used_merged, (src*2)+1, false);
+                       } else {
+                               regmask_set(&ctx->regs.used_merged, src, false);
+                       }
+
+                       if (!info->r)
+                               break;
+               }
+       } else if (info->c) {
+               int i, num = regidx(reg);
+               for (i = 0; i <= ctx->repeat; i++) {
+                       unsigned src = num + i;
+
+                       ctx->regs.max_const = MAX2(ctx->regs.max_const, src);
+
+                       if (!info->r)
+                               break;
+               }
+
+               unsigned max = (num + ctx->repeat + 1 + 3) / 4;
+               if (max > ctx->stats->constlen)
+                       ctx->stats->constlen = max;
+       }
+
        if (info->r)
                reg = idxreg(regidx(info->reg) + ctx->repeatidx);
 
 
 #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
 
-// XXX hack.. probably should move this table somewhere common:
-#include "ir3.h"
-const char *ir3_instr_name(struct ir3_instruction *instr)
+const char *disasm_a3xx_instr_name(opc_t opc)
 {
-       if (opc_cat(instr->opc) == -1) return "??meta??";
-       return opcs[instr->opc].name;
+       if (opc_cat(opc) == -1) return "??meta??";
+       return opcs[opc].name;
 }
 
 static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr)
        instr_t *instr = (instr_t *)dwords;
        uint32_t opc = instr_opc(instr, ctx->gpu_id);
        unsigned nop = 0;
-       unsigned cycles = ctx->instructions;
+       unsigned cycles = ctx->stats->instructions;
 
-       if (debug & PRINT_VERBOSE) {
-               fprintf(ctx->out, "%s%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
-                               n, cycles++, dwords[1], dwords[0]);
+       if (debug & PRINT_RAW) {
+               fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
+                               instr->opc_cat, n, cycles++, dwords[1], dwords[0]);
        }
 
        /* NOTE: order flags are printed is a bit fugly.. but for now I
         */
 
        ctx->repeat = instr_repeat(instr);
-       ctx->instructions += 1 + ctx->repeat;
+       ctx->stats->instructions += 1 + ctx->repeat;
+       ctx->stats->instlen++;
 
        if (instr->sync) {
                fprintf(ctx->out, "(sy)");
+               ctx->stats->sy++;
        }
        if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) {
                fprintf(ctx->out, "(ss)");
+               ctx->stats->ss++;
        }
        if (instr->jmp_tgt)
                fprintf(ctx->out, "(jp)");
                nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
        else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r))
                nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
-       ctx->instructions += nop;
+       ctx->stats->instructions += nop;
+       ctx->stats->nops += nop;
+       if (opc == OPC_NOP)
+               ctx->stats->nops += 1 + ctx->repeat;
        if (nop)
                fprintf(ctx->out, "(nop%d) ", nop);
 
        print_single_instr(ctx, instr);
        fprintf(ctx->out, "\n");
 
+       process_reg_dst(ctx);
+
        if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) {
                int i;
                for (i = 0; i < nop; i++) {
                        if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, "%s%04d:%04d[                   ] ",
-                                               levels[ctx->level], n, cycles++);
+                               fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
+                                               levels[ctx->level], instr->opc_cat, n, cycles++);
                        }
                        fprintf(ctx->out, "nop\n");
                }
                for (i = 0; i < ctx->repeat; i++) {
                        ctx->repeatidx = i + 1;
                        if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, "%s%04d:%04d[                   ] ",
-                                               levels[ctx->level], n, cycles++);
+                               fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
+                                               levels[ctx->level], instr->opc_cat, n, cycles++);
                        }
                        print_single_instr(ctx, instr);
                        fprintf(ctx->out, "\n");
                ctx->repeatidx = 0;
        }
 
-       return (instr->opc_cat == 0) && (opc == OPC_END);
+       return (instr->opc_cat == 0) &&
+               ((opc == OPC_END) || (opc == OPC_CHSH));
 }
 
 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
+{
+       struct shader_stats stats;
+       return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
+}
+
+int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
+               unsigned gpu_id, struct shader_stats *stats)
 {
        struct disasm_ctx ctx;
        int i;
        int nop_count = 0;
+       bool has_end = false;
 
-       assert((sizedwords % 2) == 0);
+       ir3_assert((sizedwords % 2) == 0);
 
        memset(&ctx, 0, sizeof(ctx));
        ctx.out = out;
        ctx.level = level;
        ctx.gpu_id = gpu_id;
+       ctx.stats = stats;
+       memset(ctx.stats, 0, sizeof(*ctx.stats));
 
        for (i = 0; i < sizedwords; i += 2) {
-               print_instr(&ctx, &dwords[i], i/2);
+               has_end |= print_instr(&ctx, &dwords[i], i/2);
+               if (!has_end)
+                       continue;
                if (dwords[i] == 0 && dwords[i + 1] == 0)
                        nop_count++;
                else
                        break;
        }
 
+       if (debug & PRINT_STATS)
+               print_reg_stats(&ctx);
+
        return 0;
 }
+
+void disasm_a3xx_set_debug(enum debug_t d)
+{
+       debug = d;
+}
 
 #include <stdbool.h>
 #include <assert.h>
 
+void ir3_assert_handler(const char *expr, const char *file, int line,
+               const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
+
+/* A wrapper for assert() that allows overriding handling of a failed
+ * assert.  This is needed for tools like crashdec which can want to
+ * attempt to disassemble memory that might not actually be valid
+ * instructions.
+ */
+#define ir3_assert(expr) do { \
+               if (!(expr)) { \
+                       if (ir3_assert_handler) { \
+                               ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
+                       } \
+                       assert(expr); \
+               } \
+       } while (0)
 /* size of largest OPC field of all the instruction categories: */
 #define NOPC_BITS 6
 
 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
 
+const char *disasm_a3xx_instr_name(opc_t opc);
+
 typedef enum {
        TYPE_F16 = 0,
        TYPE_F32 = 1,
        case TYPE_S8:
                return 8;
        default:
-               assert(0); /* invalid type */
+               ir3_assert(0); /* invalid type */
                return 0;
        }
 }
        int32_t  idummy8   : 8;
 } reg_t;
 
+/* comp:
+ *   0 - x
+ *   1 - y
+ *   2 - z
+ *   3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+       return (num << 2) | (comp & 0x3);
+}
+
+#define INVALID_REG      regid(63, 0)
+#define VALIDREG(r)      ((r) != INVALID_REG)
+#define CONDREG(r, val)  COND(VALIDREG(r), (val))
+
 /* special registers: */
 #define REG_A0 61       /* address register */
 #define REG_P0 62       /* predicate register */
         * in all cases.  So we can use this to detect new encoding:
         */
        if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
-               assert(gpu_id >= 600);
-               assert(instr->cat6.opc == 0);
+               ir3_assert(gpu_id >= 600);
+               ir3_assert(instr->cat6.opc == 0);
                return false;
        }
 
        }
 }
 
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
-
 #endif /* INSTR_A3XX_H_ */
 
                set_foreach ((__instr)->uses, __entry) \
                        if ((__use = (void *)__entry->key))
 
-#define MAX_ARRAYS 16
-
-/* comp:
- *   0 - x
- *   1 - y
- *   2 - z
- *   3 - w
- */
-static inline uint32_t regid(int num, int comp)
-{
-       return (num << 2) | (comp & 0x3);
-}
-
 static inline uint32_t reg_num(struct ir3_register *reg)
 {
        return reg->num >> 2;
        return reg->num & 0x3;
 }
 
-#define INVALID_REG      regid(63, 0)
-#define VALIDREG(r)      ((r) != INVALID_REG)
-#define CONDREG(r, val)  COND(VALIDREG(r), (val))
-
 static inline bool is_flow(struct ir3_instruction *instr)
 {
        return (opc_cat(instr->opc) == 0);
 
                printf(".%s%s", type_name(instr->cat1.src_type),
                                type_name(instr->cat1.dst_type));
        } else {
-               printf("%s", ir3_instr_name(instr));
+               printf("%s", disasm_a3xx_instr_name(instr->opc));
                if (instr->flags & IR3_INSTR_3D)
                        printf(".3d");
                if (instr->flags & IR3_INSTR_A)
 
 #include "ir3_compiler.h"
 #include "ir3_nir.h"
 
+#include "disasm.h"
+
 int
 ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
 {
 
        }
 }
 
+static inline void
+__regmask_clear(regmask_t *regmask, bool half, unsigned n)
+{
+       if (regmask->mergedregs) {
+               /* a6xx+ case, with merged register file, we track things in terms
+                * of half-precision registers, with a full precisions register
+                * using two half-precision slots:
+                */
+               if (half) {
+                       BITSET_CLEAR(regmask->mask, n);
+               } else {
+                       n *= 2;
+                       BITSET_CLEAR(regmask->mask, n);
+                       BITSET_CLEAR(regmask->mask, n+1);
+               }
+       } else {
+               /* pre a6xx case, with separate register file for half and full
+                * precision:
+                */
+               if (half)
+                       n += MAX_REG;
+               BITSET_CLEAR(regmask->mask, n);
+       }
+}
+
 static inline void
 regmask_init(regmask_t *regmask, bool mergedregs)
 {
 
 #include <stdlib.h>
 #include <string.h>
 #include "util/macros.h"
-#include "instr-a3xx.h"
+#include "disasm.h"
 
 #define INSTR_5XX(i, d) { .gpu_id = 540, .instr = #i, .expected = d }
 #define INSTR_6XX(i, d) { .gpu_id = 630, .instr = #i, .expected = d }
 
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-inc_freedreno = include_directories(['.', './registers'])
+inc_freedreno = include_directories(['.', './registers', './common'])
 inc_freedreno_rnn = include_directories('rnn')
 
 subdir('common')
 
                }
                break;
        default:
-               unreachable("not reached");
+               assert(!"not reached");
        }
        /* if we had a symbol table here, we could look
         * up the name of the varying..
        return 0;
 }
 
-void disasm_set_debug(enum debug_t d)
+void disasm_a2xx_set_debug(enum debug_t d)
 {
        debug = d;
 }
 
+++ /dev/null
-/*
- * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef DISASM_H_
-#define DISASM_H_
-
-#include <stdio.h>
-#include <stdbool.h>
-
-#include "compiler/shader_enums.h"
-#include "util/u_debug.h"
-
-/* bitmask of debug flags */
-enum debug_t {
-       PRINT_RAW      = 0x1,    /* dump raw hexdump */
-       PRINT_VERBOSE  = 0x2,
-};
-
-int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type);
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
-void disasm_set_debug(enum debug_t debug);
-
-#endif /* DISASM_H_ */
 
 # SOFTWARE.
 
 files_libfreedreno = files(
-  'disasm.h',
   'freedreno_batch.c',
   'freedreno_batch.h',
   'freedreno_batch_cache.c',