- uint32_t flags = blob_read_uint32(ctx->blob);
- alu->exact = flags & 1;
- alu->no_signed_wrap = flags & 2;
- alu->no_unsigned_wrap = flags & 4;
- alu->dest.saturate = flags & 8;
- alu->dest.write_mask = flags >> 4;
-
- read_dest(ctx, &alu->dest.dest, &alu->instr);
-
- for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
- read_src(ctx, &alu->src[i].src, &alu->instr);
- flags = blob_read_uint32(ctx->blob);
- alu->src[i].negate = flags & 1;
- alu->src[i].abs = flags & 2;
- for (unsigned j = 0; j < 4; j++)
- alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
+ alu->exact = header.alu.exact;
+ alu->no_signed_wrap = header.alu.no_signed_wrap;
+ alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
+ alu->dest.saturate = header.alu.saturate;
+
+ read_dest(ctx, &alu->dest.dest, &alu->instr, header);
+
+ unsigned dst_components = nir_dest_num_components(alu->dest.dest);
+
+ if (alu->dest.dest.is_ssa) {
+ alu->dest.write_mask = u_bit_consecutive(0, dst_components);
+ } else if (dst_components <= 4) {
+ alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
+ } else {
+ alu->dest.write_mask = blob_read_uint32(ctx->blob);
+ }
+
+ if (header.alu.packed_src_ssa_16bit) {
+ for (unsigned i = 0; i < num_srcs; i++) {
+ nir_alu_src *src = &alu->src[i];
+ src->src.is_ssa = true;
+ src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
+
+ memset(&src->swizzle, 0, sizeof(src->swizzle));
+
+ unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
+
+ for (unsigned chan = 0; chan < src_components; chan++)
+ src->swizzle[chan] = chan;
+ }
+ } else {
+ for (unsigned i = 0; i < num_srcs; i++) {
+ union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
+ unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
+ unsigned src_components = nir_src_num_components(alu->src[i].src);
+ bool packed = src_components <= 4 && src_channels <= 4;
+
+ alu->src[i].negate = src.alu.negate;
+ alu->src[i].abs = src.alu.abs;
+
+ memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
+
+ if (packed) {
+ alu->src[i].swizzle[0] = src.alu.swizzle_x;
+ alu->src[i].swizzle[1] = src.alu.swizzle_y;
+ alu->src[i].swizzle[2] = src.alu.swizzle_z;
+ alu->src[i].swizzle[3] = src.alu.swizzle_w;
+ } else {
+ /* Load swizzles for vec8 and vec16. */
+ for (unsigned o = 0; o < src_channels; o += 8) {
+ unsigned value = blob_read_uint32(ctx->blob);
+
+ for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
+ alu->src[i].swizzle[o + j] =
+ (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
+ }
+ }
+ }
+ }
+ }
+
+ if (header.alu.packed_src_ssa_16bit &&
+ alu->dest.dest.is_ssa) {
+ alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
+ if (num_srcs > 1)
+ alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;