aco: reserve 2 sgprs for each branch

[mesa.git] / src / amd / compiler / aco_instruction_selection.cpp
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index a3b47d6e98a16b143c9d72d56966dfcc087f5fcc..8ce4afc829cd72c9cc1c47be959df16bafff7571 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -38,6 +38,23 @@
  namespace aco {
  namespace {
  
+#define isel_err(...) _isel_err(ctx, __FILE__, __LINE__, __VA_ARGS__)
+
+static void _isel_err(isel_context *ctx, const char *file, unsigned line,
+                      const nir_instr *instr, const char *msg)
+{
+   char *out;
+   size_t outsize;
+   FILE *memf = open_memstream(&out, &outsize);
+
+   fprintf(memf, "%s: ", msg);
+   nir_print_instr(instr, memf);
+   fclose(memf);
+
+   _aco_err(ctx->program, file, line, out);
+   free(out);
+}
+
  class loop_info_RAII {
     isel_context* ctx;
     unsigned header_idx_old;
@@ -951,9 +968,7 @@ void emit_bcsel(isel_context *ctx, nir_alu_instr *instr, Temp dst)
  
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        return;
     }
@@ -971,9 +986,7 @@ void emit_bcsel(isel_context *ctx, nir_alu_instr *instr, Temp dst)
           aco_opcode op = dst.regClass() == s1 ? aco_opcode::s_cselect_b32 : aco_opcode::s_cselect_b64;
           bld.sop2(op, Definition(dst), then, els, bld.scc(bool_to_scalar_condition(ctx, cond)));
        } else {
-         fprintf(stderr, "Unimplemented uniform bcsel bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented uniform bcsel bit size");
        }
        return;
     }
@@ -1128,9 +1141,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
  void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
  {
     if (!instr->dest.dest.is_ssa) {
-      fprintf(stderr, "nir alu dst not in ssa: ");
-      nir_print_instr(&instr->instr, stderr);
-      fprintf(stderr, "\n");
+      isel_err(&instr->instr, "nir alu dst not in ssa");
        abort();
     }
     Builder bld(ctx->program, ctx->block);
@@ -1224,9 +1235,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           aco_opcode opcode = dst.size() == 1 ? aco_opcode::s_not_b32 : aco_opcode::s_not_b64;
           bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1253,9 +1262,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
              bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
           }
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1266,9 +1273,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           Temp src = get_alu_src(ctx, instr->src[0]);
           bld.vop2(aco_opcode::v_max_i32, Definition(dst), src, bld.vsub32(bld.def(v1), Operand(0u), src));
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1296,9 +1301,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), neg, gtz);
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1308,9 +1311,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_max_i32, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1320,9 +1321,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_max_u32, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1332,9 +1331,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_min_i32, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1344,9 +1341,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_min_u32, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1362,9 +1357,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s2) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_or_b64, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1380,9 +1373,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s2) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_and_b64, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1398,9 +1389,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s2) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_xor_b64, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1418,9 +1407,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b32, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1438,9 +1425,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s2) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b64, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1458,9 +1443,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s2) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i64, dst, true);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1473,9 +1456,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (src.regClass() == s2) {
           bld.sop1(aco_opcode::s_ff1_i32_b64, Definition(dst), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1502,9 +1483,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           Temp carry = bld.vsub32(Definition(msb), Operand(31u), Operand(msb_rev), true).def(1).getTemp();
           bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), msb, Operand((uint32_t)-1), carry);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1514,9 +1493,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v1) {
           bld.vop1(aco_opcode::v_bfrev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1552,9 +1529,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           Temp dst1 = bld.vadd32(bld.def(v1), src01, src11, false, carry);
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1583,9 +1558,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
              bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), tmp, Operand((uint32_t) -1), carry);
           }
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1619,9 +1592,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           carry = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), carry);
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand(0u));
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1655,9 +1626,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           Temp upper = bld.vsub32(bld.def(v1), src01, src11, false, borrow);
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1690,9 +1659,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           borrow = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), borrow);
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand(0u));
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1703,9 +1670,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1719,9 +1684,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
                               as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
           bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1735,9 +1698,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
                               as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
           bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1751,9 +1712,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v2) {
           bld.vop3(aco_opcode::v_mul_f64, Definition(dst), src0, src1);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1767,9 +1726,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v2) {
           bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, src1);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1792,9 +1749,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add);
           sub->neg[1] = true;
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1814,9 +1769,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
              bld.vop3(aco_opcode::v_max_f64, Definition(dst), src0, src1);
           }
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1836,105 +1789,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
              bld.vop3(aco_opcode::v_min_f64, Definition(dst), src0, src1);
           }
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_fmax3: {
-      if (dst.regClass() == v2b) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f16, dst, false);
-      } else if (dst.regClass() == v1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_fmin3: {
-      if (dst.regClass() == v2b) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f16, dst, false);
-      } else if (dst.regClass() == v1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_fmed3: {
-      if (dst.regClass() == v2b) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f16, dst, false);
-      } else if (dst.regClass() == v1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_umax3: {
-      if (dst.size() == 1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_u32, dst);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_umin3: {
-      if (dst.size() == 1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_u32, dst);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_umed3: {
-      if (dst.size() == 1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_u32, dst);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_imax3: {
-      if (dst.size() == 1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_i32, dst);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_imin3: {
-      if (dst.size() == 1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_i32, dst);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
-   case nir_op_imed3: {
-      if (dst.size() == 1) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_i32, dst);
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1947,8 +1802,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        ma = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), ma);
        Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
        Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
-      sc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), sc, ma, Operand(0x3f000000u/*0.5*/));
-      tc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), tc, ma, Operand(0x3f000000u/*0.5*/));
+      sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
+                    bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma), Operand(0x3f000000u/*0.5*/));
+      tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
+                    bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma), Operand(0x3f000000u/*0.5*/));
        bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc);
        break;
     }
@@ -1974,9 +1831,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           /* Lowered at NIR level for precision reasons. */
           emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f64, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -1998,9 +1853,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), upper);
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2022,9 +1875,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7FFFFFFFu), upper);
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2041,9 +1892,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(add);
           vop3->clamp = true;
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2054,9 +1903,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v1) {
           emit_log2(ctx, bld, Definition(dst), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2070,9 +1917,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           /* Lowered at NIR level for precision reasons. */
           emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f64, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2082,9 +1927,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v1) {
           emit_vop1_instruction(ctx, instr, aco_opcode::v_exp_f32, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2098,9 +1941,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           /* Lowered at NIR level for precision reasons. */
           emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f64, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2112,9 +1953,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v2) {
           emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f64, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2127,9 +1966,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v2) {
           emit_floor_f64(ctx, bld, Definition(dst), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2157,9 +1994,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
              bld.vop3(aco_opcode::v_add_f64, Definition(dst), trunc, add);
           }
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2172,9 +2007,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v2) {
           emit_trunc_f64(ctx, bld, Definition(dst), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2212,9 +2045,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
              bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
           }
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2238,9 +2069,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           aco_opcode opcode = instr->op == nir_op_fsin ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
           bld.vop1(opcode, Definition(dst), tmp);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2254,9 +2083,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v2) {
           bld.vop3(aco_opcode::v_ldexp_f64, Definition(dst), as_vgpr(ctx, src0), src1);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2269,9 +2096,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v2) {
           bld.vop1(aco_opcode::v_frexp_mant_f64, Definition(dst), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2286,9 +2111,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (instr->src[0].src.ssa->bit_size == 64) {
           bld.vop1(aco_opcode::v_frexp_exp_i32_f64, Definition(dst), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2317,9 +2140,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
  
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(0u), upper);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2350,9 +2171,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (instr->src[0].src.ssa->bit_size == 64) {
           emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f64, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2398,9 +2217,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
  
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2442,9 +2259,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u));
           bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2483,9 +2298,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (instr->src[0].src.ssa->bit_size == 64) {
           emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f64, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2504,9 +2317,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (instr->src[0].src.ssa->bit_size == 64) {
           emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f64, dst);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2583,9 +2394,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
  
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2655,9 +2464,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
  
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2829,16 +2636,20 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           Temp src0 = bld.tmp(v1);
           Temp src1 = bld.tmp(v1);
           bld.pseudo(aco_opcode::p_split_vector, Definition(src0), Definition(src1), src);
-         if (!ctx->block->fp_mode.care_about_round32 || ctx->block->fp_mode.round32 == fp_round_tz)
+         if (0 && (!ctx->block->fp_mode.care_about_round32 || ctx->block->fp_mode.round32 == fp_round_tz)) {
              bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1);
-         else
-            bld.vop3(aco_opcode::v_cvt_pk_u16_u32, Definition(dst),
-                     bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src0),
-                     bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src1));
+         } else {
+            src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src0);
+            src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src1);
+            if (ctx->program->chip_class >= GFX10) {
+               /* the high bits of v_cvt_f16_f32 isn't zero'd on GFX10 */
+               bld.vop3(aco_opcode::v_pack_b32_f16, Definition(dst), src0, src1);
+            } else {
+               bld.vop3(aco_opcode::v_cvt_pk_u16_u32, Definition(dst), src0, src1);
+            }
+         }
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2846,9 +2657,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        if (dst.regClass() == v1) {
           bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), get_alu_src(ctx, instr->src[0]));
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2858,9 +2667,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst),
                    bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand(16u), as_vgpr(ctx, get_alu_src(ctx, instr->src[0]))));
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2901,9 +2708,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (dst.regClass() == v1) {
           bld.vop3(aco_opcode::v_bfm_b32, Definition(dst), bits, offset);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -2946,9 +2751,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           bld.vop3(aco_opcode::v_bfi_b32, Definition(dst), bitmask, insert, base);
  
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -3021,9 +2824,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        } else if (src.regClass() == s2) {
           bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -3039,7 +2840,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32, aco_opcode::v_cmp_eq_f64);
        break;
     }
-   case nir_op_fne: {
+   case nir_op_fneu: {
        emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32, aco_opcode::v_cmp_neq_f64);
        break;
     }
@@ -3110,9 +2911,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        break;
     }
     default:
-      fprintf(stderr, "Unknown NIR ALU instr: ");
-      nir_print_instr(&instr->instr, stderr);
-      fprintf(stderr, "\n");
+      isel_err(&instr->instr, "Unknown NIR ALU instr");
     }
  }
  
@@ -4519,9 +4318,7 @@ void visit_store_output(isel_context *ctx, nir_intrinsic_instr *instr)
         ctx->shader->info.stage == MESA_SHADER_GEOMETRY) {
        bool stored_to_temps = store_output_to_temps(ctx, instr);
        if (!stored_to_temps) {
-         fprintf(stderr, "Unimplemented output offset instruction:\n");
-         nir_print_instr(instr->src[1].ssa->parent_instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(instr->src[1].ssa->parent_instr, "Unimplemented output offset instruction");
           abort();
        }
     } else if (ctx->stage == vertex_es ||
@@ -4738,9 +4535,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
  
        nir_instr *off_instr = instr->src[0].ssa->parent_instr;
        if (off_instr->type != nir_instr_type_load_const) {
-         fprintf(stderr, "Unimplemented nir_intrinsic_load_input offset\n");
-         nir_print_instr(off_instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(off_instr, "Unimplemented nir_intrinsic_load_input offset");
        }
        uint32_t offset = nir_instr_as_load_const(off_instr)->value[0].u32;
  
@@ -4952,9 +4747,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
        nir_instr *off_instr = instr->src[offset_idx].ssa->parent_instr;
        if (off_instr->type != nir_instr_type_load_const ||
            nir_instr_as_load_const(off_instr)->value[0].u32 != 0) {
-         fprintf(stderr, "Unimplemented nir_intrinsic_load_input offset\n");
-         nir_print_instr(off_instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(off_instr, "Unimplemented nir_intrinsic_load_input offset");
        }
  
        Temp prim_mask = get_arg(ctx, ctx->args->ac.prim_mask);
@@ -5474,7 +5267,7 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr)
           assert(nir_instr_is_last(&instr->instr));
           ctx->block->kind |= block_kind_uniform;
           ctx->cf_info.has_branch = true;
-         bld.branch(aco_opcode::p_branch);
+         bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
           add_linear_edge(ctx->block->index, linear_target);
           return;
        }
@@ -5487,14 +5280,14 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr)
        ctx->cf_info.nir_to_aco[instr->instr.block->index] = idx;
  
        /* remove critical edges from linear CFG */
-      bld.branch(aco_opcode::p_branch);
+      bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
        Block* break_block = ctx->program->create_and_insert_block();
        break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
        break_block->kind |= block_kind_uniform;
        add_linear_edge(idx, break_block);
        add_linear_edge(break_block->index, linear_target);
        bld.reset(break_block);
-      bld.branch(aco_opcode::p_branch);
+      bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
  
        Block* continue_block = ctx->program->create_and_insert_block();
        continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
@@ -6209,6 +6002,7 @@ void visit_image_size(isel_context *ctx, nir_intrinsic_instr *instr)
     }
  
     /* LOD */
+   assert(nir_src_as_uint(instr->src[1]) == 0);
     Temp lod = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
  
     /* Resource */
@@ -6303,6 +6097,7 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr)
  
     bool smem = !nir_src_is_divergent(instr->src[2]) &&
                 ctx->options->chip_class >= GFX8 &&
+               ctx->options->chip_class < GFX10_3 &&
                 (elem_size_bytes >= 4 || can_subdword_ssbo_store_use_smem(instr)) &&
                 allow_smem;
     if (smem)
@@ -7260,9 +7055,7 @@ void emit_uniform_subgroup(isel_context *ctx, nir_intrinsic_instr *instr, Temp s
     } else if (src.regClass() == s2) {
        bld.sop1(aco_opcode::s_mov_b64, dst, src);
     } else {
-      fprintf(stderr, "Unimplemented NIR instr bit size: ");
-      nir_print_instr(&instr->instr, stderr);
-      fprintf(stderr, "\n");
+      isel_err(&instr->instr, "Unimplemented NIR instr bit size");
     }
  }
  
@@ -7300,10 +7093,11 @@ void emit_interp_center(isel_context *ctx, Temp dst, Temp pos1, Temp pos2)
     }
  
     /* res_k = p_k + ddx_k * pos1 + ddy_k * pos2 */
-   Temp tmp1 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddx_1, pos1, p1);
-   Temp tmp2 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddx_2, pos1, p2);
-   tmp1 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddy_1, pos2, tmp1);
-   tmp2 = bld.vop3(aco_opcode::v_mad_f32, bld.def(v1), ddy_2, pos2, tmp2);
+   aco_opcode mad = ctx->program->chip_class >= GFX10_3 ? aco_opcode::v_fma_f32 : aco_opcode::v_mad_f32;
+   Temp tmp1 = bld.vop3(mad, bld.def(v1), ddx_1, pos1, p1);
+   Temp tmp2 = bld.vop3(mad, bld.def(v1), ddx_2, pos1, p2);
+   tmp1 = bld.vop3(mad, bld.def(v1), ddy_1, pos2, tmp1);
+   tmp2 = bld.vop3(mad, bld.def(v1), ddy_2, pos2, tmp2);
     Temp wqm1 = bld.tmp(v1);
     emit_wqm(ctx, tmp1, wqm1, true);
     Temp wqm2 = bld.tmp(v1);
@@ -7692,9 +7486,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
        } else if (instr->src[0].ssa->bit_size == 64 && src.regClass() == v2) {
           bld.vopc(aco_opcode::v_cmp_lg_u64, lanemask_tmp, Operand(0u), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        if (dst.size() != bld.lm.size()) {
           /* Wave32 with ballot size set to 64 */
@@ -7746,9 +7538,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
              tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(1u), tmp);
              emit_wqm(ctx, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), tmp), dst);
           } else {
-            fprintf(stderr, "Unimplemented NIR instr bit size: ");
-            nir_print_instr(&instr->instr, stderr);
-            fprintf(stderr, "\n");
+            isel_err(&instr->instr, "Unimplemented NIR instr bit size");
           }
        }
        break;
@@ -7786,9 +7576,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
        } else if (src.regClass() == s2) {
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -7959,9 +7747,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
              bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
              emit_split_vector(ctx, dst, 2);
           } else {
-            fprintf(stderr, "Unimplemented NIR instr bit size: ");
-            nir_print_instr(&instr->instr, stderr);
-            fprintf(stderr, "\n");
+            isel_err(&instr->instr, "Unimplemented NIR instr bit size");
           }
        }
        break;
@@ -8039,9 +7825,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
           emit_split_vector(ctx, dst, 2);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -8075,9 +7859,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
           emit_split_vector(ctx, dst, 2);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -8099,9 +7881,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
           emit_split_vector(ctx, dst, 2);
        } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
        break;
     }
@@ -8252,9 +8032,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
        break;
     }
     default:
-      fprintf(stderr, "Unimplemented intrinsic instr: ");
-      nir_print_instr(&instr->instr, stderr);
-      fprintf(stderr, "\n");
+      isel_err(&instr->instr, "Unimplemented intrinsic instr");
        abort();
  
        break;
@@ -8380,6 +8158,8 @@ void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx
  {
     Builder bld(ctx->program, ctx->block);
     Temp ma, tc, sc, id;
+   aco_opcode madak = ctx->program->chip_class >= GFX10_3 ? aco_opcode::v_fmaak_f32 : aco_opcode::v_madak_f32;
+   aco_opcode madmk = ctx->program->chip_class >= GFX10_3 ? aco_opcode::v_fmamk_f32 : aco_opcode::v_madmk_f32;
  
     if (is_array) {
        coords[3] = bld.vop1(aco_opcode::v_rndne_f32, bld.def(v1), coords[3]);
@@ -8400,11 +8180,11 @@ void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx
  
     sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
     if (!is_deriv)
-      sc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), sc, invma, Operand(0x3fc00000u/*1.5*/));
+      sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand(0x3fc00000u/*1.5*/));
  
     tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
     if (!is_deriv)
-      tc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), tc, invma, Operand(0x3fc00000u/*1.5*/));
+      tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand(0x3fc00000u/*1.5*/));
  
     id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]);
  
@@ -8435,7 +8215,7 @@ void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx
     }
  
     if (is_array)
-      id = bld.vop2(aco_opcode::v_madmk_f32, bld.def(v1), coords[3], id, Operand(0x41000000u/*8.0*/));
+      id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand(0x41000000u/*8.0*/));
     coords.resize(3);
     coords[0] = sc;
     coords[1] = tc;
@@ -9291,7 +9071,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
           /* uniform break - directly jump out of the loop */
           ctx->block->kind |= block_kind_uniform;
           ctx->cf_info.has_branch = true;
-         bld.branch(aco_opcode::p_branch);
+         bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
           add_linear_edge(idx, logical_target);
           return;
        }
@@ -9313,15 +9093,13 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
           /* uniform continue - directly jump to the loop header */
           ctx->block->kind |= block_kind_uniform;
           ctx->cf_info.has_branch = true;
-         bld.branch(aco_opcode::p_branch);
+         bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
           add_linear_edge(idx, logical_target);
           return;
        }
        break;
     default:
-      fprintf(stderr, "Unknown NIR jump instr: ");
-      nir_print_instr(&instr->instr, stderr);
-      fprintf(stderr, "\n");
+      isel_err(&instr->instr, "Unknown NIR jump instr");
        abort();
     }
  
@@ -9331,7 +9109,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
     }
  
     /* remove critical edges from linear CFG */
-   bld.branch(aco_opcode::p_branch);
+   bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
     Block* break_block = ctx->program->create_and_insert_block();
     break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
     break_block->kind |= block_kind_uniform;
@@ -9341,7 +9119,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
        logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
     add_linear_edge(break_block->index, logical_target);
     bld.reset(break_block);
-   bld.branch(aco_opcode::p_branch);
+   bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
  
     Block* continue_block = ctx->program->create_and_insert_block();
     continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
@@ -9379,9 +9157,7 @@ void visit_block(isel_context *ctx, nir_block *block)
           visit_jump(ctx, nir_instr_as_jump(instr));
           break;
        default:
-         fprintf(stderr, "Unknown NIR instr type: ");
-         nir_print_instr(instr, stderr);
-         fprintf(stderr, "\n");
+         isel_err(instr, "Unknown NIR instr type");
           //abort();
        }
     }
@@ -9443,7 +9219,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop)
     append_logical_end(ctx->block);
     ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform;
     Builder bld(ctx->program, ctx->block);
-   bld.branch(aco_opcode::p_branch);
+   bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
     unsigned loop_preheader_idx = ctx->block->index;
  
     Block loop_exit = Block();
@@ -9478,7 +9254,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop)
           break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
           break_block->kind = block_kind_uniform;
           bld.reset(break_block);
-         bld.branch(aco_opcode::p_branch);
+         bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
           add_linear_edge(block_idx, break_block);
           add_linear_edge(break_block->index, &loop_exit);
  
@@ -9486,7 +9262,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop)
           continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
           continue_block->kind = block_kind_uniform;
           bld.reset(continue_block);
-         bld.branch(aco_opcode::p_branch);
+         bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
           add_linear_edge(block_idx, continue_block);
           add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]);
  
@@ -9502,7 +9278,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop)
        }
  
        bld.reset(ctx->block);
-      bld.branch(aco_opcode::p_branch);
+      bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
     }
  
     /* Fixup phis in loop header from unreachable blocks.
@@ -9580,7 +9356,9 @@ static void begin_divergent_if_then(isel_context *ctx, if_context *ic, Temp cond
     /* branch to linear then block */
     assert(cond.regClass() == ctx->program->lane_mask);
     aco_ptr<Pseudo_branch_instruction> branch;
-   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0));
+   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1));
+   branch->definitions[0] = {ctx->program->allocateId(), s2};
+   branch->definitions[0].setHint(vcc);
     branch->operands[0] = Operand(cond);
     ctx->block->instructions.push_back(std::move(branch));
  
@@ -9619,7 +9397,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic)
     append_logical_end(BB_then_logical);
      /* branch from logical then block to invert block */
     aco_ptr<Pseudo_branch_instruction> branch;
-   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+   branch->definitions[0] = {ctx->program->allocateId(), s2};
+   branch->definitions[0].setHint(vcc);
     BB_then_logical->instructions.emplace_back(std::move(branch));
     add_linear_edge(BB_then_logical->index, &ic->BB_invert);
     if (!ctx->cf_info.parent_loop.has_divergent_branch)
@@ -9635,7 +9415,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic)
     BB_then_linear->kind |= block_kind_uniform;
     add_linear_edge(ic->BB_if_idx, BB_then_linear);
     /* branch from linear then block to invert block */
-   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+   branch->definitions[0] = {ctx->program->allocateId(), s2};
+   branch->definitions[0].setHint(vcc);
     BB_then_linear->instructions.emplace_back(std::move(branch));
     add_linear_edge(BB_then_linear->index, &ic->BB_invert);
  
@@ -9644,7 +9426,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic)
     ic->invert_idx = ctx->block->index;
  
     /* branch to linear else block (skip else) */
-   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 0));
+   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 1));
+   branch->definitions[0] = {ctx->program->allocateId(), s2};
+   branch->definitions[0].setHint(vcc);
     branch->operands[0] = Operand(ic->cond);
     ctx->block->instructions.push_back(std::move(branch));
  
@@ -9673,7 +9457,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic)
  
     /* branch from logical else block to endif block */
     aco_ptr<Pseudo_branch_instruction> branch;
-   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+   branch->definitions[0] = {ctx->program->allocateId(), s2};
+   branch->definitions[0].setHint(vcc);
     BB_else_logical->instructions.emplace_back(std::move(branch));
     add_linear_edge(BB_else_logical->index, &ic->BB_endif);
     if (!ctx->cf_info.parent_loop.has_divergent_branch)
@@ -9691,7 +9477,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic)
     add_linear_edge(ic->invert_idx, BB_else_linear);
  
     /* branch from linear else block to endif block */
-   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+   branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+   branch->definitions[0] = {ctx->program->allocateId(), s2};
+   branch->definitions[0].setHint(vcc);
     BB_else_linear->instructions.emplace_back(std::move(branch));
     add_linear_edge(BB_else_linear->index, &ic->BB_endif);
  
@@ -9728,7 +9516,9 @@ static void begin_uniform_if_then(isel_context *ctx, if_context *ic, Temp cond)
  
     aco_ptr<Pseudo_branch_instruction> branch;
     aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
-   branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 0));
+   branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
+   branch->definitions[0] = {ctx->program->allocateId(), s2};
+   branch->definitions[0].setHint(vcc);
     branch->operands[0] = Operand(cond);
     branch->operands[0].setFixed(scc);
     ctx->block->instructions.emplace_back(std::move(branch));
@@ -9760,7 +9550,9 @@ static void begin_uniform_if_else(isel_context *ctx, if_context *ic)
        append_logical_end(BB_then);
        /* branch from then block to endif block */
        aco_ptr<Pseudo_branch_instruction> branch;
-      branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+      branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+      branch->definitions[0] = {ctx->program->allocateId(), s2};
+      branch->definitions[0].setHint(vcc);
        BB_then->instructions.emplace_back(std::move(branch));
        add_linear_edge(BB_then->index, &ic->BB_endif);
        if (!ic->then_branch_divergent)
@@ -9787,7 +9579,9 @@ static void end_uniform_if(isel_context *ctx, if_context *ic)
        append_logical_end(BB_else);
        /* branch from then block to endif block */
        aco_ptr<Pseudo_branch_instruction> branch;
-      branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+      branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+      branch->definitions[0] = {ctx->program->allocateId(), s2};
+      branch->definitions[0].setHint(vcc);
        BB_else->instructions.emplace_back(std::move(branch));
        add_linear_edge(BB_else->index, &ic->BB_endif);
        if (!ctx->cf_info.parent_loop.has_divergent_branch)
@@ -9937,10 +9731,10 @@ static bool export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *nex
        else
           exp->operands[i] = Operand(v1);
     }
-   /* Navi10-14 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
+   /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
      * Setting valid_mask=1 prevents it and has no other effect.
      */
-   exp->valid_mask = ctx->options->chip_class >= GFX10 && is_pos && *next_pos == 0;
+   exp->valid_mask = ctx->options->chip_class == GFX10 && is_pos && *next_pos == 0;
     exp->done = false;
     exp->compressed = false;
     if (is_pos)
@@ -9982,7 +9776,7 @@ static void export_vs_psiz_layer_viewport(isel_context *ctx, int *next_pos)
           exp->enabled_mask |= 0x4;
        }
     }
-   exp->valid_mask = ctx->options->chip_class >= GFX10 && *next_pos == 0;
+   exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
     exp->done = false;
     exp->compressed = false;
     exp->dest = V_008DFC_SQ_EXP_POS + (*next_pos)++;
@@ -11148,7 +10942,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
           Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream));
           append_logical_end(ctx.block);
           ctx.block->kind |= block_kind_uniform;
-         bld.branch(aco_opcode::p_cbranch_z, cond);
+         bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), cond);
  
           BB_endif.kind |= ctx.block->kind & block_kind_top_level;
  
@@ -11210,7 +11004,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
           append_logical_end(ctx.block);
  
           /* branch from then block to endif block */
-         bld.branch(aco_opcode::p_branch);
+         bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
           add_edge(ctx.block->index, &BB_endif);
           ctx.block->kind |= block_kind_uniform;
  
@@ -11232,7 +11026,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
  
        append_logical_end(BB_else);
        /* branch from else block to endif block */
-      bld.branch(aco_opcode::p_branch);
+      bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
        add_edge(BB_else->index, &BB_endif);
        BB_else->kind |= block_kind_uniform;
  
@@ -11250,4 +11044,64 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
  
     cleanup_cfg(program);
  }
+
+void select_trap_handler_shader(Program *program, struct nir_shader *shader,
+                                ac_shader_config* config,
+                                struct radv_shader_args *args)
+{
+   assert(args->options->chip_class == GFX8);
+
+   init_program(program, compute_cs, args->shader_info,
+                args->options->chip_class, args->options->family, config);
+
+   isel_context ctx = {};
+   ctx.program = program;
+   ctx.args = args;
+   ctx.options = args->options;
+   ctx.stage = program->stage;
+
+   ctx.block = ctx.program->create_and_insert_block();
+   ctx.block->loop_nest_depth = 0;
+   ctx.block->kind = block_kind_top_level;
+
+   program->workgroup_size = 1; /* XXX */
+
+   add_startpgm(&ctx);
+   append_logical_start(ctx.block);
+
+   Builder bld(ctx.program, ctx.block);
+
+   /* Load the buffer descriptor from TMA. */
+   bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4),
+            Operand(PhysReg{tma}, s2), Operand(0u));
+
+   /* Store TTMP0-TTMP1. */
+   bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4),
+            Operand(0u), Operand(PhysReg{ttmp0}, s2), memory_sync_info(), true);
+
+   uint32_t hw_regs_idx[] = {
+      2, /* HW_REG_STATUS */
+      3, /* HW_REG_TRAP_STS */
+      4, /* HW_REG_HW_ID */
+      7, /* HW_REG_IB_STS */
+   };
+
+   /* Store some hardware registers. */
+   for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) {
+      /* "((size - 1) << 11) | register" */
+      bld.sopk(aco_opcode::s_getreg_b32, Definition(PhysReg{ttmp8}, s1),
+               ((20 - 1) << 11) | hw_regs_idx[i]);
+
+      bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4),
+               Operand(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true);
+   }
+
+   program->config->float_mode = program->blocks[0].fp_mode.val;
+
+   append_logical_end(ctx.block);
+   ctx.block->kind |= block_kind_uniform;
+   bld.sopp(aco_opcode::s_endpgm);
+
+   cleanup_cfg(program);
+}
  }