+static void fix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out)
+{
+ /* Branches with an offset of 0x3f are buggy on GFX10, we workaround by inserting NOPs if needed. */
+ bool gfx10_3f_bug = false;
+
+ do {
+ auto buggy_branch_it = std::find_if(ctx.branches.begin(), ctx.branches.end(), [&ctx](const auto &branch) -> bool {
+ return ((int)ctx.program->blocks[branch.second->block].offset - branch.first - 1) == 0x3f;
+ });
+
+ gfx10_3f_bug = buggy_branch_it != ctx.branches.end();
+
+ if (gfx10_3f_bug) {
+ /* Insert an s_nop after the branch */
+ constexpr uint32_t s_nop_0 = 0xbf800000u;
+ int s_nop_pos = buggy_branch_it->first + 1;
+ auto out_pos = std::next(out.begin(), s_nop_pos);
+ out.insert(out_pos, s_nop_0);
+
+ /* Update the offset of each affected block */
+ for (Block& block : ctx.program->blocks) {
+ if (block.offset > (unsigned)buggy_branch_it->first)
+ block.offset++;
+ }
+
+ /* Update the branches following the current one */
+ for (auto branch_it = std::next(buggy_branch_it); branch_it != ctx.branches.end(); ++branch_it)
+ branch_it->first++;
+
+ /* Find first constant address after the inserted instruction */
+ auto caddr_it = std::find_if(ctx.constaddrs.begin(), ctx.constaddrs.end(), [s_nop_pos](const int &caddr_pos) -> bool {
+ return caddr_pos >= s_nop_pos;
+ });
+
+ /* Update the locations of constant addresses */
+ for (; caddr_it != ctx.constaddrs.end(); ++caddr_it)
+ (*caddr_it)++;
+
+ }
+ } while (gfx10_3f_bug);
+}
+