panfrost: Share MRT blend flag calculation with Bifrost

[mesa.git] / src / amd / compiler / aco_instruction_selection.cpp
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index 47376511ed52555d734ad09bec801b90ff2768ca..9f97e1fc64759dc0474bc1dfe24bb4d2976d894c 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7539,7 +7539,14 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
           if (instr->intrinsic == nir_intrinsic_read_invocation || !nir_src_is_divergent(instr->src[1]))
              tid = bld.as_uniform(tid);
           Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
-         if (src.regClass() == v1) {
+         if (src.regClass() == v1b || src.regClass() == v2b) {
+            Temp tmp = bld.tmp(v1);
+            tmp = emit_wqm(ctx, emit_bpermute(ctx, bld, tid, src), tmp);
+            if (dst.type() == RegType::vgpr)
+               bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(src.regClass() == v1b ? v3b : v2b), tmp);
+            else
+               bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
+         } else if (src.regClass() == v1) {
              emit_wqm(ctx, emit_bpermute(ctx, bld, tid, src), dst);
           } else if (src.regClass() == v2) {
              Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
@@ -7746,6 +7753,14 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
                       bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp,
                                bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm))));
              emit_wqm(ctx, tmp, dst);
+         } else if (instr->dest.ssa.bit_size == 8) {
+            Temp tmp = bld.tmp(v1);
+            emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+            bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
+         } else if (instr->dest.ssa.bit_size == 16) {
+            Temp tmp = bld.tmp(v1);
+            emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+            bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
           } else if (instr->dest.ssa.bit_size == 32) {
              if (ctx->program->chip_class >= GFX8)
                 emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), dst);
@@ -7810,6 +7825,14 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
              src = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl);
           Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src);
           emit_wqm(ctx, tmp, dst);
+      } else if (instr->dest.ssa.bit_size == 8) {
+         Temp tmp = bld.tmp(v1);
+         emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+         bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
+      } else if (instr->dest.ssa.bit_size == 16) {
+         Temp tmp = bld.tmp(v1);
+         emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
+         bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
        } else if (instr->dest.ssa.bit_size == 32) {
           Temp tmp;
           if (ctx->program->chip_class >= GFX8)
@@ -7935,10 +7958,14 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
                 get_ssa_temp(ctx, &instr->dest.ssa));
        break;
     }
-   case nir_intrinsic_shader_clock:
-      bld.smem(aco_opcode::s_memtime, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), false);
+   case nir_intrinsic_shader_clock: {
+      aco_opcode opcode =
+         nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ?
+            aco_opcode::s_memrealtime : aco_opcode::s_memtime;
+      bld.smem(opcode, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), false);
        emit_split_vector(ctx, get_ssa_temp(ctx, &instr->dest.ssa), 2);
        break;
+   }
     case nir_intrinsic_load_vertex_id_zero_base: {
        Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
        bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.vertex_id));
@@ -8794,14 +8821,34 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
     }
  
     if (instr->op == nir_texop_tg4) {
-      if (has_offset) {
-         opcode = aco_opcode::image_gather4_lz_o;
-         if (has_compare)
+      if (has_offset) { /* image_gather4_*_o */
+         if (has_compare) {
              opcode = aco_opcode::image_gather4_c_lz_o;
+            if (has_lod)
+               opcode = aco_opcode::image_gather4_c_l_o;
+            if (has_bias)
+               opcode = aco_opcode::image_gather4_c_b_o;
+         } else {
+            opcode = aco_opcode::image_gather4_lz_o;
+            if (has_lod)
+               opcode = aco_opcode::image_gather4_l_o;
+            if (has_bias)
+               opcode = aco_opcode::image_gather4_b_o;
+         }
        } else {
-         opcode = aco_opcode::image_gather4_lz;
-         if (has_compare)
+         if (has_compare) {
              opcode = aco_opcode::image_gather4_c_lz;
+            if (has_lod)
+               opcode = aco_opcode::image_gather4_c_l;
+            if (has_bias)
+               opcode = aco_opcode::image_gather4_c_b;
+         } else {
+            opcode = aco_opcode::image_gather4_lz;
+            if (has_lod)
+               opcode = aco_opcode::image_gather4_l;
+            if (has_bias)
+               opcode = aco_opcode::image_gather4_b;
+         }
        }
     } else if (instr->op == nir_texop_lod) {
        opcode = aco_opcode::image_get_lod;