aco: implement 16-bit reduce operations on GFX6-GFX7
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 25 May 2020 17:59:57 +0000 (19:59 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 3 Jun 2020 17:48:37 +0000 (19:48 +0200)
No fp16 on GFX6-GFX7.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5227>

src/amd/compiler/aco_lower_to_hw_instr.cpp

index 05790366a71de605f7940e3d3c78af0938247cb4..df7b571c529971df1486c14b086ece3f928e4845 100644 (file)
@@ -48,19 +48,67 @@ aco_opcode get_reduce_opcode(chip_class chip, ReduceOp op) {
     */
    switch (op) {
    case iadd8:
-   case iadd16: return chip >= GFX10 ? aco_opcode::v_add_u32 : aco_opcode::v_add_u16;
+   case iadd16:
+      if (chip >= GFX10) {
+         return aco_opcode::v_add_u32;
+      } else if (chip >= GFX8) {
+         return aco_opcode::v_add_u16;
+      } else {
+         return aco_opcode::v_add_co_u32;
+      }
+      break;
    case imul8:
-   case imul16: return chip >= GFX10 ? aco_opcode::v_mul_lo_u16_e64 : aco_opcode::v_mul_lo_u16;
+   case imul16:
+      if (chip >= GFX10) {
+         return aco_opcode::v_mul_lo_u16_e64;
+      } else if (chip >= GFX8) {
+         return aco_opcode::v_mul_lo_u16;
+      } else {
+         return aco_opcode::v_mul_u32_u24;
+      }
+      break;
    case fadd16: return aco_opcode::v_add_f16;
    case fmul16: return aco_opcode::v_mul_f16;
    case imax8:
-   case imax16: return chip >= GFX10 ? aco_opcode::v_max_i32 : aco_opcode::v_max_i16;
+   case imax16:
+      if (chip >= GFX10) {
+         return aco_opcode::v_max_i32;
+      } else if (chip >= GFX8) {
+         return aco_opcode::v_max_i16;
+      } else {
+         return aco_opcode::v_max_i32;
+      }
+      break;
    case imin8:
-   case imin16: return chip >= GFX10 ? aco_opcode::v_min_i32 : aco_opcode::v_min_i16;
+   case imin16:
+      if (chip >= GFX10) {
+         return aco_opcode::v_min_i32;
+      } else if (chip >= GFX8) {
+         return aco_opcode::v_min_i16;
+      } else {
+         return aco_opcode::v_min_i32;
+      }
+      break;
    case umin8:
-   case umin16: return chip >= GFX10 ? aco_opcode::v_min_u32 : aco_opcode::v_min_u16;
+   case umin16:
+      if (chip >= GFX10) {
+         return aco_opcode::v_min_u32;
+      } else if (chip >= GFX8) {
+         return aco_opcode::v_min_u16;
+      } else {
+         return aco_opcode::v_min_u32;
+      }
+      break;
    case umax8:
-   case umax16: return chip >= GFX10 ? aco_opcode::v_max_u32 : aco_opcode::v_max_u16;
+   case umax16:
+      if (chip >= GFX10) {
+         return aco_opcode::v_max_u32;
+      } else if (chip >= GFX8) {
+         return aco_opcode::v_max_u16;
+      } else {
+         return aco_opcode::v_max_u32;
+      }
+      break;
    case fmin16: return aco_opcode::v_min_f16;
    case fmax16: return aco_opcode::v_max_f16;
    case iadd32: return chip >= GFX9 ? aco_opcode::v_add_u32 : aco_opcode::v_add_co_u32;