aarch64: fix vector multiplication costs

author Andre Vieira <andre.simoesdiasvieira@arm.com>

Tue, 9 Feb 2021 18:09:19 +0000 (18:09 +0000)

committer Andre Vieira <andre.simoesdiasvieira@arm.com>

Tue, 9 Feb 2021 18:24:22 +0000 (18:24 +0000)
author Andre Vieira <andre.simoesdiasvieira@arm.com>
Tue, 9 Feb 2021 18:09:19 +0000 (18:09 +0000)
committer Andre Vieira <andre.simoesdiasvieira@arm.com>
Tue, 9 Feb 2021 18:24:22 +0000 (18:24 +0000)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h

index c309f88cbd56f0d2347996d860c982a3a6744492..dd2e7e7cbb13d24f0b51092270cd7e2d75fabf29 100644 (file)
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -123,7 +123,8 @@ const struct cpu_cost_table qdf24xx_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
@@ -227,7 +228,8 @@ const struct cpu_cost_table thunderx_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* Alu.  */
+    COSTS_N_INSNS (1), /* Alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
@@ -330,7 +332,8 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* Alu.  */
+    COSTS_N_INSNS (1), /* Alu.  */
+    COSTS_N_INSNS (4)  /* Mult.  */
    }
  };
  
@@ -433,7 +436,8 @@ const struct cpu_cost_table thunderx3t110_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* Alu.  */
+    COSTS_N_INSNS (1), /* Alu.  */
+    COSTS_N_INSNS (4)  /* Mult.  */
    }
  };
  
@@ -537,7 +541,8 @@ const struct cpu_cost_table tsv110_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
@@ -640,7 +645,8 @@ const struct cpu_cost_table a64fx_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index b6192e55521004ae70cd13acbdb4dab142216845..146ed8c1b693d7204a754bc4e6d17025e0af544b 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11568,7 +11568,6 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
    if (VECTOR_MODE_P (mode))
      {
        unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-      mode = GET_MODE_INNER (mode);
        if (vec_flags & VEC_ADVSIMD)
         {
           /* The by-element versions of the instruction have the same costs as
@@ -11582,6 +11581,17 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
           else if (GET_CODE (op1) == VEC_DUPLICATE)
             op1 = XEXP (op1, 0);
         }
+      cost += rtx_cost (op0, mode, MULT, 0, speed);
+      cost += rtx_cost (op1, mode, MULT, 1, speed);
+      if (speed)
+       {
+         if (GET_CODE (x) == MULT)
+           cost += extra_cost->vect.mult;
+         /* This is to catch the SSRA costing currently flowing here.  */
+         else
+           cost += extra_cost->vect.alu;
+       }
+      return cost;
      }
  
    /* Integer multiply/fma.  */
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h

index 251de3d61a833a2bb4b77e9211cac7fbc17c0b75..7a9cf3d324c103de74af741abe9ef30b76fea5ce 100644 (file)
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -132,6 +132,7 @@ struct fp_cost_table
  struct vector_cost_table
  {
    const int alu;
+  const int mult;
  };
  
  struct cpu_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h

index d4baee4f83ad7bcdb1835a471e4eafedbf63ee2d..25ff702f01fab50d749b9a7b7b072c2be2504562 100644 (file)
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -121,7 +121,8 @@ const struct cpu_cost_table generic_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
@@ -224,7 +225,8 @@ const struct cpu_cost_table cortexa53_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
@@ -327,7 +329,8 @@ const struct cpu_cost_table cortexa57_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
@@ -430,7 +433,8 @@ const struct cpu_cost_table cortexa76_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
@@ -533,7 +537,8 @@ const struct cpu_cost_table exynosm1_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (0)  /* alu.  */
+    COSTS_N_INSNS (0),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
    }
  };
  
@@ -636,7 +641,8 @@ const struct cpu_cost_table xgene1_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (2)  /* alu.  */
+    COSTS_N_INSNS (2),  /* alu.  */
+    COSTS_N_INSNS (8)   /* mult.  */
    }
  };
  
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index e22396dbcd59ee41d06224e356eea2b9d4df9339..d254f417541b4f69a52a026c0342cf7e62d2db4d 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1192,7 +1192,8 @@ const struct cpu_cost_table cortexa9_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
@@ -1295,7 +1296,8 @@ const struct cpu_cost_table cortexa8_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
@@ -1399,7 +1401,8 @@ const struct cpu_cost_table cortexa5_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
@@ -1504,7 +1507,8 @@ const struct cpu_cost_table cortexa7_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
@@ -1607,7 +1611,8 @@ const struct cpu_cost_table cortexa12_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
@@ -1710,7 +1715,8 @@ const struct cpu_cost_table cortexa15_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
@@ -1813,7 +1819,8 @@ const struct cpu_cost_table v7m_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1), /* alu.  */
+    COSTS_N_INSNS (4)  /* mult.  */
    }
  };
  
diff --git a/gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c b/gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c

new file mode 100644 (file)

index 0000000..d7c5e5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-Ofast" } */
+
+/*
+**foo:
+**     shl     v1.4s, v0.4s, 16
+**     sub     v0.4s, v1.4s, v0.4s
+**     ret
+*/
+#include <arm_neon.h>
+uint32x4_t foo (uint32x4_t a)
+{
+  return a * 65535;
+}
+
+/* { dg-final { check-function-bodies "**" "" "" } } */
author	Andre Vieira <andre.simoesdiasvieira@arm.com>
	Tue, 9 Feb 2021 18:09:19 +0000 (18:09 +0000)
committer	Andre Vieira <andre.simoesdiasvieira@arm.com>
	Tue, 9 Feb 2021 18:24:22 +0000 (18:24 +0000)
gcc/config/aarch64/aarch64-cost-tables.h		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/arm/aarch-common-protos.h		patch \| blob \| history
gcc/config/arm/aarch-cost-tables.h		patch \| blob \| history
gcc/config/arm/arm.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/asimd-mul-to-shl-sub.c	[new file with mode: 0644]	patch \| blob