S/390: Handle long-running instructions.
authorRobin Dapp <rdapp@linux.vnet.ibm.com>
Wed, 18 Oct 2017 09:10:09 +0000 (09:10 +0000)
committerRobin Dapp <rdapp@gcc.gnu.org>
Wed, 18 Oct 2017 09:10:09 +0000 (09:10 +0000)
gcc/ChangeLog:

2017-10-18  Robin Dapp  <rdapp@linux.vnet.ibm.com>

* config/s390/s390.c (NUM_SIDES): New variable.
(LONGRUNNING_THRESHOLD): New variable.
(LATENCY_FACTOR): New variable.
(s390_sched_score): Decrease score for long-running instructions on
wrong side.
(s390_sched_variable_issue): Perform bookkeeping for long-running
instructions.

From-SVN: r253849

gcc/ChangeLog
gcc/config/s390/s390.c

index 5d0dd0b2cef93e408d6ae83c09b44345ae7934f7..5850f0035dcb76ea8d75c9273d480447d57ce3fe 100644 (file)
@@ -1,3 +1,13 @@
+2017-10-18  Robin Dapp  <rdapp@linux.vnet.ibm.com>
+
+       * config/s390/s390.c (NUM_SIDES): New variable.
+       (LONGRUNNING_THRESHOLD): New variable.
+       (LATENCY_FACTOR): New variable.
+       (s390_sched_score): Decrease score for long-running instructions on
+       wrong side.
+       (s390_sched_variable_issue): Perform bookkeeping for long-running
+       instructions.
+
 2017-10-18  Richard Biener  <rguenther@suse.de>
 
        * graphite-isl-ast-to-gimple.c (gcc_expression_from_isl_ast_expr_id):
index 6e3ac3b7d0f96dac56df0f8fa7573cdcc1b87ea3..021993f79421dad2f7c10fb20810c52f943599fc 100644 (file)
@@ -355,6 +355,18 @@ static rtx_insn *last_scheduled_insn;
 #define MAX_SCHED_UNITS 3
 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
 
+#define NUM_SIDES 2
+static int current_side = 1;
+#define LONGRUNNING_THRESHOLD 5
+
+/* Estimate of number of cycles a long-running insn occupies an
+   execution unit.  */
+static unsigned fxu_longrunning[NUM_SIDES];
+static unsigned vfu_longrunning[NUM_SIDES];
+
+/* Factor to scale latencies by, determined by measurements.  */
+#define LATENCY_FACTOR 4
+
 /* The maximum score added for an instruction whose unit hasn't been
    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
    give instruction mix scheduling more priority over instruction
@@ -14755,7 +14767,24 @@ s390_sched_score (rtx_insn *insn)
        if (m & unit_mask)
          score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
                    MAX_SCHED_MIX_DISTANCE);
+
+      unsigned latency = insn_default_latency (insn);
+
+      int other_side = 1 - current_side;
+
+      /* Try to delay long-running insns when side is busy.  */
+      if (latency > LONGRUNNING_THRESHOLD)
+       {
+         if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
+             && fxu_longrunning[other_side] <= fxu_longrunning[current_side])
+           score = MAX (0, score - 10);
+
+         if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
+             && vfu_longrunning[other_side] <= vfu_longrunning[current_side])
+           score = MAX (0, score - 10);
+       }
     }
+
   return score;
 }
 
@@ -14874,12 +14903,19 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 {
   last_scheduled_insn = insn;
 
+  bool starts_group = false;
+
   if (s390_tune >= PROCESSOR_2827_ZEC12
       && reload_completed
       && recog_memoized (insn) >= 0)
     {
       unsigned int mask = s390_get_sched_attrmask (insn);
 
+      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
+         || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
+         || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
+       starts_group = true;
+
       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
          || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
        s390_sched_state = S390_SCHED_STATE_CRACKED;
@@ -14892,14 +14928,15 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
          switch (s390_sched_state)
            {
            case 0:
+             starts_group = true;
+             /* fallthrough */
            case 1:
            case 2:
+             s390_sched_state++;
+             break;
            case S390_SCHED_STATE_NORMAL:
-             if (s390_sched_state == S390_SCHED_STATE_NORMAL)
-               s390_sched_state = 1;
-             else
-               s390_sched_state++;
-
+             starts_group = true;
+             s390_sched_state = 1;
              break;
            case S390_SCHED_STATE_CRACKED:
              s390_sched_state = S390_SCHED_STATE_NORMAL;
@@ -14922,6 +14959,27 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
              last_scheduled_unit_distance[i]++;
        }
 
+      /* If this insn started a new group, the side flipped.  */
+      if (starts_group)
+       current_side = current_side ? 0 : 1;
+
+      for (int i = 0; i < 2; i++)
+       {
+         if (fxu_longrunning[i] >= 1)
+           fxu_longrunning[i] -= 1;
+         if (vfu_longrunning[i] >= 1)
+           vfu_longrunning[i] -= 1;
+       }
+
+      unsigned latency = insn_default_latency (insn);
+      if (latency > LONGRUNNING_THRESHOLD)
+       {
+         if (get_attr_z13_unit_fxu (insn))
+           fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
+         else
+           vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
+       }
+
       if (verbose > 5)
        {
          unsigned int sched_mask;