lima/gpir: Always schedule complex2 and *_impl right after complex1

author Connor Abbott <cwabbott0@gmail.com>

Sat, 27 Jul 2019 23:13:10 +0000 (01:13 +0200)

committer Connor Abbott <cwabbott0@gmail.com>

Tue, 30 Jul 2019 21:00:41 +0000 (23:00 +0200)
author Connor Abbott <cwabbott0@gmail.com>
Sat, 27 Jul 2019 23:13:10 +0000 (01:13 +0200)
committer Connor Abbott <cwabbott0@gmail.com>
Tue, 30 Jul 2019 21:00:41 +0000 (23:00 +0200)
diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h

index de571c3c1c4e6d36aa9148468338ed5ac7fb018f..e7707814b7c215b7faf807a14bc64e8eee1cf38d 100644 (file)
--- a/src/gallium/drivers/lima/ir/gp/gpir.h
+++ b/src/gallium/drivers/lima/ir/gp/gpir.h
@@ -120,6 +120,7 @@ typedef struct {
     int *slots;
     gpir_node_type type;
     bool spillless;
+   bool schedule_first;
     bool may_consume_two_slots;
  } gpir_op_info;
  
@@ -299,14 +300,20 @@ typedef struct gpir_instr {
      *
      * (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
      *       alu_num_slot_needed_by_max +
-    *       alu_num_slot_needed_by_next_max
+    *       max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
      * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
      *       alu_num_slot_neede_by_non_cplx_store
+    *
+    * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
+    * nodes for the next instruction) but when there is a complex1 node in
+    * this instruction it reduces to 4 to reserve a slot for complex2 in the
+    * next instruction.
      */
     int alu_num_slot_needed_by_store;
     int alu_num_slot_needed_by_non_cplx_store;
     int alu_num_slot_needed_by_max;
-   int alu_num_slot_needed_by_next_max;
+   int alu_num_unscheduled_next_max;
+   int alu_max_allowed_next_max;
  
     /* Used to communicate to the scheduler how many slots need to be cleared
      * up in order to satisfy the invariants.
diff --git a/src/gallium/drivers/lima/ir/gp/instr.c b/src/gallium/drivers/lima/ir/gp/instr.c

index e07a2c9b7c266b175b82d84d8b5a355403da7c79..45e9d81714359c7e81afc43985d4334e57b56201 100644 (file)
--- a/src/gallium/drivers/lima/ir/gp/instr.c
+++ b/src/gallium/drivers/lima/ir/gp/instr.c
@@ -37,6 +37,7 @@ gpir_instr *gpir_instr_create(gpir_block *block)
     instr->index = block->sched.instr_index++;
     instr->alu_num_slot_free = 6;
     instr->alu_non_cplx_slot_free = 5;
+   instr->alu_max_allowed_next_max = 5;
  
     list_add(&instr->list, &block->instr_list);
     return instr;
@@ -96,6 +97,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
     int non_cplx_store_reduce_slot = 0;
     int max_reduce_slot = node->sched.max_node ? 1 : 0;
     int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0;
+   int alu_new_max_allowed_next_max =
+      node->op == gpir_op_complex1 ? 4 : instr->alu_max_allowed_next_max;
  
     /* check if this node is child of one store node.
      * complex1 won't be any of this instr's store node's child,
@@ -117,7 +120,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
     int slot_difference = 
         instr->alu_num_slot_needed_by_store - store_reduce_slot +
         instr->alu_num_slot_needed_by_max - max_reduce_slot +
-       MAX2(instr->alu_num_slot_needed_by_next_max - next_max_reduce_slot, 0) -
+       MAX2(instr->alu_num_unscheduled_next_max - next_max_reduce_slot -
+            alu_new_max_allowed_next_max, 0) -
        (instr->alu_num_slot_free - consume_slot);
     if (slot_difference > 0) {
        gpir_debug("failed %d because of alu slot\n", node->index);
@@ -141,7 +145,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
     instr->alu_num_slot_needed_by_store -= store_reduce_slot;
     instr->alu_num_slot_needed_by_non_cplx_store -= non_cplx_store_reduce_slot;
     instr->alu_num_slot_needed_by_max -= max_reduce_slot;
-   instr->alu_num_slot_needed_by_next_max -= next_max_reduce_slot;
+   instr->alu_num_unscheduled_next_max -= next_max_reduce_slot;
+   instr->alu_max_allowed_next_max = alu_new_max_allowed_next_max;
     return true;
  }
  
@@ -165,7 +170,9 @@ static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node)
     if (node->sched.max_node)
        instr->alu_num_slot_needed_by_max++;
     if (node->sched.next_max_node)
-      instr->alu_num_slot_needed_by_next_max++;
+      instr->alu_num_unscheduled_next_max++;
+   if (node->op == gpir_op_complex1)
+      instr->alu_max_allowed_next_max = 5;
  }
  
  static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)
@@ -312,7 +319,7 @@ static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node)
      */
     int slot_difference = instr->alu_num_slot_needed_by_store + 1
        + instr->alu_num_slot_needed_by_max +
-      MAX2(instr->alu_num_slot_needed_by_next_max, 0) -
+      MAX2(instr->alu_num_unscheduled_next_max - instr->alu_max_allowed_next_max, 0) -
        instr->alu_num_slot_free;
     if (slot_difference > 0) {
        instr->slot_difference = slot_difference;
diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c

index decda5f1246e66771fa0508f7d930c4f65760cd8..a8706627f38e8c5c6bc75ab5f57bec21acfa456a 100644 (file)
--- a/src/gallium/drivers/lima/ir/gp/node.c
+++ b/src/gallium/drivers/lima/ir/gp/node.c
@@ -58,6 +58,7 @@ const gpir_op_info gpir_op_infos[] = {
        .name = "complex2",
        .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
        .spillless = true,
+      .schedule_first = true,
     },
     [gpir_op_add] = {
        .name = "add",
@@ -154,11 +155,13 @@ const gpir_op_info gpir_op_infos[] = {
        .name = "rcp_impl",
        .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
        .spillless = true,
+      .schedule_first = true,
     },
     [gpir_op_rsqrt_impl] = {
        .name = "rsqrt_impl",
        .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
        .spillless = true,
+      .schedule_first = true,
     },
     [gpir_op_load_uniform] = {
        .name = "ld_uni",
diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c

index 35925a1af51c8fa65c8ba7fa4f1f2deab8bf81f1..f06089b7992183b44470d9b0ced04dc53c54e5d8 100644 (file)
--- a/src/gallium/drivers/lima/ir/gp/scheduler.c
+++ b/src/gallium/drivers/lima/ir/gp/scheduler.c
@@ -441,7 +441,8 @@ static void schedule_insert_ready_list(sched_ctx *ctx,
  
     struct list_head *insert_pos = &ctx->ready_list;
     list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
-      if (insert_node->sched.dist > node->sched.dist) {
+      if (insert_node->sched.dist > node->sched.dist ||
+          gpir_op_infos[insert_node->op].schedule_first) {
           insert_pos = &node->list;
           break;
        }
@@ -916,7 +917,7 @@ static void spill_node(sched_ctx *ctx, gpir_node *node, gpir_store_node *store)
        }
        if (node->sched.next_max_node) {
           node->sched.next_max_node = false;
-         ctx->instr->alu_num_slot_needed_by_next_max--;
+         ctx->instr->alu_num_unscheduled_next_max--;
        }
     }
  }
@@ -1153,7 +1154,7 @@ static bool can_use_complex(gpir_node *node)
  
  static void sched_find_max_nodes(sched_ctx *ctx)
  {
-   ctx->instr->alu_num_slot_needed_by_next_max = -5;
+   ctx->instr->alu_num_unscheduled_next_max = 0;
     ctx->instr->alu_num_slot_needed_by_max = 0;
  
     list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
@@ -1169,7 +1170,7 @@ static void sched_find_max_nodes(sched_ctx *ctx)
        if (node->sched.max_node)
           ctx->instr->alu_num_slot_needed_by_max++;
        if (node->sched.next_max_node)
-         ctx->instr->alu_num_slot_needed_by_next_max++;
+         ctx->instr->alu_num_unscheduled_next_max++;
     }
  }
  
@@ -1179,9 +1180,10 @@ static void sched_find_max_nodes(sched_ctx *ctx)
  static void verify_max_nodes(sched_ctx *ctx)
  {
     int alu_num_slot_needed_by_max = 0;
-   int alu_num_slot_needed_by_next_max = -5;
+   int alu_num_unscheduled_next_max = 0;
     int alu_num_slot_needed_by_store = 0;
     int alu_num_slot_needed_by_non_cplx_store = 0;
+   int alu_max_allowed_next_max = 5;
  
     list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
        if (!gpir_is_input_node(node))
@@ -1190,7 +1192,7 @@ static void verify_max_nodes(sched_ctx *ctx)
        if (node->sched.max_node)
           alu_num_slot_needed_by_max++;
        if (node->sched.next_max_node)
-         alu_num_slot_needed_by_next_max++;
+         alu_num_unscheduled_next_max++;
        if (used_by_store(node, ctx->instr)) {
           alu_num_slot_needed_by_store++;
           if (node->sched.next_max_node && !node->sched.complex_allowed)
@@ -1198,12 +1200,17 @@ static void verify_max_nodes(sched_ctx *ctx)
        }
     }
  
+   if (ctx->instr->slots[GPIR_INSTR_SLOT_MUL0] &&
+       ctx->instr->slots[GPIR_INSTR_SLOT_MUL0]->op == gpir_op_complex1)
+      alu_max_allowed_next_max = 4;
+
     assert(ctx->instr->alu_num_slot_needed_by_max == alu_num_slot_needed_by_max);
-   assert(ctx->instr->alu_num_slot_needed_by_next_max == alu_num_slot_needed_by_next_max);
+   assert(ctx->instr->alu_num_unscheduled_next_max == alu_num_unscheduled_next_max);
+   assert(ctx->instr->alu_max_allowed_next_max == alu_max_allowed_next_max);
     assert(ctx->instr->alu_num_slot_needed_by_store == alu_num_slot_needed_by_store);
     assert(ctx->instr->alu_num_slot_needed_by_non_cplx_store ==
            alu_num_slot_needed_by_non_cplx_store);
-   assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_slot_needed_by_next_max, 0));
+   assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0));
     assert(ctx->instr->alu_non_cplx_slot_free >= alu_num_slot_needed_by_max + alu_num_slot_needed_by_non_cplx_store);
  }
  
@@ -1237,6 +1244,13 @@ static bool try_node(sched_ctx *ctx)
              score = schedule_try_node(ctx, node, true);
           }
  
+         /* schedule_first nodes must be scheduled if possible */
+         if (gpir_op_infos[node->op].schedule_first && score != INT_MIN) {
+            best_node = node;
+            best_score = score;
+            break;
+         }
+
           if (score > best_score) {
              best_score = score;
              best_node = node;
@@ -1382,7 +1396,8 @@ static bool sched_move(sched_ctx *ctx)
      * need to insert the move.
      */
  
-   if (ctx->instr->alu_num_slot_needed_by_next_max > 0) {
+   if (ctx->instr->alu_num_unscheduled_next_max >
+       ctx->instr->alu_max_allowed_next_max) {
        list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
           if (!can_place_move(ctx, node))
              continue;
author	Connor Abbott <cwabbott0@gmail.com>
	Sat, 27 Jul 2019 23:13:10 +0000 (01:13 +0200)
committer	Connor Abbott <cwabbott0@gmail.com>
	Tue, 30 Jul 2019 21:00:41 +0000 (23:00 +0200)
src/gallium/drivers/lima/ir/gp/gpir.h		patch \| blob \| history
src/gallium/drivers/lima/ir/gp/instr.c		patch \| blob \| history
src/gallium/drivers/lima/ir/gp/node.c		patch \| blob \| history
src/gallium/drivers/lima/ir/gp/scheduler.c		patch \| blob \| history