lima/gp: Support exp2 and log2
authorConnor Abbott <cwabbott0@gmail.com>
Sun, 21 Apr 2019 19:46:46 +0000 (21:46 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Tue, 30 Jul 2019 21:01:15 +0000 (23:01 +0200)
log2 is tricky because there cannot be a move between complex1 and
postlog2. We can't guarantee that scheduling complex1 will succeed when
we schedule postlog2, so we try to schedule complex1 and if it fails we
back out by rewriting the postlog2 as a move and introducing a new
postlog2 so that we can try again later.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Acked-by: Qiang Yu <yuq825@gmail.com>
src/gallium/drivers/lima/ir/gp/codegen.c
src/gallium/drivers/lima/ir/gp/lower.c
src/gallium/drivers/lima/ir/gp/nir.c
src/gallium/drivers/lima/ir/gp/node.c
src/gallium/drivers/lima/ir/gp/scheduler.c

index 9bc279e0119a8b257223b02a834df4c353292499..19eb38c18a77d68b12fae0c8fe9d17fdae78b7c9 100644 (file)
@@ -376,6 +376,8 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
    case gpir_op_mov:
    case gpir_op_rcp_impl:
    case gpir_op_rsqrt_impl:
+   case gpir_op_exp2_impl:
+   case gpir_op_log2_impl:
    {
       gpir_alu_node *alu = gpir_node_to_alu(node);
       code->complex_src = gpir_get_alu_input(node, alu->children[0]);
@@ -395,6 +397,12 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
    case gpir_op_rsqrt_impl:
       code->complex_op = gpir_codegen_complex_op_rsqrt;
       break;
+   case gpir_op_exp2_impl:
+      code->complex_op = gpir_codegen_complex_op_exp2;
+      break;
+   case gpir_op_log2_impl:
+      code->complex_op = gpir_codegen_complex_op_log2;
+      break;
    default:
       assert(0);
    }
@@ -410,14 +418,19 @@ static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr)
       return;
    }
 
+   gpir_alu_node *alu = gpir_node_to_alu(node);
+   code->pass_src = gpir_get_alu_input(node, alu->children[0]);
+
    switch (node->op) {
    case gpir_op_mov:
-   {
-      gpir_alu_node *alu = gpir_node_to_alu(node);
-      code->pass_src = gpir_get_alu_input(node, alu->children[0]);
       code->pass_op = gpir_codegen_pass_op_pass;
       break;
-   }
+   case gpir_op_preexp2:
+      code->pass_op = gpir_codegen_pass_op_preexp2;
+      break;
+   case gpir_op_postlog2:
+      code->pass_op = gpir_codegen_pass_op_postlog2;
+      break;
    default:
       assert(0);
    }
index 38c24851c2ebf64a1c348f08e9a00e8883094bcb..6c5f2db7e333161731718816aa7de06e08845df0 100644 (file)
@@ -177,6 +177,19 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
    gpir_alu_node *alu = gpir_node_to_alu(node);
    gpir_node *child = alu->children[0];
 
+   if (node->op == gpir_op_exp2) {
+      gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
+      if (unlikely(!preexp2))
+         return false;
+
+      preexp2->children[0] = child;
+      preexp2->num_child = 1;
+      gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
+      list_addtail(&preexp2->node.list, &node->list);
+
+      child = &preexp2->node;
+   }
+
    gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
    if (unlikely(!complex2))
       return false;
@@ -194,6 +207,12 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
    case gpir_op_rsqrt:
       impl_op = gpir_op_rsqrt_impl;
       break;
+   case gpir_op_exp2:
+      impl_op = gpir_op_exp2_impl;
+      break;
+   case gpir_op_log2:
+      impl_op = gpir_op_log2_impl;
+      break;
    default:
       assert(0);
    }
@@ -207,14 +226,33 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
    gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
    list_addtail(&impl->node.list, &node->list);
 
-   /* change node to complex1 node */
-   node->op = gpir_op_complex1;
-   alu->children[0] = &impl->node;
-   alu->children[1] = &complex2->node;
-   alu->children[2] = child;
-   alu->num_child = 3;
-   gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
-   gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
+   gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
+   complex1->children[0] = &impl->node;
+   complex1->children[1] = &complex2->node;
+   complex1->children[2] = child;
+   complex1->num_child = 3;
+   gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
+   gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
+   gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
+   list_addtail(&complex1->node.list, &node->list);
+
+   gpir_node *result = &complex1->node;
+
+   if (node->op == gpir_op_log2) {
+      gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
+      if (unlikely(!postlog2))
+         return false;
+
+      postlog2->children[0] = result;
+      postlog2->num_child = 1;
+      gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
+      list_addtail(&postlog2->node.list, &node->list);
+
+      result = &postlog2->node;
+   }
+
+   gpir_node_replace_succ(result, node);
+   gpir_node_delete(node);
 
    return true;
 }
@@ -384,6 +422,8 @@ static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node
    [gpir_op_neg] = gpir_lower_neg,
    [gpir_op_rcp] = gpir_lower_complex,
    [gpir_op_rsqrt] = gpir_lower_complex,
+   [gpir_op_exp2] = gpir_lower_complex,
+   [gpir_op_log2] = gpir_lower_complex,
    [gpir_op_eq] = gpir_lower_eq_ne,
    [gpir_op_ne] = gpir_lower_eq_ne,
    [gpir_op_abs] = gpir_lower_abs,
index d1da7ed3754b4a2e8471f94e6351d1ebc2e1d1ef..18121b9a914621fe9d46fcdc846a8780d887cb5a 100644 (file)
@@ -118,6 +118,8 @@ static int nir_to_gpir_opcodes[nir_num_opcodes] = {
    [nir_op_fmax] = gpir_op_max,
    [nir_op_frcp] = gpir_op_rcp,
    [nir_op_frsq] = gpir_op_rsqrt,
+   [nir_op_fexp2] = gpir_op_exp2,
+   [nir_op_flog2] = gpir_op_log2,
    [nir_op_slt] = gpir_op_lt,
    [nir_op_sge] = gpir_op_ge,
    [nir_op_fcsel] = gpir_op_select,
index a8706627f38e8c5c6bc75ab5f57bec21acfa456a..1bf9d806c30fe3e341d4d9fe7dcc4d57c01b89f4 100644 (file)
@@ -141,15 +141,25 @@ const gpir_op_info gpir_op_infos[] = {
    },
    [gpir_op_preexp2] = {
       .name = "preexp2",
+      .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
+      .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_postlog2] = {
       .name = "postlog2",
+      .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
    },
    [gpir_op_exp2_impl] = {
       .name = "exp2_impl",
+      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
+      .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_log2_impl] = {
       .name = "log2_impl",
+      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
+      .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_rcp_impl] = {
       .name = "rcp_impl",
index f06089b7992183b44470d9b0ced04dc53c54e5d8..1a727aebc923f025a080df05c9121d0add2b3751 100644 (file)
@@ -627,23 +627,26 @@ static bool schedule_try_place_node(sched_ctx *ctx, gpir_node *node,
    return true;
 }
 
-static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
+/* Create a new node with "node" as the child, replace all uses of "node" with
+ * this new node, and replace "node" with it in the ready list.
+ */
+static gpir_node *create_replacement(sched_ctx *ctx, gpir_node *node,
+                                     gpir_op op)
 {
-   gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov);
-   if (unlikely(!move))
-      return NULL;
 
-   move->children[0] = node;
-   move->num_child = 1;
+   gpir_alu_node *new_node = gpir_node_create(node->block, op);
+   if (unlikely(!new_node))
+      return NULL;
 
-   move->node.sched.instr = NULL;
-   move->node.sched.pos = -1;
-   move->node.sched.dist = node->sched.dist;
-   move->node.sched.max_node = node->sched.max_node;
-   move->node.sched.next_max_node = node->sched.next_max_node;
-   move->node.sched.complex_allowed = node->sched.complex_allowed;
+   new_node->children[0] = node;
+   new_node->num_child = 1;
 
-   gpir_debug("create move %d for %d\n", move->node.index, node->index);
+   new_node->node.sched.instr = NULL;
+   new_node->node.sched.pos = -1;
+   new_node->node.sched.dist = node->sched.dist;
+   new_node->node.sched.max_node = node->sched.max_node;
+   new_node->node.sched.next_max_node = node->sched.next_max_node;
+   new_node->node.sched.complex_allowed = node->sched.complex_allowed;
 
    ctx->ready_list_slots--;
    list_del(&node->list);
@@ -651,12 +654,26 @@ static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
    node->sched.next_max_node = false;
    node->sched.ready = false;
    node->sched.inserted = false;
-   gpir_node_replace_succ(&move->node, node);
-   gpir_node_add_dep(&move->node, node, GPIR_DEP_INPUT);
-   schedule_insert_ready_list(ctx, &move->node);
-   return &move->node;
+   gpir_node_replace_succ(&new_node->node, node);
+   gpir_node_add_dep(&new_node->node, node, GPIR_DEP_INPUT);
+   schedule_insert_ready_list(ctx, &new_node->node);
+   return &new_node->node;
+}
+
+static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
+{
+   gpir_node *move = create_replacement(ctx, node, gpir_op_mov);
+   gpir_debug("create move %d for %d\n", move->index, node->index);
+   return move;
 }
 
+static gpir_node *create_postlog2(sched_ctx *ctx, gpir_node *node)
+{
+   assert(node->op == gpir_op_complex1);
+   gpir_node *postlog2 = create_replacement(ctx, node, gpir_op_postlog2);
+   gpir_debug("create postlog2 %d for %d\n", postlog2->index, node->index);
+   return postlog2;
+}
 
 /* Once we schedule the successor, would the predecessor be fully ready? */
 static bool pred_almost_ready(gpir_dep *dep)
@@ -936,7 +953,22 @@ static bool used_by_store(gpir_node *node, gpir_instr *instr)
    return false;
 }
 
+static gpir_node *consuming_postlog2(gpir_node *node)
+{
+   if (node->op != gpir_op_complex1)
+      return NULL;
 
+   gpir_node_foreach_succ(node, dep) {
+      if (dep->type != GPIR_DEP_INPUT)
+         continue;
+      if (dep->succ->op == gpir_op_postlog2)
+         return dep->succ;
+      else
+         return NULL;
+   }
+
+   return NULL;
+}
 
 static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
 {
@@ -961,6 +993,16 @@ static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
       if (available == 0)
          return false;
 
+      /* Don't spill complex1 if it's used postlog2, turn the postlog2 into a
+       * move, replace the complex1 with postlog2 and spill that instead. The
+       * store needs a move anyways so the postlog2 is usually free.
+       */
+      gpir_node *postlog2 = consuming_postlog2(node);
+      if (postlog2) {
+         postlog2->op = gpir_op_mov;
+         node = create_postlog2(ctx, node);
+      }
+
       /* TODO: use a better heuristic for choosing an available register? */
       int physreg = ffsll(available) - 1;
 
@@ -1305,7 +1347,17 @@ static bool sched_move(sched_ctx *ctx)
 {
    list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
       if (node->sched.max_node) {
-         place_move(ctx, node);
+         /* For complex1 that is consumed by a postlog2, we cannot allow any
+          * moves in between. Convert the postlog2 to a move and insert a new
+          * postlog2, and try to schedule it again in try_node().
+          */
+         gpir_node *postlog2 = consuming_postlog2(node);
+         if (postlog2) {
+            postlog2->op = gpir_op_mov;
+            create_postlog2(ctx, node);
+         } else {
+            place_move(ctx, node);
+         }
          return true;
       }
    }