From 11a49f289d17128c24dd9aa339b940b819cb7e41 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Sun, 21 Apr 2019 21:46:46 +0200 Subject: [PATCH] lima/gp: Support exp2 and log2 log2 is tricky because there cannot be a move between complex1 and postlog2. We can't guarantee that scheduling complex1 will succeed when we schedule postlog2, so we try to schedule complex1 and if it fails we back out by rewriting the postlog2 as a move and introducing a new postlog2 so that we can try again later. Signed-off-by: Connor Abbott Acked-by: Qiang Yu --- src/gallium/drivers/lima/ir/gp/codegen.c | 21 +++++- src/gallium/drivers/lima/ir/gp/lower.c | 56 ++++++++++++-- src/gallium/drivers/lima/ir/gp/nir.c | 2 + src/gallium/drivers/lima/ir/gp/node.c | 10 +++ src/gallium/drivers/lima/ir/gp/scheduler.c | 88 +++++++++++++++++----- 5 files changed, 147 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/lima/ir/gp/codegen.c b/src/gallium/drivers/lima/ir/gp/codegen.c index 9bc279e0119..19eb38c18a7 100644 --- a/src/gallium/drivers/lima/ir/gp/codegen.c +++ b/src/gallium/drivers/lima/ir/gp/codegen.c @@ -376,6 +376,8 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst case gpir_op_mov: case gpir_op_rcp_impl: case gpir_op_rsqrt_impl: + case gpir_op_exp2_impl: + case gpir_op_log2_impl: { gpir_alu_node *alu = gpir_node_to_alu(node); code->complex_src = gpir_get_alu_input(node, alu->children[0]); @@ -395,6 +397,12 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst case gpir_op_rsqrt_impl: code->complex_op = gpir_codegen_complex_op_rsqrt; break; + case gpir_op_exp2_impl: + code->complex_op = gpir_codegen_complex_op_exp2; + break; + case gpir_op_log2_impl: + code->complex_op = gpir_codegen_complex_op_log2; + break; default: assert(0); } @@ -410,14 +418,19 @@ static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr) return; } + gpir_alu_node *alu = gpir_node_to_alu(node); + code->pass_src = gpir_get_alu_input(node, alu->children[0]); + switch (node->op) { case gpir_op_mov: - { - gpir_alu_node *alu = gpir_node_to_alu(node); - code->pass_src = gpir_get_alu_input(node, alu->children[0]); code->pass_op = gpir_codegen_pass_op_pass; break; - } + case gpir_op_preexp2: + code->pass_op = gpir_codegen_pass_op_preexp2; + break; + case gpir_op_postlog2: + code->pass_op = gpir_codegen_pass_op_postlog2; + break; default: assert(0); } diff --git a/src/gallium/drivers/lima/ir/gp/lower.c b/src/gallium/drivers/lima/ir/gp/lower.c index 38c24851c2e..6c5f2db7e33 100644 --- a/src/gallium/drivers/lima/ir/gp/lower.c +++ b/src/gallium/drivers/lima/ir/gp/lower.c @@ -177,6 +177,19 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node) gpir_alu_node *alu = gpir_node_to_alu(node); gpir_node *child = alu->children[0]; + if (node->op == gpir_op_exp2) { + gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2); + if (unlikely(!preexp2)) + return false; + + preexp2->children[0] = child; + preexp2->num_child = 1; + gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT); + list_addtail(&preexp2->node.list, &node->list); + + child = &preexp2->node; + } + gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2); if (unlikely(!complex2)) return false; @@ -194,6 +207,12 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node) case gpir_op_rsqrt: impl_op = gpir_op_rsqrt_impl; break; + case gpir_op_exp2: + impl_op = gpir_op_exp2_impl; + break; + case gpir_op_log2: + impl_op = gpir_op_log2_impl; + break; default: assert(0); } @@ -207,14 +226,33 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node) gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT); list_addtail(&impl->node.list, &node->list); - /* change node to complex1 node */ - node->op = gpir_op_complex1; - alu->children[0] = &impl->node; - alu->children[1] = &complex2->node; - alu->children[2] = child; - alu->num_child = 3; - gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT); - gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT); + gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1); + complex1->children[0] = &impl->node; + complex1->children[1] = &complex2->node; + complex1->children[2] = child; + complex1->num_child = 3; + gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT); + gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT); + gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT); + list_addtail(&complex1->node.list, &node->list); + + gpir_node *result = &complex1->node; + + if (node->op == gpir_op_log2) { + gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2); + if (unlikely(!postlog2)) + return false; + + postlog2->children[0] = result; + postlog2->num_child = 1; + gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT); + list_addtail(&postlog2->node.list, &node->list); + + result = &postlog2->node; + } + + gpir_node_replace_succ(result, node); + gpir_node_delete(node); return true; } @@ -384,6 +422,8 @@ static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node [gpir_op_neg] = gpir_lower_neg, [gpir_op_rcp] = gpir_lower_complex, [gpir_op_rsqrt] = gpir_lower_complex, + [gpir_op_exp2] = gpir_lower_complex, + [gpir_op_log2] = gpir_lower_complex, [gpir_op_eq] = gpir_lower_eq_ne, [gpir_op_ne] = gpir_lower_eq_ne, [gpir_op_abs] = gpir_lower_abs, diff --git a/src/gallium/drivers/lima/ir/gp/nir.c b/src/gallium/drivers/lima/ir/gp/nir.c index d1da7ed3754..18121b9a914 100644 --- a/src/gallium/drivers/lima/ir/gp/nir.c +++ b/src/gallium/drivers/lima/ir/gp/nir.c @@ -118,6 +118,8 @@ static int nir_to_gpir_opcodes[nir_num_opcodes] = { [nir_op_fmax] = gpir_op_max, [nir_op_frcp] = gpir_op_rcp, [nir_op_frsq] = gpir_op_rsqrt, + [nir_op_fexp2] = gpir_op_exp2, + [nir_op_flog2] = gpir_op_log2, [nir_op_slt] = gpir_op_lt, [nir_op_sge] = gpir_op_ge, [nir_op_fcsel] = gpir_op_select, diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c index a8706627f38..1bf9d806c30 100644 --- a/src/gallium/drivers/lima/ir/gp/node.c +++ b/src/gallium/drivers/lima/ir/gp/node.c @@ -141,15 +141,25 @@ const gpir_op_info gpir_op_infos[] = { }, [gpir_op_preexp2] = { .name = "preexp2", + .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END }, + .spillless = true, + .schedule_first = true, }, [gpir_op_postlog2] = { .name = "postlog2", + .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END }, }, [gpir_op_exp2_impl] = { .name = "exp2_impl", + .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, + .spillless = true, + .schedule_first = true, }, [gpir_op_log2_impl] = { .name = "log2_impl", + .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, + .spillless = true, + .schedule_first = true, }, [gpir_op_rcp_impl] = { .name = "rcp_impl", diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c index f06089b7992..1a727aebc92 100644 --- a/src/gallium/drivers/lima/ir/gp/scheduler.c +++ b/src/gallium/drivers/lima/ir/gp/scheduler.c @@ -627,23 +627,26 @@ static bool schedule_try_place_node(sched_ctx *ctx, gpir_node *node, return true; } -static gpir_node *create_move(sched_ctx *ctx, gpir_node *node) +/* Create a new node with "node" as the child, replace all uses of "node" with + * this new node, and replace "node" with it in the ready list. + */ +static gpir_node *create_replacement(sched_ctx *ctx, gpir_node *node, + gpir_op op) { - gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov); - if (unlikely(!move)) - return NULL; - move->children[0] = node; - move->num_child = 1; + gpir_alu_node *new_node = gpir_node_create(node->block, op); + if (unlikely(!new_node)) + return NULL; - move->node.sched.instr = NULL; - move->node.sched.pos = -1; - move->node.sched.dist = node->sched.dist; - move->node.sched.max_node = node->sched.max_node; - move->node.sched.next_max_node = node->sched.next_max_node; - move->node.sched.complex_allowed = node->sched.complex_allowed; + new_node->children[0] = node; + new_node->num_child = 1; - gpir_debug("create move %d for %d\n", move->node.index, node->index); + new_node->node.sched.instr = NULL; + new_node->node.sched.pos = -1; + new_node->node.sched.dist = node->sched.dist; + new_node->node.sched.max_node = node->sched.max_node; + new_node->node.sched.next_max_node = node->sched.next_max_node; + new_node->node.sched.complex_allowed = node->sched.complex_allowed; ctx->ready_list_slots--; list_del(&node->list); @@ -651,12 +654,26 @@ static gpir_node *create_move(sched_ctx *ctx, gpir_node *node) node->sched.next_max_node = false; node->sched.ready = false; node->sched.inserted = false; - gpir_node_replace_succ(&move->node, node); - gpir_node_add_dep(&move->node, node, GPIR_DEP_INPUT); - schedule_insert_ready_list(ctx, &move->node); - return &move->node; + gpir_node_replace_succ(&new_node->node, node); + gpir_node_add_dep(&new_node->node, node, GPIR_DEP_INPUT); + schedule_insert_ready_list(ctx, &new_node->node); + return &new_node->node; +} + +static gpir_node *create_move(sched_ctx *ctx, gpir_node *node) +{ + gpir_node *move = create_replacement(ctx, node, gpir_op_mov); + gpir_debug("create move %d for %d\n", move->index, node->index); + return move; } +static gpir_node *create_postlog2(sched_ctx *ctx, gpir_node *node) +{ + assert(node->op == gpir_op_complex1); + gpir_node *postlog2 = create_replacement(ctx, node, gpir_op_postlog2); + gpir_debug("create postlog2 %d for %d\n", postlog2->index, node->index); + return postlog2; +} /* Once we schedule the successor, would the predecessor be fully ready? */ static bool pred_almost_ready(gpir_dep *dep) @@ -936,7 +953,22 @@ static bool used_by_store(gpir_node *node, gpir_instr *instr) return false; } +static gpir_node *consuming_postlog2(gpir_node *node) +{ + if (node->op != gpir_op_complex1) + return NULL; + gpir_node_foreach_succ(node, dep) { + if (dep->type != GPIR_DEP_INPUT) + continue; + if (dep->succ->op == gpir_op_postlog2) + return dep->succ; + else + return NULL; + } + + return NULL; +} static bool try_spill_node(sched_ctx *ctx, gpir_node *node) { @@ -961,6 +993,16 @@ static bool try_spill_node(sched_ctx *ctx, gpir_node *node) if (available == 0) return false; + /* Don't spill complex1 if it's used postlog2, turn the postlog2 into a + * move, replace the complex1 with postlog2 and spill that instead. The + * store needs a move anyways so the postlog2 is usually free. + */ + gpir_node *postlog2 = consuming_postlog2(node); + if (postlog2) { + postlog2->op = gpir_op_mov; + node = create_postlog2(ctx, node); + } + /* TODO: use a better heuristic for choosing an available register? */ int physreg = ffsll(available) - 1; @@ -1305,7 +1347,17 @@ static bool sched_move(sched_ctx *ctx) { list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { if (node->sched.max_node) { - place_move(ctx, node); + /* For complex1 that is consumed by a postlog2, we cannot allow any + * moves in between. Convert the postlog2 to a move and insert a new + * postlog2, and try to schedule it again in try_node(). + */ + gpir_node *postlog2 = consuming_postlog2(node); + if (postlog2) { + postlog2->op = gpir_op_mov; + create_postlog2(ctx, node); + } else { + place_move(ctx, node); + } return true; } } -- 2.30.2