From 8161a87b249c4e4e58a6163f09a52391aa20588a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 21 Nov 2018 13:46:51 -0800 Subject: [PATCH] nir/phi_builder: Use per-value hash table to store [block] -> def mapping Replace the old array in each value with a hash table in each value. Changes in peak memory usage according to Valgrind massif: mean soft fp64 using uint64: 5,499,875,082 => 1,343,991,403 gfxbench5 aztec ruins high 11: 63,619,971 => 63,619,971 deus ex mankind divided 148: 62,887,728 => 62,887,728 deus ex mankind divided 2890: 72,402,222 => 72,399,750 dirt showdown 676: 74,466,431 => 69,464,023 dolphin ubershaders 210: 109,630,376 => 78,359,728 Run-time change for a full run on shader-db on my Haswell desktop (with -march=native) is 1.22245% +/- 0.463879% (n=11). This is about +2.9 seconds on a 237 second run. The first time I sent this version of this patch out, the run-time data was quite different. I had misconfigured the script that ran the test, and none of the tests from higher GLSL versions were run. These are generally more complex shaders, and they are more affected by this change. The previous version of this patch used a single hash table for the whole phi builder. The mapping was from [value, block] -> def, so a separate allocation was needed for each [value, block] tuple. There was quite a bit of per-allocation overhead (due to ralloc), so the patch was followed by a patch that added the use of the slab allocator. The results of those two patches was not quite as good: mean soft fp64 using uint64: 5,499,875,082 => 1,343,991,403 gfxbench5 aztec ruins high 11: 63,619,971 => 63,619,971 deus ex mankind divided 148: 62,887,728 => 62,887,728 deus ex mankind divided 2890: 72,402,222 => 72,402,222 * dirt showdown 676: 74,466,431 => 72,443,591 * dolphin ubershaders 210: 109,630,376 => 81,034,320 * The * denote tests that are better now. In the tests that are the same in both patches, the "after" peak memory usage was at a different location. I did not check the local peaks. Signed-off-by: Ian Romanick Suggested-by: Jason Ekstrand Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir_phi_builder.c | 45 ++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/src/compiler/nir/nir_phi_builder.c b/src/compiler/nir/nir_phi_builder.c index add3efd2dfc..621777d6ecc 100644 --- a/src/compiler/nir/nir_phi_builder.c +++ b/src/compiler/nir/nir_phi_builder.c @@ -75,9 +75,18 @@ struct nir_phi_builder_value { * - A regular SSA def. This will be either the result of a phi node or * one of the defs provided by nir_phi_builder_value_set_blocK_def(). */ - nir_ssa_def *defs[0]; + struct hash_table ht; }; +/** + * Convert a block index into a value that can be used as a key for a hash table + * + * The hash table functions want a pointer that is not \c NULL. + * _mesa_hash_pointer drops the two least significant bits, but that's where + * most of our data likely is. Shift by 2 and add 1 to make everything happy. + */ +#define INDEX_TO_KEY(x) ((void *)(uintptr_t) ((x << 2) + 1)) + struct nir_phi_builder * nir_phi_builder_create(nir_function_impl *impl) { @@ -111,13 +120,16 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, struct nir_phi_builder_value *val; unsigned i, w_start = 0, w_end = 0; - val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks); + val = rzalloc_size(pb, sizeof(*val)); val->builder = pb; val->num_components = num_components; val->bit_size = bit_size; exec_list_make_empty(&val->phis); exec_list_push_tail(&pb->values, &val->node); + _mesa_hash_table_init(&val->ht, pb, _mesa_hash_pointer, + _mesa_key_pointer_equal); + pb->iter_count++; BITSET_WORD tmp; @@ -142,7 +154,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, if (next == pb->impl->end_block) continue; - if (val->defs[next->index] == NULL) { + if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(next->index)) == NULL) { /* Instead of creating a phi node immediately, we simply set the * value to the magic value NEEDS_PHI. Later, we create phi nodes * on demand in nir_phi_builder_value_get_block_def(). @@ -164,7 +176,7 @@ void nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, nir_block *block, nir_ssa_def *def) { - val->defs[block->index] = def; + _mesa_hash_table_insert(&val->ht, INDEX_TO_KEY(block->index), def); } nir_ssa_def * @@ -175,8 +187,18 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, * have a valid ssa_def, if any. */ nir_block *dom = block; - while (dom && val->defs[dom->index] == NULL) + struct hash_entry *he = NULL; + + while (dom != NULL) { + he = _mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index)); + if (he != NULL) + break; + dom = dom->imm_dom; + } + + /* Exactly one of (he != NULL) and (dom == NULL) must be true. */ + assert((he != NULL) != (dom == NULL)); nir_ssa_def *def; if (dom == NULL) { @@ -191,7 +213,7 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, nir_instr_insert(nir_before_cf_list(&val->builder->impl->body), &undef->instr); def = &undef->def; - } else if (val->defs[dom->index] == NEEDS_PHI) { + } else if (he->data == NEEDS_PHI) { /* The magic value NEEDS_PHI indicates that the block needs a phi node * but none has been created. We need to create one now so we can * return it to the caller. @@ -215,13 +237,14 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, val->bit_size, NULL); phi->instr.block = dom; exec_list_push_tail(&val->phis, &phi->instr.node); - def = val->defs[dom->index] = &phi->dest.ssa; + def = &phi->dest.ssa; + he->data = def; } else { /* In this case, we have an actual SSA def. It's either the result of a * phi node created by the case above or one passed to us through * nir_phi_builder_value_set_block_def(). */ - def = val->defs[dom->index]; + def = (struct nir_ssa_def *) he->data; } /* Walk the chain and stash the def in all of the applicable blocks. We do @@ -231,8 +254,12 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, * block that is not dominated by this one. * 2) To avoid unneeded recreation of phi nodes and undefs. */ - for (dom = block; dom && val->defs[dom->index] == NULL; dom = dom->imm_dom) + for (dom = block; dom != NULL; dom = dom->imm_dom) { + if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index)) != NULL) + break; + nir_phi_builder_value_set_block_def(val, dom, def); + } return def; } -- 2.30.2