#include "compiler/nir/nir.h"
#include "util/u_dynarray.h"
+#include "util/register_allocate.h"
+
+/* To be shoved inside panfrost_screen for the Gallium driver, or somewhere
+ * else for Vulkan/standalone. The single compiler "screen" to be shared across
+ * all shader compiles, used to store complex initialization (for instance,
+ * related to register allocation) */
+
+struct midgard_screen {
+ /* Precomputed register allocation sets for varying numbers of work
+ * registers. The zeroeth entry corresponds to 8 work registers. The
+ * eighth entry corresponds to 16 work registers. NULL if this set has
+ * not been allocated yet. */
+
+ struct ra_regs *regs[9];
+
+ /* Work register classes corresponds to the above register sets */
+ unsigned reg_classes[9][4];
+};
/* Define the general compiler entry point */
} midgard_program;
int
-midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend);
+midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midgard_program *program, bool is_blend);
/* NIR options are shared between the standalone compiler and the online
* compiler. Defining it here is the simplest, though maybe not the Right
return r;
}
-/* This routine performs the actual register allocation. It should be succeeded
- * by install_registers */
+/* This routine creates a register set. Should be called infrequently since
+ * it's slow and can be cached */
-struct ra_graph *
-allocate_registers(compiler_context *ctx, bool *spilled)
+static struct ra_regs *
+create_register_set(unsigned work_count, unsigned *classes)
{
- /* The number of vec4 work registers available depends on when the
- * uniforms start, so compute that first */
-
- int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
-
int virtual_count = work_count * WORK_STRIDE;
/* First, initialize the RA */
int work_vec2 = ra_alloc_reg_class(regs);
int work_vec1 = ra_alloc_reg_class(regs);
- unsigned classes[4] = {
- work_vec1,
- work_vec2,
- work_vec3,
- work_vec4
- };
+ classes[0] = work_vec1;
+ classes[1] = work_vec2;
+ classes[2] = work_vec3;
+ classes[3] = work_vec4;
/* Add the full set of work registers */
for (unsigned i = 0; i < work_count; ++i) {
/* We're done setting up */
ra_set_finalize(regs, NULL);
+ return regs;
+}
+
+/* This routine gets a precomputed register set off the screen if it's able, or otherwise it computes one on the fly */
+
+static struct ra_regs *
+get_register_set(struct midgard_screen *screen, unsigned work_count, unsigned **classes)
+{
+ /* Bounds check */
+ assert(work_count >= 8);
+ assert(work_count <= 16);
+
+ /* Compute index */
+ unsigned index = work_count - 8;
+
+ /* Find the reg set */
+ struct ra_regs *cached = screen->regs[index];
+
+ if (cached) {
+ assert(screen->reg_classes[index]);
+ *classes = screen->reg_classes[index];
+ return cached;
+ }
+
+ /* Otherwise, create one */
+ struct ra_regs *created = create_register_set(work_count, screen->reg_classes[index]);
+
+ /* Cache it and use it */
+ screen->regs[index] = created;
+
+ *classes = screen->reg_classes[index];
+ return created;
+}
+
+/* This routine performs the actual register allocation. It should be succeeded
+ * by install_registers */
+
+struct ra_graph *
+allocate_registers(compiler_context *ctx, bool *spilled)
+{
+ /* The number of vec4 work registers available depends on when the
+ * uniforms start, so compute that first */
+ int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+ unsigned *classes = NULL;
+ struct ra_regs *regs = get_register_set(ctx->screen, work_count, &classes);
+
+ assert(regs != NULL);
+ assert(classes != NULL);
+
/* No register allocation to do with no SSA */
if (!ctx->temp_count)