From 7e8de5a707f7279929d7396550024b8cdc6a8c61 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Wed, 3 Apr 2019 01:48:09 +0000
Subject: [PATCH] panfrost: Implement system values

This patch implements system values via specially-crafted uniforms.
While we previously had an ad hoc system for passing the viewport into
the vertex shader, this commit generalizes the system to allow for
arbitrary system values to be added to both shader stages. While we're
at it, we clean up uniform handling code (which was considerably muddied
to handle the ad hoc viewport uniform).

This commit serves as both a cleanup of the existing codebase and the
precursor to new functionality, like implementing textureSize().

Concurrent with these changes is respecting the depth transform, which
was not possible with the old fixed uniform system and here serves as a
proof-of-correctness test (as well as justifying the NIR changes).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
---
 src/gallium/drivers/panfrost/meson.build      |   1 +
 .../drivers/panfrost/midgard/helpers.h        |   4 -
 .../panfrost/midgard/midgard_compile.c        | 272 +++++++++---------
 .../panfrost/midgard/midgard_compile.h        |  27 +-
 src/gallium/drivers/panfrost/pan_assemble.c   |   5 +
 src/gallium/drivers/panfrost/pan_context.c    | 103 +++----
 src/gallium/drivers/panfrost/pan_context.h    |   5 +
 7 files changed, 229 insertions(+), 188 deletions(-)

diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build
index 371a11a934b..b7e7d0c5808 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -109,6 +109,7 @@ midgard_compiler = executable(
   link_with : [
     libgallium,
     libglsl_standalone,
+    libmesa_gallium, # for st_glsl_storage_type_size
     libmesa_util
   ],
   build_by_default : true
diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h
index 530a086e928..54960c7e599 100644
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -95,10 +95,6 @@
 #define REGISTER_TEXTURE_BASE 28
 #define REGISTER_SELECT 31
 
-/* Special uniforms used for e.g. vertex epilogues */
-#define SPECIAL_UNIFORM_BASE (1 << 24)
-#define UNIFORM_VIEWPORT (SPECIAL_UNIFORM_BASE + 0)
-
 /* SSA helper aliases to mimic the registers. UNUSED_0 encoded as an inline
  * constant. UNUSED_1 encoded as REGISTER_UNUSED */
 
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index ea8c0153c96..c68067d50a5 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -32,6 +32,7 @@
 
 #include "main/mtypes.h"
 #include "compiler/glsl/glsl_to_nir.h"
+#include "mesa/state_tracker/st_glsl_types.h"
 #include "compiler/nir_types.h"
 #include "main/imports.h"
 #include "compiler/nir/nir_builder.h"
@@ -176,6 +177,7 @@ typedef struct midgard_block {
  * driver seems to do it that way */
 
 #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
+#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W)
 
 #define M_LOAD_STORE(name, rname, uname) \
 	static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
@@ -189,7 +191,7 @@ typedef struct midgard_block {
 			.load_store = { \
 				.op = midgard_op_##name, \
 				.mask = 0xF, \
-				.swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), \
+				.swizzle = SWIZZLE_XYZW, \
 				.address = address \
 			} \
 		}; \
@@ -432,10 +434,6 @@ typedef struct compiler_context {
         int temp_count;
         int max_hash;
 
-        /* Uniform IDs for mdg */
-        struct hash_table_u64 *uniform_nir_to_mdg;
-        int uniform_count;
-
         /* Just the count of the max register used. Higher count => higher
          * register pressure */
         int work_registers;
@@ -447,9 +445,6 @@ typedef struct compiler_context {
         /* Mapping of texture register -> SSA index for unaliasing */
         int texture_index[2];
 
-        /* Count of special uniforms (viewport, etc) in vec4 units */
-        int special_uniforms;
-
         /* If any path hits a discard instruction */
         bool can_discard;
 
@@ -464,6 +459,11 @@ typedef struct compiler_context {
 
         /* The index corresponding to the fragment output */
         unsigned fragment_output;
+
+        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+        unsigned sysvals[MAX_SYSVAL_COUNT];
+        unsigned sysval_count;
+        struct hash_table_u64 *sysval_to_id;
 } compiler_context;
 
 /* Append instruction to end of current block */
@@ -645,6 +645,12 @@ glsl_type_size(const struct glsl_type *type)
         return glsl_count_attribute_slots(type, false);
 }
 
+static int
+uniform_type_size(const struct glsl_type *type)
+{
+        return st_glsl_storage_type_size(type, false);
+}
+
 /* Lower fdot2 to a vector multiplication followed by channel addition  */
 static void
 midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
@@ -667,6 +673,60 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
         nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
 }
 
+static int
+midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
+{
+        switch (instr->intrinsic) {
+        case nir_intrinsic_load_viewport_scale:
+                return PAN_SYSVAL_VIEWPORT_SCALE;
+        case nir_intrinsic_load_viewport_offset:
+                return PAN_SYSVAL_VIEWPORT_OFFSET;
+        default:
+                return -1;
+        }
+}
+
+static void
+midgard_nir_assign_sysval_body(compiler_context *ctx, nir_instr *instr)
+{
+        int sysval = -1;
+
+        if (instr->type == nir_instr_type_intrinsic) {
+                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                sysval = midgard_nir_sysval_for_intrinsic(intr);
+        }
+
+        if (sysval < 0)
+                return;
+
+        /* We have a sysval load; check if it's already been assigned */
+
+        if (_mesa_hash_table_u64_search(ctx->sysval_to_id, sysval))
+                return;
+
+        /* It hasn't -- so assign it now! */
+
+        unsigned id = ctx->sysval_count++;
+        _mesa_hash_table_u64_insert(ctx->sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
+        ctx->sysvals[id] = sysval;
+}
+
+static void
+midgard_nir_assign_sysvals(compiler_context *ctx, nir_shader *shader)
+{
+        ctx->sysval_count = 0;
+
+        nir_foreach_function(function, shader) {
+                if (!function->impl) continue;
+
+                nir_foreach_block(block, function->impl) {
+                        nir_foreach_instr_safe(instr, block) {
+                                midgard_nir_assign_sysval_body(ctx, instr);
+                        }
+                }
+        }
+}
+
 static bool
 midgard_nir_lower_fdot2(nir_shader *shader)
 {
@@ -715,7 +775,6 @@ optimise_nir(nir_shader *nir)
                 progress = false;
 
                 NIR_PASS(progress, nir, midgard_nir_lower_algebraic);
-                NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
                 NIR_PASS(progress, nir, nir_lower_var_copies);
                 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
 
@@ -1206,6 +1265,52 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 
 #undef ALU_CASE
 
+static void
+emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset)
+{
+        /* TODO: half-floats */
+
+        if (offset < ctx->uniform_cutoff) {
+                /* Fast path: For the first 16 uniform,
+                 * accesses are 0-cycle, since they're
+                 * just a register fetch in the usual
+                 * case.  So, we alias the registers
+                 * while we're still in SSA-space */
+
+                int reg_slot = 23 - offset;
+                alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
+        } else {
+                /* Otherwise, read from the 'special'
+                 * UBO to access higher-indexed
+                 * uniforms, at a performance cost */
+
+                midgard_instruction ins = m_load_uniform_32(dest, offset);
+
+                /* TODO: Don't split */
+                ins.load_store.varying_parameters = (offset & 7) << 7;
+                ins.load_store.address = offset >> 3;
+
+                ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
+                emit_mir_instruction(ctx, ins);
+        }
+}
+
+static void
+emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr)
+{
+        /* First, pull out the destination */
+        unsigned dest = nir_dest_index(ctx, &instr->dest);
+
+        /* Now, figure out which uniform this is */
+        int sysval = midgard_nir_sysval_for_intrinsic(instr);
+        void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval);
+
+        /* Sysvals are prefix uniforms */
+        unsigned uniform = ((uintptr_t) val) - 1;
+
+        emit_uniform_read(ctx, dest, uniform);
+}
+
 static void
 emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
@@ -1238,52 +1343,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                 reg = nir_dest_index(ctx, &instr->dest);
 
                 if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) {
-                        /* TODO: half-floats */
-
-                        int uniform_offset = 0;
-
-                        if (offset >= SPECIAL_UNIFORM_BASE) {
-                                /* XXX: Resolve which uniform */
-                                uniform_offset = 0;
-                        } else {
-                                /* Offset away from the special
-                                 * uniform block */
-
-                                void *entry = _mesa_hash_table_u64_search(ctx->uniform_nir_to_mdg, offset + 1);
-
-                                /* XXX */
-                                if (!entry) {
-                                        DBG("WARNING: Unknown uniform %d\n", offset);
-                                        break;
-                                }
-
-                                uniform_offset = (uintptr_t) (entry) - 1;
-                                uniform_offset += ctx->special_uniforms;
-                        }
-
-                        if (uniform_offset < ctx->uniform_cutoff) {
-                                /* Fast path: For the first 16 uniform,
-                                 * accesses are 0-cycle, since they're
-                                 * just a register fetch in the usual
-                                 * case.  So, we alias the registers
-                                 * while we're still in SSA-space */
-
-                                int reg_slot = 23 - uniform_offset;
-                                alias_ssa(ctx, reg, SSA_FIXED_REGISTER(reg_slot));
-                        } else {
-                                /* Otherwise, read from the 'special'
-                                 * UBO to access higher-indexed
-                                 * uniforms, at a performance cost */
-
-                                midgard_instruction ins = m_load_uniform_32(reg, uniform_offset);
-
-                                /* TODO: Don't split */
-                                ins.load_store.varying_parameters = (uniform_offset & 7) << 7;
-                                ins.load_store.address = uniform_offset >> 3;
-
-                                ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
-                                emit_mir_instruction(ctx, ins);
-                        }
+                        emit_uniform_read(ctx, reg, ctx->sysval_count + offset);
                 } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
                         /* XXX: Half-floats? */
                         /* TODO: swizzle, mask */
@@ -1490,6 +1550,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                 _mesa_hash_table_u64_insert(ctx->ssa_constants, instr->dest.ssa.index + 1, v);
                 break;
 
+        case nir_intrinsic_load_viewport_scale:
+        case nir_intrinsic_load_viewport_offset:
+                emit_sysval_read(ctx, instr);
+                break;
 
         default:
                 printf ("Unhandled intrinsic\n");
@@ -3005,41 +3069,17 @@ actualise_ssa_to_alias(compiler_context *ctx)
  * */
 
 static void
-write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_no)
+write_transformed_position(nir_builder *b, nir_src input_point_src)
 {
         nir_ssa_def *input_point = nir_ssa_for_src(b, input_point_src, 4);
+        nir_ssa_def *scale = nir_load_viewport_scale(b);
+        nir_ssa_def *offset = nir_load_viewport_offset(b);
 
-        /* Get viewport from the uniforms */
-        nir_intrinsic_instr *load;
-        load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
-        load->num_components = 4;
-        load->src[0] = nir_src_for_ssa(nir_imm_int(b, uniform_no));
-        nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
-        nir_builder_instr_insert(b, &load->instr);
-
-        /* Formatted as <width, height, centerx, centery> */
-        nir_ssa_def *viewport_vec4 = &load->dest.ssa;
-        nir_ssa_def *viewport_width_2 = nir_channel(b, viewport_vec4, 0);
-        nir_ssa_def *viewport_height_2 = nir_channel(b, viewport_vec4, 1);
-        nir_ssa_def *viewport_offset = nir_channels(b, viewport_vec4, 0x8 | 0x4);
-
-        /* XXX: From uniforms? */
-        nir_ssa_def *depth_near = nir_imm_float(b, 0.0);
-        nir_ssa_def *depth_far = nir_imm_float(b, 1.0);
-
-        /* World space to normalised device coordinates */
+        /* World space to normalised device coordinates to screen space */
 
         nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, input_point, 3));
         nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, input_point, 0x7), w_recip);
-
-        /* Normalised device coordinates to screen space */
-
-        nir_ssa_def *viewport_multiplier = nir_vec2(b, viewport_width_2, viewport_height_2);
-        nir_ssa_def *viewport_xy = nir_fadd(b, nir_fmul(b, nir_channels(b, ndc_point, 0x3), viewport_multiplier), viewport_offset);
-
-        nir_ssa_def *depth_multiplier = nir_fmul(b, nir_fsub(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
-        nir_ssa_def *depth_offset     = nir_fmul(b, nir_fadd(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
-        nir_ssa_def *screen_depth     = nir_fadd(b, nir_fmul(b, nir_channel(b, ndc_point, 2), depth_multiplier), depth_offset);
+        nir_ssa_def *screen = nir_fadd(b, nir_fmul(b, ndc_point, scale), offset);
 
         /* gl_Position will be written out in screenspace xyz, with w set to
          * the reciprocal we computed earlier. The transformed w component is
@@ -3048,9 +3088,9 @@ write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_
          * used in depth clipping computations */
 
         nir_ssa_def *screen_space = nir_vec4(b,
-                                             nir_channel(b, viewport_xy, 0),
-                                             nir_channel(b, viewport_xy, 1),
-                                             screen_depth,
+                                             nir_channel(b, screen, 0),
+                                             nir_channel(b, screen, 1),
+                                             nir_channel(b, screen, 2),
                                              w_recip);
 
         /* Finally, write out the transformed values to the varying */
@@ -3107,7 +3147,7 @@ transform_position_writes(nir_shader *shader)
                                 nir_builder_init(&b, func->impl);
                                 b.cursor = nir_before_instr(instr);
 
-                                write_transformed_position(&b, intr->src[0], UNIFORM_VIEWPORT);
+                                write_transformed_position(&b, intr->src[0]);
                                 nir_instr_remove(instr);
                         }
                 }
@@ -3457,28 +3497,11 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
         /* TODO: Decide this at runtime */
         ctx->uniform_cutoff = 8;
 
-        switch (ctx->stage) {
-        case MESA_SHADER_VERTEX:
-                ctx->special_uniforms = 1;
-                break;
-
-        default:
-                ctx->special_uniforms = 0;
-                break;
-        }
-
-        /* Append epilogue uniforms if necessary. The cmdstream depends on
-         * these being at the -end-; see assign_var_locations. */
-
-        if (ctx->stage == MESA_SHADER_VERTEX) {
-                nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "viewport");
-        }
-
         /* Assign var locations early, so the epilogue can use them if necessary */
 
         nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size);
         nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size);
-        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size);
+        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, uniform_type_size);
 
         /* Initialize at a global (not block) level hash tables */
 
@@ -3487,31 +3510,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
         ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL);
         ctx->ssa_to_register = _mesa_hash_table_u64_create(NULL);
         ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
+        ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
         ctx->leftover_ssa_to_alias = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        /* Assign actual uniform location, skipping over samplers */
-
-        ctx->uniform_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
-
-        nir_foreach_variable(var, &nir->uniforms) {
-                if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
-
-                unsigned length = glsl_get_aoa_size(var->type);
-
-                if (!length) {
-                        length = glsl_get_length(var->type);
-                }
-
-                if (!length) {
-                        length = glsl_get_matrix_columns(var->type);
-                }
-
-                for (int col = 0; col < length; ++col) {
-                        int id = ctx->uniform_count++;
-                        _mesa_hash_table_u64_insert(ctx->uniform_nir_to_mdg, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
-                }
-        }
-
         /* Record the varying mapping for the command stream's bookkeeping */
 
         struct exec_list *varyings =
@@ -3531,7 +3532,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
         NIR_PASS_V(nir, nir_lower_global_vars_to_local);
         NIR_PASS_V(nir, nir_lower_var_copies);
         NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+
+        NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, uniform_type_size, 0);
+        NIR_PASS_V(nir, nir_lower_io, nir_var_all & ~nir_var_uniform, glsl_type_size, 0);
 
         /* Append vertex epilogue before optimisation, so the epilogue itself
          * is optimised */
@@ -3547,13 +3550,18 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
 	        nir_print_shader(nir, stdout);
 	}
 
-        /* Assign counts, now that we're sure (post-optimisation) */
+        /* Assign sysvals and counts, now that we're sure
+         * (post-optimisation) */
+
+        midgard_nir_assign_sysvals(ctx, nir);
+
         program->uniform_count = nir->num_uniforms;
+        program->sysval_count = ctx->sysval_count;
+        memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count);
 
         program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0;
         program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0);
 
-
         nir_foreach_function(func, nir) {
                 if (!func->impl)
                         continue;
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.h b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
index a4dfb25648d..ffc1b1f70e6 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -21,12 +21,29 @@
  * SOFTWARE.
  */
 
+#ifndef __MIDGARD_H_
+#define __MIDGARD_H_
 
 #include "compiler/nir/nir.h"
 #include "util/u_dynarray.h"
 
 /* Define the general compiler entry point */
 
+#define MAX_SYSVAL_COUNT 32
+
+/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
+ * their class for equal comparison */
+
+#define PAN_SYSVAL(type, no) ((no << 16) | PAN_SYSVAL_##type)
+
+/* Define some common types. We start at one for easy indexing of hash
+ * tables internal to the compiler */
+
+enum {
+        PAN_SYSVAL_VIEWPORT_SCALE = 1,
+        PAN_SYSVAL_VIEWPORT_OFFSET = 2,
+} pan_sysval;
+
 typedef struct {
         int work_register_count;
         int uniform_count;
@@ -35,6 +52,12 @@ typedef struct {
         int attribute_count;
         int varying_count;
 
+        /* Prepended before uniforms, mapping to SYSVAL_ names for the
+         * sysval */
+
+        unsigned sysval_count;
+        unsigned sysvals[MAX_SYSVAL_COUNT];
+
         unsigned varyings[32];
 
         /* Boolean properties of the program */
@@ -81,3 +104,5 @@ static const nir_shader_compiler_options midgard_nir_options = {
 
         .native_integers = true
 };
+
+#endif
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index c9abe9e6687..52226311832 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -81,6 +81,11 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
 
         util_dynarray_fini(&program.compiled);
 
+        /* Sysvals are prepended */
+        program.uniform_count += program.sysval_count;
+        state->sysval_count = program.sysval_count;
+        memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
+
         meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
         meta->attribute_count = program.attribute_count;
         meta->varying_count = program.varying_count;
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index bafe67e138f..9f401b1a7a1 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1048,7 +1048,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 }
         }
 
-        /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */
         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 
         /* For flipped-Y buffers (signaled by negative scale), the translate is
@@ -1060,71 +1059,73 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         if (invert_y)
                 translate_y = ctx->pipe_framebuffer.height - translate_y;
 
-        float viewport_vec4[] = {
-                vp->scale[0],
-                fabsf(vp->scale[1]),
-
-                vp->translate[0],
-                translate_y
-        };
-
-        for (int i = 0; i < PIPE_SHADER_TYPES; ++i) {
+        for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
                 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
 
-                if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) {
-                        /* It doesn't matter if we don't use all the memory;
-                         * we'd need a dummy UBO anyway. Compute the max */
-
-                        size_t size = sizeof(viewport_vec4) + buf->size;
-                        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
+                struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
+                struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
+                struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs;
 
-                        /* Keep track how much we've uploaded */
-                        off_t offset = 0;
+                /* Allocate room for the sysval and the uniforms */
+                size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
+                size_t size = sys_size + buf->size;
+                struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
 
-                        if (i == PIPE_SHADER_VERTEX) {
-                                /* Upload viewport */
-                                memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4));
-                                offset += sizeof(viewport_vec4);
+                /* Upload sysvals requested by the shader */
+                float *uniforms = (float *) transfer.cpu;
+                for (unsigned i = 0; i < ss->sysval_count; ++i) {
+                        int sysval = ss->sysval[i];
+
+                        if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) {
+                                uniforms[4*i + 0] = vp->scale[0];
+                                uniforms[4*i + 1] = fabsf(vp->scale[1]);
+                                uniforms[4*i + 2] = vp->scale[2];
+                        } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) {
+                                uniforms[4*i + 0] = vp->translate[0];
+                                uniforms[4*i + 1] = translate_y;
+                                uniforms[4*i + 2] = vp->translate[2];
+                        } else {
+                                assert(0);
                         }
+                }
 
-                        /* Upload uniforms */
-                        memcpy(transfer.cpu + offset, buf->buffer, buf->size);
+                /* Upload uniforms */
+                memcpy(transfer.cpu + sys_size, buf->buffer, buf->size);
 
-                        int uniform_count = 0;
+                int uniform_count = 0;
 
-                        struct mali_vertex_tiler_postfix *postfix;
+                struct mali_vertex_tiler_postfix *postfix;
 
-                        switch (i) {
-                        case PIPE_SHADER_VERTEX:
-                                uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
-                                postfix = &ctx->payload_vertex.postfix;
-                                break;
+                switch (i) {
+                case PIPE_SHADER_VERTEX:
+                        uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
+                        postfix = &ctx->payload_vertex.postfix;
+                        break;
 
-                        case PIPE_SHADER_FRAGMENT:
-                                uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
-                                postfix = &ctx->payload_tiler.postfix;
-                                break;
+                case PIPE_SHADER_FRAGMENT:
+                        uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
+                        postfix = &ctx->payload_tiler.postfix;
+                        break;
 
-                        default:
-                                DBG("Unknown shader stage %d in uniform upload\n", i);
-                                assert(0);
-                        }
+                default:
+                        DBG("Unknown shader stage %d in uniform upload\n", i);
+                        assert(0);
+                }
 
-                        /* Also attach the same buffer as a UBO for extended access */
+                /* Also attach the same buffer as a UBO for extended access */
 
-                        struct mali_uniform_buffer_meta uniform_buffers[] = {
-                                {
-                                        .size = MALI_POSITIVE((2 + uniform_count)),
-                                        .ptr = transfer.gpu >> 2,
-                                },
-                        };
+                struct mali_uniform_buffer_meta uniform_buffers[] = {
+                        {
+                                .size = MALI_POSITIVE((2 + uniform_count)),
+                                .ptr = transfer.gpu >> 2,
+                        },
+                };
 
-                        mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
-                        postfix->uniforms = transfer.gpu;
-                        postfix->uniform_buffers = ubufs;
+                mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
+                postfix->uniforms = transfer.gpu;
+                postfix->uniform_buffers = ubufs;
 
-                        buf->dirty = 0;
-                }
+                buf->dirty = 0;
         }
 
         /* TODO: Upload the viewport somewhere more appropriate */
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index 7947169f83b..d071da1c62f 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -42,6 +42,8 @@
 #include "util/u_blitter.h"
 #include "util/hash_table.h"
 
+#include "midgard/midgard_compile.h"
+
 /* Forward declare to avoid extra header dep */
 struct prim_convert_context;
 
@@ -264,6 +266,9 @@ struct panfrost_shader_state {
         unsigned general_varying_stride;
         struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
 
+        unsigned sysval_count;
+        unsigned sysval[MAX_SYSVAL_COUNT];
+
         /* Information on this particular shader variant */
         struct pipe_alpha_state alpha_state;
 };
-- 
2.30.2