panfrost: Implement system values

author Alyssa Rosenzweig <alyssa@rosenzweig.io>

Wed, 3 Apr 2019 01:48:09 +0000 (01:48 +0000)

committer Alyssa Rosenzweig <alyssa@rosenzweig.io>

Thu, 4 Apr 2019 03:44:15 +0000 (03:44 +0000)
author Alyssa Rosenzweig <alyssa@rosenzweig.io>
Wed, 3 Apr 2019 01:48:09 +0000 (01:48 +0000)
committer Alyssa Rosenzweig <alyssa@rosenzweig.io>
Thu, 4 Apr 2019 03:44:15 +0000 (03:44 +0000)
diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build

index 371a11a934b4641bd452470e6f6785643f3728a1..b7e7d0c58082504f575f2977f38a32479cb6895e 100644 (file)
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -109,6 +109,7 @@ midgard_compiler = executable(
    link_with : [
      libgallium,
      libglsl_standalone,
+    libmesa_gallium, # for st_glsl_storage_type_size
      libmesa_util
    ],
    build_by_default : true
diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h

index 530a086e928afd565ad1dcdc6f9e2313d392064e..54960c7e5999196ce0f133814d6c8cd2ede33460 100644 (file)
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -95,10 +95,6 @@
  #define REGISTER_TEXTURE_BASE 28
  #define REGISTER_SELECT 31
  
-/* Special uniforms used for e.g. vertex epilogues */
-#define SPECIAL_UNIFORM_BASE (1 << 24)
-#define UNIFORM_VIEWPORT (SPECIAL_UNIFORM_BASE + 0)
-
  /* SSA helper aliases to mimic the registers. UNUSED_0 encoded as an inline
   * constant. UNUSED_1 encoded as REGISTER_UNUSED */
  
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c

index ea8c0153c96651dbb1919aa36ebeec66571e298c..c68067d50a5c914e50b03106bc2ecfe8287b09e3 100644 (file)
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -32,6 +32,7 @@
  
  #include "main/mtypes.h"
  #include "compiler/glsl/glsl_to_nir.h"
+#include "mesa/state_tracker/st_glsl_types.h"
  #include "compiler/nir_types.h"
  #include "main/imports.h"
  #include "compiler/nir/nir_builder.h"
@@ -176,6 +177,7 @@ typedef struct midgard_block {
   * driver seems to do it that way */
  
  #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
+#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W)
  
  #define M_LOAD_STORE(name, rname, uname) \
         static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
@@ -189,7 +191,7 @@ typedef struct midgard_block {
                         .load_store = { \
                                 .op = midgard_op_##name, \
                                 .mask = 0xF, \
-                               .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W), \
+                               .swizzle = SWIZZLE_XYZW, \
                                 .address = address \
                         } \
                 }; \
@@ -432,10 +434,6 @@ typedef struct compiler_context {
          int temp_count;
          int max_hash;
  
-        /* Uniform IDs for mdg */
-        struct hash_table_u64 *uniform_nir_to_mdg;
-        int uniform_count;
-
          /* Just the count of the max register used. Higher count => higher
           * register pressure */
          int work_registers;
@@ -447,9 +445,6 @@ typedef struct compiler_context {
          /* Mapping of texture register -> SSA index for unaliasing */
          int texture_index[2];
  
-        /* Count of special uniforms (viewport, etc) in vec4 units */
-        int special_uniforms;
-
          /* If any path hits a discard instruction */
          bool can_discard;
  
@@ -464,6 +459,11 @@ typedef struct compiler_context {
  
          /* The index corresponding to the fragment output */
          unsigned fragment_output;
+
+        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+        unsigned sysvals[MAX_SYSVAL_COUNT];
+        unsigned sysval_count;
+        struct hash_table_u64 *sysval_to_id;
  } compiler_context;
  
  /* Append instruction to end of current block */
@@ -645,6 +645,12 @@ glsl_type_size(const struct glsl_type *type)
          return glsl_count_attribute_slots(type, false);
  }
  
+static int
+uniform_type_size(const struct glsl_type *type)
+{
+        return st_glsl_storage_type_size(type, false);
+}
+
  /* Lower fdot2 to a vector multiplication followed by channel addition  */
  static void
  midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
@@ -667,6 +673,60 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
          nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
  }
  
+static int
+midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
+{
+        switch (instr->intrinsic) {
+        case nir_intrinsic_load_viewport_scale:
+                return PAN_SYSVAL_VIEWPORT_SCALE;
+        case nir_intrinsic_load_viewport_offset:
+                return PAN_SYSVAL_VIEWPORT_OFFSET;
+        default:
+                return -1;
+        }
+}
+
+static void
+midgard_nir_assign_sysval_body(compiler_context *ctx, nir_instr *instr)
+{
+        int sysval = -1;
+
+        if (instr->type == nir_instr_type_intrinsic) {
+                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                sysval = midgard_nir_sysval_for_intrinsic(intr);
+        }
+
+        if (sysval < 0)
+                return;
+
+        /* We have a sysval load; check if it's already been assigned */
+
+        if (_mesa_hash_table_u64_search(ctx->sysval_to_id, sysval))
+                return;
+
+        /* It hasn't -- so assign it now! */
+
+        unsigned id = ctx->sysval_count++;
+        _mesa_hash_table_u64_insert(ctx->sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
+        ctx->sysvals[id] = sysval;
+}
+
+static void
+midgard_nir_assign_sysvals(compiler_context *ctx, nir_shader *shader)
+{
+        ctx->sysval_count = 0;
+
+        nir_foreach_function(function, shader) {
+                if (!function->impl) continue;
+
+                nir_foreach_block(block, function->impl) {
+                        nir_foreach_instr_safe(instr, block) {
+                                midgard_nir_assign_sysval_body(ctx, instr);
+                        }
+                }
+        }
+}
+
  static bool
  midgard_nir_lower_fdot2(nir_shader *shader)
  {
@@ -715,7 +775,6 @@ optimise_nir(nir_shader *nir)
                  progress = false;
  
                  NIR_PASS(progress, nir, midgard_nir_lower_algebraic);
-                NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
                  NIR_PASS(progress, nir, nir_lower_var_copies);
                  NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
  
@@ -1206,6 +1265,52 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
  
  #undef ALU_CASE
  
+static void
+emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset)
+{
+        /* TODO: half-floats */
+
+        if (offset < ctx->uniform_cutoff) {
+                /* Fast path: For the first 16 uniform,
+                 * accesses are 0-cycle, since they're
+                 * just a register fetch in the usual
+                 * case.  So, we alias the registers
+                 * while we're still in SSA-space */
+
+                int reg_slot = 23 - offset;
+                alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
+        } else {
+                /* Otherwise, read from the 'special'
+                 * UBO to access higher-indexed
+                 * uniforms, at a performance cost */
+
+                midgard_instruction ins = m_load_uniform_32(dest, offset);
+
+                /* TODO: Don't split */
+                ins.load_store.varying_parameters = (offset & 7) << 7;
+                ins.load_store.address = offset >> 3;
+
+                ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
+                emit_mir_instruction(ctx, ins);
+        }
+}
+
+static void
+emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr)
+{
+        /* First, pull out the destination */
+        unsigned dest = nir_dest_index(ctx, &instr->dest);
+
+        /* Now, figure out which uniform this is */
+        int sysval = midgard_nir_sysval_for_intrinsic(instr);
+        void *val = _mesa_hash_table_u64_search(ctx->sysval_to_id, sysval);
+
+        /* Sysvals are prefix uniforms */
+        unsigned uniform = ((uintptr_t) val) - 1;
+
+        emit_uniform_read(ctx, dest, uniform);
+}
+
  static void
  emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
  {
@@ -1238,52 +1343,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                  reg = nir_dest_index(ctx, &instr->dest);
  
                  if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) {
-                        /* TODO: half-floats */
-
-                        int uniform_offset = 0;
-
-                        if (offset >= SPECIAL_UNIFORM_BASE) {
-                                /* XXX: Resolve which uniform */
-                                uniform_offset = 0;
-                        } else {
-                                /* Offset away from the special
-                                 * uniform block */
-
-                                void *entry = _mesa_hash_table_u64_search(ctx->uniform_nir_to_mdg, offset + 1);
-
-                                /* XXX */
-                                if (!entry) {
-                                        DBG("WARNING: Unknown uniform %d\n", offset);
-                                        break;
-                                }
-
-                                uniform_offset = (uintptr_t) (entry) - 1;
-                                uniform_offset += ctx->special_uniforms;
-                        }
-
-                        if (uniform_offset < ctx->uniform_cutoff) {
-                                /* Fast path: For the first 16 uniform,
-                                 * accesses are 0-cycle, since they're
-                                 * just a register fetch in the usual
-                                 * case.  So, we alias the registers
-                                 * while we're still in SSA-space */
-
-                                int reg_slot = 23 - uniform_offset;
-                                alias_ssa(ctx, reg, SSA_FIXED_REGISTER(reg_slot));
-                        } else {
-                                /* Otherwise, read from the 'special'
-                                 * UBO to access higher-indexed
-                                 * uniforms, at a performance cost */
-
-                                midgard_instruction ins = m_load_uniform_32(reg, uniform_offset);
-
-                                /* TODO: Don't split */
-                                ins.load_store.varying_parameters = (uniform_offset & 7) << 7;
-                                ins.load_store.address = uniform_offset >> 3;
-
-                                ins.load_store.unknown = 0x1E00; /* xxx: what is this? */
-                                emit_mir_instruction(ctx, ins);
-                        }
+                        emit_uniform_read(ctx, reg, ctx->sysval_count + offset);
                  } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
                          /* XXX: Half-floats? */
                          /* TODO: swizzle, mask */
@@ -1490,6 +1550,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                  _mesa_hash_table_u64_insert(ctx->ssa_constants, instr->dest.ssa.index + 1, v);
                  break;
  
+        case nir_intrinsic_load_viewport_scale:
+        case nir_intrinsic_load_viewport_offset:
+                emit_sysval_read(ctx, instr);
+                break;
  
          default:
                  printf ("Unhandled intrinsic\n");
@@ -3005,41 +3069,17 @@ actualise_ssa_to_alias(compiler_context *ctx)
   * */
  
  static void
-write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_no)
+write_transformed_position(nir_builder *b, nir_src input_point_src)
  {
          nir_ssa_def *input_point = nir_ssa_for_src(b, input_point_src, 4);
+        nir_ssa_def *scale = nir_load_viewport_scale(b);
+        nir_ssa_def *offset = nir_load_viewport_offset(b);
  
-        /* Get viewport from the uniforms */
-        nir_intrinsic_instr *load;
-        load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
-        load->num_components = 4;
-        load->src[0] = nir_src_for_ssa(nir_imm_int(b, uniform_no));
-        nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
-        nir_builder_instr_insert(b, &load->instr);
-
-        /* Formatted as <width, height, centerx, centery> */
-        nir_ssa_def *viewport_vec4 = &load->dest.ssa;
-        nir_ssa_def *viewport_width_2 = nir_channel(b, viewport_vec4, 0);
-        nir_ssa_def *viewport_height_2 = nir_channel(b, viewport_vec4, 1);
-        nir_ssa_def *viewport_offset = nir_channels(b, viewport_vec4, 0x8 | 0x4);
-
-        /* XXX: From uniforms? */
-        nir_ssa_def *depth_near = nir_imm_float(b, 0.0);
-        nir_ssa_def *depth_far = nir_imm_float(b, 1.0);
-
-        /* World space to normalised device coordinates */
+        /* World space to normalised device coordinates to screen space */
  
          nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, input_point, 3));
          nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, input_point, 0x7), w_recip);
-
-        /* Normalised device coordinates to screen space */
-
-        nir_ssa_def *viewport_multiplier = nir_vec2(b, viewport_width_2, viewport_height_2);
-        nir_ssa_def *viewport_xy = nir_fadd(b, nir_fmul(b, nir_channels(b, ndc_point, 0x3), viewport_multiplier), viewport_offset);
-
-        nir_ssa_def *depth_multiplier = nir_fmul(b, nir_fsub(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
-        nir_ssa_def *depth_offset     = nir_fmul(b, nir_fadd(b, depth_far, depth_near), nir_imm_float(b, 0.5f));
-        nir_ssa_def *screen_depth     = nir_fadd(b, nir_fmul(b, nir_channel(b, ndc_point, 2), depth_multiplier), depth_offset);
+        nir_ssa_def *screen = nir_fadd(b, nir_fmul(b, ndc_point, scale), offset);
  
          /* gl_Position will be written out in screenspace xyz, with w set to
           * the reciprocal we computed earlier. The transformed w component is
@@ -3048,9 +3088,9 @@ write_transformed_position(nir_builder *b, nir_src input_point_src, int uniform_
           * used in depth clipping computations */
  
          nir_ssa_def *screen_space = nir_vec4(b,
-                                             nir_channel(b, viewport_xy, 0),
-                                             nir_channel(b, viewport_xy, 1),
-                                             screen_depth,
+                                             nir_channel(b, screen, 0),
+                                             nir_channel(b, screen, 1),
+                                             nir_channel(b, screen, 2),
                                               w_recip);
  
          /* Finally, write out the transformed values to the varying */
@@ -3107,7 +3147,7 @@ transform_position_writes(nir_shader *shader)
                                  nir_builder_init(&b, func->impl);
                                  b.cursor = nir_before_instr(instr);
  
-                                write_transformed_position(&b, intr->src[0], UNIFORM_VIEWPORT);
+                                write_transformed_position(&b, intr->src[0]);
                                  nir_instr_remove(instr);
                          }
                  }
@@ -3457,28 +3497,11 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
          /* TODO: Decide this at runtime */
          ctx->uniform_cutoff = 8;
  
-        switch (ctx->stage) {
-        case MESA_SHADER_VERTEX:
-                ctx->special_uniforms = 1;
-                break;
-
-        default:
-                ctx->special_uniforms = 0;
-                break;
-        }
-
-        /* Append epilogue uniforms if necessary. The cmdstream depends on
-         * these being at the -end-; see assign_var_locations. */
-
-        if (ctx->stage == MESA_SHADER_VERTEX) {
-                nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "viewport");
-        }
-
          /* Assign var locations early, so the epilogue can use them if necessary */
  
          nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size);
          nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size);
-        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size);
+        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, uniform_type_size);
  
          /* Initialize at a global (not block) level hash tables */
  
@@ -3487,31 +3510,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
          ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL);
          ctx->ssa_to_register = _mesa_hash_table_u64_create(NULL);
          ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
+        ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
          ctx->leftover_ssa_to_alias = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
  
-        /* Assign actual uniform location, skipping over samplers */
-
-        ctx->uniform_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
-
-        nir_foreach_variable(var, &nir->uniforms) {
-                if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
-
-                unsigned length = glsl_get_aoa_size(var->type);
-
-                if (!length) {
-                        length = glsl_get_length(var->type);
-                }
-
-                if (!length) {
-                        length = glsl_get_matrix_columns(var->type);
-                }
-
-                for (int col = 0; col < length; ++col) {
-                        int id = ctx->uniform_count++;
-                        _mesa_hash_table_u64_insert(ctx->uniform_nir_to_mdg, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
-                }
-        }
-
          /* Record the varying mapping for the command stream's bookkeeping */
  
          struct exec_list *varyings =
@@ -3531,7 +3532,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
          NIR_PASS_V(nir, nir_lower_global_vars_to_local);
          NIR_PASS_V(nir, nir_lower_var_copies);
          NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+
+        NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, uniform_type_size, 0);
+        NIR_PASS_V(nir, nir_lower_io, nir_var_all & ~nir_var_uniform, glsl_type_size, 0);
  
          /* Append vertex epilogue before optimisation, so the epilogue itself
           * is optimised */
@@ -3547,13 +3550,18 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                 nir_print_shader(nir, stdout);
         }
  
-        /* Assign counts, now that we're sure (post-optimisation) */
+        /* Assign sysvals and counts, now that we're sure
+         * (post-optimisation) */
+
+        midgard_nir_assign_sysvals(ctx, nir);
+
          program->uniform_count = nir->num_uniforms;
+        program->sysval_count = ctx->sysval_count;
+        memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count);
  
          program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0;
          program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0);
  
-
          nir_foreach_function(func, nir) {
                  if (!func->impl)
                          continue;
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.h b/src/gallium/drivers/panfrost/midgard/midgard_compile.h

index a4dfb25648d261c7f736e8b8a0fd3ad9db441080..ffc1b1f70e631569c3ce5c5797a4b3dbc8a071a5 100644 (file)
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
   * copy of this software and associated documentation files (the "Software"),
@@ -21,12 +21,29 @@
   * SOFTWARE.
   */
  
+#ifndef __MIDGARD_H_
+#define __MIDGARD_H_
  
  #include "compiler/nir/nir.h"
  #include "util/u_dynarray.h"
  
  /* Define the general compiler entry point */
  
+#define MAX_SYSVAL_COUNT 32
+
+/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
+ * their class for equal comparison */
+
+#define PAN_SYSVAL(type, no) ((no << 16) | PAN_SYSVAL_##type)
+
+/* Define some common types. We start at one for easy indexing of hash
+ * tables internal to the compiler */
+
+enum {
+        PAN_SYSVAL_VIEWPORT_SCALE = 1,
+        PAN_SYSVAL_VIEWPORT_OFFSET = 2,
+} pan_sysval;
+
  typedef struct {
          int work_register_count;
          int uniform_count;
@@ -35,6 +52,12 @@ typedef struct {
          int attribute_count;
          int varying_count;
  
+        /* Prepended before uniforms, mapping to SYSVAL_ names for the
+         * sysval */
+
+        unsigned sysval_count;
+        unsigned sysvals[MAX_SYSVAL_COUNT];
+
          unsigned varyings[32];
  
          /* Boolean properties of the program */
@@ -81,3 +104,5 @@ static const nir_shader_compiler_options midgard_nir_options = {
  
          .native_integers = true
  };
+
+#endif
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c

index c9abe9e6687865308edae598bbf369d197273a1b..5222631183283879d57219d7d6419791bda844e4 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -81,6 +81,11 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
  
          util_dynarray_fini(&program.compiled);
  
+        /* Sysvals are prepended */
+        program.uniform_count += program.sysval_count;
+        state->sysval_count = program.sysval_count;
+        memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
+
          meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
          meta->attribute_count = program.attribute_count;
          meta->varying_count = program.varying_count;
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c

index bafe67e138f16728ba9c94aa07e9c38f93d5e004..9f401b1a7a12a25549c5b2f6c34be078bc9e09ee 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1048,7 +1048,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                  }
          }
  
-        /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */
          const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
  
          /* For flipped-Y buffers (signaled by negative scale), the translate is
@@ -1060,71 +1059,73 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
          if (invert_y)
                  translate_y = ctx->pipe_framebuffer.height - translate_y;
  
-        float viewport_vec4[] = {
-                vp->scale[0],
-                fabsf(vp->scale[1]),
-
-                vp->translate[0],
-                translate_y
-        };
-
-        for (int i = 0; i < PIPE_SHADER_TYPES; ++i) {
+        for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
                  struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
  
-                if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) {
-                        /* It doesn't matter if we don't use all the memory;
-                         * we'd need a dummy UBO anyway. Compute the max */
-
-                        size_t size = sizeof(viewport_vec4) + buf->size;
-                        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
+                struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
+                struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant];
+                struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs;
  
-                        /* Keep track how much we've uploaded */
-                        off_t offset = 0;
+                /* Allocate room for the sysval and the uniforms */
+                size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
+                size_t size = sys_size + buf->size;
+                struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
  
-                        if (i == PIPE_SHADER_VERTEX) {
-                                /* Upload viewport */
-                                memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4));
-                                offset += sizeof(viewport_vec4);
+                /* Upload sysvals requested by the shader */
+                float *uniforms = (float *) transfer.cpu;
+                for (unsigned i = 0; i < ss->sysval_count; ++i) {
+                        int sysval = ss->sysval[i];
+
+                        if (sysval == PAN_SYSVAL_VIEWPORT_SCALE) {
+                                uniforms[4*i + 0] = vp->scale[0];
+                                uniforms[4*i + 1] = fabsf(vp->scale[1]);
+                                uniforms[4*i + 2] = vp->scale[2];
+                        } else if (sysval == PAN_SYSVAL_VIEWPORT_OFFSET) {
+                                uniforms[4*i + 0] = vp->translate[0];
+                                uniforms[4*i + 1] = translate_y;
+                                uniforms[4*i + 2] = vp->translate[2];
+                        } else {
+                                assert(0);
                          }
+                }
  
-                        /* Upload uniforms */
-                        memcpy(transfer.cpu + offset, buf->buffer, buf->size);
+                /* Upload uniforms */
+                memcpy(transfer.cpu + sys_size, buf->buffer, buf->size);
  
-                        int uniform_count = 0;
+                int uniform_count = 0;
  
-                        struct mali_vertex_tiler_postfix *postfix;
+                struct mali_vertex_tiler_postfix *postfix;
  
-                        switch (i) {
-                        case PIPE_SHADER_VERTEX:
-                                uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
-                                postfix = &ctx->payload_vertex.postfix;
-                                break;
+                switch (i) {
+                case PIPE_SHADER_VERTEX:
+                        uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
+                        postfix = &ctx->payload_vertex.postfix;
+                        break;
  
-                        case PIPE_SHADER_FRAGMENT:
-                                uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
-                                postfix = &ctx->payload_tiler.postfix;
-                                break;
+                case PIPE_SHADER_FRAGMENT:
+                        uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
+                        postfix = &ctx->payload_tiler.postfix;
+                        break;
  
-                        default:
-                                DBG("Unknown shader stage %d in uniform upload\n", i);
-                                assert(0);
-                        }
+                default:
+                        DBG("Unknown shader stage %d in uniform upload\n", i);
+                        assert(0);
+                }
  
-                        /* Also attach the same buffer as a UBO for extended access */
+                /* Also attach the same buffer as a UBO for extended access */
  
-                        struct mali_uniform_buffer_meta uniform_buffers[] = {
-                                {
-                                        .size = MALI_POSITIVE((2 + uniform_count)),
-                                        .ptr = transfer.gpu >> 2,
-                                },
-                        };
+                struct mali_uniform_buffer_meta uniform_buffers[] = {
+                        {
+                                .size = MALI_POSITIVE((2 + uniform_count)),
+                                .ptr = transfer.gpu >> 2,
+                        },
+                };
  
-                        mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
-                        postfix->uniforms = transfer.gpu;
-                        postfix->uniform_buffers = ubufs;
+                mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
+                postfix->uniforms = transfer.gpu;
+                postfix->uniform_buffers = ubufs;
  
-                        buf->dirty = 0;
-                }
+                buf->dirty = 0;
          }
  
          /* TODO: Upload the viewport somewhere more appropriate */
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h

index 7947169f83b6fcf7b5f5bf3f1be4b26bc6118a4e..d071da1c62fad36cfd32f506597c74fb54ff806e 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -42,6 +42,8 @@
  #include "util/u_blitter.h"
  #include "util/hash_table.h"
  
+#include "midgard/midgard_compile.h"
+
  /* Forward declare to avoid extra header dep */
  struct prim_convert_context;
  
@@ -264,6 +266,9 @@ struct panfrost_shader_state {
          unsigned general_varying_stride;
          struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
  
+        unsigned sysval_count;
+        unsigned sysval[MAX_SYSVAL_COUNT];
+
          /* Information on this particular shader variant */
          struct pipe_alpha_state alpha_state;
  };
author	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Wed, 3 Apr 2019 01:48:09 +0000 (01:48 +0000)
committer	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Thu, 4 Apr 2019 03:44:15 +0000 (03:44 +0000)
src/gallium/drivers/panfrost/meson.build		patch \| blob \| history
src/gallium/drivers/panfrost/midgard/helpers.h		patch \| blob \| history
src/gallium/drivers/panfrost/midgard/midgard_compile.c		patch \| blob \| history
src/gallium/drivers/panfrost/midgard/midgard_compile.h		patch \| blob \| history
src/gallium/drivers/panfrost/pan_assemble.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_context.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_context.h		patch \| blob \| history