vc4: Rewrite the kernel ABI to support texture uniform relocation.

author Eric Anholt <eric@anholt.net>

Mon, 21 Jul 2014 18:27:35 +0000 (11:27 -0700)

committer Eric Anholt <eric@anholt.net>

Mon, 11 Aug 2014 21:45:28 +0000 (14:45 -0700)
author Eric Anholt <eric@anholt.net>
Mon, 21 Jul 2014 18:27:35 +0000 (11:27 -0700)
committer Eric Anholt <eric@anholt.net>
Mon, 11 Aug 2014 21:45:28 +0000 (14:45 -0700)
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources

index ee351835896fc797713e6e5d697d15c438818245..414a64ab4722ccac5b31f098ab6e5a55e1a8510b 100644 (file)
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -17,5 +17,6 @@ C_SOURCES := \
         vc4_screen.c \
         vc4_simulator.c \
         vc4_simulator_validate.c \
+       vc4_simulator_validate_shaders.c \
         vc4_state.c \
         $()
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c

index a9fa7ef70f1ec0ade309a289aaf524cd7a530682..08e85ed6312e538b8f240501d12d94f44563c07a 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -107,6 +107,8 @@ vc4_flush(struct pipe_context *pctx)
          submit.shader_records = vc4->shader_rec.base;
          submit.shader_record_len = vc4->shader_rec.next - vc4->shader_rec.base;
          submit.shader_record_count = vc4->shader_rec_count;
+        submit.uniforms = vc4->uniforms.base;
+        submit.uniforms_len = vc4->uniforms.next - vc4->uniforms.base;
  
          if (!(vc4_debug & VC4_DEBUG_NORAST)) {
                  int ret;
@@ -123,6 +125,7 @@ vc4_flush(struct pipe_context *pctx)
          vc4_reset_cl(&vc4->bcl);
          vc4_reset_cl(&vc4->rcl);
          vc4_reset_cl(&vc4->shader_rec);
+        vc4_reset_cl(&vc4->uniforms);
          vc4_reset_cl(&vc4->bo_handles);
  #ifdef USE_VC4_SIMULATOR
          vc4_reset_cl(&vc4->bo_pointers);
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h

index ee9ddcfd82b38e446e62957eae3ac0df4254552c..010727ff4de9026454b836c01dc4906e1fc5b1d6 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -70,6 +70,7 @@ struct vc4_shader_uniform_info {
          enum quniform_contents *contents;
          uint32_t *data;
          uint32_t count;
+        uint32_t num_texture_samples;
  };
  
  struct vc4_compiled_shader {
@@ -120,6 +121,7 @@ struct vc4_context {
          struct vc4_cl bcl;
          struct vc4_cl rcl;
          struct vc4_cl shader_rec;
+        struct vc4_cl uniforms;
          struct vc4_cl bo_handles;
  #ifdef USE_VC4_SIMULATOR
          struct vc4_cl bo_pointers;
@@ -195,12 +197,11 @@ int vc4_simulator_flush(struct vc4_context *vc4,
                          struct drm_vc4_submit_cl *args,
                          struct vc4_surface *color_surf);
  
-void vc4_get_uniform_bo(struct vc4_context *vc4,
+void vc4_write_uniforms(struct vc4_context *vc4,
                          struct vc4_compiled_shader *shader,
                          struct vc4_constbuf_stateobj *cb,
                          struct vc4_texture_stateobj *texstate,
-                        int shader_index, struct vc4_bo **out_bo,
-                        uint32_t *out_offset);
+                        int shader_index);
  
  void vc4_flush(struct pipe_context *pctx);
  void vc4_emit_state(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c

index d5628d0d3ca6e745aa0ab7a124c87dc781c89174..8559bf3b2fe0ba6c77c9fb3e100277452cc0a261 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -162,40 +162,38 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
  
  // Shader Record
  
-        struct vc4_bo *fs_ubo, *vs_ubo, *cs_ubo;
-        uint32_t fs_ubo_offset, vs_ubo_offset, cs_ubo_offset;
-        vc4_get_uniform_bo(vc4, vc4->prog.fs,
+        vc4_write_uniforms(vc4, vc4->prog.fs,
                             &vc4->constbuf[PIPE_SHADER_FRAGMENT],
                             &vc4->fragtex,
-                           0, &fs_ubo, &fs_ubo_offset);
-        vc4_get_uniform_bo(vc4, vc4->prog.vs,
+                           0);
+        vc4_write_uniforms(vc4, vc4->prog.vs,
                             &vc4->constbuf[PIPE_SHADER_VERTEX],
                             &vc4->verttex,
-                           0, &vs_ubo, &vs_ubo_offset);
-        vc4_get_uniform_bo(vc4, vc4->prog.vs,
+                           0);
+        vc4_write_uniforms(vc4, vc4->prog.vs,
                             &vc4->constbuf[PIPE_SHADER_VERTEX],
                             &vc4->verttex,
-                           1, &cs_ubo, &cs_ubo_offset);
+                           1);
  
-        cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements);
+        cl_start_shader_reloc(&vc4->shader_rec, 3 + vtx->num_elements);
          cl_u16(&vc4->shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING);
          cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
          cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
          cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
-        cl_reloc(vc4, &vc4->shader_rec, fs_ubo, fs_ubo_offset);
+        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
  
          cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
          cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* vs attribute array bitfield */
          cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
          cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
-        cl_reloc(vc4, &vc4->shader_rec, vs_ubo, vs_ubo_offset);
+        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
  
          cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
          cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* cs attribute array bitfield */
          cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
          cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo,
                  vc4->prog.vs->coord_shader_offset);
-        cl_reloc(vc4, &vc4->shader_rec, cs_ubo, cs_ubo_offset);
+        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
  
          for (int i = 0; i < vtx->num_elements; i++) {
                  struct pipe_vertex_element *elem = &vtx->pipe[i];
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h

index b958f1d03d0af5d8f47bc776f07a52240b99e54d..cc4c735d881e60111bed30bad724fa94bb0cd845 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -74,6 +74,21 @@ struct drm_vc4_submit_cl {
          */
         void __user *shader_records;
  
+       /* Pointer to uniform data and texture handles for the textures
+        * referenced by the shader.
+        *
+        * For each shader state record, there is a set of uniform data in the
+        * order referenced by the record (FS, VS, then CS).  Each set of
+        * uniform data has a uint32_t index into bo_handles per texture
+        * sample operation, in the order the QPU_W_TMUn_S writes appear in
+        * the program.  Following the texture BO handle indices is the actual
+        * uniform data.
+        *
+        * The individual uniform state blocks don't have sizes passed in,
+        * because the kernel has to determine the sizes anyway during shader
+        * code validation.
+        */
+       void __user *uniforms;
         void __user *bo_handles;
  
         /* Size in bytes of the binner command list. */
@@ -84,11 +99,13 @@ struct drm_vc4_submit_cl {
         uint32_t shader_record_len;
         /* Number of shader records.
          *
-        * This could just be computed from the contents of shader_records,
-        * but it keeps the kernel from having to resize various allocations
-        * it makes.
+        * This could just be computed from the contents of shader_records and
+        * the address bits of references to them from the bin CL, but it
+        * keeps the kernel from having to resize some allocations it makes.
          */
         uint32_t shader_record_count;
+       /** Size in bytes of the uniform state. */
+       uint32_t uniforms_len;
  
         /* Number of BO handles passed in (size is that times 4). */
         uint32_t bo_handle_count;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c

index 15e1ff25b04d7842a2bfca3fa4f38c3217737bbb..b7ed1bf60a0f4157ad04912d8030839a2a098593 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -57,6 +57,7 @@ struct tgsi_to_qir {
          enum quniform_contents *uniform_contents;
          uint32_t num_uniforms;
          uint32_t num_outputs;
+        uint32_t num_texture_samples;
  };
  
  struct vc4_key {
@@ -332,6 +333,7 @@ tgsi_to_qir_tex(struct tgsi_to_qir *trans,
                  qir_TEX_S(c, s, sampler_p1);
          }
  
+        trans->num_texture_samples++;
          qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef));
  
          for (int i = 0; i < 4; i++) {
@@ -938,6 +940,7 @@ copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
          uinfo->contents = malloc(count * sizeof(*uinfo->contents));
          memcpy(uinfo->contents, trans->uniform_contents,
                 count * sizeof(*uinfo->contents));
+        uinfo->num_texture_samples = trans->num_texture_samples;
  }
  
  static void
@@ -1141,26 +1144,23 @@ static uint32_t translate_wrap(uint32_t p_wrap)
          }
  }
  
-static uint32_t
-get_texture_p0(struct vc4_texture_stateobj *texstate,
-               uint32_t tex_and_sampler)
+static void
+write_texture_p0(struct vc4_context *vc4,
+                 struct vc4_texture_stateobj *texstate,
+                 uint32_t tex_and_sampler)
  {
          uint32_t texi = (tex_and_sampler >> 0) & 0xff;
          struct pipe_sampler_view *texture = texstate->textures[texi];
          struct vc4_resource *rsc = vc4_resource(texture->texture);
  
-        return (texture->u.tex.last_level |
-#if USE_VC4_SIMULATOR
-                simpenrose_hw_addr(rsc->bo->map) /* XXX */
-#else
-                0 /* XXX */
-#endif
-                /* XXX: data type */);
+        cl_reloc(vc4, &vc4->uniforms, rsc->bo,
+                 texture->u.tex.last_level);
  }
  
-static uint32_t
-get_texture_p1(struct vc4_texture_stateobj *texstate,
-               uint32_t tex_and_sampler)
+static void
+write_texture_p1(struct vc4_context *vc4,
+                 struct vc4_texture_stateobj *texstate,
+                 uint32_t tex_and_sampler)
  {
          uint32_t texi = (tex_and_sampler >> 0) & 0xff;
          uint32_t sampi = (tex_and_sampler >> 8) & 0xff;
@@ -1176,14 +1176,15 @@ get_texture_p1(struct vc4_texture_stateobj *texstate,
                  [PIPE_TEX_FILTER_LINEAR] = 0,
          };
  
-        return ((1 << 31) /* XXX: data type */|
-                (texture->texture->height0 << 20) |
-                (texture->texture->width0 << 8) |
-                (imgfilter_map[sampler->mag_img_filter] << 7) |
-                ((imgfilter_map[sampler->min_img_filter] +
-                  mipfilter_map[sampler->min_mip_filter]) << 4) |
-                (translate_wrap(sampler->wrap_t) << 2) |
-                (translate_wrap(sampler->wrap_s) << 0));
+        cl_u32(&vc4->uniforms,
+               (1 << 31) /* XXX: data type */|
+               (texture->texture->height0 << 20) |
+               (texture->texture->width0 << 8) |
+               (imgfilter_map[sampler->mag_img_filter] << 7) |
+               ((imgfilter_map[sampler->min_img_filter] +
+                 mipfilter_map[sampler->min_mip_filter]) << 4) |
+               (translate_wrap(sampler->wrap_t) << 2) |
+               (translate_wrap(sampler->wrap_s) << 0));
  }
  
  static uint32_t
@@ -1203,56 +1204,57 @@ get_texrect_scale(struct vc4_texture_stateobj *texstate,
  }
  
  void
-vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                     struct vc4_constbuf_stateobj *cb,
                     struct vc4_texture_stateobj *texstate,
-                   int shader_index, struct vc4_bo **out_bo,
-                   uint32_t *out_offset)
+                   int shader_index)
  {
          struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
-        struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen,
-                                          MAX2(1, uinfo->count * 4), "ubo");
-        uint32_t *map = vc4_bo_map(ubo);
+        const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+
+        cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
  
          for (int i = 0; i < uinfo->count; i++) {
  
                  switch (uinfo->contents[i]) {
                  case QUNIFORM_CONSTANT:
-                        map[i] = uinfo->data[i];
+                        cl_u32(&vc4->uniforms, uinfo->data[i]);
                          break;
                  case QUNIFORM_UNIFORM:
-                        map[i] = ((uint32_t *)cb->cb[0].user_buffer)[uinfo->data[i]];
+                        cl_u32(&vc4->uniforms,
+                               gallium_uniforms[uinfo->data[i]]);
                          break;
                  case QUNIFORM_VIEWPORT_X_SCALE:
-                        map[i] = fui(vc4->framebuffer.width * 16.0f / 2.0f);
+                        cl_u32(&vc4->uniforms, fui(vc4->framebuffer.width *
+                                                   16.0f / 2.0f));
                          break;
                  case QUNIFORM_VIEWPORT_Y_SCALE:
-                        map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
+                        cl_u32(&vc4->uniforms, fui(vc4->framebuffer.height *
+                                                   -16.0f / 2.0f));
                          break;
  
                  case QUNIFORM_TEXTURE_CONFIG_P0:
-                        map[i] = get_texture_p0(texstate, uinfo->data[i]);
+                        write_texture_p0(vc4, texstate, uinfo->data[i]);
                          break;
  
                  case QUNIFORM_TEXTURE_CONFIG_P1:
-                        map[i] = get_texture_p1(texstate, uinfo->data[i]);
+                        write_texture_p1(vc4, texstate, uinfo->data[i]);
                          break;
  
                  case QUNIFORM_TEXRECT_SCALE_X:
                  case QUNIFORM_TEXRECT_SCALE_Y:
-                        map[i] = get_texrect_scale(texstate,
-                                                   uinfo->contents[i],
-                                                   uinfo->data[i]);
+                        cl_u32(&vc4->uniforms,
+                               get_texrect_scale(texstate,
+                                                 uinfo->contents[i],
+                                                 uinfo->data[i]));
                          break;
                  }
  #if 0
+                uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
                  fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
-                        shader, shader_index, i, map[i], uif(map[i]));
+                        shader, shader_index, i, written_val, uif(written_val));
  #endif
          }
-
-        *out_bo = ubo;
-        *out_offset = 0;
  }
  
  static void
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c

index 2b59aa53f5a7ba710dfd5d506d4c72e4ac34eabe..0dada68791162537242ef363a1ff94d8d85ff0ce 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -63,9 +63,9 @@ drm_gem_cma_create(struct drm_device *dev, size_t size)
  }
  
  static int
-vc4_simulator_pin_bos(struct drm_device *dev, struct drm_vc4_submit_cl *args,
-                      struct exec_info *exec)
+vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
  {
+        struct drm_vc4_submit_cl *args = exec->args;
          struct vc4_context *vc4 = dev->vc4;
          struct vc4_bo **bos = vc4->bo_pointers.base;
  
@@ -84,8 +84,7 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct drm_vc4_submit_cl *args,
  }
  
  static int
-vc4_simulator_unpin_bos(struct drm_vc4_submit_cl *args,
-                        struct exec_info *exec)
+vc4_simulator_unpin_bos(struct exec_info *exec)
  {
          for (int i = 0; i < exec->bo_count; i++) {
                  struct drm_gem_cma_object *obj = exec->bo[i];
@@ -102,9 +101,9 @@ vc4_simulator_unpin_bos(struct drm_vc4_submit_cl *args,
  }
  
  static int
-vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
-               struct exec_info *exec)
+vc4_cl_validate(struct drm_device *dev, struct exec_info *exec)
  {
+       struct drm_vc4_submit_cl *args = exec->args;
         void *temp = NULL;
         void *bin, *render, *shader_rec;
         int ret = 0;
@@ -112,12 +111,14 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
         uint32_t render_offset = bin_offset + args->bin_cl_len;
         uint32_t shader_rec_offset = roundup(render_offset +
                                              args->render_cl_len, 16);
-       uint32_t exec_size = shader_rec_offset + args->shader_record_len;
+       uint32_t uniforms_offset = shader_rec_offset + args->shader_record_len;
+       uint32_t exec_size = uniforms_offset + args->uniforms_len;
         uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
                                           args->shader_record_count);
  
         if (shader_rec_offset < render_offset ||
-           exec_size < shader_rec_offset ||
+           uniforms_offset < shader_rec_offset ||
+           exec_size < uniforms_offset ||
             args->shader_record_count >= (UINT_MAX /
                                           sizeof(struct vc4_shader_state)) ||
             temp_size < exec_size) {
@@ -142,6 +143,7 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
         bin = temp + bin_offset;
         render = temp + render_offset;
         shader_rec = temp + shader_rec_offset;
+       exec->uniforms_u = temp + uniforms_offset;
         exec->shader_state = temp + exec_size;
         exec->shader_state_size = args->shader_record_count;
  
@@ -164,6 +166,13 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
                 goto fail;
         }
  
+       ret = copy_from_user(exec->uniforms_u, args->uniforms,
+                            args->uniforms_len);
+       if (ret) {
+               DRM_ERROR("Failed to copy in uniforms cl\n");
+               goto fail;
+       }
+
         exec->exec_bo = drm_gem_cma_create(dev, exec_size);
  #if 0
         if (IS_ERR(exec->exec_bo)) {
@@ -180,6 +189,10 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
         exec->ct1ea = exec->ct1ca + args->render_cl_len;
         exec->shader_paddr = exec->exec_bo->paddr + shader_rec_offset;
  
+       exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
+       exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
+       exec->uniforms_size = args->uniforms_len;
+
         ret = vc4_validate_cl(dev,
                               exec->exec_bo->vaddr + bin_offset,
                               bin,
@@ -243,18 +256,20 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args,
                  }
          }
  
-        ret = vc4_simulator_pin_bos(dev, args, &exec);
+        exec.args = args;
+
+        ret = vc4_simulator_pin_bos(dev, &exec);
          if (ret)
                  return ret;
  
-        ret = vc4_cl_validate(dev, args, &exec);
+        ret = vc4_cl_validate(dev, &exec);
          if (ret)
                  return ret;
  
          simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
          simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
  
-        ret = vc4_simulator_unpin_bos(args, &exec);
+        ret = vc4_simulator_unpin_bos(&exec);
          if (ret)
                  return ret;
  
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.c b/src/gallium/drivers/vc4/vc4_simulator_validate.c

index 14701b171c70122ce30e3fececebd66bc6ca3bc2..a67e2345b11446e27859d882b1479845beaf949c 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.c
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.c
@@ -347,6 +347,30 @@ vc4_validate_cl(struct drm_device *dev,
         return 0;
  }
  
+static bool
+reloc_tex(struct exec_info *exec,
+         void *uniform_data_u,
+         struct vc4_texture_sample_info *sample,
+         uint32_t texture_handle_index)
+
+{
+       struct drm_gem_cma_object *tex;
+       uint32_t unvalidated_p0 = *(uint32_t *)(uniform_data_u +
+                                               sample->p_offset[0]);
+       uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
+
+       if (texture_handle_index >= exec->bo_count) {
+               DRM_ERROR("texture handle index %d >= %d\n",
+                         texture_handle_index, exec->bo_count);
+               return false;
+       }
+       tex = exec->bo[texture_handle_index];
+
+       *validated_p0 = tex->paddr + unvalidated_p0;
+
+       return true;
+}
+
  static int
  validate_shader_rec(struct drm_device *dev,
                     struct exec_info *exec,
@@ -358,45 +382,54 @@ validate_shader_rec(struct drm_device *dev,
         uint32_t *src_handles = unvalidated;
         void *src_pkt;
         void *dst_pkt = validated;
-       static const int gl_bo_offsets[] = {
-               4, 8, /* fs code, ubo */
-               16, 20, /* vs code, ubo */
-               28, 32, /* cs code, ubo */
+       enum shader_rec_reloc_type {
+               RELOC_CODE,
+               RELOC_VBO,
+       };
+       struct shader_rec_reloc {
+               enum shader_rec_reloc_type type;
+               uint32_t offset;
+       };
+       static const struct shader_rec_reloc gl_relocs[] = {
+               { RELOC_CODE, 4 },  /* fs */
+               { RELOC_CODE, 16 }, /* vs */
+               { RELOC_CODE, 28 }, /* cs */
         };
-       static const int nv_bo_offsets[] = {
-               4, 8, /* fs code, ubo */
-               12, /* vbo */
+       static const struct shader_rec_reloc nv_relocs[] = {
+               { RELOC_CODE, 4 }, /* fs */
+               { RELOC_VBO, 12 }
         };
-       struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_bo_offsets) + 8];
-       const int *bo_offsets;
-       uint32_t nr_attributes = 0, nr_bo, packet_size;
+       const struct shader_rec_reloc *relocs;
+       struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
+       uint32_t nr_attributes = 0, nr_relocs, packet_size;
         int i;
+       struct vc4_validated_shader_info *validated_shader = NULL;
  
         if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
-               bo_offsets = nv_bo_offsets;
-               nr_bo = ARRAY_SIZE(nv_bo_offsets);
+               relocs = nv_relocs;
+               nr_relocs = ARRAY_SIZE(nv_relocs);
  
                 packet_size = 16;
         } else {
-               bo_offsets = gl_bo_offsets;
-               nr_bo = ARRAY_SIZE(gl_bo_offsets);
+               relocs = gl_relocs;
+               nr_relocs = ARRAY_SIZE(gl_relocs);
  
                 nr_attributes = state->addr & 0x7;
                 if (nr_attributes == 0)
                         nr_attributes = 8;
                 packet_size = 36 + nr_attributes * 8;
         }
-       if ((nr_bo + nr_attributes) * 4 + packet_size > len) {
+       if ((nr_relocs + nr_attributes) * 4 + packet_size > len) {
                 DRM_ERROR("overflowed shader packet read "
                           "(handles %d, packet %d, len %d)\n",
-                         (nr_bo + nr_attributes) * 4, packet_size, len);
+                         (nr_relocs + nr_attributes) * 4, packet_size, len);
                 return -EINVAL;
         }
  
-       src_pkt = unvalidated + 4 * (nr_bo + nr_attributes);
+       src_pkt = unvalidated + 4 * (nr_relocs + nr_attributes);
         memcpy(dst_pkt, src_pkt, packet_size);
  
-       for (i = 0; i < nr_bo + nr_attributes; i++) {
+       for (i = 0; i < nr_relocs + nr_attributes; i++) {
                 if (src_handles[i] >= exec->bo_count) {
                         DRM_ERROR("shader rec bo index %d > %d\n",
                                   src_handles[i], exec->bo_count);
@@ -405,21 +438,73 @@ validate_shader_rec(struct drm_device *dev,
                 bo[i] = exec->bo[src_handles[i]];
         }
  
-       for (i = 0; i < nr_bo; i++) {
-               /* XXX: validation */
-               uint32_t o = bo_offsets[i];
-               *(uint32_t *)(dst_pkt + o) =
-                       bo[i]->paddr + *(uint32_t *)(src_pkt + o);
+       for (i = 0; i < nr_relocs; i++) {
+               uint32_t o = relocs[i].offset;
+               uint32_t src_offset = *(uint32_t *)(src_pkt + o);
+               *(uint32_t *)(dst_pkt + o) = bo[i]->paddr + src_offset;
+               uint32_t *texture_handles_u;
+               void *uniform_data_u;
+               uint32_t tex;
+
+               switch (relocs[i].type) {
+               case RELOC_CODE:
+                       kfree(validated_shader);
+                       validated_shader = vc4_validate_shader(bo[i],
+                                                              src_offset);
+                       if (!validated_shader)
+                               goto fail;
+
+                       if (validated_shader->uniforms_src_size >
+                           exec->uniforms_size) {
+                               DRM_ERROR("Uniforms src buffer overflow\n");
+                               goto fail;
+                       }
+
+                       texture_handles_u = exec->uniforms_u;
+                       uniform_data_u = (texture_handles_u +
+                                         validated_shader->num_texture_samples);
+
+                       memcpy(exec->uniforms_v, uniform_data_u,
+                              validated_shader->uniforms_size);
+
+                       for (tex = 0;
+                            tex < validated_shader->num_texture_samples;
+                            tex++) {
+                               if (!reloc_tex(exec,
+                                              uniform_data_u,
+                                              &validated_shader->texture_samples[tex],
+                                              texture_handles_u[tex])) {
+                                       goto fail;
+                               }
+                       }
+
+                       *(uint32_t *)(dst_pkt + o + 4) = exec->uniforms_p;
+
+                       exec->uniforms_u += validated_shader->uniforms_src_size;
+                       exec->uniforms_v += validated_shader->uniforms_size;
+                       exec->uniforms_p += validated_shader->uniforms_size;
+
+                       break;
+
+               case RELOC_VBO:
+                       break;
+               }
         }
  
         for (i = 0; i < nr_attributes; i++) {
                 /* XXX: validation */
                 uint32_t o = 36 + i * 8;
                 *(uint32_t *)(dst_pkt + o) =
-                       bo[nr_bo + i]->paddr + *(uint32_t *)(src_pkt + o);
+                       bo[nr_relocs + i]->paddr + *(uint32_t *)(src_pkt + o);
         }
  
+       kfree(validated_shader);
+
         return 0;
+
+fail:
+       kfree(validated_shader);
+       return -EINVAL;
  }
  
  int
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h

index 4a2a2181ab43348bbd9ba44ee9c315795da8548f..885a754a9d56a6b00cbb6e234cf92b255f2cb253 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -26,15 +26,20 @@
  
  #include <stdbool.h>
  #include <string.h>
+#include <stdlib.h>
  #include <stdint.h>
  #include <stdio.h>
  #include <errno.h>
  
+#include "vc4_context.h"
+#include "vc4_qpu_defines.h"
+
  #define DRM_INFO(...) fprintf(stderr, __VA_ARGS__)
  #define DRM_ERROR(...) fprintf(stderr, __VA_ARGS__)
  #define kmalloc(size, arg) malloc(size)
+#define kcalloc(size, count, arg) calloc(size, count)
  #define kfree(ptr) free(ptr)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define krealloc(ptr, size, args) realloc(ptr, size)
  #define roundup(x, y) align(x, y)
  
  static inline int
@@ -64,6 +69,9 @@ struct drm_gem_cma_object {
  };
  
  struct exec_info {
+       /* Kernel-space copy of the ioctl arguments */
+       struct drm_vc4_submit_cl *args;
+
         /* This is the array of BOs that were looked up at the start of exec.
          * Command validation will use indices into this array.
          */
@@ -79,9 +87,8 @@ struct exec_info {
         uint32_t bo_index[2];
         uint32_t max_width, max_height;
  
-       /**
-        * This is the BO where we store the validated command lists
-        * and shader records.
+       /* This is the BO where we store the validated command lists, shader
+        * records, and uniforms.
          */
         struct drm_gem_cma_object *exec_bo;
  
@@ -108,6 +115,50 @@ struct exec_info {
         uint32_t ct0ca, ct0ea;
         uint32_t ct1ca, ct1ea;
         uint32_t shader_paddr;
+
+       /* Pointers to the uniform data.  These pointers are incremented, and
+        * size decremented, as each batch of uniforms is uploaded.
+        */
+       void *uniforms_u;
+       void *uniforms_v;
+       uint32_t uniforms_p;
+       uint32_t uniforms_size;
+};
+
+/**
+ * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
+ * setup parameters.
+ *
+ * This will be used at draw time to relocate the reference to the texture
+ * contents in p0, and validate that the offset combined with
+ * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
+ * Note that the hardware treats unprovided config parameters as 0, so not all
+ * of them need to be set up for every texure sample, and we'll store ~0 as
+ * the offset to mark the unused ones.
+ *
+ * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
+ * Setup") for definitions of the texture parameters.
+ */
+struct vc4_texture_sample_info {
+       uint32_t p_offset[4];
+};
+
+/**
+ * struct vc4_validated_shader_info - information about validated shaders that
+ * needs to be used from command list validation.
+ *
+ * For a given shader, each time a shader state record references it, we need
+ * to verify that the shader doesn't read more uniforms than the shader state
+ * record's uniform BO pointer can provide, and we need to apply relocations
+ * and validate the shader state record's uniforms that define the texture
+ * samples.
+ */
+struct vc4_validated_shader_info
+{
+       uint32_t uniforms_size;
+       uint32_t uniforms_src_size;
+       uint32_t num_texture_samples;
+       struct vc4_texture_sample_info *texture_samples;
  };
  
  int vc4_validate_cl(struct drm_device *dev,
@@ -123,4 +174,8 @@ int vc4_validate_shader_recs(struct drm_device *dev,
                               uint32_t len,
                               struct exec_info *exec);
  
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
+                    uint32_t start_offset);
+
  #endif /* VC4_SIMULATOR_VALIDATE_H */
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c b/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c

new file mode 100644 (file)

index 0000000..c02deb4
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Shader validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory.  So, a user with access
+ * to execute shaders could escalate privilege by overwriting system memory
+ * (using the VPM write address register in the general-purpose DMA mode) or
+ * reading system memory it shouldn't (reading it as a texture, or uniform
+ * data, or vertex data).
+ *
+ * This walks over a shader starting from some offset within a BO, ensuring
+ * that its accesses are appropriately bounded, and recording how many texture
+ * accesses are made and where so that we can do relocations for them in the
+ * uniform stream.
+ *
+ * The kernel API has shaders stored in user-mapped BOs.  The BOs will be
+ * forcibly unmapped from the process before validation, and any cache of
+ * validated state will be flushed if the mapping is faulted back in.
+ *
+ * Storing the shaders in BOs means that the validation process will be slow
+ * due to uncached reads, but since shaders are long-lived and shader BOs are
+ * never actually modified, this shouldn't be a problem.
+ */
+
+#include "vc4_simulator_validate.h"
+#include "vc4_qpu.h"
+#include "vc4_qpu_defines.h"
+
+struct vc4_shader_validation_state {
+       struct vc4_texture_sample_info tmu_setup[2];
+       int tmu_write_count[2];
+};
+
+static bool
+is_tmu_write(uint32_t waddr)
+{
+       return (waddr >= QPU_W_TMU0_S &&
+               waddr <= QPU_W_TMU1_B);
+}
+
+static bool
+check_register_write(uint32_t waddr, bool is_b)
+{
+       switch (waddr) {
+       case QPU_W_UNIFORMS_ADDRESS:
+               /* XXX: We'll probably need to support this for reladdr, but
+                * it's definitely a security-related one.
+                */
+               DRM_ERROR("uniforms address load unsupported\n");
+               return false;
+
+       case QPU_W_TLB_COLOR_MS:
+       case QPU_W_TLB_COLOR_ALL:
+       case QPU_W_TLB_Z:
+               /* XXX: We need to track which buffers get written by the
+                * shader, to make sure that we have those buffers set up by
+                * the config packets.  But we need to pass them for now to
+                * get things up and running.
+                */
+               return true;
+
+       case QPU_W_TMU0_S:
+       case QPU_W_TMU0_T:
+       case QPU_W_TMU0_R:
+       case QPU_W_TMU0_B:
+       case QPU_W_TMU1_S:
+       case QPU_W_TMU1_T:
+       case QPU_W_TMU1_R:
+       case QPU_W_TMU1_B:
+               /* XXX: We need to track where the uniforms get loaded for
+                * texturing so that we can do relocations, and to validate
+                * those uniform contents.
+                */
+               return true;
+
+       case QPU_W_HOST_INT:
+       case QPU_W_TMU_NOSWAP:
+       case QPU_W_TLB_STENCIL_SETUP:
+       case QPU_W_TLB_ALPHA_MASK:
+       case QPU_W_MUTEX_RELEASE:
+               /* XXX: I haven't thought about these, so don't support them
+                * for now.
+                */
+               DRM_ERROR("Unsupported waddr %d\n", waddr);
+               return false;
+
+       case QPU_W_VPM_ADDR:
+               DRM_ERROR("General VPM DMA unsupported\n");
+               return false;
+
+       case QPU_W_VPM:
+       case QPU_W_VPMVCD_SETUP:
+               /* We allow VPM setup in general, even including VPM DMA
+                * configuration setup, because the (unsafe) DMA can only be
+                * triggered by QPU_W_VPM_ADDR writes.
+                */
+               return true;
+       }
+
+       return true;
+}
+
+static bool
+record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
+                               struct vc4_shader_validation_state *validation_state,
+                               int tmu)
+{
+       uint32_t s = validated_shader->num_texture_samples;
+       int i;
+       struct vc4_texture_sample_info *temp_samples;
+
+       temp_samples = krealloc(validated_shader->texture_samples,
+                               (s + 1) * sizeof(*temp_samples),
+                               GFP_KERNEL);
+       if (!temp_samples)
+               return false;
+
+       memcpy(temp_samples[s].p_offset,
+              validation_state->tmu_setup[tmu].p_offset,
+              validation_state->tmu_write_count[tmu] * sizeof(uint32_t));
+       for (i = validation_state->tmu_write_count[tmu]; i < 4; i++)
+               temp_samples[s].p_offset[i] = ~0;
+
+       validated_shader->num_texture_samples = s + 1;
+       validated_shader->texture_samples = temp_samples;
+
+       return true;
+}
+
+static bool
+check_tmu_writes(uint64_t inst,
+                struct vc4_validated_shader_info *validated_shader,
+                struct vc4_shader_validation_state *validation_state,
+                uint32_t waddr)
+{
+       int tmu = waddr > QPU_W_TMU0_B;
+
+       if (!is_tmu_write(waddr))
+               return true;
+
+       if (validation_state->tmu_write_count[tmu] >= 4) {
+               DRM_ERROR("TMU%d got too many parameters before dispatch\n",
+                         tmu);
+               return false;
+       }
+       validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
+               validated_shader->uniforms_size;
+       validation_state->tmu_write_count[tmu]++;
+       validated_shader->uniforms_size += 4;
+
+       if (waddr == QPU_W_TMU0_S || waddr == QPU_W_TMU1_S) {
+               if (!record_validated_texture_sample(validated_shader,
+                                                    validation_state, tmu)) {
+                       return false;
+               }
+
+               validation_state->tmu_write_count[tmu] = 0;
+       }
+
+       return true;
+}
+
+static bool
+check_instruction_writes(uint64_t inst,
+                        struct vc4_validated_shader_info *validated_shader,
+                        struct vc4_shader_validation_state *validation_state)
+{
+       uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+       uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+       bool ws = inst & QPU_WS;
+
+       if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
+               DRM_ERROR("ADD and MUL both set up textures\n");
+               return false;
+       }
+
+       if (!check_tmu_writes(inst, validated_shader, validation_state,
+                             waddr_add)) {
+               return false;
+       }
+
+       if (!check_tmu_writes(inst, validated_shader, validation_state,
+                             waddr_mul)) {
+               return false;
+       }
+
+       return (check_register_write(waddr_add, ws) &&
+               check_register_write(waddr_mul, !ws));
+}
+
+static bool
+check_instruction_reads(uint64_t inst,
+                       struct vc4_validated_shader_info *validated_shader)
+{
+       uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+       uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+       uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+       uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+       if (raddr_a == QPU_R_UNIF ||
+           raddr_b == QPU_R_UNIF) {
+               if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) {
+                       DRM_ERROR("uniform read in the same instruction as "
+                                 "texture setup");
+                       return false;
+               }
+
+               /* This can't overflow the uint32_t, because we're reading 8
+                * bytes of instruction to increment by 4 here, so we'd
+                * already be OOM.
+                */
+               validated_shader->uniforms_size += 4;
+       }
+
+       return true;
+}
+
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
+                   uint32_t start_offset)
+{
+       bool found_shader_end = false;
+       int shader_end_ip = 0;
+       uint32_t ip, max_ip;
+       uint64_t *shader;
+       struct vc4_validated_shader_info *validated_shader;
+       struct vc4_shader_validation_state validation_state;
+
+       memset(&validation_state, 0, sizeof(validation_state));
+
+       if (start_offset + sizeof(uint64_t) > shader_obj->base.size) {
+               DRM_ERROR("shader starting at %d outside of BO sized %d\n",
+                         start_offset,
+                         shader_obj->base.size);
+               return NULL;
+       }
+       shader = shader_obj->vaddr + start_offset;
+       max_ip = (shader_obj->base.size - start_offset) / sizeof(uint64_t);
+
+       validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
+       if (!validated_shader)
+               return NULL;
+
+       for (ip = 0; ip < max_ip; ip++) {
+               uint64_t inst = shader[ip];
+               uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+               switch (sig) {
+               case QPU_SIG_NONE:
+               case QPU_SIG_WAIT_FOR_SCOREBOARD:
+               case QPU_SIG_SCOREBOARD_UNLOCK:
+               case QPU_SIG_LOAD_TMU0:
+               case QPU_SIG_LOAD_TMU1:
+                       if (!check_instruction_writes(inst, validated_shader,
+                                                     &validation_state)) {
+                               DRM_ERROR("Bad write at ip %d\n", ip);
+                               goto fail;
+                       }
+
+                       if (!check_instruction_reads(inst, validated_shader))
+                               goto fail;
+
+                       break;
+
+               case QPU_SIG_LOAD_IMM:
+                       if (!check_instruction_writes(inst, validated_shader,
+                                                     &validation_state)) {
+                               DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
+                               goto fail;
+                       }
+                       break;
+
+               case QPU_SIG_PROG_END:
+                       found_shader_end = true;
+                       shader_end_ip = ip;
+                       break;
+
+               default:
+                       DRM_ERROR("Unsupported QPU signal %d at "
+                                 "instruction %d\n", sig, ip);
+                       goto fail;
+               }
+
+               /* There are two delay slots after program end is signaled
+                * that are still executed, then we're finished.
+                */
+               if (found_shader_end && ip == shader_end_ip + 2)
+                       break;
+       }
+
+       if (ip == max_ip) {
+               DRM_ERROR("shader starting at %d failed to terminate before "
+                         "shader BO end at %d\n",
+                         start_offset,
+                         shader_obj->base.size);
+               goto fail;
+       }
+
+       /* Again, no chance of integer overflow here because the worst case
+        * scenario is 8 bytes of uniforms plus handles per 8-byte
+        * instruction.
+        */
+       validated_shader->uniforms_src_size =
+               (validated_shader->uniforms_size +
+                4 * validated_shader->num_texture_samples);
+
+       return validated_shader;
+
+fail:
+       kfree(validated_shader);
+       return NULL;
+}
author	Eric Anholt <eric@anholt.net>
	Mon, 21 Jul 2014 18:27:35 +0000 (11:27 -0700)
committer	Eric Anholt <eric@anholt.net>
	Mon, 11 Aug 2014 21:45:28 +0000 (14:45 -0700)
src/gallium/drivers/vc4/Makefile.sources		patch \| blob \| history
src/gallium/drivers/vc4/vc4_context.c		patch \| blob \| history
src/gallium/drivers/vc4/vc4_context.h		patch \| blob \| history
src/gallium/drivers/vc4/vc4_draw.c		patch \| blob \| history
src/gallium/drivers/vc4/vc4_drm.h		patch \| blob \| history
src/gallium/drivers/vc4/vc4_program.c		patch \| blob \| history
src/gallium/drivers/vc4/vc4_simulator.c		patch \| blob \| history
src/gallium/drivers/vc4/vc4_simulator_validate.c		patch \| blob \| history
src/gallium/drivers/vc4/vc4_simulator_validate.h		patch \| blob \| history
src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c	[new file with mode: 0644]	patch \| blob