Merge commit 'origin/master' into gallium-sw-api-2
authorKeith Whitwell <keithw@vmware.com>
Wed, 10 Mar 2010 08:29:27 +0000 (08:29 +0000)
committerKeith Whitwell <keithw@vmware.com>
Wed, 10 Mar 2010 08:29:27 +0000 (08:29 +0000)
138 files changed:
progs/gallium/python/retrace/interpreter.py
progs/tests/fbotest1.c
progs/tests/fbotest2.c
progs/tests/fbotest3.c
src/gallium/auxiliary/cso_cache/cso_cache.c
src/gallium/auxiliary/cso_cache/cso_cache.h
src/gallium/auxiliary/cso_cache/cso_context.c
src/gallium/auxiliary/cso_cache/cso_context.h
src/gallium/auxiliary/draw/draw_pt.c
src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/auxiliary/gallivm/lp_bld_const.c
src/gallium/auxiliary/gallivm/lp_bld_logic.c
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
src/gallium/auxiliary/gallivm/lp_bld_type.h
src/gallium/auxiliary/util/u_blit.c
src/gallium/auxiliary/util/u_blitter.c
src/gallium/auxiliary/util/u_blitter.h
src/gallium/auxiliary/util/u_draw_quad.c
src/gallium/auxiliary/util/u_dump_state.c
src/gallium/auxiliary/util/u_format.h
src/gallium/auxiliary/util/u_format_pack.py
src/gallium/auxiliary/util/u_gen_mipmap.c
src/gallium/auxiliary/vl/vl_compositor.c
src/gallium/auxiliary/vl/vl_compositor.h
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
src/gallium/docs/source/context.rst
src/gallium/docs/source/cso/velems.rst [new file with mode: 0644]
src/gallium/drivers/cell/ppu/cell_context.h
src/gallium/drivers/cell/ppu/cell_state_vertex.c
src/gallium/drivers/failover/fo_context.h
src/gallium/drivers/failover/fo_state.c
src/gallium/drivers/failover/fo_state_emit.c
src/gallium/drivers/i915/i915_context.h
src/gallium/drivers/i915/i915_state.c
src/gallium/drivers/i965/brw_context.h
src/gallium/drivers/i965/brw_draw_upload.c
src/gallium/drivers/i965/brw_pipe_vertex.c
src/gallium/drivers/i965/brw_structs.h
src/gallium/drivers/identity/id_context.c
src/gallium/drivers/llvmpipe/lp_context.c
src/gallium/drivers/llvmpipe/lp_context.h
src/gallium/drivers/llvmpipe/lp_state.h
src/gallium/drivers/llvmpipe/lp_state_vertex.c
src/gallium/drivers/nouveau/nouveau_screen.c
src/gallium/drivers/nouveau/nouveau_util.h
src/gallium/drivers/nv30/nv30_context.h
src/gallium/drivers/nv30/nv30_miptree.c
src/gallium/drivers/nv30/nv30_state.c
src/gallium/drivers/nv30/nv30_vbo.c
src/gallium/drivers/nv40/nv40_context.h
src/gallium/drivers/nv40/nv40_state.c
src/gallium/drivers/nv40/nv40_state_emit.c
src/gallium/drivers/nv40/nv40_vbo.c
src/gallium/drivers/nv50/Makefile
src/gallium/drivers/nv50/nv50_clear.c
src/gallium/drivers/nv50/nv50_context.c
src/gallium/drivers/nv50/nv50_context.h
src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_push.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_screen.c
src/gallium/drivers/nv50/nv50_screen.h
src/gallium/drivers/nv50/nv50_state.c
src/gallium/drivers/nv50/nv50_state_validate.c
src/gallium/drivers/nv50/nv50_tex.c
src/gallium/drivers/nv50/nv50_vbo.c
src/gallium/drivers/r300/r300_blit.c
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_state_derived.c
src/gallium/drivers/r300/r300_state_inlines.h
src/gallium/drivers/softpipe/sp_context.c
src/gallium/drivers/softpipe/sp_context.h
src/gallium/drivers/softpipe/sp_state.h
src/gallium/drivers/softpipe/sp_state_vertex.c
src/gallium/drivers/svga/svga_context.h
src/gallium/drivers/svga/svga_pipe_vertex.c
src/gallium/drivers/svga/svga_state_need_swtnl.c
src/gallium/drivers/svga/svga_state_rss.c
src/gallium/drivers/svga/svga_state_vdecl.c
src/gallium/drivers/svga/svga_state_vs.c
src/gallium/drivers/svga/svga_swtnl_state.c
src/gallium/drivers/trace/tr_context.c
src/gallium/drivers/trace/tr_dump_state.c
src/gallium/include/pipe/p_compiler.h
src/gallium/include/pipe/p_context.h
src/gallium/include/pipe/p_state.h
src/gallium/state_trackers/python/p_context.i
src/gallium/state_trackers/vega/api_masks.c
src/gallium/state_trackers/vega/polygon.c
src/gallium/state_trackers/vega/renderer.c
src/gallium/state_trackers/vega/vg_context.c
src/gallium/state_trackers/vega/vg_context.h
src/gallium/state_trackers/xorg/xorg_renderer.c
src/gallium/state_trackers/xorg/xorg_renderer.h
src/mesa/SConscript
src/mesa/drivers/dri/i965/brw_disasm.c
src/mesa/drivers/dri/i965/brw_program.c
src/mesa/drivers/dri/i965/brw_vs_emit.c
src/mesa/drivers/dri/i965/brw_wm_glsl.c
src/mesa/drivers/dri/r200/Makefile
src/mesa/drivers/dri/r200/r200_context.c
src/mesa/drivers/dri/r200/radeon_pixel_read.c [new symlink]
src/mesa/drivers/dri/r300/Makefile
src/mesa/drivers/dri/r300/r300_cmdbuf.c
src/mesa/drivers/dri/r300/r300_context.c
src/mesa/drivers/dri/r300/r300_state.c
src/mesa/drivers/dri/r300/r300_tex.c
src/mesa/drivers/dri/r300/r300_tex.h
src/mesa/drivers/dri/r300/radeon_pixel_read.c [new symlink]
src/mesa/drivers/dri/r600/Makefile
src/mesa/drivers/dri/r600/r600_context.c
src/mesa/drivers/dri/r600/radeon_pixel_read.c [new symlink]
src/mesa/drivers/dri/radeon/Makefile
src/mesa/drivers/dri/radeon/radeon_common.h
src/mesa/drivers/dri/radeon/radeon_common_context.h
src/mesa/drivers/dri/radeon/radeon_context.c
src/mesa/drivers/dri/radeon/radeon_fbo.c
src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
src/mesa/drivers/dri/radeon/radeon_pixel_read.c [new file with mode: 0644]
src/mesa/drivers/dri/radeon/radeon_texture.c
src/mesa/drivers/dri/radeon/radeon_texture.h
src/mesa/glapi/glapi.c
src/mesa/glapi/glapi.h
src/mesa/glapi/glapi_entrypoint.c [new file with mode: 0644]
src/mesa/glapi/glapi_getproc.c
src/mesa/glapi/glapi_priv.h [new file with mode: 0644]
src/mesa/sources.mak
src/mesa/state_tracker/st_cb_bitmap.c
src/mesa/state_tracker/st_cb_clear.c
src/mesa/state_tracker/st_cb_drawpixels.c
src/mesa/state_tracker/st_context.c
src/mesa/state_tracker/st_context.h
src/mesa/state_tracker/st_draw.c
src/mesa/state_tracker/st_draw_feedback.c

index b30469dfaeebc75b8c670bd5c790f77b8595784d..1a9618125535768250f054924229e3d7719d1f05 100755 (executable)
@@ -551,7 +551,6 @@ class Context(Object):
                 data = vbuf.buffer.read()
                 values = unpack_from(format, data, offset)
                 sys.stdout.write('\t\t{' + ', '.join(map(str, values)) + '},\n')
-                assert len(values) == velem.nr_components
             sys.stdout.write('\t},\n')
         sys.stdout.flush()
 
index 0cd7f95c355c616755777adf861b0e9c78b61e8f..a95fdff74c35b32ac74ca125a460b51a25de5142 100644 (file)
@@ -36,8 +36,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -161,7 +161,7 @@ Init( void )
    assert(i == MyFB);
 
    CheckError(__LINE__);
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, MyRB);
 
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
index f9c506193f64196e27c5fd391b10905fd43f2939..872b46279e6acf802e9cfce186bbfdc05c445e2b 100644 (file)
@@ -40,8 +40,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -163,7 +163,7 @@ Init( void )
    glGenRenderbuffersEXT(1, &ColorRb);
    glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb);
    assert(glIsRenderbufferEXT(ColorRb));
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, ColorRb);
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
 
index 8e288b38b8323b4a7f7b0220da851904c2625dd8..c176f82d2ba9661f4a18d91a7215121f17e74f7a 100644 (file)
@@ -50,8 +50,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -189,7 +189,7 @@ Init( void )
    glGenRenderbuffersEXT(1, &ColorRb);
    glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb);
    assert(glIsRenderbufferEXT(ColorRb));
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, ColorRb);
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
 
index a6a07e72c2f985bdad571aa95bb0a64f07e09747..900c64df4b93279ac951a780a63bfb15a0b1b045 100644 (file)
@@ -43,6 +43,7 @@ struct cso_cache {
    struct cso_hash *vs_hash;
    struct cso_hash *rasterizer_hash;
    struct cso_hash *sampler_hash;
+   struct cso_hash *velements_hash;
    int    max_size;
 
    cso_sanitize_callback sanitize_cb;
@@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    return hash;
@@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data)
    FREE(state);
 }
 
+static void delete_velements(void *state, void *data)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+}
 
 static INLINE void delete_cso(void *state, enum cso_cache_type type)
 {
@@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
    case CSO_VERTEX_SHADER:
       delete_vs_state(state, 0);
       break;
+   case CSO_VELEMENTS:
+      delete_velements(state, 0);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void)
    sc->rasterizer_hash    = cso_hash_create();
    sc->fs_hash            = cso_hash_create();
    sc->vs_hash            = cso_hash_create();
+   sc->velements_hash     = cso_hash_create();
    sc->sanitize_cb        = sanitize_cb;
    sc->sanitize_data      = 0;
 
@@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    iter = cso_hash_first_node(hash);
@@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0);
    cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
    cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
+   cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0);
 
    cso_hash_delete(sc->blend_hash);
    cso_hash_delete(sc->sampler_hash);
@@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_hash_delete(sc->rasterizer_hash);
    cso_hash_delete(sc->fs_hash);
    cso_hash_delete(sc->vs_hash);
+   cso_hash_delete(sc->velements_hash);
    FREE(sc);
 }
 
@@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
    sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
    sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
    sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+   sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size);
 }
 
 int cso_maximum_cache_size(const struct cso_cache *sc)
index eea60b940bb8663f5037a804f1f0cec4068bbfc6..fb09b83c623ef61be9ca4f540540667126dc8d80 100644 (file)
@@ -53,6 +53,7 @@
   * - rasterizer (old setup)
   * - sampler
   * - vertex shader
+  * - vertex elements
   *
   * Things that are not constant state objects include:
   * - blend_color
@@ -90,7 +91,8 @@ enum cso_cache_type {
    CSO_DEPTH_STENCIL_ALPHA,
    CSO_RASTERIZER,
    CSO_FRAGMENT_SHADER,
-   CSO_VERTEX_SHADER
+   CSO_VERTEX_SHADER,
+   CSO_VELEMENTS
 };
 
 typedef void (*cso_state_callback)(void *ctx, void *obj);
@@ -144,6 +146,18 @@ struct cso_sampler {
    struct pipe_context *context;
 };
 
+struct cso_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+};
+
+struct cso_velements {
+   struct cso_velems_state state;
+   void *data;
+   cso_state_callback delete_state;
+   struct pipe_context *context;
+};
+
 unsigned cso_construct_key(void *item, int item_size);
 
 struct cso_cache *cso_cache_create(void);
index a7335c340ca8d930235aa30605fcfacef3414293..6500891a10cb6cae31ca45bcfb1c2edfb702e97c 100644 (file)
@@ -89,6 +89,7 @@ struct cso_context {
    void *rasterizer, *rasterizer_saved;
    void *fragment_shader, *fragment_shader_saved, *geometry_shader;
    void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
+   void *velements, *velements_saved;
 
    struct pipe_clip_state clip;
    struct pipe_clip_state clip_saved;
@@ -174,6 +175,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state)
    return FALSE;
 }
 
+static boolean delete_vertex_elements(struct cso_context *ctx,
+                                      void *state)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+
+   if (ctx->velements == cso->data)
+      return FALSE;
+
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+   return TRUE;
+}
+
 
 static INLINE boolean delete_cso(struct cso_context *ctx,
                                  void *state, enum cso_cache_type type)
@@ -197,6 +212,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
    case CSO_VERTEX_SHADER:
       return delete_vs_state(ctx, state);
       break;
+   case CSO_VELEMENTS:
+      return delete_vertex_elements(ctx, state);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -271,6 +289,7 @@ void cso_release_all( struct cso_context *ctx )
       ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
       ctx->pipe->bind_fs_state( ctx->pipe, NULL );
       ctx->pipe->bind_vs_state( ctx->pipe, NULL );
+      ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
    }
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
@@ -1130,7 +1149,6 @@ void cso_restore_geometry_shader(struct cso_context *ctx)
    ctx->geometry_shader_saved = NULL;
 }
 
-
 /* clip state */
 
 static INLINE void
@@ -1180,3 +1198,66 @@ cso_restore_clip(struct cso_context *ctx)
       ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
    }
 }
+
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states)
+{
+   unsigned key_size, hash_key;
+   struct cso_hash_iter iter;
+   void *handle;
+   struct cso_velems_state velems_state;
+
+   /* need to include the count into the stored state data too.
+      Otherwise first few count pipe_vertex_elements could be identical even if count
+      is different, and there's no guarantee the hash would be different in that
+      case neither */
+   key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
+   velems_state.count = count;
+   memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count);
+   hash_key = cso_construct_key((void*)&velems_state, key_size);
+   iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size);
+
+   if (cso_hash_iter_is_null(iter)) {
+      struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
+      if (!cso)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+
+      memcpy(&cso->state, &velems_state, key_size);
+      cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]);
+      cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state;
+      cso->context = ctx->pipe;
+
+      iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso);
+      if (cso_hash_iter_is_null(iter)) {
+         FREE(cso);
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+
+      handle = cso->data;
+   }
+   else {
+      handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
+   }
+
+   if (ctx->velements != handle) {
+      ctx->velements = handle;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
+   }
+   return PIPE_OK;
+}
+
+void cso_save_vertex_elements(struct cso_context *ctx)
+{
+   assert(!ctx->velements_saved);
+   ctx->velements_saved = ctx->velements;
+}
+
+void cso_restore_vertex_elements(struct cso_context *ctx)
+{
+   if (ctx->velements != ctx->velements_saved) {
+      ctx->velements = ctx->velements_saved;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
+   }
+   ctx->velements_saved = NULL;
+}
index 251a9a644f85a1b46f928d9048ff984339b6a04c..9c16abd28dd483738ff6d424e625dd12a6dce334 100644 (file)
@@ -122,6 +122,12 @@ void
 cso_restore_vertex_sampler_textures(struct cso_context *cso);
 
 
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states);
+void cso_save_vertex_elements(struct cso_context *ctx);
+void cso_restore_vertex_elements(struct cso_context *ctx);
+
 
 /* These aren't really sensible -- most of the time the api provides
  * object semantics for shaders anyway, and the cases where it doesn't
@@ -157,7 +163,6 @@ void cso_save_geometry_shader(struct cso_context *cso);
 void cso_restore_geometry_shader(struct cso_context *cso);
 
 
-
 enum pipe_error cso_set_framebuffer(struct cso_context *cso,
                                     const struct pipe_framebuffer_state *fb);
 void cso_save_framebuffer(struct cso_context *cso);
index 6d90a6c42fde3be6f3faadaede06e2c3de66f19b..a8cdc57ad962012fc91ee8707e051195a7f4b20d 100644 (file)
@@ -307,9 +307,8 @@ draw_arrays_instanced(struct draw_context *draw,
       tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
       debug_printf("Elements:\n");
       for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
-         debug_printf("  format=%s comps=%u\n",
-                      util_format_name(draw->pt.vertex_element[i].src_format),
-                      draw->pt.vertex_element[i].nr_components);
+         debug_printf("  format=%s\n",
+                      util_format_name(draw->pt.vertex_element[i].src_format));
       }
       debug_printf("Buffers:\n");
       for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
index 32f9e5201c54fa566c0e02eb67f450290594d0f2..e2c67883972e032f4c75f551237242ce9fb169d3 100644 (file)
@@ -644,13 +644,26 @@ lp_build_abs(struct lp_build_context *bld,
 
    if(type.floating) {
       /* Mask out the sign bit */
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      unsigned long long absMask = ~(1ULL << (type.width - 1));
-      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      a = LLVMBuildAnd(bld->builder, a, mask, "");
-      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
-      return a;
+      if (type.length == 1) {
+         LLVMTypeRef int_type = LLVMIntType(type.width);
+         LLVMTypeRef float_type = LLVMFloatType();
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+         a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+         return a;
+      }
+      else {
+         /* vector of floats */
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+         return a;
+      }
    }
 
    if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -753,7 +766,7 @@ lp_build_set_sign(struct lp_build_context *bld,
 
 
 /**
- * Convert vector of int to vector of float.
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
  */
 LLVMValueRef
 lp_build_int_to_float(struct lp_build_context *bld,
@@ -764,7 +777,11 @@ lp_build_int_to_float(struct lp_build_context *bld,
    assert(type.floating);
    /*assert(lp_check_value(type, a));*/
 
-   {
+   if (type.length == 1) {
+      LLVMTypeRef float_type = LLVMFloatType();
+      return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+   }
+   else {
       LLVMTypeRef vec_type = lp_build_vec_type(type);
       /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
       LLVMValueRef res;
@@ -921,12 +938,18 @@ lp_build_itrunc(struct lp_build_context *bld,
                 LLVMValueRef a)
 {
    const struct lp_type type = bld->type;
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 
    assert(type.floating);
-   assert(lp_check_value(type, a));
 
-   return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   if (type.length == 1) {
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      return LLVMBuildFPTrunc(bld->builder, a, int_type, "");
+   }
+   else {
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      assert(lp_check_value(type, a));
+      return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   }
 }
 
 
@@ -939,6 +962,15 @@ lp_build_iround(struct lp_build_context *bld,
    LLVMValueRef res;
 
    assert(type.floating);
+
+   if (type.length == 1) {
+      /* scalar float to int */
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      /* XXX we want rounding here! */
+      res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+      return res;
+   }
+
    assert(lp_check_value(type, a));
 
    if(util_cpu_caps.has_sse4_1) {
@@ -1207,6 +1239,7 @@ lp_build_polynomial(struct lp_build_context *bld,
                     unsigned num_coeffs)
 {
    const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
    LLVMValueRef res = NULL;
    unsigned i;
 
@@ -1216,7 +1249,13 @@ lp_build_polynomial(struct lp_build_context *bld,
                    __FUNCTION__);
 
    for (i = num_coeffs; i--; ) {
-      LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+      LLVMValueRef coeff;
+
+      if (type.length == 1)
+         coeff = LLVMConstReal(float_type, coeffs[i]);
+      else
+         coeff = lp_build_const_scalar(type, coeffs[i]);
+
       if(res)
          res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
       else
@@ -1410,11 +1449,87 @@ lp_build_log2_approx(struct lp_build_context *bld,
 }
 
 
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+                           LLVMValueRef x,
+                           LLVMValueRef *p_exp,
+                           LLVMValueRef *p_floor_log2,
+                           LLVMValueRef *p_log2)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
+   LLVMTypeRef int_type = LLVMIntType(type.width);
+
+   LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+   LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+   LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+   LLVMValueRef i = NULL;
+   LLVMValueRef exp = NULL;
+   LLVMValueRef mant = NULL;
+   LLVMValueRef logexp = NULL;
+   LLVMValueRef logmant = NULL;
+   LLVMValueRef res = NULL;
+
+   if(p_exp || p_floor_log2 || p_log2) {
+      /* TODO: optimize the constant case */
+      if(LLVMIsConstant(x))
+         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+                      __FUNCTION__);
+
+      assert(type.floating && type.width == 32);
+
+      i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+      /* exp = (float) exponent(x) */
+      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+   }
+
+   if(p_floor_log2 || p_log2) {
+      LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+      LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+      logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+      logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+      logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+   }
+
+   if(p_log2) {
+      /* mant = (float) mantissa(x) */
+      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+      mant = LLVMBuildOr(bld->builder, mant, one, "");
+      mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+                                    Elements(lp_build_log2_polynomial));
+
+      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+   }
+
+   if(p_exp)
+      *p_exp = exp;
+
+   if(p_floor_log2)
+      *p_floor_log2 = logexp;
+
+   if(p_log2)
+      *p_log2 = res;
+}
+
+
 LLVMValueRef
 lp_build_log2(struct lp_build_context *bld,
               LLVMValueRef x)
 {
    LLVMValueRef res;
-   lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   if (bld->type.length == 1) {
+      lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+   }
+   else {
+      lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   }
    return res;
 }
index c8eaa8c394068225080b7bd1a4190013569b4d29..53447757e8e23ba6f24a0c089af4761983f52cc1 100644 (file)
@@ -264,10 +264,16 @@ lp_build_one(struct lp_type type)
    for(i = 1; i < type.length; ++i)
       elems[i] = elems[0];
 
-   return LLVMConstVector(elems, type.length);
+   if (type.length == 1)
+      return elems[0];
+   else
+      return LLVMConstVector(elems, type.length);
 }
                
 
+/**
+ * Build constant-valued vector from a scalar value.
+ */
 LLVMValueRef
 lp_build_const_scalar(struct lp_type type,
                       double val)
index 2726747eaea5698934c03eefa6f2b64d6e949a52..7c585fda7885c08b36ec75cf28beed28f98d6739 100644 (file)
@@ -198,7 +198,7 @@ lp_build_compare(LLVMBuilderRef builder,
 
          return res;
       }
-   }
+   } /* if (type.width * type.length == 128) */
 #endif
 
    if(type.floating) {
@@ -238,20 +238,25 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildFCmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildFCmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildFCmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise float"
+                      " vector comparison\n", __FUNCTION__);
+         for (i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildFCmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -286,20 +291,26 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildICmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise int vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildICmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildICmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise int"
+                      " vector comparison\n", __FUNCTION__);
+
+         for(i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildICmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -339,26 +350,31 @@ lp_build_select(struct lp_build_context *bld,
    if(a == b)
       return a;
 
-   if(type.floating) {
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+   if (type.length == 1) {
+      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
    }
+   else {
+      if(type.floating) {
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+      }
 
-   a = LLVMBuildAnd(bld->builder, a, mask, "");
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
 
-   /* This often gets translated to PANDN, but sometimes the NOT is
-    * pre-computed and stored in another constant. The best strategy depends
-    * on available registers, so it is not a big deal -- hopefully LLVM does
-    * the right decision attending the rest of the program.
-    */
-   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+      /* This often gets translated to PANDN, but sometimes the NOT is
+       * pre-computed and stored in another constant. The best strategy depends
+       * on available registers, so it is not a big deal -- hopefully LLVM does
+       * the right decision attending the rest of the program.
+       */
+      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
 
-   res = LLVMBuildOr(bld->builder, a, b, "");
+      res = LLVMBuildOr(bld->builder, a, b, "");
 
-   if(type.floating) {
-      LLVMTypeRef vec_type = lp_build_vec_type(type);
-      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      if(type.floating) {
+         LLVMTypeRef vec_type = lp_build_vec_type(type);
+         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      }
    }
 
    return res;
index 1dca29cdd5894559c2a90e05c294a314f03fe61e..a965d394f44b7292d8bf6ddcbe3a02ed0d6a2514 100644 (file)
@@ -65,6 +65,14 @@ struct lp_build_sample_context
 
    const struct util_format_description *format_desc;
 
+   /** regular scalar float type */
+   struct lp_type float_type;
+   struct lp_build_context float_bld;
+
+   /** regular scalar float type */
+   struct lp_type int_type;
+   struct lp_build_context int_bld;
+
    /** Incoming coordinates type and build context */
    struct lp_type coord_type;
    struct lp_build_context coord_bld;
@@ -108,6 +116,27 @@ wrap_mode_uses_border_color(unsigned mode)
 }
 
 
+static LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+                          LLVMValueRef data_array, LLVMValueRef level)
+{
+   LLVMValueRef indexes[2], data_ptr;
+   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   indexes[1] = level;
+   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+   return data_ptr;
+}
+
+
+static LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+                                LLVMValueRef data_array, int level)
+{
+   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+   return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
 
 /**
  * Gen code to fetch a texel from a texture at int coords (x, y).
@@ -124,14 +153,13 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                           LLVMValueRef x,
                           LLVMValueRef y,
                           LLVMValueRef y_stride,
-                          LLVMValueRef data_array,
+                          LLVMValueRef data_ptr,
                           LLVMValueRef *texel)
 {
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
    LLVMValueRef packed;
    LLVMValueRef use_border = NULL;
-   LLVMValueRef data_ptr;
 
    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
@@ -154,16 +182,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
-   /* XXX always use mipmap level 0 for now */
-   {
-      const int level = 0;
-      LLVMValueRef indexes[2];
-      indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
-      data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
-      data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
-   }
-
    /*
     * Note: if we find an app which frequently samples the texture border
     * we might want to implement a true conditional here to avoid sampling
@@ -233,17 +251,8 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
    assert(bld->format_desc->block.height == 1);
    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 
-   /* XXX always use mipmap level 0 for now */
-   {
-      const int level = 0;
-      LLVMValueRef indexes[2];
-      /* get data_ptr[level] */
-      indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
-      data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
-      /* load texture base address */
-      data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
-   }
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 
    return lp_build_gather(bld->builder,
                           bld->texel_type.length,
@@ -733,7 +742,210 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 
 
 /**
- * Sample 2D texture with nearest filtering.
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+                LLVMValueRef base_size,
+                LLVMValueRef level)
+{
+   LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+   return size;
+}
+
+
+static int
+texture_dims(enum pipe_texture_target tex)
+{
+   switch (tex) {
+   case PIPE_TEXTURE_1D:
+      return 1;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_CUBE:
+      return 2;
+   case PIPE_TEXTURE_3D:
+      return 3;
+   default:
+      assert(0 && "bad texture target in texture_dims()");
+      return 2;
+   }
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param s  vector of texcoord s values
+ * \param t  vector of texcoord t values
+ * \param r  vector of texcoord r values
+ * \param width  scalar int texture width
+ * \param height  scalar int texture height
+ * \param depth  scalar int texture depth
+ */
+static LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
+                      LLVMValueRef width,
+                      LLVMValueRef height,
+                      LLVMValueRef depth)
+
+{
+   const int dims = texture_dims(bld->static_state->target);
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *float_bld = &bld->float_bld;
+   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
+   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
+
+   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+   LLVMValueRef s0, s1, s2;
+   LLVMValueRef t0, t1, t2;
+   LLVMValueRef r0, r1, r2;
+   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+   LLVMValueRef rho, lod;
+
+   /*
+    * dsdx = abs(s[1] - s[0]);
+    * dsdy = abs(s[2] - s[0]);
+    * dtdx = abs(t[1] - t[0]);
+    * dtdy = abs(t[2] - t[0]);
+    * drdx = abs(r[1] - r[0]);
+    * drdy = abs(r[2] - r[0]);
+    * XXX we're assuming a four-element quad in 2x2 layout here.
+    */
+   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+   dsdx = lp_build_abs(float_bld, dsdx);
+   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+   dsdy = lp_build_abs(float_bld, dsdy);
+   if (dims > 1) {
+      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+      dtdx = lp_build_abs(float_bld, dtdx);
+      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+      dtdy = lp_build_abs(float_bld, dtdy);
+      if (dims > 2) {
+         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+         drdx = lp_build_abs(float_bld, drdx);
+         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+         drdy = lp_build_abs(float_bld, drdy);
+      }
+   }
+
+   /* Compute rho = max of all partial derivatives scaled by texture size.
+    * XXX this could be vectorized somewhat
+    */
+   rho = LLVMBuildMul(bld->builder,
+                      lp_build_max(float_bld, dsdx, dsdy),
+                      lp_build_int_to_float(float_bld, width), "");
+   if (dims > 1) {
+      LLVMValueRef max;
+      max = LLVMBuildMul(bld->builder,
+                         lp_build_max(float_bld, dtdx, dtdy),
+                         lp_build_int_to_float(float_bld, height), "");
+      rho = lp_build_max(float_bld, rho, max);
+      if (dims > 2) {
+         max = LLVMBuildMul(bld->builder,
+                            lp_build_max(float_bld, drdx, drdy),
+                            lp_build_int_to_float(float_bld, depth), "");
+         rho = lp_build_max(float_bld, rho, max);
+      }
+   }
+
+   /* compute lod = log2(rho) */
+   lod = lp_build_log2(float_bld, rho);
+
+   /* add lod bias */
+   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+
+   /* clamp lod */
+   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+   return lod;
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod  scalar float texture level of detail
+ * \param level_out  returns integer 
+ */
+static void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level_out)
+{
+   struct lp_build_context *float_bld = &bld->float_bld;
+   struct lp_build_context *int_bld = &bld->int_bld;
+   LLVMValueRef last_level, level;
+
+   LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_iround(float_bld, lod);
+
+   /* clamp level to legal range of levels */
+   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes.  Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+static void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level0_out,
+                           LLVMValueRef *level1_out,
+                           LLVMValueRef *weight_out)
+{
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef last_level, level;
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_ifloor(coord_bld, lod);
+
+   /* compute level 0 and clamp to legal range of levels */
+   *level0_out = lp_build_clamp(int_coord_bld, level,
+                                int_coord_bld->zero,
+                                last_level);
+   /* compute level 1 and clamp to legal range of levels */
+   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
+   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
+
+   *weight_out = lp_build_fract(coord_bld, lod);
+}
+
+
+
+/**
+ * Sample 2D texture with nearest filtering, no mipmapping.
  */
 static void
 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
@@ -746,6 +958,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
                                LLVMValueRef *texel)
 {
    LLVMValueRef x, y;
+   LLVMValueRef data_ptr;
 
    x = lp_build_sample_wrap_nearest(bld, s, width,
                                     bld->static_state->pot_width,
@@ -757,7 +970,63 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
    lp_build_name(x, "tex.x.wrapped");
    lp_build_name(y, "tex.y.wrapped");
 
-   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_array, texel);
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
+}
+
+
+/**
+ * Sample 2D texture with nearest filtering, nearest mipmap.
+ */
+static void
+lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld,
+                                           unsigned unit,
+                                           LLVMValueRef s,
+                                           LLVMValueRef t,
+                                           LLVMValueRef width,
+                                           LLVMValueRef height,
+                                           LLVMValueRef width_vec,
+                                           LLVMValueRef height_vec,
+                                           LLVMValueRef stride,
+                                           LLVMValueRef data_array,
+                                           LLVMValueRef *texel)
+{
+   LLVMValueRef x, y;
+   LLVMValueRef lod, ilevel, ilevel_vec;
+   LLVMValueRef data_ptr;
+
+   /* compute float LOD */
+   lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL);
+
+   /* convert LOD to int */
+   lp_build_nearest_mip_level(bld, unit, lod, &ilevel);
+
+   ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel);
+
+   /* compute width_vec, height at mipmap level 'ilevel' */
+   width_vec = lp_build_minify(bld, width_vec, ilevel_vec);
+   height_vec = lp_build_minify(bld, height_vec, ilevel_vec);
+   stride = lp_build_minify(bld, stride, ilevel_vec);
+
+   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+                                    bld->static_state->pot_width,
+                                    bld->static_state->wrap_s);
+   y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+                                    bld->static_state->pot_height,
+                                    bld->static_state->wrap_t);
+
+   lp_build_name(x, "tex.x.wrapped");
+   lp_build_name(y, "tex.y.wrapped");
+
+   /* get pointer to mipmap level [ilevel] data */
+   if (0)
+      data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel);
+   else
+      data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel);
 }
 
 
@@ -779,6 +1048,7 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    LLVMValueRef x0, x1;
    LLVMValueRef y0, y1;
    LLVMValueRef neighbors[2][2][4];
+   LLVMValueRef data_ptr;
    unsigned chan;
 
    lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
@@ -786,10 +1056,13 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
                                bld->static_state->wrap_t, &y0, &y1, &t_fpart);
 
-   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_array, neighbors[0][0]);
-   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_array, neighbors[0][1]);
-   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_array, neighbors[1][0]);
-   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_array, neighbors[1][1]);
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
@@ -857,7 +1130,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef packed, packed_lo, packed_hi;
    LLVMValueRef unswizzled[4];
 
-   lp_build_context_init(&i32, builder, lp_type_int(32));
+   lp_build_context_init(&i32, builder, lp_type_int_vec(32));
    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 
@@ -1066,194 +1339,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
 }
 
 
-static int
-texture_dims(enum pipe_texture_target tex)
-{
-   switch (tex) {
-   case PIPE_TEXTURE_1D:
-      return 1;
-   case PIPE_TEXTURE_2D:
-   case PIPE_TEXTURE_CUBE:
-      return 2;
-   case PIPE_TEXTURE_3D:
-      return 3;
-   default:
-      assert(0 && "bad texture target in texture_dims()");
-      return 2;
-   }
-}
-
-
-/**
- * Generate code to compute texture level of detail (lambda).
- * \param s  vector of texcoord s values
- * \param t  vector of texcoord t values
- * \param r  vector of texcoord r values
- * \param width  scalar int texture width
- * \param height  scalar int texture height
- * \param depth  scalar int texture depth
- */
-static LLVMValueRef
-lp_build_lod_selector(struct lp_build_sample_context *bld,
-                      LLVMValueRef s,
-                      LLVMValueRef t,
-                      LLVMValueRef r,
-                      LLVMValueRef width,
-                      LLVMValueRef height,
-                      LLVMValueRef depth)
-
-{
-   const int dims = texture_dims(bld->static_state->target);
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-
-   LLVMValueRef lod_bias = lp_build_const_scalar(bld->coord_bld.type,
-                                                 bld->static_state->lod_bias);
-   LLVMValueRef min_lod = lp_build_const_scalar(bld->coord_bld.type,
-                                                bld->static_state->min_lod);
-   LLVMValueRef max_lod = lp_build_const_scalar(bld->coord_bld.type,
-                                                bld->static_state->max_lod);
-
-   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
-   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
-   LLVMValueRef s0, s1, s2;
-   LLVMValueRef t0, t1, t2;
-   LLVMValueRef r0, r1, r2;
-   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
-   LLVMValueRef rho, lod;
-
-   /*
-    * dsdx = abs(s[1] - s[0]);
-    * dsdy = abs(s[2] - s[0]);
-    * dtdx = abs(t[1] - t[0]);
-    * dtdy = abs(t[2] - t[0]);
-    * drdx = abs(r[1] - r[0]);
-    * drdy = abs(r[2] - r[0]);
-    * XXX we're assuming a four-element quad in 2x2 layout here.
-    */
-   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
-   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
-   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
-   dsdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s1, s0));
-   dsdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s2, s0));
-   if (dims > 1) {
-      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
-      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
-      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
-      dtdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t1, t0));
-      dtdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t2, t0));
-      if (dims > 2) {
-         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
-         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
-         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
-         drdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r1, r0));
-         drdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r2, r0));
-      }
-   }
-
-   /* Compute rho = max of all partial derivatives scaled by texture size.
-    * XXX this can be vectorized somewhat
-    */
-   rho = lp_build_mul(coord_bld,
-                       lp_build_max(coord_bld, dsdx, dsdy),
-                       lp_build_int_to_float(coord_bld, width));
-   if (dims > 1) {
-      LLVMValueRef max;
-      max = lp_build_mul(coord_bld,
-                         lp_build_max(coord_bld, dtdx, dtdy),
-                         lp_build_int_to_float(coord_bld, height));
-      rho = lp_build_max(coord_bld, rho, max);
-      if (dims > 2) {
-         max = lp_build_mul(coord_bld,
-                            lp_build_max(coord_bld, drdx, drdy),
-                            lp_build_int_to_float(coord_bld, depth));
-         rho = lp_build_max(coord_bld, rho, max);
-      }
-   }
-
-   /* compute lod = log2(rho) */
-   lod = lp_build_log2(coord_bld, rho);
-
-   /* add lod bias */
-   lod = lp_build_add(coord_bld, lod, lod_bias);
-
-   /* clamp lod */
-   lod = lp_build_clamp(coord_bld, lod, min_lod, max_lod);
-
-   return lod;
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
- * \param lod  scalar float texture level of detail
- * \param level_out  returns integer 
- */
-static void
-lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
-                           unsigned unit,
-                           LLVMValueRef lod,
-                           LLVMValueRef *level_out)
-{
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef last_level, level;
-
-   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
-                                               bld->builder, unit);
-
-   /* convert float lod to integer */
-   level = lp_build_iround(coord_bld, lod);
-
-   /* clamp level to legal range of levels */
-   *level_out = lp_build_clamp(int_coord_bld, level,
-                               int_coord_bld->zero,
-                               last_level);
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
- * two (adjacent) mipmap level indexes.  Later, we'll sample from those
- * two mipmap levels and interpolate between them.
- */
-static void
-lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
-                           unsigned unit,
-                           LLVMValueRef lod,
-                           LLVMValueRef *level0_out,
-                           LLVMValueRef *level1_out,
-                           LLVMValueRef *weight_out)
-{
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef last_level, level;
-
-   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
-                                               bld->builder, unit);
-
-   /* convert float lod to integer */
-   level = lp_build_ifloor(coord_bld, lod);
-
-   /* compute level 0 and clamp to legal range of levels */
-   *level0_out = lp_build_clamp(int_coord_bld, level,
-                                int_coord_bld->zero,
-                                last_level);
-   /* compute level 1 and clamp to legal range of levels */
-   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
-   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
-
-   *weight_out = lp_build_fract(coord_bld, lod);
-}
-
-
-
 /**
  * Build texture sampling code.
  * 'texel' will return a vector of four LLVMValueRefs corresponding to
  * R, G, B, A.
+ * \param type  vector float type to use for coords, etc.
  */
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
@@ -1267,17 +1357,19 @@ lp_build_sample_soa(LLVMBuilderRef builder,
                     LLVMValueRef *texel)
 {
    struct lp_build_sample_context bld;
-   LLVMValueRef width;
-   LLVMValueRef height;
-   LLVMValueRef stride;
+   LLVMValueRef width, width_vec;
+   LLVMValueRef height, height_vec;
+   LLVMValueRef stride, stride_vec;
    LLVMValueRef data_array;
    LLVMValueRef s;
    LLVMValueRef t;
    LLVMValueRef r;
+   boolean done = FALSE;
 
    (void) lp_build_lod_selector;   /* temporary to silence warning */
    (void) lp_build_nearest_mip_level;
    (void) lp_build_linear_mip_levels;
+   (void) lp_build_minify;
 
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
@@ -1285,10 +1377,16 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    bld.static_state = static_state;
    bld.dynamic_state = dynamic_state;
    bld.format_desc = util_format_description(static_state->format);
+
+   bld.float_type = lp_type_float(32);
+   bld.int_type = lp_type_int(32);
    bld.coord_type = type;
    bld.uint_coord_type = lp_uint_type(type);
    bld.int_coord_type = lp_int_type(type);
    bld.texel_type = type;
+
+   lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+   lp_build_context_init(&bld.int_bld, builder, bld.int_type);
    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
@@ -1305,30 +1403,56 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    t = coords[1];
    r = coords[2];
 
-   width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
-   height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
-   stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
+   width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
+   height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
+   stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
 
    if(static_state->target == PIPE_TEXTURE_1D)
       t = bld.coord_bld.zero;
 
-   switch (static_state->min_img_filter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      lp_build_sample_2d_nearest_soa(&bld, s, t, width, height,
-                                     stride, data_array, texel);
+   switch (static_state->min_mip_filter) {
+   case PIPE_TEX_MIPFILTER_NONE:
       break;
-   case PIPE_TEX_FILTER_LINEAR:
-      if(lp_format_is_rgba8(bld.format_desc) &&
-         is_simple_wrap_mode(static_state->wrap_s) &&
-         is_simple_wrap_mode(static_state->wrap_t))
-         lp_build_sample_2d_linear_aos(&bld, s, t, width, height,
-                                       stride, data_array, texel);
-      else
-         lp_build_sample_2d_linear_soa(&bld, s, t, width, height,
-                                       stride, data_array, texel);
+   case PIPE_TEX_MIPFILTER_NEAREST:
+
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit,
+                                                    s, t,
+                                                    width, height,
+                                                    width_vec, height_vec,
+                                                    stride_vec,
+                                                    data_array, texel);
+         done = TRUE;
+         break;
+      }
+
+      break;
+   case PIPE_TEX_MIPFILTER_LINEAR:
       break;
    default:
-      assert(0);
+      assert(0 && "invalid mip filter");
+   }
+
+   if (!done) {
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec,
+                                        stride_vec, data_array, texel);
+         break;
+      case PIPE_TEX_FILTER_LINEAR:
+         if(lp_format_is_rgba8(bld.format_desc) &&
+            is_simple_wrap_mode(static_state->wrap_s) &&
+            is_simple_wrap_mode(static_state->wrap_t))
+            lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         else
+            lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         break;
+      default:
+         assert(0);
+      }
    }
 
    /* FIXME: respect static_state->min_mip_filter */;
index 16946cc28a2f9aa63fa4664cfa99aa5f62107003..4daa904e633c8e73457236e190c48c1964c5a6e9 100644 (file)
@@ -103,7 +103,7 @@ struct lp_type {
    unsigned width:14;
 
    /**
-    * Vector length.
+    * Vector length.  If length==1, this is a scalar (float/int) type.
     *
     * width*length should be a power of two greater or equal to eight.
     *
@@ -139,11 +139,28 @@ struct lp_build_context
 };
 
 
+/** Create scalar float type */
 static INLINE struct lp_type
 lp_type_float(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.floating = TRUE;
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector of float type */
+static INLINE struct lp_type
+lp_type_float_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.floating = TRUE;
    res_type.sign = TRUE;
@@ -154,11 +171,27 @@ lp_type_float(unsigned width)
 }
 
 
+/** Create scalar int type */
 static INLINE struct lp_type
 lp_type_int(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector int type */
+static INLINE struct lp_type
+lp_type_int_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.sign = TRUE;
    res_type.width = width;
@@ -168,11 +201,26 @@ lp_type_int(unsigned width)
 }
 
 
+/** Create scalar uint type */
 static INLINE struct lp_type
 lp_type_uint(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector uint type */
+static INLINE struct lp_type
+lp_type_uint_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.width = width;
    res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
index 0b263a9db5ca31148421b003307eb41220d217bd..4d0737ccd3d03ed0fea6b2ca22b739b4d672c6ab 100644 (file)
@@ -63,6 +63,7 @@ struct blit_state
    struct pipe_sampler_state sampler;
    struct pipe_viewport_state viewport;
    struct pipe_clip_state clip;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs[TGSI_WRITEMASK_XYZW + 1];
@@ -114,6 +115,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    ctx->sampler.mag_img_filter = 0; /* set later */
    ctx->sampler.normalized_coords = 1;
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* vertex shader - still required to provide the linkage between
     * fragment shader input semantics and vertex_element/buffers.
     */
@@ -410,12 +420,14 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
    cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -480,6 +492,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
    cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 
    pipe_texture_reference(&tex, NULL);
 }
@@ -564,12 +577,14 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
    cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -628,4 +643,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
    cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
index 0ba09d33bfcf6aad9a5f4f2f87db1a15ff3d092f..33d09085f0bf743e2418192a41930815d7f6bff7 100644 (file)
@@ -88,6 +88,8 @@ struct blitter_context_priv
    void *dsa_write_depth_keep_stencil;
    void *dsa_keep_depth_stencil;
 
+   void *velem_state;
+
    /* Sampler state for clamping to a miplevel. */
    void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
 
@@ -108,6 +110,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    struct pipe_depth_stencil_alpha_state dsa = { { 0 } };
    struct pipe_rasterizer_state rs_state = { 0 };
    struct pipe_sampler_state *sampler_state;
+   struct pipe_vertex_element velem[2];
    unsigned i;
 
    ctx = CALLOC_STRUCT(blitter_context_priv);
@@ -122,6 +125,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
    ctx->blitter.saved_fb_state.nr_cbufs = ~0;
    ctx->blitter.saved_num_textures = ~0;
    ctx->blitter.saved_num_sampler_states = ~0;
@@ -170,6 +174,16 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    rs_state.flatshade = 1;
    ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
 
+   /* vertex elements state */
+   memset(&velem[0], 0, sizeof(velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      velem[i].src_offset = i * 4 * sizeof(float);
+      velem[i].instance_divisor = 0;
+      velem[i].vertex_buffer_index = 0;
+      velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+   ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
+
    /* fragment shaders are created on-demand */
 
    /* vertex shaders */
@@ -219,6 +233,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
    pipe->delete_rasterizer_state(pipe, ctx->rs_state);
    pipe->delete_vs_state(pipe, ctx->vs_col);
    pipe->delete_vs_state(pipe, ctx->vs_tex);
+   pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
 
    for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
       if (ctx->fs_texfetch_col[i])
@@ -246,7 +261,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
           ctx->blitter.saved_dsa_state != INVALID_PTR &&
           ctx->blitter.saved_rs_state != INVALID_PTR &&
           ctx->blitter.saved_fs != INVALID_PTR &&
-          ctx->blitter.saved_vs != INVALID_PTR);
+          ctx->blitter.saved_vs != INVALID_PTR &&
+          ctx->blitter.saved_velem_state != INVALID_PTR);
 }
 
 static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
@@ -259,12 +275,14 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
    pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
    pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
    pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+   pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
 
    ctx->blitter.saved_blend_state = INVALID_PTR;
    ctx->blitter.saved_dsa_state = INVALID_PTR;
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
 
    pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
 
@@ -569,6 +587,7 @@ void util_blitter_clear(struct blitter_context *blitter,
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
 
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
    pipe->bind_vs_state(pipe, ctx->vs_col);
 
@@ -634,6 +653,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
    pipe->bind_vs_state(pipe, ctx->vs_tex);
    pipe->bind_fragment_sampler_states(pipe, 1,
       blitter_get_sampler_state(ctx, src->level));
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
    pipe->set_framebuffer_state(pipe, &fb_state);
 
@@ -807,6 +827,7 @@ void util_blitter_fill(struct blitter_context *blitter,
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
    pipe->bind_vs_state(pipe, ctx->vs_col);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
    fb_state.width = dst->width;
index 92008fce9925e438fba8d11ca1dc2cb205138731..ecafdabafae79ffa3431ca9d78d3bf79c350c8e0 100644 (file)
@@ -43,6 +43,7 @@ struct blitter_context
    /* Private members, really. */
    void *saved_blend_state;   /**< blend state */
    void *saved_dsa_state;     /**< depth stencil alpha state */
+   void *saved_velem_state;   /**< vertex elements state */
    void *saved_rs_state;      /**< rasterizer state */
    void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
 
@@ -172,6 +173,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
    blitter->saved_dsa_state = state;
 }
 
+static INLINE
+void util_blitter_save_vertex_elements(struct blitter_context *blitter,
+                                       void *state)
+{
+   blitter->saved_velem_state = state;
+}
+
 static INLINE
 void util_blitter_save_stencil_ref(struct blitter_context *blitter,
                                    const struct pipe_stencil_ref *state)
index 14506e8451968b238373eba26675b8d238cff26a..8c194102bfca2cf632d5e0bce4c98ec99a5e917a 100644 (file)
@@ -45,8 +45,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
                         uint num_attribs)
 {
    struct pipe_vertex_buffer vbuffer;
-   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
-   uint i;
 
    assert(num_attribs <= PIPE_MAX_ATTRIBS);
 
@@ -58,15 +56,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
    vbuffer.max_index = num_verts - 1;
    pipe->set_vertex_buffers(pipe, 1, &vbuffer);
 
-   /* tell pipe about the vertex attributes */
-   for (i = 0; i < num_attribs; i++) {
-      velements[i].src_offset = i * 4 * sizeof(float);
-      velements[i].instance_divisor = 0;
-      velements[i].vertex_buffer_index = 0;
-      velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      velements[i].nr_components = 4;
-   }
-   pipe->set_vertex_elements(pipe, num_attribs, velements);
+   /* note: vertex elements already set by caller */
 
    /* draw */
    pipe->draw_arrays(pipe, prim_type, 0, num_verts);
index ae7afd7311e608ddebd67b8c8505aec6d9fc6469..52cf3ef4ce048139300044c39c481adda830034f 100644 (file)
@@ -700,7 +700,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem
    util_dump_member(stream, uint, state, src_offset);
 
    util_dump_member(stream, uint, state, vertex_buffer_index);
-   util_dump_member(stream, uint, state, nr_components);
 
    util_dump_member(stream, format, state, src_format);
 
index b2aa5bfb188cba57ab3b78a75a937b1b97964d81..c08fdcafcc8deca38f5013e5570924687f15c070 100644 (file)
@@ -415,6 +415,16 @@ util_format_has_alpha(enum pipe_format format)
    }
 }
 
+/**
+ * Return the number of components stored.
+ * Formats with block size != 1x1 will always have 1 component (the block).
+ */
+static INLINE unsigned
+util_format_get_nr_components(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   return desc->nr_channels;
+}
 
 /*
  * Format access functions.
index b49039db39b03990c7b98fc3c8c9bd479675681f..409d024c63726cb88dd986302fcbcae492638b4e 100644 (file)
@@ -418,31 +418,70 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix):
 
     dst_native_type = native_type(format)
 
+    assert format.layout == PLAIN
+
+    inv_swizzle = format.inv_swizzles()
+    
     print 'static INLINE void'
     print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
     print '{'
-    print '   union util_format_%s pixel;' % format.short_name()
-
-    assert format.layout == PLAIN
+    
+    if format.is_bitmask():
+        depth = format.block_size()
+        print '   uint%u_t value = 0;' % depth 
 
-    inv_swizzle = format.inv_swizzles()
+        shift = 0
+        for i in range(4):
+            dst_channel = format.channels[i]
+            if inv_swizzle[i] is not None:
+                value = 'rgba'[inv_swizzle[i]]
+                value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                if format.colorspace == ZS:
+                    if i == 3:
+                        value = get_one(dst_channel)
+                    elif i >= 1:
+                        value = '0'
+                if dst_channel.type in (UNSIGNED, SIGNED):
+                    if shift + dst_channel.size < depth:
+                        value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1)
+                    if shift:
+                        value = '(%s) << %u' % (value, shift)
+                    if dst_channel.type == SIGNED:
+                        # Cast to unsigned
+                        value = '(uint%u_t)(%s) ' % (depth, value)
+                else:
+                    value = None
+                if value is not None:
+                    print '   value |= %s;' % (value)
+                
+            shift += dst_channel.size
 
-    for i in range(4):
-        dst_channel = format.channels[i]
-        width = dst_channel.size
-        if inv_swizzle[i] is None:
-            continue
-        value = 'rgba'[inv_swizzle[i]]
-        value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
-        if format.colorspace == ZS:
-            if i == 3:
-                value = get_one(dst_channel)
-            elif i >= 1:
-                value = '0'
-        print '   pixel.chan.%s = %s;' % (dst_channel.name, value)
+        print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+        print '   value = util_bswap%u(value);' % depth
+        print '#endif'
+        
+        print '   *(uint%u_t *)dst = value;' % depth 
 
-    bswap_format(format)
-    print '   memcpy(dst, &pixel, sizeof pixel);'
+    else:
+        print '   union util_format_%s pixel;' % format.short_name()
+    
+        for i in range(4):
+            dst_channel = format.channels[i]
+            width = dst_channel.size
+            if inv_swizzle[i] is None:
+                continue
+            value = 'rgba'[inv_swizzle[i]]
+            value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+            if format.colorspace == ZS:
+                if i == 3:
+                    value = get_one(dst_channel)
+                elif i >= 1:
+                    value = '0'
+            print '   pixel.chan.%s = %s;' % (dst_channel.name, value)
+    
+        bswap_format(format)
+        print '   memcpy(dst, &pixel, sizeof pixel);'
+        
     print '}'
     print
     
index fc027e48e4ebb7e8b1cffdb6686ecd55da38a390..d421bee8efe1e26311274996048313fca543d1d2 100644 (file)
@@ -62,6 +62,7 @@ struct gen_mipmap_state
    struct pipe_rasterizer_state rasterizer;
    struct pipe_sampler_state sampler;
    struct pipe_clip_state clip;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs2d, *fsCube;
@@ -1307,6 +1308,15 @@ util_create_gen_mipmap(struct pipe_context *pipe,
    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
    ctx->sampler.normalized_coords = 1;
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* vertex shader - still needed to specify mapping from fragment
     * shader input semantics to vertex elements 
     */
@@ -1501,12 +1511,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_save_vertex_shader(ctx->cso);
    cso_save_viewport(ctx->cso);
    cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* bind our state */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    cso_set_fragment_shader_handle(ctx->cso, fs);
    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
@@ -1593,4 +1605,5 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_restore_vertex_shader(ctx->cso);
    cso_restore_viewport(ctx->cso);
    cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
index ba23435f698240257bf29fe798937e66dab6dbed..6d461cb88001e7db34972dcf6f95be52a193ea75 100644 (file)
@@ -230,6 +230,7 @@ static bool
 init_pipe_state(struct vl_compositor *c)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[2];
 
    assert(c);
 
@@ -251,15 +252,27 @@ init_pipe_state(struct vl_compositor *c)
    /*sampler.border_color[i] = ;*/
    /*sampler.max_anisotropy = ;*/
    c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
-       
+
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[1].src_offset = 0;
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 1;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   c->vertex_elems = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
+
+
    return true;
 }
 
 static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
-       
+
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
+   c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems);
 }
 
 static bool
@@ -314,12 +327,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
 
-   c->vertex_elems[0].src_offset = 0;
-   c->vertex_elems[0].instance_divisor = 0;
-   c->vertex_elems[0].vertex_buffer_index = 0;
-   c->vertex_elems[0].nr_components = 2;
-   c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our texcoord buffer and texcoord buffer element
     * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
@@ -344,12 +351,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
 
-   c->vertex_elems[1].src_offset = 0;
-   c->vertex_elems[1].instance_divisor = 0;
-   c->vertex_elems[1].vertex_buffer_index = 1;
-   c->vertex_elems[1].nr_components = 2;
-   c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our vertex shader's constant buffer
     * Const buffer contains scaling and translation vectors
@@ -483,7 +484,7 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
-   compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
+   compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
 
index 6a9a3fd7af111dc327f98fe416abe6b34e984b3f..51755554da1b9098b8cf4dd2c885df07fd6cb36c 100644 (file)
@@ -43,10 +43,10 @@ struct vl_compositor
    void *sampler;
    void *vertex_shader;
    void *fragment_shader;
+   void *vertex_elems;
    struct pipe_viewport_state viewport;
    struct pipe_scissor_state scissor;
    struct pipe_vertex_buffer vertex_bufs[2];
-   struct pipe_vertex_element vertex_elems[2];
    struct pipe_buffer *vs_const_buf, *fs_const_buf;
 };
 
index f323de0ea55d0bfe2ac84ddcdd3c10af52a29d25..0763b5bb0e46763361e2c6cc995edab01d6554d8 100644 (file)
@@ -708,6 +708,7 @@ static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[8];
    unsigned filters[5];
    unsigned i;
 
@@ -771,6 +772,59 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
    }
 
+   /* Position element */
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Luma, texcoord element */
+   vertex_elems[1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 0;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cr texcoord element */
+   vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+   vertex_elems[2].instance_divisor = 0;
+   vertex_elems[2].vertex_buffer_index = 0;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cb texcoord element */
+   vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+   vertex_elems[3].instance_divisor = 0;
+   vertex_elems[3].vertex_buffer_index = 0;
+   vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface top field texcoord element */
+   vertex_elems[4].src_offset = 0;
+   vertex_elems[4].instance_divisor = 0;
+   vertex_elems[4].vertex_buffer_index = 1;
+   vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface bottom field texcoord element */
+   vertex_elems[5].src_offset = sizeof(struct vertex2f);
+   vertex_elems[5].instance_divisor = 0;
+   vertex_elems[5].vertex_buffer_index = 1;
+   vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface top field texcoord element */
+   vertex_elems[6].src_offset = 0;
+   vertex_elems[6].instance_divisor = 0;
+   vertex_elems[6].vertex_buffer_index = 2;
+   vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface bottom field texcoord element */
+   vertex_elems[7].src_offset = sizeof(struct vertex2f);
+   vertex_elems[7].instance_divisor = 0;
+   vertex_elems[7].vertex_buffer_index = 2;
+   vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* need versions with 4,6 and 8 vertex elems */
+   r->vertex_elems[0] = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
+   r->vertex_elems[1] = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
+   r->vertex_elems[2] = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
+
    return true;
 }
 
@@ -783,6 +837,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 5; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+   for (i = 0; i < 3; i++)
+      r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems[i]);
 }
 
 static bool
@@ -888,62 +944,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       );
    }
 
-   /* Position element */
-   r->vertex_elems[0].src_offset = 0;
-   r->vertex_elems[0].instance_divisor = 0;
-   r->vertex_elems[0].vertex_buffer_index = 0;
-   r->vertex_elems[0].nr_components = 2;
-   r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Luma, texcoord element */
-   r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[1].instance_divisor = 0;
-   r->vertex_elems[1].vertex_buffer_index = 0;
-   r->vertex_elems[1].nr_components = 2;
-   r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cr texcoord element */
-   r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
-   r->vertex_elems[2].instance_divisor = 0;
-   r->vertex_elems[2].vertex_buffer_index = 0;
-   r->vertex_elems[2].nr_components = 2;
-   r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cb texcoord element */
-   r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
-   r->vertex_elems[3].instance_divisor = 0;
-   r->vertex_elems[3].vertex_buffer_index = 0;
-   r->vertex_elems[3].nr_components = 2;
-   r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface top field texcoord element */
-   r->vertex_elems[4].src_offset = 0;
-   r->vertex_elems[4].instance_divisor = 0;
-   r->vertex_elems[4].vertex_buffer_index = 1;
-   r->vertex_elems[4].nr_components = 2;
-   r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface bottom field texcoord element */
-   r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[5].instance_divisor = 0;
-   r->vertex_elems[5].vertex_buffer_index = 1;
-   r->vertex_elems[5].nr_components = 2;
-   r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface top field texcoord element */
-   r->vertex_elems[6].src_offset = 0;
-   r->vertex_elems[6].instance_divisor = 0;
-   r->vertex_elems[6].vertex_buffer_index = 2;
-   r->vertex_elems[6].nr_components = 2;
-   r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface bottom field texcoord element */
-   r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[7].instance_divisor = 0;
-   r->vertex_elems[7].vertex_buffer_index = 2;
-   r->vertex_elems[7].nr_components = 2;
-   r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    r->vs_const_buf = pipe_buffer_create
    (
       r->pipe->screen,
@@ -1307,7 +1307,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[0]);
       r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->i_vs);
@@ -1320,7 +1320,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1334,7 +1334,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1348,7 +1348,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1362,7 +1362,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1376,7 +1376,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
@@ -1391,7 +1391,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
index f00b8c7b8b12e71c746598c2482be66c5f0c1a67..a11a3e7307b4e3dd80a2ce610a2ad1c03644035d 100644 (file)
@@ -66,8 +66,8 @@ struct vl_mpeg12_mc_renderer
    struct pipe_buffer *vs_const_buf;
    struct pipe_buffer *fs_const_buf;
    struct pipe_framebuffer_state fb_state;
-   struct pipe_vertex_element vertex_elems[8];
-       
+   void *vertex_elems[3];
+
    union
    {
       void *all[5];
index 9080addba444f94aa0439a8ee7bf37d8d8f7839b..4608e97adbb18ee0241bcfab37769bbb6c7c7db7 100644 (file)
@@ -24,6 +24,7 @@ CSO objects handled by the context object:
 * :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state``
 * :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for
   fragment shaders, and ``*_vs_state`` is for vertex shaders.
+* :ref:`Vertex Elements`: ``*_vertex_elements_state``
 
 
 Resource Binding State
@@ -60,7 +61,6 @@ objects. They all follow simple, one-method binding calls, e.g.
   not have the scissor test enabled, then the scissor bounds never need to
   be set since they will not be used.
 * ``set_viewport_state``
-* ``set_vertex_elements``
 
 
 Clearing
diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst
new file mode 100644 (file)
index 0000000..8e758fa
--- /dev/null
@@ -0,0 +1,24 @@
+.. _vertex,elements
+
+Vertex Elements
+===============
+
+This state controls format etc. of the input attributes contained
+in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member
+for each input attribute.
+
+Members
+-------
+
+src_offset
+    The byte offset of the attribute in the buffer given by
+    vertex_buffer_index for the first vertex.
+instance_divisor
+    The instance data rate divisor, used for instancing.
+    0 means this is per-vertex data, n means per-instance data used for
+    n consecutive instances (n > 0).
+vertex_buffer_index
+    The vertex buffer this attribute lives in. Several attributes may
+    live in the same vertex buffer.
+src_format
+    The format of the attribute data. One of the PIPE_FORMAT tokens.
index 233b91dec6b301021a500c23b8511e6b359ad9e8..28f80b82cd590d3336ff5274066e9c780c5a093f 100644 (file)
@@ -93,6 +93,11 @@ struct cell_buffer_list
    struct cell_buffer_node *head;
 };
 
+struct cell_velems_state
+{
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+}
 
 /**
  * Per-context state, subclass of pipe_context.
@@ -110,6 +115,7 @@ struct cell_context
    const struct pipe_rasterizer_state *rasterizer;
    const struct cell_vertex_shader_state *vs;
    const struct cell_fragment_shader_state *fs;
+   const struct cell_velems_state *velems;
 
    struct spe_function logic_op;
 
@@ -125,8 +131,6 @@ struct cell_context
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    uint num_vertex_buffers;
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-   uint num_vertex_elements;
 
    ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
    ubyte *zsbuf_map;
index fbe55c84721497b572fb8e970eff6e0e4584cc17..d3efb8ecea21fce5f8e2add2a1e1d37ce26b0ed8 100644 (file)
 #include "cell_context.h"
 #include "cell_state.h"
 
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 
 
-static void
-cell_set_vertex_elements(struct pipe_context *pipe,
-                         unsigned count,
-                         const struct pipe_vertex_element *elements)
+void *
+cell_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
 {
-   struct cell_context *cell = cell_context(pipe);
-
+   struct cell_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
+void
+cell_bind_vertex_elements_state(struct pipe_context *pipe,
+                                void *velems)
+{
+   struct cell_context *cell = cell_context(pipe);
+   struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems;
 
-   memcpy(cell->vertex_element, elements, count * sizeof(elements[0]));
-   cell->num_vertex_elements = count;
+   cell->velems = cell_velems;
 
    cell->dirty |= CELL_NEW_VERTEX;
 
-   draw_set_vertex_elements(cell->draw, count, elements);
+   draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
+}
+
+void
+cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
 }
 
 
@@ -75,5 +94,7 @@ void
 cell_init_vertex_functions(struct cell_context *cell)
 {
    cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
-   cell->pipe.set_vertex_elements = cell_set_vertex_elements;
+   cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
+   cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
+   cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
 }
index bb1a168ea7aeb4d46c051ea0e1b28b461ca15731..4a754465bbe9f88e8930e072cafde99a49ce8a1c 100644 (file)
@@ -78,6 +78,7 @@ struct failover_context {
    const struct fo_state     *rasterizer;
    const struct fo_state     *fragment_shader;
    const struct fo_state     *vertex_shader;
+   const struct fo_state     *vertex_elements;
 
    struct pipe_blend_color blend_color;
    struct pipe_stencil_ref stencil_ref;
@@ -89,10 +90,8 @@ struct failover_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
 
    uint num_vertex_buffers;
-   uint num_vertex_elements;
 
    void *sw_sampler_state[PIPE_MAX_SAMPLERS];
    void *hw_sampler_state[PIPE_MAX_SAMPLERS];
index 970606a3f50a0cd3561e2e14c7fcaab1fa0b8d6e..0247fb803b297baa6d12b8d9f8ca894b72cd5c1d 100644 (file)
@@ -255,9 +255,52 @@ failover_delete_vs_state(struct pipe_context *pipe,
    free(state);
 }
 
+
+
+static void *
+failover_create_vertex_elements_state( struct pipe_context *pipe,
+                                       unsigned count,
+                                       const struct pipe_vertex_element *velems )
+{
+   struct fo_state *state = malloc(sizeof(struct fo_state));
+   struct failover_context *failover = failover_context(pipe);
+
+   state->sw_state = failover->sw->create_vertex_elements_state(failover->sw, count, velems);
+   state->hw_state = failover->hw->create_vertex_elements_state(failover->hw, count, velems);
+
+   return state;
+}
+
+static void
+failover_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems )
+{
+   struct failover_context *failover = failover_context(pipe);
+   struct fo_state *state = (struct fo_state*)velems;
+
+   failover->vertex_elements = state;
+   failover->dirty |= FO_NEW_VERTEX_ELEMENT;
+   failover->sw->bind_vertex_elements_state( failover->sw, velems );
+   failover->hw->bind_vertex_elements_state( failover->hw, velems );
+}
+
+static void
+failover_delete_vertex_elements_state( struct pipe_context *pipe,
+                                       void *velems )
+{
+   struct fo_state *state = (struct fo_state*)velems;
+   struct failover_context *failover = failover_context(pipe);
+
+   failover->sw->delete_vertex_elements_state(failover->sw, state->sw_state);
+   failover->hw->delete_vertex_elements_state(failover->hw, state->hw_state);
+   state->sw_state = 0;
+   state->hw_state = 0;
+   free(state);
+}
+
 static void 
 failover_set_polygon_stipple( struct pipe_context *pipe,
-                             const struct pipe_poly_stipple *stipple )
+                              const struct pipe_poly_stipple *stipple )
 {
    struct failover_context *failover = failover_context(pipe);
 
@@ -490,22 +533,6 @@ failover_set_vertex_buffers(struct pipe_context *pipe,
 }
 
 
-static void
-failover_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *vertex_elements)
-{
-   struct failover_context *failover = failover_context(pipe);
-
-   memcpy(failover->vertex_elements, vertex_elements,
-          count * sizeof(vertex_elements[0]));
-
-   failover->dirty |= FO_NEW_VERTEX_ELEMENT;
-   failover->num_vertex_elements = count;
-   failover->sw->set_vertex_elements( failover->sw, count, vertex_elements );
-   failover->hw->set_vertex_elements( failover->hw, count, vertex_elements );
-}
-
 void
 failover_set_constant_buffer(struct pipe_context *pipe,
                              uint shader, uint index,
@@ -543,6 +570,9 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.create_vs_state = failover_create_vs_state;
    failover->pipe.bind_vs_state   = failover_bind_vs_state;
    failover->pipe.delete_vs_state = failover_delete_vs_state;
+   failover->pipe.create_vertex_elements_state = failover_create_vertex_elements_state;
+   failover->pipe.bind_vertex_elements_state = failover_bind_vertex_elements_state;
+   failover->pipe.delete_vertex_elements_state = failover_delete_vertex_elements_state;
 
    failover->pipe.set_blend_color = failover_set_blend_color;
    failover->pipe.set_stencil_ref = failover_set_stencil_ref;
@@ -554,6 +584,5 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures;
    failover->pipe.set_viewport_state = failover_set_viewport_state;
    failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
-   failover->pipe.set_vertex_elements = failover_set_vertex_elements;
    failover->pipe.set_constant_buffer = failover_set_constant_buffer;
 }
index 5c000808425cdeedfd11271e47e73c541eab57f7..09ca19449713229a997072a7e82636b4fcd5b950 100644 (file)
@@ -81,6 +81,10 @@ failover_state_emit( struct failover_context *failover )
       failover->sw->bind_vs_state( failover->sw,
                                    failover->vertex_shader->sw_state );
 
+   if (failover->dirty & FO_NEW_VERTEX_ELEMENT)
+      failover->sw->bind_vertex_elements_state( failover->sw,
+                                                failover->vertex_elements->sw_state );
+
    if (failover->dirty & FO_NEW_STIPPLE)
       failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple );
 
@@ -116,11 +120,5 @@ failover_state_emit( struct failover_context *failover )
                                         failover->vertex_buffers );
    }
 
-   if (failover->dirty & FO_NEW_VERTEX_ELEMENT) {
-      failover->sw->set_vertex_elements( failover->sw,
-                                         failover->num_vertex_elements,
-                                         failover->vertex_elements );
-   }
-
    failover->dirty = 0;
 }
index 499a727314d9746d347305c07a8d17257f4d4d25..49945376837cee71929670930c2f70ef29d0aa30 100644 (file)
@@ -148,7 +148,7 @@ struct i915_state
 
    /** Describes the current hardware vertex layout */
    struct vertex_info vertex_info;
-   
+
    unsigned id;                        /* track lost context events */
 };
 
@@ -187,6 +187,11 @@ struct i915_sampler_state {
    unsigned maxlod;
 };
 
+struct i915_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
 #define I915_MAX_TEXTURE_2D_LEVELS 11  /* max 1024x1024 */
 #define I915_MAX_TEXTURE_3D_LEVELS  8  /* max 128x128x128 */
 
@@ -250,7 +255,6 @@ struct i915_context
 
    unsigned num_samplers;
    unsigned num_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    struct intel_batchbuffer *batch;
index 62169918e2b715bedf8decb306ecde8ffaba4c20..8927dfc33d474177aeae2080b704399fefd119d5 100644 (file)
@@ -742,21 +742,42 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe,
    draw_set_vertex_buffers(i915->draw, count, buffers);
 }
 
-static void i915_set_vertex_elements(struct pipe_context *pipe,
-                                     unsigned count,
-                                     const struct pipe_vertex_element *elements)
+static void *
+i915_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
+{
+   struct i915_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct i915_velems_state *) MALLOC(sizeof(struct i915_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
+static void
+i915_bind_vertex_elements_state(struct pipe_context *pipe,
+                                void *velems)
 {
    struct i915_context *i915 = i915_context(pipe);
+   struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems;
+
    /* Because we change state before the draw_set_vertex_buffers call
     * we need a flush here, just to be sure.
     */
    draw_flush(i915->draw);
 
-   i915->num_vertex_elements = count;
    /* pass-through to draw module */
-   draw_set_vertex_elements(i915->draw, count, elements);
+   draw_set_vertex_elements(i915->draw, i915_velems->count, i915_velems->velem);
 }
 
+static void
+i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 i915_init_state_functions( struct i915_context *i915 )
@@ -782,6 +803,9 @@ i915_init_state_functions( struct i915_context *i915 )
    i915->base.create_vs_state = i915_create_vs_state;
    i915->base.bind_vs_state = i915_bind_vs_state;
    i915->base.delete_vs_state = i915_delete_vs_state;
+   i915->base.create_vertex_elements_state = i915_create_vertex_elements_state;
+   i915->base.bind_vertex_elements_state = i915_bind_vertex_elements_state;
+   i915->base.delete_vertex_elements_state = i915_delete_vertex_elements_state;
 
    i915->base.set_blend_color = i915_set_blend_color;
    i915->base.set_stencil_ref = i915_set_stencil_ref;
@@ -794,5 +818,4 @@ i915_init_state_functions( struct i915_context *i915 )
    i915->base.set_fragment_sampler_textures = i915_set_sampler_textures;
    i915->base.set_viewport_state = i915_set_viewport_state;
    i915->base.set_vertex_buffers = i915_set_vertex_buffers;
-   i915->base.set_vertex_elements = i915_set_vertex_elements;
 }
index 12cfa7b049c2276de81f82f807ad42be0fb9bfc0..f5b1a06576bd1dda50f535e6dd8173bc0832c8f2 100644 (file)
@@ -351,7 +351,7 @@ struct brw_vs_prog_data {
 
 /* Size == 0 if output either not written, or always [0,0,0,1]
  */
-struct brw_vs_ouput_sizes {
+struct brw_vs_output_sizes {
    GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
 };
 
@@ -546,14 +546,13 @@ struct brw_context
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
+      const struct brw_vertex_element_packet *velems;
 
       const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
       unsigned num_samplers;
 
       struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_elements;
       unsigned num_textures;
       unsigned num_vertex_buffers;
 
index 9f136eec71cf1ef9384f619829da854c2fd70ffb..0820ba20a08fc7c4fa61d7b5e5ac70b7787f0e37 100644 (file)
 
 
 
-static unsigned brw_translate_surface_format( unsigned id )
-{
-   switch (id) {
-   case PIPE_FORMAT_R64_FLOAT:
-      return BRW_SURFACEFORMAT_R64_FLOAT;
-   case PIPE_FORMAT_R64G64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64_FLOAT;
-   case PIPE_FORMAT_R64G64B64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
-   case PIPE_FORMAT_R64G64B64A64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
-
-   case PIPE_FORMAT_R32_FLOAT:
-      return BRW_SURFACEFORMAT_R32_FLOAT;
-   case PIPE_FORMAT_R32G32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32_FLOAT;
-   case PIPE_FORMAT_R32G32B32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
-
-   case PIPE_FORMAT_R32_UNORM:
-      return BRW_SURFACEFORMAT_R32_UNORM;
-   case PIPE_FORMAT_R32G32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32_UNORM;
-   case PIPE_FORMAT_R32G32B32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32B32_UNORM;
-   case PIPE_FORMAT_R32G32B32A32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
-
-   case PIPE_FORMAT_R32_USCALED:
-      return BRW_SURFACEFORMAT_R32_USCALED;
-   case PIPE_FORMAT_R32G32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32_USCALED;
-   case PIPE_FORMAT_R32G32B32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32B32_USCALED;
-   case PIPE_FORMAT_R32G32B32A32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
-
-   case PIPE_FORMAT_R32_SNORM:
-      return BRW_SURFACEFORMAT_R32_SNORM;
-   case PIPE_FORMAT_R32G32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32_SNORM;
-   case PIPE_FORMAT_R32G32B32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32B32_SNORM;
-   case PIPE_FORMAT_R32G32B32A32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
-
-   case PIPE_FORMAT_R32_SSCALED:
-      return BRW_SURFACEFORMAT_R32_SSCALED;
-   case PIPE_FORMAT_R32G32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32_SSCALED;
-   case PIPE_FORMAT_R32G32B32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
-   case PIPE_FORMAT_R32G32B32A32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
-
-   case PIPE_FORMAT_R16_UNORM:
-      return BRW_SURFACEFORMAT_R16_UNORM;
-   case PIPE_FORMAT_R16G16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16_UNORM;
-   case PIPE_FORMAT_R16G16B16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16B16_UNORM;
-   case PIPE_FORMAT_R16G16B16A16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
-
-   case PIPE_FORMAT_R16_USCALED:
-      return BRW_SURFACEFORMAT_R16_USCALED;
-   case PIPE_FORMAT_R16G16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16_USCALED;
-   case PIPE_FORMAT_R16G16B16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16B16_USCALED;
-   case PIPE_FORMAT_R16G16B16A16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
-
-   case PIPE_FORMAT_R16_SNORM:
-      return BRW_SURFACEFORMAT_R16_SNORM;
-   case PIPE_FORMAT_R16G16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16_SNORM;
-   case PIPE_FORMAT_R16G16B16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16B16_SNORM;
-   case PIPE_FORMAT_R16G16B16A16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
-
-   case PIPE_FORMAT_R16_SSCALED:
-      return BRW_SURFACEFORMAT_R16_SSCALED;
-   case PIPE_FORMAT_R16G16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16_SSCALED;
-   case PIPE_FORMAT_R16G16B16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
-   case PIPE_FORMAT_R16G16B16A16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
-
-   case PIPE_FORMAT_R8_UNORM:
-      return BRW_SURFACEFORMAT_R8_UNORM;
-   case PIPE_FORMAT_R8G8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8_UNORM;
-   case PIPE_FORMAT_R8G8B8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8_UNORM;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
-
-   case PIPE_FORMAT_R8_USCALED:
-      return BRW_SURFACEFORMAT_R8_USCALED;
-   case PIPE_FORMAT_R8G8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8_USCALED;
-   case PIPE_FORMAT_R8G8B8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8B8_USCALED;
-   case PIPE_FORMAT_R8G8B8A8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
-
-   case PIPE_FORMAT_R8_SNORM:
-      return BRW_SURFACEFORMAT_R8_SNORM;
-   case PIPE_FORMAT_R8G8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8_SNORM;
-   case PIPE_FORMAT_R8G8B8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8B8_SNORM;
-   case PIPE_FORMAT_R8G8B8A8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
-   case PIPE_FORMAT_R8_SSCALED:
-      return BRW_SURFACEFORMAT_R8_SSCALED;
-   case PIPE_FORMAT_R8G8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8_SSCALED;
-   case PIPE_FORMAT_R8G8B8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
-   case PIPE_FORMAT_R8G8B8A8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
-
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
 static unsigned get_index_type(int type)
 {
    switch (type) {
@@ -315,75 +180,16 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
 
 
 
-
 static int brw_emit_vertex_elements(struct brw_context *brw)
 {
-   GLuint nr = brw->curr.num_vertex_elements;
-   GLuint i;
+   const struct brw_vertex_element_packet *brw_velems = brw->curr.velems;
+   unsigned size = brw_velems->header.length + 2;
 
+   /* why is this here */
    brw_emit_query_begin(brw);
 
-   /* If the VS doesn't read any inputs (calculating vertex position from
-    * a state variable for some reason, for example), emit a single pad
-    * VERTEX_ELEMENT struct and bail.
-    *
-    * The stale VB state stays in place, but they don't do anything unless
-    * a VE loads from them.
-    */
-   if (nr == 0) {
-      BEGIN_BATCH(3, IGNORE_CLIPRECTS);
-      OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
-      OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
-               BRW_VE0_VALID |
-               (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
-               (0 << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
-               (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-               (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-               (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
-      ADVANCE_BATCH();
-      return 0;
-   }
-
-   /* Now emit vertex element (VEP) state packets.
-    *
-    */
-   BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
-   for (i = 0; i < nr; i++) {
-      const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
-      uint32_t format = brw_translate_surface_format( input->src_format );
-      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
-
-      switch (input->nr_components) {
-      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
-      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
-      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
-      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
-        break;
-      }
-
-      OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
-               BRW_VE0_VALID |
-               (format << BRW_VE0_FORMAT_SHIFT) |
-               (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
+   brw_batchbuffer_data(brw->batch, brw_velems, size * 4, IGNORE_CLIPRECTS);
 
-      if (BRW_IS_IGDNG(brw))
-          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
-                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
-                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
-                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
-      else
-          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
-                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
-                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
-                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
-                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
-   }
-   ADVANCE_BATCH();
    return 0;
 }
 
@@ -396,10 +202,11 @@ static int brw_emit_vertices( struct brw_context *brw )
    if (ret)
       return ret;
 
+   /* XXX should separate this? */
    ret = brw_emit_vertex_elements( brw );
    if (ret)
       return ret;
-   
+
    return 0;
 }
 
@@ -407,7 +214,8 @@ static int brw_emit_vertices( struct brw_context *brw )
 const struct brw_tracked_state brw_vertices = {
    .dirty = {
       .mesa = (PIPE_NEW_INDEX_RANGE |
-               PIPE_NEW_VERTEX_BUFFER),
+               PIPE_NEW_VERTEX_BUFFER |
+               PIPE_NEW_VERTEX_ELEMENT),
       .brw = BRW_NEW_BATCH,
       .cache = 0,
    },
index e3c48e3149357d88dc11fd51653324280998898a..d6a840857ec0fd7b20e853847ccc683e624b210c 100644 (file)
 #include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
 
+#include "util/u_memory.h"
+#include "util/u_format.h"
 
-static void brw_set_vertex_elements( struct pipe_context *pipe,
-                                    unsigned count,
-                                    const struct pipe_vertex_element *elements )
+
+static unsigned brw_translate_surface_format( unsigned id )
+{
+   switch (id) {
+   case PIPE_FORMAT_R64_FLOAT:
+      return BRW_SURFACEFORMAT_R64_FLOAT;
+   case PIPE_FORMAT_R64G64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64_FLOAT;
+   case PIPE_FORMAT_R64G64B64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+   case PIPE_FORMAT_R64G64B64A64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+   case PIPE_FORMAT_R32_FLOAT:
+      return BRW_SURFACEFORMAT_R32_FLOAT;
+   case PIPE_FORMAT_R32G32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32_FLOAT;
+   case PIPE_FORMAT_R32G32B32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   case PIPE_FORMAT_R32_UNORM:
+      return BRW_SURFACEFORMAT_R32_UNORM;
+   case PIPE_FORMAT_R32G32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32_UNORM;
+   case PIPE_FORMAT_R32G32B32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+   case PIPE_FORMAT_R32G32B32A32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+   case PIPE_FORMAT_R32_USCALED:
+      return BRW_SURFACEFORMAT_R32_USCALED;
+   case PIPE_FORMAT_R32G32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32_USCALED;
+   case PIPE_FORMAT_R32G32B32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+   case PIPE_FORMAT_R32G32B32A32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+   case PIPE_FORMAT_R32_SNORM:
+      return BRW_SURFACEFORMAT_R32_SNORM;
+   case PIPE_FORMAT_R32G32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32_SNORM;
+   case PIPE_FORMAT_R32G32B32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+   case PIPE_FORMAT_R32G32B32A32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+   case PIPE_FORMAT_R32_SSCALED:
+      return BRW_SURFACEFORMAT_R32_SSCALED;
+   case PIPE_FORMAT_R32G32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32_SSCALED;
+   case PIPE_FORMAT_R32G32B32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+   case PIPE_FORMAT_R32G32B32A32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+   case PIPE_FORMAT_R16_UNORM:
+      return BRW_SURFACEFORMAT_R16_UNORM;
+   case PIPE_FORMAT_R16G16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16_UNORM;
+   case PIPE_FORMAT_R16G16B16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+   case PIPE_FORMAT_R16_USCALED:
+      return BRW_SURFACEFORMAT_R16_USCALED;
+   case PIPE_FORMAT_R16G16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16_USCALED;
+   case PIPE_FORMAT_R16G16B16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+   case PIPE_FORMAT_R16G16B16A16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+   case PIPE_FORMAT_R16_SNORM:
+      return BRW_SURFACEFORMAT_R16_SNORM;
+   case PIPE_FORMAT_R16G16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16_SNORM;
+   case PIPE_FORMAT_R16G16B16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+   case PIPE_FORMAT_R16_SSCALED:
+      return BRW_SURFACEFORMAT_R16_SSCALED;
+   case PIPE_FORMAT_R16G16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16_SSCALED;
+   case PIPE_FORMAT_R16G16B16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+   case PIPE_FORMAT_R16G16B16A16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+   case PIPE_FORMAT_R8_UNORM:
+      return BRW_SURFACEFORMAT_R8_UNORM;
+   case PIPE_FORMAT_R8G8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8_UNORM;
+   case PIPE_FORMAT_R8G8B8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+   case PIPE_FORMAT_R8_USCALED:
+      return BRW_SURFACEFORMAT_R8_USCALED;
+   case PIPE_FORMAT_R8G8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8_USCALED;
+   case PIPE_FORMAT_R8G8B8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+   case PIPE_FORMAT_R8G8B8A8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+   case PIPE_FORMAT_R8_SNORM:
+      return BRW_SURFACEFORMAT_R8_SNORM;
+   case PIPE_FORMAT_R8G8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8_SNORM;
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+   case PIPE_FORMAT_R8G8B8A8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+   case PIPE_FORMAT_R8_SSCALED:
+      return BRW_SURFACEFORMAT_R8_SSCALED;
+   case PIPE_FORMAT_R8G8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8_SSCALED;
+   case PIPE_FORMAT_R8G8B8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+   case PIPE_FORMAT_R8G8B8A8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void brw_translate_vertex_elements(struct brw_context *brw,
+                                          struct brw_vertex_element_packet *brw_velems,
+                                          const struct pipe_vertex_element *attribs,
+                                          unsigned count)
+{
+   unsigned i;
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), emit a single pad
+    * VERTEX_ELEMENT struct and bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   brw_velems->header.opcode = CMD_VERTEX_ELEMENT;
+
+   if (count == 0) {
+      brw_velems->header.length = 1;
+      brw_velems->ve[0].ve0.src_offset = 0;
+      brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+      brw_velems->ve[0].ve0.valid = 1;
+      brw_velems->ve[0].ve0.vertex_buffer_index = 0;
+      brw_velems->ve[0].ve1.dst_offset = 0;
+      brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+      return;
+   }
+
+
+   /* Now emit vertex element (VEP) state packets.
+    *
+    */
+   brw_velems->header.length = (1 + count * 2) - 2;
+   for (i = 0; i < count; i++) {
+      const struct pipe_vertex_element *input = &attribs[i];
+      unsigned nr_components = util_format_get_nr_components(input->src_format);
+
+      uint32_t format = brw_translate_surface_format( input->src_format );
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+      switch (nr_components) {
+      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+         break;
+      }
+
+      brw_velems->ve[i].ve0.src_offset = input->src_offset;
+      brw_velems->ve[i].ve0.src_format = format;
+      brw_velems->ve[i].ve0.valid = 1;
+      brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index;
+      brw_velems->ve[i].ve1.vfcomponent0 = comp0;
+      brw_velems->ve[i].ve1.vfcomponent1 = comp1;
+      brw_velems->ve[i].ve1.vfcomponent2 = comp2;
+      brw_velems->ve[i].ve1.vfcomponent3 = comp3;
+
+      if (BRW_IS_IGDNG(brw))
+         brw_velems->ve[i].ve1.dst_offset = 0;
+      else
+         brw_velems->ve[i].ve1.dst_offset = i * 4;
+   }
+}
+
+static void* brw_create_vertex_elements_state( struct pipe_context *pipe,
+                                               unsigned count,
+                                               const struct pipe_vertex_element *attribs )
 {
+   /* note: for the brw_swtnl.c code (if ever we need draw fallback) we'd also need
+      to store the original data */
    struct brw_context *brw = brw_context(pipe);
+   struct brw_vertex_element_packet *velems;
+   assert(count <= BRW_VEP_MAX);
+   velems = (struct brw_vertex_element_packet *) MALLOC(sizeof(struct brw_vertex_element_packet));
+   if (velems) {
+      brw_translate_vertex_elements(brw, velems, attribs, count);
+   }
+   return velems;
+}
 
-   memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
-   brw->curr.num_vertex_elements = count;
+static void brw_bind_vertex_elements_state(struct pipe_context *pipe,
+                                           void *velems)
+{
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_vertex_element_packet *brw_velems = (struct brw_vertex_element_packet *) velems;
+
+   brw->curr.velems = brw_velems;
 
    brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
 }
 
+static void brw_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
+
 
 static void brw_set_vertex_buffers(struct pipe_context *pipe,
-                                  unsigned count,
-                                  const struct pipe_vertex_buffer *buffers)
+                                   unsigned count,
+                                   const struct pipe_vertex_buffer *buffers)
 {
    struct brw_context *brw = brw_context(pipe);
    unsigned i;
@@ -49,7 +278,9 @@ void
 brw_pipe_vertex_init( struct brw_context *brw )
 {
    brw->base.set_vertex_buffers = brw_set_vertex_buffers;
-   brw->base.set_vertex_elements = brw_set_vertex_elements;
+   brw->base.create_vertex_elements_state = brw_create_vertex_elements_state;
+   brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state;
+   brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state;
 }
 
 
index bf10bc04de75ebdf84c61a7abba2f092b96b4bd9..e97ddeb5e1cf729759b39afccc7e71c2e817b55a 100644 (file)
@@ -28,7 +28,7 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-        
+
 
 #ifndef BRW_STRUCTS_H
 #define BRW_STRUCTS_H
@@ -1149,7 +1149,7 @@ struct brw_vertex_element_state
       GLuint valid:1; 
       GLuint vertex_buffer_index:5; 
    } ve0;
-   
+
    struct
    {
       GLuint dst_offset:8; 
index 8248b2a4132f9f8497d4c052e4202f7ab38ef5da..baf0ae440166762c525b966a0dbd2729a243a6f2 100644 (file)
@@ -377,6 +377,42 @@ identity_delete_vs_state(struct pipe_context *_pipe,
                          vs);
 }
 
+
+static void *
+identity_create_vertex_elements_state(struct pipe_context *_pipe,
+                                      unsigned num_elements,
+                                      const struct pipe_vertex_element *vertex_elements)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   return pipe->create_vertex_elements_state(pipe,
+                                             num_elements,
+                                             vertex_elements);
+}
+
+static void
+identity_bind_vertex_elements_state(struct pipe_context *_pipe,
+                                    void *velems)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->bind_vertex_elements_state(pipe,
+                                    velems);
+}
+
+static void
+identity_delete_vertex_elements_state(struct pipe_context *_pipe,
+                                      void *velems)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->delete_vertex_elements_state(pipe,
+                                      velems);
+}
+
 static void
 identity_set_blend_color(struct pipe_context *_pipe,
                          const struct pipe_blend_color *blend_color)
@@ -563,20 +599,6 @@ identity_set_vertex_buffers(struct pipe_context *_pipe,
                             num_buffers,
                             buffers);
 }
-
-static void
-identity_set_vertex_elements(struct pipe_context *_pipe,
-                             unsigned num_elements,
-                             const struct pipe_vertex_element *vertex_elements)
-{
-   struct identity_context *id_pipe = identity_context(_pipe);
-   struct pipe_context *pipe = id_pipe->pipe;
-
-   pipe->set_vertex_elements(pipe,
-                             num_elements,
-                             vertex_elements);
-}
-
 static void
 identity_surface_copy(struct pipe_context *_pipe,
                       struct pipe_surface *_dst,
@@ -733,6 +755,9 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.create_vs_state = identity_create_vs_state;
    id_pipe->base.bind_vs_state = identity_bind_vs_state;
    id_pipe->base.delete_vs_state = identity_delete_vs_state;
+   id_pipe->base.create_vertex_elements_state = identity_create_vertex_elements_state;
+   id_pipe->base.bind_vertex_elements_state = identity_bind_vertex_elements_state;
+   id_pipe->base.delete_vertex_elements_state = identity_delete_vertex_elements_state;
    id_pipe->base.set_blend_color = identity_set_blend_color;
    id_pipe->base.set_stencil_ref = identity_set_stencil_ref;
    id_pipe->base.set_clip_state = identity_set_clip_state;
@@ -744,7 +769,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures;
    id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures;
    id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
-   id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
    id_pipe->base.surface_copy = identity_surface_copy;
    id_pipe->base.surface_fill = identity_surface_fill;
    id_pipe->base.clear = identity_clear;
index e31ae6a3fc167cda917dfe44ce4b3c28fdcaed4f..d94efec16a45a1b6092c8a61f04f24433c1eabf1 100644 (file)
@@ -145,6 +145,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
    llvmpipe->pipe.bind_vs_state   = llvmpipe_bind_vs_state;
    llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state;
 
+   llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state;
+   llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state;
+   llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state;
+
    llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color;
    llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref;
    llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state;
@@ -157,7 +161,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
-   llvmpipe->pipe.set_vertex_elements = llvmpipe_set_vertex_elements;
 
    llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
    llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
index 955c7eb8e0e97c0ce4e0289a00e14296a8615a67..217ec59b68847eb7e6dc7d55d4b8dfe845aca58b 100644 (file)
@@ -46,6 +46,7 @@ struct lp_fragment_shader;
 struct lp_vertex_shader;
 struct lp_blend_state;
 struct setup_context;
+struct lp_velems_state;
 
 struct llvmpipe_context {
    struct pipe_context pipe;  /**< base class */
@@ -58,6 +59,7 @@ struct llvmpipe_context {
    const struct pipe_rasterizer_state *rasterizer;
    struct lp_fragment_shader *fs;
    const struct lp_vertex_shader *vs;
+   const struct lp_velems_state *velems;
 
    /** Other rendering state */
    struct pipe_blend_color blend_color;
@@ -71,13 +73,11 @@ struct llvmpipe_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
 
    unsigned num_samplers;
    unsigned num_textures;
    unsigned num_vertex_samplers;
    unsigned num_vertex_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    unsigned dirty; /**< Mask of LP_NEW_x flags */
index 9beba32271f5022f805d45fc3176b5b5c1d65260..6dbdc195bfc3065f10ba3f95c94b6181d25c4fe5 100644 (file)
@@ -119,6 +119,10 @@ struct lp_vertex_shader {
    struct draw_vertex_shader *draw_data;
 };
 
+struct lp_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
 
 
 void *
@@ -176,8 +180,14 @@ void *llvmpipe_create_vs_state(struct pipe_context *,
 void llvmpipe_bind_vs_state(struct pipe_context *, void *);
 void llvmpipe_delete_vs_state(struct pipe_context *, void *);
 
+void *llvmpipe_create_vertex_elements_state(struct pipe_context *,
+                                            unsigned count,
+                                            const struct pipe_vertex_element *);
+void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *);
+void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *);
+
 void llvmpipe_set_polygon_stipple( struct pipe_context *,
-                                 const struct pipe_poly_stipple * );
+                                   const struct pipe_poly_stipple * );
 
 void llvmpipe_set_scissor_state( struct pipe_context *,
                                  const struct pipe_scissor_state * );
@@ -194,10 +204,6 @@ llvmpipe_set_vertex_sampler_textures(struct pipe_context *,
 void llvmpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
 
-void llvmpipe_set_vertex_elements(struct pipe_context *,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *);
-
 void llvmpipe_set_vertex_buffers(struct pipe_context *,
                                  unsigned count,
                                  const struct pipe_vertex_buffer *);
index 57ac25ea0cba07d27deaaa1920b088096ece5bbf..f6427aa908e2c84c8acddf3738a9bccc672901d0 100644 (file)
 #include "draw/draw_context.h"
 
 
+void *
+llvmpipe_create_vertex_elements_state(struct pipe_context *pipe,
+                                      unsigned count,
+                                      const struct pipe_vertex_element *attribs)
+{
+   struct lp_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
 void
-llvmpipe_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *attribs)
+llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems;
 
-   assert(count <= PIPE_MAX_ATTRIBS);
-
-   memcpy(llvmpipe->vertex_element, attribs,
-          count * sizeof(struct pipe_vertex_element));
-   llvmpipe->num_vertex_elements = count;
+   llvmpipe->velems = lp_velems;
 
    llvmpipe->dirty |= LP_NEW_VERTEX;
 
-   draw_set_vertex_elements(llvmpipe->draw, count, attribs);
+   if (velems)
+      draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
 }
 
+void
+llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
index f7d10a591f62324230ec4a9654f495d482e66150..b1ad686022a0905f838cca62dfb3804f3e2046c8 100644 (file)
@@ -124,7 +124,7 @@ nouveau_screen_map_flags(unsigned pipe)
        if (pipe & PIPE_BUFFER_USAGE_DONTBLOCK)
                flags |= NOUVEAU_BO_NOWAIT;
        else
-       if (pipe & 0 /*PIPE_BUFFER_USAGE_UNSYNCHRONIZED*/)
+       if (pipe & PIPE_BUFFER_USAGE_UNSYNCHRONIZED)
                flags |= NOUVEAU_BO_NOSYNC;
 
        return flags;
index a10114beab98d9ffb9204a448d6ed0d0692cbcc3..7f16e31c3f0838b706edb0e4f8216a2ae2ba2309 100644 (file)
@@ -88,4 +88,104 @@ static INLINE unsigned log2i(unsigned i)
        return r;
 }
 
+struct u_split_prim {
+   void *priv;
+   void (*emit)(void *priv, unsigned start, unsigned count);
+   void (*edge)(void *priv, boolean enabled);
+
+   unsigned mode;
+   unsigned start;
+   unsigned p_start;
+   unsigned p_end;
+
+   int repeat_first:1;
+   int close_first:1;
+   int edgeflag_off:1;
+};
+
+static inline void
+u_split_prim_init(struct u_split_prim *s,
+                  unsigned mode, unsigned start, unsigned count)
+{
+   if (mode == PIPE_PRIM_LINE_LOOP) {
+      s->mode = PIPE_PRIM_LINE_STRIP;
+      s->close_first = 1;
+   } else {
+      s->mode = mode;
+      s->close_first = 0;
+   }
+   s->start = start;
+   s->p_start = start;
+   s->p_end = start + count;
+   s->edgeflag_off = 0;
+   s->repeat_first = 0;
+}
+
+static INLINE boolean
+u_split_prim_next(struct u_split_prim *s, unsigned max_verts)
+{
+   int repeat = 0;
+
+   if (s->repeat_first) {
+      s->emit(s->priv, s->start, 1);
+      max_verts--;
+      if (s->edgeflag_off) {
+         s->edge(s->priv, TRUE);
+         s->edgeflag_off = FALSE;
+      }
+   }
+
+   if (s->p_start + s->close_first + max_verts >= s->p_end) {
+      s->emit(s->priv, s->p_start, s->p_end - s->p_start);
+      if (s->close_first)
+         s->emit(s->priv, s->start, 1);
+      return TRUE;
+   }
+
+   switch (s->mode) {
+   case PIPE_PRIM_LINES:
+      max_verts &= ~1;
+      break;
+   case PIPE_PRIM_LINE_STRIP:
+      repeat = 1;
+      break;
+   case PIPE_PRIM_POLYGON:
+      max_verts--;
+      s->emit(s->priv, s->p_start, max_verts);
+      s->edge(s->priv, FALSE);
+      s->emit(s->priv, s->p_start + max_verts, 1);
+      s->p_start += max_verts;
+      s->repeat_first = TRUE;
+      s->edgeflag_off = TRUE;
+      return FALSE;
+   case PIPE_PRIM_TRIANGLES:
+      max_verts = max_verts - (max_verts % 3);
+      break;
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      /* to ensure winding stays correct, always split
+       * on an even number of generated triangles
+       */
+      max_verts = max_verts & ~1;
+      repeat = 2;
+      break;
+   case PIPE_PRIM_TRIANGLE_FAN:
+      s->repeat_first = TRUE;
+      repeat = 1;
+      break;
+   case PIPE_PRIM_QUADS:
+      max_verts &= ~3;
+      break;
+   case PIPE_PRIM_QUAD_STRIP:
+      max_verts &= ~1;
+      repeat = 2;
+      break;
+   default:
+      break;
+   }
+
+   s->emit (s->priv, s->p_start, max_verts);
+   s->p_start += (max_verts - repeat);
+   return FALSE;
+}
+
 #endif
index ea259aadf359f28222d68a5f089098c2afab467e..1786460aec10b9a4ad1274bfb39048c5834e170c 100644 (file)
@@ -107,6 +107,11 @@ struct nv30_state {
        struct nouveau_stateobj *hw[NV30_STATE_MAX];
 };
 
+struct nv30_vtxelt_state {
+       struct pipe_vertex_element pipe[16];
+       unsigned num_elements;
+};
+
 struct nv30_context {
        struct pipe_context pipe;
 
@@ -142,8 +147,7 @@ struct nv30_context {
        unsigned dirty_samplers;
        struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
        unsigned vtxbuf_nr;
-       struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-       unsigned vtxelt_nr;
+       struct nv30_vtxelt_state *vtxelt;
 };
 
 static INLINE struct nv30_context *
index 5ef74a832dcea3835f74e344626ceca0a1410a35..bfa27b632f70e24a5f759ddcaff10a2d6a46ecac 100644 (file)
@@ -236,5 +236,5 @@ nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
        pscreen->get_tex_surface = nv30_miptree_surface_new;
        pscreen->tex_surface_destroy = nv30_miptree_surface_del;
 
-       nouveau_screen(pscreen)->texture_blanket = nv50_miptree_blanket;
+       nouveau_screen(pscreen)->texture_blanket = nv30_miptree_blanket;
 }
index d911c80707404656a07a15090912432f03159a13..24b15a63ac4a5956ebf3bfb81511f3af988346da 100644 (file)
@@ -669,15 +669,34 @@ nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
        /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
 }
 
+static void *
+nv30_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nv30_vtxelt_state *cso = CALLOC_STRUCT(nv30_vtxelt_state);
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+       cso->num_elements = num_elements;
+       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/*     nv30_vtxelt_construct(cso);*/
+
+       return (void *)cso;
+}
+
 static void
-nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-                        const struct pipe_vertex_element *ve)
+nv30_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-       struct nv30_context *nv30 = nv30_context(pipe);
+       FREE(hwcso);
+}
 
-       memcpy(nv30->vtxelt, ve, sizeof(*ve) * count);
-       nv30->vtxelt_nr = count;
+static void
+nv30_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
 
+       nv30->vtxelt = hwcso;
        nv30->dirty |= NV30_NEW_ARRAYS;
        /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
 }
@@ -722,7 +741,10 @@ nv30_init_state_functions(struct nv30_context *nv30)
        nv30->pipe.set_scissor_state = nv30_set_scissor_state;
        nv30->pipe.set_viewport_state = nv30_set_viewport_state;
 
+       nv30->pipe.create_vertex_elements_state = nv30_vtxelts_state_create;
+       nv30->pipe.delete_vertex_elements_state = nv30_vtxelts_state_delete;
+       nv30->pipe.bind_vertex_elements_state = nv30_vtxelts_state_bind;
+
        nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
-       nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
 }
 
index e48823a91381657a21ba43708db7fb4eac30a51f..f3856bb5a5ebe7773f4ec525c09ca45dcf1b10a4 100644 (file)
@@ -492,16 +492,16 @@ nv30_vbo_validate(struct nv30_context *nv30)
        int hw;
 
        vtxbuf = so_new(3, 17, 18);
-       so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
+       so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt->num_elements);
        vtxfmt = so_new(1, 16, 0);
-       so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
+       so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt->num_elements);
 
-       for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
+       for (hw = 0; hw < nv30->vtxelt->num_elements; hw++) {
                struct pipe_vertex_element *ve;
                struct pipe_vertex_buffer *vb;
                unsigned type, ncomp;
 
-               ve = &nv30->vtxelt[hw];
+               ve = &nv30->vtxelt->pipe[hw];
                vb = &nv30->vtxbuf[ve->vertex_buffer_index];
 
                if (!vb->stride) {
index 97fb6a2ef947b4c1495979273e981efcfe07fe26..2550ec654b31d4aec4b91a6b3901afa8d7c1ef9e 100644 (file)
@@ -107,6 +107,12 @@ struct nv40_state {
        struct nouveau_stateobj *hw[NV40_STATE_MAX];
 };
 
+
+struct nv40_vtxelt_state {
+       struct pipe_vertex_element pipe[16];
+       unsigned num_elements;
+};
+
 struct nv40_context {
        struct pipe_context pipe;
 
@@ -157,8 +163,7 @@ struct nv40_context {
        unsigned dirty_samplers;
        struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
        unsigned vtxbuf_nr;
-       struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-       unsigned vtxelt_nr;
+       struct nv40_vtxelt_state *vtxelt;
 };
 
 static INLINE struct nv40_context *
index 4f28675e64c5f9b8d6bf305a958c91fe59a9bad8..ee471e6ce4c2c94c650ddbfd81a1a2dae3968d42 100644 (file)
@@ -684,15 +684,34 @@ nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
        nv40->draw_dirty |= NV40_NEW_ARRAYS;
 }
 
+static void *
+nv40_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nv40_vtxelt_state *cso = CALLOC_STRUCT(nv40_vtxelt_state);
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+       cso->num_elements = num_elements;
+       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/*     nv40_vtxelt_construct(cso);*/
+
+       return (void *)cso;
+}
+
 static void
-nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-                        const struct pipe_vertex_element *ve)
+nv40_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-       struct nv40_context *nv40 = nv40_context(pipe);
+       FREE(hwcso);
+}
 
-       memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
-       nv40->vtxelt_nr = count;
+static void
+nv40_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv40_context *nv40 = nv40_context(pipe);
 
+       nv40->vtxelt = hwcso;
        nv40->dirty |= NV40_NEW_ARRAYS;
        nv40->draw_dirty |= NV40_NEW_ARRAYS;
 }
@@ -737,7 +756,10 @@ nv40_init_state_functions(struct nv40_context *nv40)
        nv40->pipe.set_scissor_state = nv40_set_scissor_state;
        nv40->pipe.set_viewport_state = nv40_set_viewport_state;
 
+       nv40->pipe.create_vertex_elements_state = nv40_vtxelts_state_create;
+       nv40->pipe.delete_vertex_elements_state = nv40_vtxelts_state_delete;
+       nv40->pipe.bind_vertex_elements_state = nv40_vtxelts_state_bind;
+
        nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
-       nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
 }
 
index 8990f303ce4e300d7465d5da6fe38571d2768270..297d71f4fac67eb6fd53a97e3994c7b87ee8a474 100644 (file)
@@ -174,7 +174,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
 
        if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
                draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
-               draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);  
+               draw_set_vertex_elements(draw, nv40->vtxelt->num_elements, nv40->vtxelt->pipe); 
        }
 
        nv40_state_do_validate(nv40, swtnl_states);
index 7812460d2ed2ad1e26f239c9a9c2bd93c13b6641..fabdf4bf23b95b35e8f56580924cebc93ca34b56 100644 (file)
@@ -493,16 +493,16 @@ nv40_vbo_validate(struct nv40_context *nv40)
        int hw;
 
        vtxbuf = so_new(3, 17, 18);
-       so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
+       so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt->num_elements);
        vtxfmt = so_new(1, 16, 0);
-       so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
+       so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt->num_elements);
 
-       for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
+       for (hw = 0; hw < nv40->vtxelt->num_elements; hw++) {
                struct pipe_vertex_element *ve;
                struct pipe_vertex_buffer *vb;
                unsigned type, ncomp;
 
-               ve = &nv40->vtxelt[hw];
+               ve = &nv40->vtxelt->pipe[hw];
                vb = &nv40->vtxbuf[ve->vertex_buffer_index];
 
                if (!vb->stride) {
index 612aea28a34b5c001752bf398f26386cd84dabbf..5d622e1c13cdb60a94f95b703704f79163b67ed3 100644 (file)
@@ -16,6 +16,7 @@ C_SOURCES = \
        nv50_surface.c \
        nv50_tex.c \
        nv50_transfer.c \
-       nv50_vbo.c
+       nv50_vbo.c \
+       nv50_push.c
 
 include ../../Makefile.template
index e0b2d2880b00b1e42bfe7d02a3ba121ea06c208b..8afc95c9fc6b7942ce9e5bd974244f240861fffb 100644 (file)
@@ -36,7 +36,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
        struct pipe_framebuffer_state *fb = &nv50->framebuffer;
        unsigned mode = 0, i;
 
-       if (!nv50_state_validate(nv50))
+       if (!nv50_state_validate(nv50, 64))
                return;
 
        if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
index 7be12fcdef466a27681a66cda1141522f47299bf..0eb42f323ffac218b71d1edd65a27f8f73a24ff7 100644 (file)
@@ -46,43 +46,13 @@ static void
 nv50_destroy(struct pipe_context *pipe)
 {
        struct nv50_context *nv50 = nv50_context(pipe);
+       int i;
 
-        if (nv50->state.fb)
-               so_ref(NULL, &nv50->state.fb);
-       if (nv50->state.blend)
-               so_ref(NULL, &nv50->state.blend);
-       if (nv50->state.blend_colour)
-               so_ref(NULL, &nv50->state.blend_colour);
-       if (nv50->state.zsa)
-               so_ref(NULL, &nv50->state.zsa);
-       if (nv50->state.rast)
-               so_ref(NULL, &nv50->state.rast);
-       if (nv50->state.stipple)
-               so_ref(NULL, &nv50->state.stipple);
-       if (nv50->state.scissor)
-               so_ref(NULL, &nv50->state.scissor);
-       if (nv50->state.viewport)
-               so_ref(NULL, &nv50->state.viewport);
-       if (nv50->state.tsc_upload)
-               so_ref(NULL, &nv50->state.tsc_upload);
-       if (nv50->state.tic_upload)
-               so_ref(NULL, &nv50->state.tic_upload);
-       if (nv50->state.vertprog)
-               so_ref(NULL, &nv50->state.vertprog);
-       if (nv50->state.fragprog)
-               so_ref(NULL, &nv50->state.fragprog);
-       if (nv50->state.geomprog)
-               so_ref(NULL, &nv50->state.geomprog);
-       if (nv50->state.fp_linkage)
-               so_ref(NULL, &nv50->state.fp_linkage);
-       if (nv50->state.gp_linkage)
-               so_ref(NULL, &nv50->state.gp_linkage);
-       if (nv50->state.vtxfmt)
-               so_ref(NULL, &nv50->state.vtxfmt);
-       if (nv50->state.vtxbuf)
-               so_ref(NULL, &nv50->state.vtxbuf);
-       if (nv50->state.vtxattr)
-               so_ref(NULL, &nv50->state.vtxattr);
+       for (i = 0; i < 64; i++) {
+               if (!nv50->state.hw[i])
+                       continue;
+               so_ref(NULL, &nv50->state.hw[i]);
+       }
 
        draw_destroy(nv50->draw);
 
@@ -123,7 +93,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
        nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
        screen->base.channel->user_private = nv50;
-       screen->base.channel->flush_notify = nv50_state_flush_notify;
 
        nv50_init_surface_functions(nv50);
        nv50_init_state_functions(nv50);
index 044437e75fa290f3c9e704f8b3ea168359ab1fc5..8793c2aac5d340b7915c2e9c7ce6271cc0ed6d41 100644 (file)
@@ -72,6 +72,12 @@ struct nv50_sampler_stateobj {
        unsigned tsc[8];
 };
 
+struct nv50_vtxelt_stateobj {
+       struct pipe_vertex_element pipe[16];
+       unsigned num_elements;
+       uint32_t hw[16];
+};
+
 static INLINE unsigned
 get_tile_height(uint32_t tile_mode)
 {
@@ -117,30 +123,12 @@ nv50_surface(struct pipe_surface *pt)
 }
 
 struct nv50_state {
-       unsigned dirty;
+       struct nouveau_stateobj *hw[64];
+       uint64_t hw_dirty;
 
-       struct nouveau_stateobj *fb;
-       struct nouveau_stateobj *blend;
-       struct nouveau_stateobj *blend_colour;
-       struct nouveau_stateobj *zsa;
-       struct nouveau_stateobj *stencil_ref;
-       struct nouveau_stateobj *rast;
-       struct nouveau_stateobj *stipple;
-       struct nouveau_stateobj *scissor;
-       unsigned scissor_enabled;
-       struct nouveau_stateobj *viewport;
-       struct nouveau_stateobj *tsc_upload;
-       struct nouveau_stateobj *tic_upload;
        unsigned miptree_nr[PIPE_SHADER_TYPES];
-       struct nouveau_stateobj *vertprog;
-       struct nouveau_stateobj *fragprog;
-       struct nouveau_stateobj *geomprog;
-       struct nouveau_stateobj *fp_linkage;
-       struct nouveau_stateobj *gp_linkage;
-       struct nouveau_stateobj *vtxfmt;
        struct nouveau_stateobj *vtxbuf;
        struct nouveau_stateobj *vtxattr;
-       struct nouveau_stateobj *instbuf;
        unsigned vtxelt_nr;
 };
 
@@ -169,14 +157,13 @@ struct nv50_context {
        struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
        struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
        unsigned vtxbuf_nr;
-       struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-       unsigned vtxelt_nr;
+       struct nv50_vtxelt_stateobj *vtxelt;
        struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
        unsigned sampler_nr[PIPE_SHADER_TYPES];
        struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
        unsigned miptree_nr[PIPE_SHADER_TYPES];
 
-       uint16_t vbo_fifo;
+       unsigned vbo_fifo;
 };
 
 static INLINE struct nv50_context *
@@ -218,24 +205,36 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
                                         unsigned count,
                                         unsigned startInstance,
                                         unsigned instanceCount);
-extern void nv50_vbo_validate(struct nv50_context *nv50);
+extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
+extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50);
+
+/* nv50_push.c */
+extern void
+nv50_push_elements_instanced(struct pipe_context *, struct pipe_buffer *,
+                            unsigned idxsize, unsigned mode, unsigned start,
+                            unsigned count, unsigned i_start,
+                            unsigned i_count);
 
 /* nv50_clear.c */
 extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
                       const float *rgba, double depth, unsigned stencil);
 
 /* nv50_program.c */
-extern void nv50_vertprog_validate(struct nv50_context *nv50);
-extern void nv50_fragprog_validate(struct nv50_context *nv50);
-extern void nv50_geomprog_validate(struct nv50_context *nv50);
-extern void nv50_fp_linkage_validate(struct nv50_context *nv50);
-extern void nv50_gp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_vertprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fragprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_geomprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_gp_linkage_validate(struct nv50_context *nv50);
 extern void nv50_program_destroy(struct nv50_context *nv50,
                                 struct nv50_program *p);
 
 /* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *nv50);
-extern void nv50_state_flush_notify(struct nouveau_channel *chan);
+extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords);
 
 extern void nv50_so_init_sifc(struct nv50_context *nv50,
                              struct nouveau_stateobj *so,
@@ -243,7 +242,8 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
                              unsigned offset, unsigned size);
 
 /* nv50_tex.c */
-extern void nv50_tex_validate(struct nv50_context *);
+extern void nv50_tex_relocs(struct nv50_context *);
+extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *);
 
 /* nv50_transfer.c */
 extern void
@@ -257,4 +257,35 @@ nv50_upload_sifc(struct nv50_context *nv50,
 struct pipe_context *
 nv50_create(struct pipe_screen *pscreen, void *priv);
 
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+       switch (mode) {
+       case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+       case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+       case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+       case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+       case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+       case PIPE_PRIM_TRIANGLE_STRIP:
+               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+       case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+       case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+       case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+       case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+       case PIPE_PRIM_LINES_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
+       case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
+       case PIPE_PRIM_TRIANGLES_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
+       case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
+       default:
+               break;
+       }
+
+       NOUVEAU_ERR("invalid primitive type %d\n", mode);
+       return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
 #endif
index 2372cbbef69e17eb2b603f663b154057d0cc2246..c857816b31a70a2cad7063aba3a1a3c78fa3d89c 100644 (file)
@@ -4270,7 +4270,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
        FREE(up);
 }
 
-void
+struct nouveau_stateobj *
 nv50_vertprog_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4286,6 +4286,9 @@ nv50_vertprog_validate(struct nv50_context *nv50)
        nv50_program_validate_data(nv50, p);
        nv50_program_validate_code(nv50, p);
 
+       if (!(nv50->dirty & NV50_NEW_VERTPROG))
+               return NULL;
+
        so = so_new(5, 7, 2);
        so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4301,11 +4304,10 @@ nv50_vertprog_validate(struct nv50_context *nv50)
        so_data  (so, p->cfg.high_temp);
        so_method(so, tesla, NV50TCL_VP_START_ID, 1);
        so_data  (so, 0); /* program start offset */
-       so_ref(so, &nv50->state.vertprog);
-       so_ref(NULL, &so);
+       return so;
 }
 
-void
+struct nouveau_stateobj *
 nv50_fragprog_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4321,6 +4323,9 @@ nv50_fragprog_validate(struct nv50_context *nv50)
        nv50_program_validate_data(nv50, p);
        nv50_program_validate_code(nv50, p);
 
+       if (!(nv50->dirty & NV50_NEW_FRAGPROG))
+               return NULL;
+
        so = so_new(6, 7, 2);
        so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4337,11 +4342,10 @@ nv50_fragprog_validate(struct nv50_context *nv50)
        so_data  (so, p->cfg.regs[3]);
        so_method(so, tesla, NV50TCL_FP_START_ID, 1);
        so_data  (so, 0); /* program start offset */
-       so_ref(so, &nv50->state.fragprog);
-       so_ref(NULL, &so);
+       return so;
 }
 
-void
+struct nouveau_stateobj *
 nv50_geomprog_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4357,6 +4361,9 @@ nv50_geomprog_validate(struct nv50_context *nv50)
        nv50_program_validate_data(nv50, p);
        nv50_program_validate_code(nv50, p);
 
+       if (!(nv50->dirty & NV50_NEW_GEOMPROG))
+               return NULL;
+
        so = so_new(6, 7, 2);
        so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4373,8 +4380,7 @@ nv50_geomprog_validate(struct nv50_context *nv50)
        so_data  (so, p->cfg.vert_count);
        so_method(so, tesla, NV50TCL_GP_START_ID, 1);
        so_data  (so, 0);
-       so_ref(so, &nv50->state.geomprog);
-       so_ref(NULL, &so);
+       return so;
 }
 
 static uint32_t
@@ -4454,7 +4460,7 @@ nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
        return mid;
 }
 
-void
+struct nouveau_stateobj *
 nv50_fp_linkage_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4580,8 +4586,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
        so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
        so_data  (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
 
-       so_ref(so, &nv50->state.fp_linkage);
-       so_ref(NULL, &so);
+       return so;
 }
 
 static int
@@ -4615,7 +4620,7 @@ construct_vp_gp_mapping(uint32_t *map32, int m,
        return m;
 }
 
-void
+struct nouveau_stateobj *
 nv50_gp_linkage_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4625,10 +4630,8 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
        uint32_t map[16];
        int m = 0;
 
-       if (!gp) {
-               so_ref(NULL, &nv50->state.gp_linkage);
-               return;
-       }
+       if (!gp)
+               return NULL;
        memset(map, 0, sizeof(map));
 
        m = construct_vp_gp_mapping(map, m, vp, gp);
@@ -4646,8 +4649,7 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
        so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
        so_datap (so, map, m);
 
-       so_ref(so, &nv50->state.gp_linkage);
-       so_ref(NULL, &so);
+       return so;
 }
 
 void
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
new file mode 100644 (file)
index 0000000..96a1f32
--- /dev/null
@@ -0,0 +1,326 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nouveau/nouveau_util.h"
+#include "nv50_context.h"
+
+struct push_context {
+   struct nv50_context *nv50;
+
+   unsigned vtx_size;
+
+   void *idxbuf;
+   unsigned idxsize;
+
+   float edgeflag;
+   int edgeflag_attr;
+
+   struct {
+      void *map;
+      unsigned stride;
+      unsigned divisor;
+      unsigned step;
+      void (*push)(struct nouveau_channel *, void *);
+   } attr[16];
+   unsigned attr_nr;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+   OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, (v[1] << 16) | v[0]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+   int i;
+
+   if (ctx->edgeflag_attr < 16) {
+      float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
+                        ctx->attr[ctx->edgeflag_attr].stride * n;
+
+      if (*edgeflag != ctx->edgeflag) {
+         BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+         OUT_RING  (chan, *edgeflag ? 1 : 0);
+         ctx->edgeflag = *edgeflag;
+      }
+   }
+
+   BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
+   for (i = 0; i < ctx->attr_nr; i++)
+      ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
+}
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+   struct push_context *ctx = priv;
+   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+
+   BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+   OUT_RING  (chan, enabled ? 1 : 0);
+}
+
+static void
+emit_elt08(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint8_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint16_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint32_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_verts(void *priv, unsigned start, unsigned count)
+{
+   while (count--)
+      emit_vertex(priv, start++);
+}
+
+void
+nv50_push_elements_instanced(struct pipe_context *pipe,
+                             struct pipe_buffer *idxbuf, unsigned idxsize,
+                             unsigned mode, unsigned start, unsigned count,
+                             unsigned i_start, unsigned i_count)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nouveau_grobj *tesla = nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+   struct push_context ctx;
+   const unsigned p_overhead = 4 + /* begin/end */
+                               4; /* potential edgeflag enable/disable */
+   const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
+                               2; /* potential edgeflag modification */
+   struct u_split_prim s;
+   unsigned vtx_size;
+   boolean nzi = FALSE;
+   int i;
+
+   ctx.nv50 = nv50;
+   ctx.attr_nr = 0;
+   ctx.idxbuf = NULL;
+   ctx.vtx_size = 0;
+   ctx.edgeflag = 0.5f;
+   ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
+
+   /* map vertex buffers, determine vertex size */
+   for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+      struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+      struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
+      struct nouveau_bo *bo = nouveau_bo(vb->buffer);
+      unsigned size, nr_components, n;
+
+      if (!(nv50->vbo_fifo & (1 << i)))
+         continue;
+      n = ctx.attr_nr++;
+
+      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+         assert(bo->map);
+         return;
+      }
+      ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
+      nouveau_bo_unmap(bo);
+
+      ctx.attr[n].stride = vb->stride;
+      ctx.attr[n].divisor = ve->instance_divisor;
+      if (ctx.attr[n].divisor) {
+         ctx.attr[n].step = i_start % ve->instance_divisor;
+         ctx.attr[n].map += i_start * vb->stride;
+      }
+
+      size = util_format_get_component_bits(ve->src_format,
+                                            UTIL_FORMAT_COLORSPACE_RGB, 0);
+      nr_components = util_format_get_nr_components(ve->src_format);
+      switch (size) {
+      case 8:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b08_1; break;
+         case 2: ctx.attr[n].push = emit_b16_1; break;
+         case 3: ctx.attr[n].push = emit_b08_3; break;
+         case 4: ctx.attr[n].push = emit_b32_1; break;
+         }
+         ctx.vtx_size++;
+         break;
+      case 16:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b16_1; break;
+         case 2: ctx.attr[n].push = emit_b32_1; break;
+         case 3: ctx.attr[n].push = emit_b16_3; break;
+         case 4: ctx.attr[n].push = emit_b32_2; break;
+         }
+         ctx.vtx_size += (nr_components + 1) >> 1;
+         break;
+      case 32:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b32_1; break;
+         case 2: ctx.attr[n].push = emit_b32_2; break;
+         case 3: ctx.attr[n].push = emit_b32_3; break;
+         case 4: ctx.attr[n].push = emit_b32_4; break;
+         }
+         ctx.vtx_size += nr_components;
+         break;
+      default:
+         assert(0);
+         return;
+      }
+   }
+   vtx_size = ctx.vtx_size + v_overhead;
+
+   /* map index buffer, if present */
+   if (idxbuf) {
+      struct nouveau_bo *bo = nouveau_bo(idxbuf);
+
+      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+         assert(bo->map);
+         return;
+      }
+      ctx.idxbuf = bo->map;
+      ctx.idxsize = idxsize;
+      nouveau_bo_unmap(bo);
+   }
+
+   s.priv = &ctx;
+   s.edge = emit_edgeflag;
+   if (idxbuf) {
+      if (idxsize == 1)
+         s.emit = emit_elt08;
+      else
+      if (idxsize == 2)
+         s.emit = emit_elt16;
+      else
+         s.emit = emit_elt32;
+   } else
+      s.emit = emit_verts;
+
+   /* per-instance loop */
+   BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+   OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
+   OUT_RING  (chan, i_start);
+   while (i_count--) {
+      unsigned max_verts;
+      boolean done;
+
+      for (i = 0; i < ctx.attr_nr; i++) {
+         if (!ctx.attr[i].divisor ||
+              ctx.attr[i].divisor != ++ctx.attr[i].step)
+            continue;
+         ctx.attr[i].step = 0;
+         ctx.attr[i].map += ctx.attr[i].stride;
+      }
+
+      u_split_prim_init(&s, mode, start, count);
+      do {
+         if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
+            FIRE_RING(chan);
+            if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
+               assert(0);
+               return;
+            }
+         }
+
+         max_verts  = AVAIL_RING(chan);
+         max_verts -= p_overhead;
+         max_verts /= vtx_size;
+
+         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+         OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
+         done = u_split_prim_next(&s, max_verts);
+         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+         OUT_RING  (chan, 0);
+      } while (!done);
+
+      nzi = TRUE;
+   }
+}
index eed6031eafab715247115bad6992eb2e4e6ca530..7e2e8aa336e7baa58b46c5f1144a3c5478de9a93 100644 (file)
@@ -95,6 +95,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 static int
 nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 {
+       struct nv50_screen *screen = nv50_screen(pscreen);
+
        switch (param) {
        case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
                return 32;
@@ -132,9 +134,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
                return 1;
        case NOUVEAU_CAP_HW_VTXBUF:
-               return 1;
+               return screen->force_push ? 0 : 1;
        case NOUVEAU_CAP_HW_IDXBUF:
-               return 1;
+               return screen->force_push ? 0 : 1;
        case PIPE_CAP_INDEP_BLEND_ENABLE:
                return 1;
        case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -202,28 +204,6 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
        FREE(screen);
 }
 
-static int
-nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
-       unsigned usage)
-{
-       struct nv50_screen *screen = nv50_screen(pscreen);
-       struct nv50_context *ctx = screen->cur_ctx;
-
-       if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
-               return 0;
-
-       /* Our vtxbuf got mapped, it can no longer be considered part of current
-        * state, remove it to avoid emitting reloc markers.
-        */
-       if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
-                       nouveau_bo(pb))) {
-               so_ref(NULL, &ctx->state.vtxbuf);
-               ctx->dirty |= NV50_NEW_ARRAYS;
-       }
-
-       return 0;
-}
-
 struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 {
@@ -252,7 +232,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        pscreen->get_paramf = nv50_screen_get_paramf;
        pscreen->is_format_supported = nv50_screen_is_format_supported;
        pscreen->context_create = nv50_create;
-       screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
 
        nv50_screen_init_miptree_functions(pscreen);
        nv50_transfer_init_screen_functions(pscreen);
@@ -508,10 +487,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
        so_data  (so, 1);
 
-       /* activate first scissor rectangle */
-       so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
-       so_data  (so, 1);
-
        so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
        so_data  (so, 1); /* default edgeflag to TRUE */
 
@@ -520,6 +495,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_ref (NULL, &so);
        nouveau_pushbuf_flush(chan, 0);
 
+       screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE);
        return pscreen;
 }
 
index 2687b72127797a208edfa8db5996a59f8b37b6f1..d1bc80cb9edfcf51667e1ce8d3a486af0a9303ac 100644 (file)
@@ -28,6 +28,8 @@ struct nv50_screen {
        struct nouveau_bo *tsc;
 
        struct nouveau_stateobj *static_init;
+
+       boolean force_push;
 };
 
 static INLINE struct nv50_screen *
index 7d304907b652240da70bc57878c56511e7deef34..b0e5552eff45957a838f7c47c2dfa4f2a25cc8af 100644 (file)
@@ -302,7 +302,7 @@ static void *
 nv50_rasterizer_state_create(struct pipe_context *pipe,
                             const struct pipe_rasterizer_state *cso)
 {
-       struct nouveau_stateobj *so = so_new(15, 21, 0);
+       struct nouveau_stateobj *so = so_new(16, 22, 0);
        struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
        struct nv50_rasterizer_stateobj *rso =
                CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -314,6 +314,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
         *      - point_sprite / sprite_coord_mode
         */
 
+       so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
+       so_data  (so, cso->scissor);
+
        so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
        so_data  (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
                                       NV50TCL_SHADE_MODEL_SMOOTH);
@@ -720,15 +723,34 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
        nv50->dirty |= NV50_NEW_ARRAYS;
 }
 
+static void *
+nv50_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj);
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+       cso->num_elements = num_elements;
+       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+       nv50_vtxelt_construct(cso);
+
+       return (void *)cso;
+}
+
 static void
-nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-                        const struct pipe_vertex_element *ve)
+nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-       struct nv50_context *nv50 = nv50_context(pipe);
+       FREE(hwcso);
+}
 
-       memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
-       nv50->vtxelt_nr = count;
+static void
+nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv50_context *nv50 = nv50_context(pipe);
 
+       nv50->vtxelt = hwcso;
        nv50->dirty |= NV50_NEW_ARRAYS;
 }
 
@@ -778,7 +800,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
        nv50->pipe.set_scissor_state = nv50_set_scissor_state;
        nv50->pipe.set_viewport_state = nv50_set_viewport_state;
 
+       nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create;
+       nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete;
+       nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind;
+
        nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
-       nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
 }
 
index c974cc92dcc7d875f857b3f2775694a2ac4ea438..2c8e7ca7982cbd273c6fdd49d94e638fbab437b2 100644 (file)
@@ -25,8 +25,8 @@
 #include "nv50_context.h"
 #include "nouveau/nouveau_stateobj.h"
 
-static void
-nv50_state_validate_fb(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_fb(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
        struct nouveau_stateobj *so = so_new(32, 79, 18);
@@ -167,12 +167,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
        so_data  (so, w << 16);
        so_data  (so, h << 16);
 
-       /* we set scissors to framebuffer size when they're 'turned off' */
-       nv50->dirty |= NV50_NEW_SCISSOR;
-       so_ref(NULL, &nv50->state.scissor);
-
-       so_ref(so, &nv50->state.fb);
-       so_ref(NULL, &so);
+       return so;
 }
 
 static void
@@ -199,263 +194,256 @@ nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
        }
 }
 
-static void
-nv50_state_emit(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_blend(struct nv50_context *nv50)
 {
-       struct nv50_screen *screen = nv50->screen;
-       struct nouveau_channel *chan = screen->base.channel;
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->blend->so, &so);
+       return so;
+}
 
-       /* XXX: this is racy for multiple contexts active on separate
-        * threads.
-        */
-       if (screen->cur_ctx != nv50) {
-               if (nv50->state.fb)
-                       nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
-               if (nv50->state.blend)
-                       nv50->state.dirty |= NV50_NEW_BLEND;
-               if (nv50->state.zsa)
-                       nv50->state.dirty |= NV50_NEW_ZSA;
-               if (nv50->state.vertprog)
-                       nv50->state.dirty |= NV50_NEW_VERTPROG;
-               if (nv50->state.fragprog)
-                       nv50->state.dirty |= NV50_NEW_FRAGPROG;
-               if (nv50->state.geomprog)
-                       nv50->state.dirty |= NV50_NEW_GEOMPROG;
-               if (nv50->state.rast)
-                       nv50->state.dirty |= NV50_NEW_RASTERIZER;
-               if (nv50->state.blend_colour)
-                       nv50->state.dirty |= NV50_NEW_BLEND_COLOUR;
-               if (nv50->state.stencil_ref)
-                       nv50->state.dirty |= NV50_NEW_STENCIL_REF;
-               if (nv50->state.stipple)
-                       nv50->state.dirty |= NV50_NEW_STIPPLE;
-               if (nv50->state.scissor)
-                       nv50->state.dirty |= NV50_NEW_SCISSOR;
-               if (nv50->state.viewport)
-                       nv50->state.dirty |= NV50_NEW_VIEWPORT;
-               if (nv50->state.tsc_upload)
-                       nv50->state.dirty |= NV50_NEW_SAMPLER;
-               if (nv50->state.tic_upload)
-                       nv50->state.dirty |= NV50_NEW_TEXTURE;
-               if (nv50->state.vtxfmt && nv50->state.vtxbuf)
-                       nv50->state.dirty |= NV50_NEW_ARRAYS;
-               screen->cur_ctx = nv50;
-       }
+static struct nouveau_stateobj *
+validate_zsa(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->zsa->so, &so);
+       return so;
+}
 
-       if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
-               so_emit(chan, nv50->state.fb);
-       if (nv50->state.dirty & NV50_NEW_BLEND)
-               so_emit(chan, nv50->state.blend);
-       if (nv50->state.dirty & NV50_NEW_ZSA)
-               so_emit(chan, nv50->state.zsa);
-       if (nv50->state.dirty & NV50_NEW_VERTPROG)
-               so_emit(chan, nv50->state.vertprog);
-       if (nv50->state.dirty & NV50_NEW_FRAGPROG)
-               so_emit(chan, nv50->state.fragprog);
-       if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog)
-               so_emit(chan, nv50->state.geomprog);
-       if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
-                                NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
-               so_emit(chan, nv50->state.fp_linkage);
-       if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG))
-           && nv50->state.gp_linkage)
-               so_emit(chan, nv50->state.gp_linkage);
-       if (nv50->state.dirty & NV50_NEW_RASTERIZER)
-               so_emit(chan, nv50->state.rast);
-       if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
-               so_emit(chan, nv50->state.blend_colour);
-       if (nv50->state.dirty & NV50_NEW_STENCIL_REF)
-               so_emit(chan, nv50->state.stencil_ref);
-       if (nv50->state.dirty & NV50_NEW_STIPPLE)
-               so_emit(chan, nv50->state.stipple);
-       if (nv50->state.dirty & NV50_NEW_SCISSOR)
-               so_emit(chan, nv50->state.scissor);
-       if (nv50->state.dirty & NV50_NEW_VIEWPORT)
-               so_emit(chan, nv50->state.viewport);
-       if (nv50->state.dirty & NV50_NEW_SAMPLER)
-               so_emit(chan, nv50->state.tsc_upload);
-       if (nv50->state.dirty & NV50_NEW_TEXTURE)
-               so_emit(chan, nv50->state.tic_upload);
-       if (nv50->state.dirty & NV50_NEW_ARRAYS) {
-               so_emit(chan, nv50->state.vtxfmt);
-               so_emit(chan, nv50->state.vtxbuf);
-               if (nv50->state.vtxattr)
-                       so_emit(chan, nv50->state.vtxattr);
-       }
-       nv50->state.dirty = 0;
+static struct nouveau_stateobj *
+validate_rast(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->rasterizer->so, &so);
+       return so;
 }
 
-void
-nv50_state_flush_notify(struct nouveau_channel *chan)
+static struct nouveau_stateobj *
+validate_blend_colour(struct nv50_context *nv50)
 {
-       struct nv50_context *nv50 = chan->user_private;
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so_new(1, 4, 0);
+
+       so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+       so_data  (so, fui(nv50->blend_colour.color[0]));
+       so_data  (so, fui(nv50->blend_colour.color[1]));
+       so_data  (so, fui(nv50->blend_colour.color[2]));
+       so_data  (so, fui(nv50->blend_colour.color[3]));
+       return so;
+}
 
-       if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE))
-               so_emit(chan, nv50->state.tic_upload);
+static struct nouveau_stateobj *
+validate_stencil_ref(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so = so_new(2, 2, 0);
 
-       so_emit_reloc_markers(chan, nv50->state.fb);
-       so_emit_reloc_markers(chan, nv50->state.vertprog);
-       so_emit_reloc_markers(chan, nv50->state.fragprog);
-       so_emit_reloc_markers(chan, nv50->state.vtxbuf);
-       so_emit_reloc_markers(chan, nv50->screen->static_init);
+       so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
+       so_data  (so, nv50->stencil_ref.ref_value[0]);
+       so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
+       so_data  (so, nv50->stencil_ref.ref_value[1]);
+       return so;
+}
 
-       if (nv50->state.instbuf)
-               so_emit_reloc_markers(chan, nv50->state.instbuf);
+static struct nouveau_stateobj *
+validate_stipple(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so_new(1, 32, 0);
+       int i;
+
+       so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+       for (i = 0; i < 32; i++)
+               so_data(so, util_bswap32(nv50->stipple.stipple[i]));
+       return so;
 }
 
-boolean
-nv50_state_validate(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_scissor(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
+        struct pipe_scissor_state *s = &nv50->scissor;
        struct nouveau_stateobj *so;
-       unsigned i;
 
-       if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
-               nv50_state_validate_fb(nv50);
+       so = so_new(1, 2, 0);
+       so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
+       so_data  (so, (s->maxx << 16) | s->minx);
+       so_data  (so, (s->maxy << 16) | s->miny);
+       return so;
+}
+
+static struct nouveau_stateobj *
+validate_viewport(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so_new(5, 9, 0);
+
+       so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
+       so_data  (so, fui(nv50->viewport.translate[0]));
+       so_data  (so, fui(nv50->viewport.translate[1]));
+       so_data  (so, fui(nv50->viewport.translate[2]));
+       so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
+       so_data  (so, fui(nv50->viewport.scale[0]));
+       so_data  (so, fui(nv50->viewport.scale[1]));
+       so_data  (so, fui(nv50->viewport.scale[2]));
+
+       so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
+       so_data  (so, 1);
+       /* 0x0000 = remove whole primitive only (xyz)
+        * 0x1018 = remove whole primitive only (xy), clamp z
+        * 0x1080 = clip primitive (xyz)
+        * 0x1098 = clip primitive (xy), clamp z
+        */
+       so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+       so_data  (so, 0x1080);
+       /* no idea what 0f90 does */
+       so_method(so, tesla, 0x0f90, 1);
+       so_data  (so, 0);
+
+       return so;
+}
 
-       if (nv50->dirty & NV50_NEW_BLEND)
-               so_ref(nv50->blend->so, &nv50->state.blend);
+static struct nouveau_stateobj *
+validate_sampler(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so;
+       unsigned nr = 0, i;
 
-       if (nv50->dirty & NV50_NEW_ZSA)
-               so_ref(nv50->zsa->so, &nv50->state.zsa);
+       for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+               nr += nv50->sampler_nr[i];
 
-       if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
-               nv50_vertprog_validate(nv50);
+       so = so_new(1 + 5 * PIPE_SHADER_TYPES,
+                   1 + 19 * PIPE_SHADER_TYPES + nr * 8,
+                   PIPE_SHADER_TYPES * 2);
 
-       if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
-               nv50_fragprog_validate(nv50);
+       nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+       nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
 
-       if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB))
-               nv50_geomprog_validate(nv50);
+       so_method(so, tesla, 0x1334, 1); /* flush TSC */
+       so_data  (so, 0);
 
-       if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
-                          NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
-               nv50_fp_linkage_validate(nv50);
+       return so;
+}
 
-       if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG))
-               nv50_gp_linkage_validate(nv50);
+static struct nouveau_stateobj *
+validate_vtxbuf(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->state.vtxbuf, &so);
+       return so;
+}
 
-       if (nv50->dirty & NV50_NEW_RASTERIZER)
-               so_ref(nv50->rasterizer->so, &nv50->state.rast);
+static struct nouveau_stateobj *
+validate_vtxattr(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->state.vtxattr, &so);
+       return so;
+}
 
-       if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
-               so = so_new(1, 4, 0);
-               so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
-               so_data  (so, fui(nv50->blend_colour.color[0]));
-               so_data  (so, fui(nv50->blend_colour.color[1]));
-               so_data  (so, fui(nv50->blend_colour.color[2]));
-               so_data  (so, fui(nv50->blend_colour.color[3]));
-               so_ref(so, &nv50->state.blend_colour);
-               so_ref(NULL, &so);
-       }
+struct state_validate {
+       struct nouveau_stateobj *(*func)(struct nv50_context *nv50);
+       unsigned states;
+} validate_list[] = {
+       { validate_fb             , NV50_NEW_FRAMEBUFFER                      },
+       { validate_blend          , NV50_NEW_BLEND                            },
+       { validate_zsa            , NV50_NEW_ZSA                              },
+       { nv50_vertprog_validate  , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB  },
+       { nv50_fragprog_validate  , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB  },
+       { nv50_geomprog_validate  , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB  },
+       { nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG |
+                                   NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER   },
+       { nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG     },
+       { validate_rast           , NV50_NEW_RASTERIZER                       },
+       { validate_blend_colour   , NV50_NEW_BLEND_COLOUR                     },
+       { validate_stencil_ref    , NV50_NEW_STENCIL_REF                      },
+       { validate_stipple        , NV50_NEW_STIPPLE                          },
+       { validate_scissor        , NV50_NEW_SCISSOR                          },
+       { validate_viewport       , NV50_NEW_VIEWPORT                         },
+       { validate_sampler        , NV50_NEW_SAMPLER                          },
+       { nv50_tex_validate       , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER       },
+       { nv50_vbo_validate       , NV50_NEW_ARRAYS                           },
+       { validate_vtxbuf         , NV50_NEW_ARRAYS                           },
+       { validate_vtxattr        , NV50_NEW_ARRAYS                           },
+       {}
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
-       if (nv50->dirty & NV50_NEW_STENCIL_REF) {
-               so = so_new(2, 2, 0);
-               so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
-               so_data  (so, nv50->stencil_ref.ref_value[0]);
-               so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
-               so_data  (so, nv50->stencil_ref.ref_value[1]);
-               so_ref(so, &nv50->state.stencil_ref);
-               so_ref(NULL, &so);
-       }
+boolean
+nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
+{
+       struct nouveau_channel *chan = nv50->screen->base.channel;
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4;
+       int ret, i;
 
-       if (nv50->dirty & NV50_NEW_STIPPLE) {
-               so = so_new(1, 32, 0);
-               so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
-               for (i = 0; i < 32; i++)
-                       so_data(so, util_bswap32(nv50->stipple.stipple[i]));
-               so_ref(so, &nv50->state.stipple);
-               so_ref(NULL, &so);
-       }
+       for (i = 0; i < validate_list_len; i++) {
+               struct state_validate *validate = &validate_list[i];
+               struct nouveau_stateobj *so;
 
-       if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
-               struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
-               struct pipe_scissor_state *s = &nv50->scissor;
+               if (!(nv50->dirty & validate->states))
+                       continue;
 
-               if (nv50->state.scissor &&
-                   (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
-                       goto scissor_uptodate;
-               nv50->state.scissor_enabled = rast->scissor;
+               so = validate->func(nv50);
+               if (!so)
+                       continue;
 
-               so = so_new(1, 2, 0);
-               so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
-               if (nv50->state.scissor_enabled) {
-                       so_data(so, (s->maxx << 16) | s->minx);
-                       so_data(so, (s->maxy << 16) | s->miny);
-               } else {
-                       so_data(so, (nv50->framebuffer.width << 16));
-                       so_data(so, (nv50->framebuffer.height << 16));
-               }
-               so_ref(so, &nv50->state.scissor);
-               so_ref(NULL, &so);
-               nv50->state.dirty |= NV50_NEW_SCISSOR;
-       }
-scissor_uptodate:
-
-       if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) {
-               if (nv50->state.viewport &&
-                   !(nv50->dirty & NV50_NEW_VIEWPORT))
-                       goto viewport_uptodate;
-
-               so = so_new(5, 9, 0);
-               so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
-               so_data  (so, fui(nv50->viewport.translate[0]));
-               so_data  (so, fui(nv50->viewport.translate[1]));
-               so_data  (so, fui(nv50->viewport.translate[2]));
-               so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
-               so_data  (so, fui(nv50->viewport.scale[0]));
-               so_data  (so, fui(nv50->viewport.scale[1]));
-               so_data  (so, fui(nv50->viewport.scale[2]));
-
-               so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
-               so_data  (so, 1);
-               /* 0x0000 = remove whole primitive only (xyz)
-                * 0x1018 = remove whole primitive only (xy), clamp z
-                * 0x1080 = clip primitive (xyz)
-                * 0x1098 = clip primitive (xy), clamp z
-                */
-               so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
-               so_data  (so, 0x1080);
-               /* no idea what 0f90 does */
-               so_method(so, tesla, 0x0f90, 1);
-               so_data  (so, 0);
+               nr_dwords += (so->total + so->cur);
+               nr_relocs += so->cur_reloc;
 
-               so_ref(so, &nv50->state.viewport);
+               so_ref(so, &nv50->state.hw[i]);
                so_ref(NULL, &so);
-               nv50->state.dirty |= NV50_NEW_VIEWPORT;
+               nv50->state.hw_dirty |= (1 << i);
        }
-viewport_uptodate:
-
-       if (nv50->dirty & NV50_NEW_SAMPLER) {
-               unsigned nr = 0;
-
-               for (i = 0; i < PIPE_SHADER_TYPES; ++i)
-                       nr += nv50->sampler_nr[i];
+       nv50->dirty = 0;
 
-               so = so_new(1 + 5 * PIPE_SHADER_TYPES,
-                           1 + 19 * PIPE_SHADER_TYPES + nr * 8,
-                           PIPE_SHADER_TYPES * 2);
+       if (nv50->screen->cur_ctx != nv50) {
+               for (i = 0; i < validate_list_len; i++) {
+                       if (!nv50->state.hw[i] ||
+                           (nv50->state.hw_dirty & (1 << i)))
+                               continue;
 
-               nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
-               nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
+                       nr_dwords += (nv50->state.hw[i]->total +
+                                     nv50->state.hw[i]->cur);
+                       nr_relocs += nv50->state.hw[i]->cur_reloc;
+                       nv50->state.hw_dirty |= (1 << i);
+               }
 
-               so_method(so, tesla, 0x1334, 1); /* flush TSC */
-               so_data  (so, 0);
+               nv50->screen->cur_ctx = nv50;
+       }
 
-               so_ref(so, &nv50->state.tsc_upload);
-               so_ref(NULL, &so);
+       ret = MARK_RING(chan, nr_dwords, nr_relocs);
+       if (ret) {
+               debug_printf("MARK_RING(%d, %d) failed: %d\n",
+                            nr_dwords, nr_relocs, ret);
+               return FALSE;
        }
 
-       if (nv50->dirty & (NV50_NEW_TEXTURE | NV50_NEW_SAMPLER))
-               nv50_tex_validate(nv50);
+       while (nv50->state.hw_dirty) {
+               i = ffs(nv50->state.hw_dirty) - 1;
+               nv50->state.hw_dirty &= ~(1 << i);
 
-       if (nv50->dirty & NV50_NEW_ARRAYS)
-               nv50_vbo_validate(nv50);
+               so_emit(chan, nv50->state.hw[i]);
+       }
 
-       nv50->state.dirty |= nv50->dirty;
-       nv50->dirty = 0;
-       nv50_state_emit(nv50);
+       /* Yes, really, we need to do this.  If a buffer that is referenced
+        * on the hardware isn't part of changed state above, without doing
+        * this the kernel is given no clue that the buffer is being used
+        * still.  This can cause all sorts of fun issues.
+        */
+       nv50_tex_relocs(nv50);
+       so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
+       so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
+       so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
+       so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
+       so_emit_reloc_markers(chan, nv50->screen->static_init);
 
+       /* No idea.. */
+       BEGIN_RING(chan, tesla, 0x142c, 1);
+       OUT_RING  (chan, 0);
+       BEGIN_RING(chan, tesla, 0x142c, 1);
+       OUT_RING  (chan, 0);
        return TRUE;
 }
 
index de0560e20cd07b4f44cf2716e958d0c7da37b296..4c48b12cd87b377b24a061b47c74276fcbf3c558 100644 (file)
@@ -24,6 +24,7 @@
 #include "nv50_texture.h"
 
 #include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nouveau_reloc.h"
 
 #include "util/u_format.h"
 
@@ -195,6 +196,35 @@ nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
 }
 
 void
+nv50_tex_relocs(struct nv50_context *nv50)
+{
+       struct nouveau_channel *chan = nv50->screen->tesla->channel;
+       int p, unit;
+
+       p = PIPE_SHADER_FRAGMENT;
+       for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+               if (!nv50->miptree[p][unit])
+                       continue;
+               nouveau_reloc_emit(chan, nv50->screen->tic,
+                                  ((p * 32) + unit) * 32, NULL,
+                                  nv50->miptree[p][unit]->base.bo, 0, 0,
+                                  NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+                                  NOUVEAU_BO_RD, 0, 0);
+       }
+
+       p = PIPE_SHADER_VERTEX;
+       for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+               if (!nv50->miptree[p][unit])
+                       continue;
+               nouveau_reloc_emit(chan, nv50->screen->tic,
+                                  ((p * 32) + unit) * 32, NULL,
+                                  nv50->miptree[p][unit]->base.bo, 0, 0,
+                                  NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+                                  NOUVEAU_BO_RD, 0, 0);
+       }
+}
+
+struct nouveau_stateobj *
 nv50_tex_validate(struct nv50_context *nv50)
 {
        struct nouveau_stateobj *so;
@@ -217,12 +247,11 @@ nv50_tex_validate(struct nv50_context *nv50)
                so_ref(NULL, &so);
 
                NOUVEAU_ERR("failed tex validate\n");
-               return;
+               return NULL;
        }
 
        so_method(so, tesla, 0x1330, 1); /* flush TIC */
        so_data  (so, 0);
 
-       so_ref(so, &nv50->state.tic_upload);
-       so_ref(NULL, &so);
+       return so;
 }
index 1c8ee0b9adf83364c5720ef7f3a4c8d293c3e91f..6b9c1ee231e40e47d8ee94866f24d21a7f25d40f 100644 (file)
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 
+#include "nouveau/nouveau_util.h"
 #include "nv50_context.h"
 
-static boolean
-nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
-
-static boolean
-nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
-
-#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
-
-static INLINE unsigned
-nv50_prim(unsigned mode)
-{
-       switch (mode) {
-       case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
-       case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
-       case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
-       case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
-       case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
-       case PIPE_PRIM_TRIANGLE_STRIP:
-               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
-       case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
-       case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
-       case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
-       case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
-       case PIPE_PRIM_LINES_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
-       case PIPE_PRIM_LINE_STRIP_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
-       case PIPE_PRIM_TRIANGLES_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
-       case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
-       default:
-               break;
-       }
-
-       NOUVEAU_ERR("invalid primitive type %d\n", mode);
-       return NV50TCL_VERTEX_BEGIN_POINTS;
-}
-
 static INLINE uint32_t
 nv50_vbo_type_to_hw(enum pipe_format format)
 {
@@ -139,15 +95,16 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
        uint32_t hw_type, hw_size;
        enum pipe_format pf = ve->src_format;
        const struct util_format_description *desc;
-       unsigned size;
+       unsigned size, nr_components;
 
        desc = util_format_description(pf);
        assert(desc);
 
        size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
+       nr_components = util_format_get_nr_components(pf);
 
        hw_type = nv50_vbo_type_to_hw(pf);
-       hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
+       hw_size = nv50_vbo_size_to_hw(size, nr_components);
 
        if (!hw_type || !hw_size) {
                NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
@@ -161,250 +118,58 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
        return (hw_type | hw_size);
 }
 
-/* For instanced drawing from user buffers, hitting the FIFO repeatedly
- * with the same vertex data is probably worse than uploading all data.
- */
-static boolean
-nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
-{
-       struct nv50_screen *nscreen = nv50->screen;
-       struct pipe_screen *pscreen = &nscreen->base.base;
-       struct pipe_buffer *buf = nscreen->strm_vbuf[i];
-       struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
-       uint8_t *src;
-       unsigned size = align(vb->buffer->size, 4096);
-
-       if (buf && buf->size < size)
-               pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
-
-       if (!nscreen->strm_vbuf[i]) {
-               nscreen->strm_vbuf[i] = pipe_buffer_create(
-                       pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
-               buf = nscreen->strm_vbuf[i];
-       }
-
-       src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
-       if (!src)
-               return FALSE;
-       src += vb->buffer_offset;
-
-       size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
-       if (vb->buffer_offset + size > vb->buffer->size)
-               size = vb->buffer->size - vb->buffer_offset;
-
-       pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
-       pipe_buffer_unmap(pscreen, vb->buffer);
-
-       vb->buffer = buf; /* don't pipe_reference, this is a private copy */
-       return TRUE;
-}
-
-static void
-nv50_upload_user_vbufs(struct nv50_context *nv50)
-{
-       unsigned i;
-
-       if (nv50->vbo_fifo)
-               nv50->dirty |= NV50_NEW_ARRAYS;
-       if (!(nv50->dirty & NV50_NEW_ARRAYS))
-               return;
-
-       for (i = 0; i < nv50->vtxbuf_nr; ++i) {
-               if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
-                       continue;
-               nv50_upload_vtxbuf(nv50, i);
-       }
-}
-
-static void
-nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
-{
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
-       float v[4];
-
-       util_format_read_4f(nv50->vtxelt[i].src_format,
-                           v, 0, data, 0, 0, 0, 1, 1);
-
-       switch (nv50->vtxelt[i].nr_components) {
-       case 4:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
-               OUT_RINGf (chan, v[0]);
-               OUT_RINGf (chan, v[1]);
-               OUT_RINGf (chan, v[2]);
-               OUT_RINGf (chan, v[3]);
-               break;
-       case 3:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
-               OUT_RINGf (chan, v[0]);
-               OUT_RINGf (chan, v[1]);
-               OUT_RINGf (chan, v[2]);
-               break;
-       case 2:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
-               OUT_RINGf (chan, v[0]);
-               OUT_RINGf (chan, v[1]);
-               break;
-       case 1:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
-               OUT_RINGf (chan, v[0]);
-               break;
-       default:
-               assert(0);
-               break;
-       }
-}
-
-static unsigned
-init_per_instance_arrays_immd(struct nv50_context *nv50,
-                             unsigned startInstance,
-                             unsigned pos[16], unsigned step[16])
-{
-       struct nouveau_bo *bo;
-       unsigned i, b, count = 0;
-
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
-                       continue;
-               ++count;
-               b = nv50->vtxelt[i].vertex_buffer_index;
-
-               pos[i] = nv50->vtxelt[i].src_offset +
-                       nv50->vtxbuf[b].buffer_offset +
-                       startInstance * nv50->vtxbuf[b].stride;
-               step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-               if (!bo->map)
-                       nouveau_bo_map(bo, NOUVEAU_BO_RD);
-
-               nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
-       }
-
-       return count;
-}
-
-static unsigned
-init_per_instance_arrays(struct nv50_context *nv50,
-                        unsigned startInstance,
-                        unsigned pos[16], unsigned step[16])
-{
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
+struct instance {
        struct nouveau_bo *bo;
-       struct nouveau_stateobj *so;
-       unsigned i, b, count = 0;
-       const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
-       if (nv50->vbo_fifo)
-               return init_per_instance_arrays_immd(nv50, startInstance,
-                                                    pos, step);
-
-       so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
-
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
-                       continue;
-               ++count;
-               b = nv50->vtxelt[i].vertex_buffer_index;
-
-               pos[i] = nv50->vtxelt[i].src_offset +
-                       nv50->vtxbuf[b].buffer_offset +
-                       startInstance * nv50->vtxbuf[b].stride;
-
-               if (!startInstance) {
-                       step[i] = 0;
-                       continue;
-               }
-               step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-               so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
-       }
-
-       if (count && startInstance) {
-               so_ref (so, &nv50->state.instbuf); /* for flush notify */
-               so_emit(chan, nv50->state.instbuf);
-       }
-       so_ref (NULL, &so);
-
-       return count;
-}
+       unsigned delta;
+       unsigned stride;
+       unsigned step;
+       unsigned divisor;
+};
 
 static void
-step_per_instance_arrays_immd(struct nv50_context *nv50,
-                             unsigned pos[16], unsigned step[16])
+instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
 {
-       struct nouveau_bo *bo;
-       unsigned i, b;
+       int i;
 
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
-                       continue;
-               if (++step[i] != nv50->vtxelt[i].instance_divisor)
-                       continue;
-               b = nv50->vtxelt[i].vertex_buffer_index;
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+       for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+               struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+               struct pipe_vertex_buffer *vb;
 
-               step[i] = 0;
-               pos[i] += nv50->vtxbuf[b].stride;
+               a[i].divisor = ve->instance_divisor;
+               if (a[i].divisor) {
+                       vb = &nv50->vtxbuf[ve->vertex_buffer_index];
 
-               nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+                       a[i].bo = nouveau_bo(vb->buffer);
+                       a[i].stride = vb->stride;
+                       a[i].step = first % a[i].divisor;
+                       a[i].delta = vb->buffer_offset + ve->src_offset +
+                                    (first * a[i].stride);
+               }
        }
 }
 
 static void
-step_per_instance_arrays(struct nv50_context *nv50,
-                        unsigned pos[16], unsigned step[16])
+instance_step(struct nv50_context *nv50, struct instance *a)
 {
+       struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
-       struct nouveau_bo *bo;
-       struct nouveau_stateobj *so;
-       unsigned i, b;
-       const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
-       if (nv50->vbo_fifo) {
-               step_per_instance_arrays_immd(nv50, pos, step);
-               return;
-       }
-
-       so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+       int i;
 
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
+       for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+               if (!a[i].divisor)
                        continue;
-               b = nv50->vtxelt[i].vertex_buffer_index;
 
-               if (++step[i] == nv50->vtxelt[i].instance_divisor) {
-                       step[i] = 0;
-                       pos[i] += nv50->vtxbuf[b].stride;
+               BEGIN_RING(chan, tesla,
+                          NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+               OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+               OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+               if (++a[i].step == a[i].divisor) {
+                       a[i].step = 0;
+                       a[i].delta += a[i].stride;
                }
-
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-               so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
        }
-
-       so_ref (so, &nv50->state.instbuf); /* for flush notify */
-       so_ref (NULL, &so);
-
-       so_emit(chan, nv50->state.instbuf);
-}
-
-static INLINE void
-nv50_unmap_vbufs(struct nv50_context *nv50)
-{
-        unsigned i;
-
-        for (i = 0; i < nv50->vtxbuf_nr; ++i)
-                if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
-                        nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
 }
 
 void
@@ -415,198 +180,207 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
        struct nv50_context *nv50 = nv50_context(pipe);
        struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       unsigned i, nz_divisors;
-       unsigned step[16], pos[16];
-
-       if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
-               nv50_upload_user_vbufs(nv50);
+       struct instance a[16];
+       unsigned prim = nv50_prim(mode);
 
-       nv50_state_validate(nv50);
+       instance_init(nv50, a, startInstance);
+       if (!nv50_state_validate(nv50, 10 + 16*3))
+               return;
 
-       nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+       if (nv50->vbo_fifo) {
+               nv50_push_elements_instanced(pipe, NULL, 0, mode, start,
+                                            count, startInstance,
+                                            instanceCount);
+               return;
+       }
 
        BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
        OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
        OUT_RING  (chan, startInstance);
+       while (instanceCount--) {
+               if (AVAIL_RING(chan) < (7 + 16*3)) {
+                       FIRE_RING(chan);
+                       if (!nv50_state_validate(nv50, 7 + 16*3)) {
+                               assert(0);
+                               return;
+                       }
+               }
+               instance_step(nv50, a);
 
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       if (nv50->vbo_fifo)
-               nv50_push_arrays(nv50, start, count);
-       else {
+               BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+               OUT_RING  (chan, prim);
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
                OUT_RING  (chan, start);
                OUT_RING  (chan, count);
-       }
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
-
-       for (i = 1; i < instanceCount; i++) {
-               if (nz_divisors) /* any non-zero array divisors ? */
-                       step_per_instance_arrays(nv50, pos, step);
-
-               BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-               OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
-
-               if (nv50->vbo_fifo)
-                       nv50_push_arrays(nv50, start, count);
-               else {
-                       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-                       OUT_RING  (chan, start);
-                       OUT_RING  (chan, count);
-               }
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
                OUT_RING  (chan, 0);
-       }
-       nv50_unmap_vbufs(nv50);
 
-       so_ref(NULL, &nv50->state.instbuf);
+               prim |= (1 << 28);
+       }
 }
 
 void
 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
                 unsigned count)
 {
-       struct nv50_context *nv50 = nv50_context(pipe);
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       boolean ret;
-
-       nv50_state_validate(nv50);
-
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       if (nv50->vbo_fifo)
-               ret = nv50_push_arrays(nv50, start, count);
-       else {
-               BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-               OUT_RING  (chan, start);
-               OUT_RING  (chan, count);
-               ret = TRUE;
-       }
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
-
-       nv50_unmap_vbufs(nv50);
-
-        /* XXX: not sure what to do if ret != TRUE: flush and retry?
-         */
-        assert(ret);
+       nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1);
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
-                             unsigned start, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-
-       map += start;
+struct inline_ctx {
+       struct nv50_context *nv50;
+       void *map;
+};
 
-       if (nv50->vbo_fifo)
-               return nv50_push_elements_u08(nv50, map, count);
+static void
+inline_elt08(void *priv, unsigned start, unsigned count)
+{
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+       uint8_t *map = (uint8_t *)ctx->map + start;
 
        if (count & 1) {
                BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
                OUT_RING  (chan, map[0]);
                map++;
-               count--;
+               count &= ~1;
        }
 
-       while (count) {
-               unsigned nr = count > 2046 ? 2046 : count;
-               int i;
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
-               for (i = 0; i < nr; i += 2)
-                       OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
+       count >>= 1;
+       if (!count)
+               return;
 
-               count -= nr;
-               map += nr;
+       BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+       while (count--) {
+               OUT_RING(chan, (map[1] << 16) | map[0]);
+               map += 2;
        }
-       return TRUE;
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
-                             unsigned start, unsigned count)
+static void
+inline_elt16(void *priv, unsigned start, unsigned count)
 {
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-
-       map += start;
-
-       if (nv50->vbo_fifo)
-               return nv50_push_elements_u16(nv50, map, count);
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+       uint16_t *map = (uint16_t *)ctx->map + start;
 
        if (count & 1) {
                BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
                OUT_RING  (chan, map[0]);
+               count &= ~1;
                map++;
-               count--;
        }
 
-       while (count) {
-               unsigned nr = count > 2046 ? 2046 : count;
-               int i;
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
-               for (i = 0; i < nr; i += 2)
-                       OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
+       count >>= 1;
+       if (!count)
+               return;
 
-               count -= nr;
-               map += nr;
+       BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+       while (count--) {
+               OUT_RING(chan, (map[1] << 16) | map[0]);
+               map += 2;
        }
-       return TRUE;
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
-                             unsigned start, unsigned count)
+static void
+inline_elt32(void *priv, unsigned start, unsigned count)
+{
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+
+       BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
+       OUT_RINGp    (chan, (uint32_t *)ctx->map + start, count);
+}
+
+static void
+inline_edgeflag(void *priv, boolean enabled)
 {
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+
+       BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+       OUT_RING  (chan, enabled ? 1 : 0);
+}
+
+static void
+nv50_draw_elements_inline(struct pipe_context *pipe,
+                         struct pipe_buffer *indexBuffer, unsigned indexSize,
+                         unsigned mode, unsigned start, unsigned count,
+                         unsigned startInstance, unsigned instanceCount)
+{
+       struct pipe_screen *pscreen = pipe->screen;
+       struct nv50_context *nv50 = nv50_context(pipe);
        struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct instance a[16];
+       struct inline_ctx ctx;
+       struct u_split_prim s;
+       boolean nzi = FALSE;
+       unsigned overhead;
+
+       overhead = 16*3; /* potential instance adjustments */
+       overhead += 4; /* Begin()/End() */
+       overhead += 4; /* potential edgeflag disable/reenable */
+       overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
+
+       s.priv = &ctx;
+       if (indexSize == 1)
+               s.emit = inline_elt08;
+       else
+       if (indexSize == 2)
+               s.emit = inline_elt16;
+       else
+               s.emit = inline_elt32;
+       s.edge = inline_edgeflag;
+
+       ctx.nv50 = nv50;
+       ctx.map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+       assert(ctx.map);
+       if (!ctx.map)
+               return;
 
-       map += start;
+       instance_init(nv50, a, startInstance);
+       if (!nv50_state_validate(nv50, overhead + 6 + 3))
+               return;
 
-       if (nv50->vbo_fifo)
-               return nv50_push_elements_u32(nv50, map, count);
+       BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+       OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
+       OUT_RING  (chan, startInstance);
+       while (instanceCount--) {
+               unsigned max_verts;
+               boolean done;
+
+               u_split_prim_init(&s, mode, start, count);
+               do {
+                       if (AVAIL_RING(chan) < (overhead + 6)) {
+                               FIRE_RING(chan);
+                               if (!nv50_state_validate(nv50, (overhead + 6))) {
+                                       assert(0);
+                                       return;
+                               }
+                       }
 
-       while (count) {
-               unsigned nr = count > 2047 ? 2047 : count;
+                       max_verts = AVAIL_RING(chan) - overhead;
+                       if (max_verts > 2047)
+                               max_verts = 2047;
+                       if (indexSize != 4)
+                               max_verts <<= 1;
+                       instance_step(nv50, a);
 
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
-               OUT_RINGp (chan, map, nr);
+                       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+                       OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
+                       done = u_split_prim_next(&s, max_verts);
+                       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+                       OUT_RING  (chan, 0);
+               } while (!done);
 
-               count -= nr;
-               map += nr;
+               nzi = TRUE;
        }
-       return TRUE;
-}
 
-static INLINE void
-nv50_draw_elements_inline(struct nv50_context *nv50,
-                         void *map, unsigned indexSize,
-                         unsigned start, unsigned count)
-{
-       switch (indexSize) {
-       case 1:
-               nv50_draw_elements_inline_u08(nv50, map, start, count);
-               break;
-       case 2:
-               nv50_draw_elements_inline_u16(nv50, map, start, count);
-               break;
-       case 4:
-               nv50_draw_elements_inline_u32(nv50, map, start, count);
-               break;
-       }
+       pipe_buffer_unmap(pscreen, indexBuffer);
 }
 
 void
@@ -617,49 +391,68 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
                             unsigned startInstance, unsigned instanceCount)
 {
        struct nv50_context *nv50 = nv50_context(pipe);
+       struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
-       struct pipe_screen *pscreen = pipe->screen;
-       void *map;
-       unsigned i, nz_divisors;
-       unsigned step[16], pos[16];
+       struct instance a[16];
+       unsigned prim = nv50_prim(mode);
 
-       map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
-
-       if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
-               nv50_upload_user_vbufs(nv50);
-
-       nv50_state_validate(nv50);
+       instance_init(nv50, a, startInstance);
+       if (!nv50_state_validate(nv50, 13 + 16*3))
+               return;
 
-       nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+       if (nv50->vbo_fifo) {
+               nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
+                                            mode, start, count, startInstance,
+                                            instanceCount);
+               return;
+       } else
+       if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || indexSize == 1) {
+               nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
+                                         mode, start, count, startInstance,
+                                         instanceCount);
+               return;
+       }
 
        BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
        OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
        OUT_RING  (chan, startInstance);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
-
-       for (i = 1; i < instanceCount; ++i) {
-               if (nz_divisors) /* any non-zero array divisors ? */
-                       step_per_instance_arrays(nv50, pos, step);
+       while (instanceCount--) {
+               if (AVAIL_RING(chan) < (7 + 16*3)) {
+                       FIRE_RING(chan);
+                       if (!nv50_state_validate(nv50, 10 + 16*3)) {
+                               assert(0);
+                               return;
+                       }
+               }
+               instance_step(nv50, a);
 
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-               OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
-
-               nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
+               OUT_RING  (chan, prim);
+               if (indexSize == 4) {
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
+                       OUT_RING  (chan, count);
+                       nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+                                              start << 2, count << 2);
+               } else
+               if (indexSize == 2) {
+                       unsigned vb_start = (start & ~1);
+                       unsigned vb_end = (start + count + 1) & ~1;
+                       unsigned dwords = (vb_end - vb_start) >> 1;
+
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+                       OUT_RING  (chan, ((start & 1) << 31) | count);
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
+                       OUT_RING  (chan, dwords);
+                       nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+                                              vb_start << 1, dwords << 2);
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+                       OUT_RING  (chan, 0);
+               }
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
                OUT_RING  (chan, 0);
-       }
-       nv50_unmap_vbufs(nv50);
 
-       so_ref(NULL, &nv50->state.instbuf);
+               prim |= (1 << 28);
+       }
 }
 
 void
@@ -667,51 +460,8 @@ nv50_draw_elements(struct pipe_context *pipe,
                   struct pipe_buffer *indexBuffer, unsigned indexSize,
                   unsigned mode, unsigned start, unsigned count)
 {
-       struct nv50_context *nv50 = nv50_context(pipe);
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct pipe_screen *pscreen = pipe->screen;
-       void *map;
-       
-       nv50_state_validate(nv50);
-
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       if (!nv50->vbo_fifo && indexSize == 4) {
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
-               OUT_RING  (chan, count);
-               nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
-                                      start << 2, count << 2);
-       } else
-       if (!nv50->vbo_fifo && indexSize == 2) {
-               unsigned vb_start = (start & ~1);
-               unsigned vb_end = (start + count + 1) & ~1;
-               unsigned dwords = (vb_end - vb_start) >> 1;
-
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
-               OUT_RING  (chan, ((start & 1) << 31) | count);
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
-               OUT_RING  (chan, dwords);
-               nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
-                                      vb_start << 1, dwords << 2);
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
-               OUT_RING  (chan, 0);
-       } else {
-               map = pipe_buffer_map(pscreen, indexBuffer,
-                                     PIPE_BUFFER_USAGE_CPU_READ);
-               nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-               nv50_unmap_vbufs(nv50);
-               pipe_buffer_unmap(pscreen, indexBuffer);
-       }
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
+       nv50_draw_elements_instanced(pipe, indexBuffer, indexSize,
+                                    mode, start, count, 0, 1);
 }
 
 static INLINE boolean
@@ -726,6 +476,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
        struct nouveau_bo *bo = nouveau_bo(vb->buffer);
        float v[4];
        int ret;
+       unsigned nr_components = util_format_get_nr_components(ve->src_format);
 
        ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
        if (ret)
@@ -736,9 +487,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
                            0, 0, 1, 1);
        so = *pso;
        if (!so)
-               *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
+               *pso = so = so_new(nv50->vtxelt->num_elements,
+                                  nv50->vtxelt->num_elements * 4, 0);
 
-       switch (ve->nr_components) {
+       switch (nr_components) {
        case 4:
                so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
                so_data  (so, fui(v[0]));
@@ -775,6 +527,18 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 }
 
 void
+nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
+{
+       unsigned i;
+
+       for (i = 0; i < cso->num_elements; ++i) {
+               struct pipe_vertex_element *ve = &cso->pipe[i];
+
+               cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
+       }
+}
+
+struct nouveau_stateobj *
 nv50_vbo_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -783,30 +547,31 @@ nv50_vbo_validate(struct nv50_context *nv50)
 
        /* don't validate if Gallium took away our buffers */
        if (nv50->vtxbuf_nr == 0)
-               return;
-       nv50->vbo_fifo = 0;
+               return NULL;
+
+       if (nv50->screen->force_push ||
+           nv50->vertprog->cfg.edgeflag_in < 16)
+               nv50->vbo_fifo = 0xffff;
 
-       for (i = 0; i < nv50->vtxbuf_nr; ++i)
+       for (i = 0; i < nv50->vtxbuf_nr; i++) {
                if (nv50->vtxbuf[i].stride &&
                    !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
                        nv50->vbo_fifo = 0xffff;
+       }
 
-       if (NV50_USING_LOATHED_EDGEFLAG(nv50))
-               nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
-
-       n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
+       n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
 
        vtxattr = NULL;
-       vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+       vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
        vtxfmt = so_new(1, n_ve, 0);
        so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
 
-       for (i = 0; i < nv50->vtxelt_nr; i++) {
-               struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+       for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+               struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
                struct pipe_vertex_buffer *vb =
                        &nv50->vtxbuf[ve->vertex_buffer_index];
                struct nouveau_bo *bo = nouveau_bo(vb->buffer);
-               uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
+               uint32_t hw = nv50->vtxelt->hw[i];
 
                if (!vb->stride &&
                    nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
@@ -821,13 +586,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
                }
 
                if (nv50->vbo_fifo) {
-                       so_data  (vtxfmt, hw |
-                                 (ve->instance_divisor ? (1 << 4) : i));
+                       so_data  (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
                        so_method(vtxbuf, tesla,
                                  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
                        so_data  (vtxbuf, 0);
                        continue;
                }
+
                so_data(vtxfmt, hw | i);
 
                so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
@@ -855,355 +620,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
                so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
                so_data  (vtxbuf, 0);
        }
-       nv50->state.vtxelt_nr = nv50->vtxelt_nr;
+       nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
 
-       so_ref (vtxfmt, &nv50->state.vtxfmt);
        so_ref (vtxbuf, &nv50->state.vtxbuf);
        so_ref (vtxattr, &nv50->state.vtxattr);
        so_ref (NULL, &vtxbuf);
-       so_ref (NULL, &vtxfmt);
        so_ref (NULL, &vtxattr);
+       return vtxfmt;
 }
 
-typedef void (*pfn_push)(struct nouveau_channel *, void *);
-
-struct nv50_vbo_emitctx
-{
-       pfn_push push[16];
-       uint8_t *map[16];
-       unsigned stride[16];
-       unsigned nr_ve;
-       unsigned vtx_dwords;
-       unsigned vtx_max;
-
-       float edgeflag;
-       unsigned ve_edgeflag;
-};
-
-static INLINE void
-emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
-{
-       unsigned i;
-
-       for (i = 0; i < emit->nr_ve; ++i) {
-               emit->push[i](chan, emit->map[i]);
-               emit->map[i] += emit->stride[i];
-       }
-}
-
-static INLINE void
-emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
-        uint32_t vi)
-{
-       unsigned i;
-
-       for (i = 0; i < emit->nr_ve; ++i)
-               emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
-}
-
-static INLINE boolean
-nv50_map_vbufs(struct nv50_context *nv50)
-{
-       int i;
-
-       for (i = 0; i < nv50->vtxbuf_nr; ++i) {
-               struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
-               unsigned size = vb->stride * (vb->max_index + 1) + 16;
-
-               if (nouveau_bo(vb->buffer)->map)
-                       continue;
-
-               size = vb->stride * (vb->max_index + 1) + 16;
-               size = MIN2(size, vb->buffer->size);
-               if (!size)
-                       size = vb->buffer->size;
-
-               if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
-                                        0, size, NOUVEAU_BO_RD))
-                       break;
-       }
-
-       if (i == nv50->vtxbuf_nr)
-               return TRUE;
-       for (; i >= 0; --i)
-               nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
-       return FALSE;
-}
-
-static void
-emit_b32_1(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b32_2(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-       OUT_RING(chan, v[1]);
-}
-
-static void
-emit_b32_3(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-       OUT_RING(chan, v[1]);
-       OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b32_4(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-       OUT_RING(chan, v[1]);
-       OUT_RING(chan, v[2]);
-       OUT_RING(chan, v[3]);
-}
-
-static void
-emit_b16_1(struct nouveau_channel *chan, void *data)
-{
-       uint16_t *v = data;
-
-       OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b16_3(struct nouveau_channel *chan, void *data)
-{
-       uint16_t *v = data;
-
-       OUT_RING(chan, (v[1] << 16) | v[0]);
-       OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b08_1(struct nouveau_channel *chan, void *data)
-{
-       uint8_t *v = data;
-
-       OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b08_3(struct nouveau_channel *chan, void *data)
-{
-       uint8_t *v = data;
-
-       OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
-}
-
-static boolean
-emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
-            unsigned start)
-{
-       unsigned i;
-
-       if (nv50_map_vbufs(nv50) == FALSE)
-               return FALSE;
-
-       emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
-
-       emit->edgeflag = 0.5f;
-       emit->nr_ve = 0;
-       emit->vtx_dwords = 0;
-
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               struct pipe_vertex_element *ve;
-               struct pipe_vertex_buffer *vb;
-               unsigned n, size;
-               const struct util_format_description *desc;
-
-               ve = &nv50->vtxelt[i];
-               vb = &nv50->vtxbuf[ve->vertex_buffer_index];
-               if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
-                       continue;
-               n = emit->nr_ve++;
-
-               emit->stride[n] = vb->stride;
-               emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
-                       vb->buffer_offset +
-                       (start * vb->stride + ve->src_offset);
-
-               desc = util_format_description(ve->src_format);
-               assert(desc);
-
-               size = util_format_get_component_bits(
-                       ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
-
-               assert(ve->nr_components > 0 && ve->nr_components <= 4);
-
-               /* It shouldn't be necessary to push the implicit 1s
-                * for case 3 and size 8 cases 1, 2, 3.
-                */
-               switch (size) {
-               default:
-                       NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
-                       return FALSE;
-               case 32:
-                       switch (ve->nr_components) {
-                       case 1: emit->push[n] = emit_b32_1; break;
-                       case 2: emit->push[n] = emit_b32_2; break;
-                       case 3: emit->push[n] = emit_b32_3; break;
-                       case 4: emit->push[n] = emit_b32_4; break;
-                       }
-                       emit->vtx_dwords += ve->nr_components;
-                       break;
-               case 16:
-                       switch (ve->nr_components) {
-                       case 1: emit->push[n] = emit_b16_1; break;
-                       case 2: emit->push[n] = emit_b32_1; break;
-                       case 3: emit->push[n] = emit_b16_3; break;
-                       case 4: emit->push[n] = emit_b32_2; break;
-                       }
-                       emit->vtx_dwords += (ve->nr_components + 1) >> 1;
-                       break;
-               case 8:
-                       switch (ve->nr_components) {
-                       case 1: emit->push[n] = emit_b08_1; break;
-                       case 2: emit->push[n] = emit_b16_1; break;
-                       case 3: emit->push[n] = emit_b08_3; break;
-                       case 4: emit->push[n] = emit_b32_1; break;
-                       }
-                       emit->vtx_dwords += 1;
-                       break;
-               }
-       }
-
-       emit->vtx_max = 512 / emit->vtx_dwords;
-       if (emit->ve_edgeflag < 16)
-               emit->vtx_max = 1;
-
-       return TRUE;
-}
-
-static INLINE void
-set_edgeflag(struct nouveau_channel *chan,
-            struct nouveau_grobj *tesla,
-            struct nv50_vbo_emitctx *emit, uint32_t index)
-{
-       unsigned i = emit->ve_edgeflag;
-
-       if (i < 16) {
-               float f = *((float *)(emit->map[i] + index * emit->stride[i]));
-
-               if (emit->edgeflag != f) {
-                       emit->edgeflag = f;
-
-                       BEGIN_RING(chan, tesla, 0x15e4, 1);
-                       OUT_RING  (chan, f ? 1 : 0);
-               }
-       }
-}
-
-static boolean
-nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
 
-       if (emit_prepare(nv50, &emit, start) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx_next(chan, &emit);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
-
-       if (emit_prepare(nv50, &emit, 0) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, *map);
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx(chan, &emit, *map++);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
-
-       if (emit_prepare(nv50, &emit, 0) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, *map);
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx(chan, &emit, *map++);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
-
-static boolean
-nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
-
-       if (emit_prepare(nv50, &emit, 0) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, *map);
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx(chan, &emit, *map++);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
index 513cc0f5d44e433d9051ac85232fc65a3f776cb1..b7ad6b2020608169d2939ef541537aa1945e3585 100644 (file)
@@ -36,6 +36,7 @@ static void r300_blitter_save_states(struct r300_context* r300)
     util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
     util_blitter_save_viewport(r300->blitter, &r300->viewport);
     util_blitter_save_clip(r300->blitter, &r300->clip);
+    util_blitter_save_vertex_elements(r300->blitter, r300->velems);
 }
 
 /* Clear currently bound buffers. */
index e0a553232737d3a90eefc09188798a18b932984a..923e1e541ff490e74983e7d21ea44c4e99a34dcf 100644 (file)
@@ -60,7 +60,6 @@ static void r300_destroy_context(struct pipe_context* context)
     FREE(r300->rs_block_state.state);
     FREE(r300->scissor_state.state);
     FREE(r300->textures_state.state);
-    FREE(r300->vertex_stream_state.state);
     FREE(r300->vap_output_state.state);
     FREE(r300->viewport_state.state);
     FREE(r300->ztop_state.state);
@@ -147,7 +146,6 @@ static void r300_setup_atoms(struct r300_context* r300)
     r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
     r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
     r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
-    r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state);
     r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state);
     r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
     r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
index c2825d5f2676923349d0de20758fa71ff8714421..985e33911263f073c5430e2d48935047f32c149c 100644 (file)
@@ -278,6 +278,23 @@ struct r300_texture {
     enum r300_buffer_tiling microtile, macrotile;
 };
 
+struct r300_vertex_info {
+    /* Parent class */
+    struct vertex_info vinfo;
+
+    /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
+    uint32_t vap_prog_stream_cntl[8];
+    /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
+    uint32_t vap_prog_stream_cntl_ext[8];
+};
+
+struct r300_vertex_element_state {
+    unsigned count;
+    struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+
+    struct r300_vertex_stream_state vertex_stream;
+};
+
 extern struct pipe_viewport_state r300_viewport_identity;
 
 struct r300_context {
@@ -350,8 +367,7 @@ struct r300_context {
     int vertex_buffer_count;
     int vertex_buffer_max_index;
     /* Vertex elements for Gallium. */
-    struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-    int vertex_element_count;
+    struct r300_vertex_element_state *velems;
 
     /* Vertex info for Draw. */
     struct vertex_info vertex_info;
index 51fc590e5d927615c8226056e38db0ca824d6252..55e9217fd323cc24f92dc09b0426b2817d2caeb8 100644 (file)
@@ -757,9 +757,9 @@ void r300_emit_textures_state(struct r300_context *r300,
 void r300_emit_aos(struct r300_context* r300, unsigned offset)
 {
     struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     int i;
-    unsigned size1, size2, aos_count = r300->vertex_element_count;
+    unsigned size1, size2, aos_count = r300->velems->count;
     unsigned packet_size = (aos_count * 3 + 1) / 2;
     CS_LOCALS(r300);
 
@@ -1004,7 +1004,7 @@ void r300_emit_buffer_validate(struct r300_context *r300,
         (struct r300_textures_state*)r300->textures_state.state;
     struct r300_texture* tex;
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     struct pipe_buffer *pbuf;
     unsigned i;
     boolean invalid = FALSE;
@@ -1062,7 +1062,7 @@ validate:
     }
     /* ...vertex buffers for HWTCL path... */
     if (do_validate_vertex_buffers) {
-        for (i = 0; i < r300->vertex_element_count; i++) {
+        for (i = 0; i < r300->velems->count; i++) {
             pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
 
             if (!r300->winsys->add_buffer(r300->winsys, pbuf,
index c8420bcdd5be0fb28b9e3f565187a777eb2abd09..9c001ae186de24cea10f958837ff928994bdb011 100644 (file)
@@ -143,7 +143,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 {
     struct pipe_vertex_element* velem;
     struct pipe_vertex_buffer* vbuf;
-    unsigned vertex_element_count = r300->vertex_element_count;
+    unsigned vertex_element_count = r300->velems->count;
     unsigned i, v, vbi, dw, elem_offset, dwords;
 
     /* Size of the vertex, in dwords. */
@@ -166,7 +166,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 
     /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
     for (i = 0; i < vertex_element_count; i++) {
-        velem = &r300->vertex_element[i];
+        velem = &r300->velems->velem[i];
         offset[i] = velem->src_offset / 4;
         size[i] = util_format_get_blocksize(velem->src_format) / 4;
         vertex_size += size[i];
@@ -183,7 +183,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
         }
     }
 
-    dwords = 10 + count * vertex_size;
+    dwords = 9 + count * vertex_size;
 
     r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords);
     r300_emit_buffer_validate(r300, FALSE, NULL);
@@ -193,8 +193,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
     OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(count - 1);
+    OUT_CS(0);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
             r300_translate_primitive(mode));
@@ -202,7 +203,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     /* Emit vertices. */
     for (v = 0; v < count; v++) {
         for (i = 0; i < vertex_element_count; i++) {
-            velem = &r300->vertex_element[i];
+            velem = &r300->velems->velem[i];
             vbi = velem->vertex_buffer_index;
             elem_offset = offset[i] + stride[vbi] * (v + start);
 
@@ -215,7 +216,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 
     /* Unmap buffers. */
     for (i = 0; i < vertex_element_count; i++) {
-        vbi = r300->vertex_element[i].vertex_buffer_index;
+        vbi = r300->velems->velem[i].vertex_buffer_index;
 
         if (map[vbi]) {
             vbuf = &r300->vertex_buffer[vbi];
@@ -238,15 +239,16 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
 
     if (alt_num_verts) {
         assert(count < (1 << 24));
-        BEGIN_CS(10);
+        BEGIN_CS(9);
         OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
     } else {
-        BEGIN_CS(8);
+        BEGIN_CS(7);
     }
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(count - 1);
+    OUT_CS(0);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
            r300_translate_primitive(mode) |
@@ -281,15 +283,16 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
 
     if (alt_num_verts) {
-        BEGIN_CS(16);
+        BEGIN_CS(15);
         OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
     } else {
-        BEGIN_CS(14);
+        BEGIN_CS(13);
     }
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(maxIndex);
+    OUT_CS(minIndex);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
     if (indexSize == 4) {
         count_dwords = count;
index 8c9f6046228810a0da00dc405ab61e355b394b8f..bd4c2766cb1426859c0fe11a538ef78ee8759a01 100644 (file)
@@ -1046,19 +1046,17 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
     if (r300->draw) {
         draw_flush(r300->draw);
         draw_set_vertex_buffers(r300->draw, count, buffers);
-    } else {
-        r300->vertex_stream_state.dirty = TRUE;
     }
 }
 
 static boolean r300_validate_aos(struct r300_context *r300)
 {
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     int i;
 
     /* Check if formats and strides are aligned to the size of DWORD. */
-    for (i = 0; i < r300->vertex_element_count; i++) {
+    for (i = 0; i < r300->velems->count; i++) {
         if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
             util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
             return FALSE;
@@ -1067,20 +1065,209 @@ static boolean r300_validate_aos(struct r300_context *r300)
     return TRUE;
 }
 
-static void r300_set_vertex_elements(struct pipe_context* pipe,
-                                    unsigned count,
-                                    const struct pipe_vertex_element* elements)
+static void r300_draw_emit_attrib(struct r300_context* r300,
+                                  enum attrib_emit emit,
+                                  enum interp_mode interp,
+                                  int index)
 {
-    struct r300_context* r300 = r300_context(pipe);
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct tgsi_shader_info* info = &vs->info;
+    int output;
+
+    output = draw_find_shader_output(r300->draw,
+                                     info->output_semantic_name[index],
+                                     info->output_semantic_index[index]);
+    draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
+}
+
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
+{
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct r300_shader_semantics* vs_outputs = &vs->outputs;
+    int i, gen_count;
+
+    /* Position. */
+    if (vs_outputs->pos != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->pos);
+    } else {
+        assert(0);
+    }
+
+    /* Point size. */
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+                              vs_outputs->psize);
+    }
+
+    /* Colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+                                  vs_outputs->color[i]);
+        }
+    }
+
+    /* XXX Back-face colors. */
+
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                                  vs_outputs->generic[i]);
+            gen_count++;
+        }
+    }
+
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->fog);
+        gen_count++;
+    }
+
+    /* XXX magic */
+    assert(gen_count <= 8);
+}
+
+/* Update the PSC tables. */
+static void r300_vertex_psc(struct r300_vertex_element_state *velems)
+{
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i;
+
+    assert(velems->count <= 16);
+
+    /* Vertex shaders have no semantics on their inputs,
+     * so PSC should just route stuff based on the vertex elements,
+     * and not on attrib information. */
+    for (i = 0; i < velems->count; i++) {
+        format = velems->velem[i].src_format;
+
+        type = r300_translate_vertex_data_type(format) |
+            (i << R300_DST_VEC_LOC_SHIFT);
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+    }
 
-    memcpy(r300->vertex_element,
-           elements,
-           sizeof(struct pipe_vertex_element) * count);
-    r300->vertex_element_count = count;
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vstream->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+    vstream->count = (i >> 1) + 1;
+}
+
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context *r300,
+                                  struct r300_vertex_element_state *velems)
+{
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct vertex_info* vinfo = &r300->vertex_info;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i, attrib_count;
+    int* vs_output_tab = vs->stream_loc_notcl;
+
+    /* For each Draw attribute, route it to the fragment shader according
+     * to the vs_output_tab. */
+    attrib_count = vinfo->num_attribs;
+    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+    for (i = 0; i < attrib_count; i++) {
+        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+               vs_output_tab[i]);
+    }
+
+    for (i = 0; i < attrib_count; i++) {
+        /* Make sure we have a proper destination for our attribute. */
+        assert(vs_output_tab[i] != -1);
+
+        format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+
+        /* Obtain the type of data in this attribute. */
+        type = r300_translate_vertex_data_type(format) |
+            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
+
+        /* Obtain the swizzle for this attribute. Note that the default
+         * swizzle in the hardware is not XYZW! */
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        /* Add the attribute to the PSC table. */
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+    }
+
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vstream->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+    vstream->count = (i >> 1) + 1;
+}
+
+static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
+                                               unsigned count,
+                                               const struct pipe_vertex_element* attribs)
+{
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_screen* r300screen = r300_screen(pipe->screen);
+    struct r300_vertex_element_state *velems;
+
+    assert(count <= PIPE_MAX_ATTRIBS);
+    velems = CALLOC_STRUCT(r300_vertex_element_state);
+    if (velems != NULL) {
+        velems->count = count;
+        memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
+
+        if (r300screen->caps->has_tcl) {
+            r300_vertex_psc(velems);
+        } else {
+            memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
+            r300_draw_emit_all_attribs(r300);
+            draw_compute_vertex_size(&r300->vertex_info);
+            r300_swtcl_vertex_psc(r300, velems);
+        }
+    }
+    return velems;
+}
+
+static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
+                                            void *state)
+{
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_vertex_element_state *velems = state;
+
+    if (velems == NULL) {
+        return;
+    }
+
+    r300->velems = velems;
 
     if (r300->draw) {
         draw_flush(r300->draw);
-        draw_set_vertex_elements(r300->draw, count, elements);
+        draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
     }
 
     if (!r300_validate_aos(r300)) {
@@ -1088,6 +1275,14 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
         assert(0);
         abort();
     }
+
+    UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
+    r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
+}
+
+static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
+{
+   FREE(state);
 }
 
 static void* r300_create_vs_state(struct pipe_context* pipe,
@@ -1262,7 +1457,10 @@ void r300_init_state_functions(struct r300_context* r300)
     r300->context.set_viewport_state = r300_set_viewport_state;
 
     r300->context.set_vertex_buffers = r300_set_vertex_buffers;
-    r300->context.set_vertex_elements = r300_set_vertex_elements;
+
+    r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
+    r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
+    r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
 
     r300->context.create_vs_state = r300_create_vs_state;
     r300->context.bind_vs_state = r300_bind_vs_state;
index e9e40747ef1f5f9d8f939ca857e0411ac65c08a1..6b9f61acd7bcfc95279bb23b328c101c6ca1080b 100644 (file)
 /* r300_state_derived: Various bits of state which are dependent upon
  * currently bound CSO data. */
 
-static void r300_draw_emit_attrib(struct r300_context* r300,
-                                  enum attrib_emit emit,
-                                  enum interp_mode interp,
-                                  int index)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct tgsi_shader_info* info = &vs->info;
-    int output;
-
-    output = draw_find_shader_output(r300->draw,
-                                     info->output_semantic_name[index],
-                                     info->output_semantic_index[index]);
-    draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
-}
-
-static void r300_draw_emit_all_attribs(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_shader_semantics* vs_outputs = &vs->outputs;
-    int i, gen_count;
-
-    /* Position. */
-    if (vs_outputs->pos != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                              vs_outputs->pos);
-    } else {
-        assert(0);
-    }
-
-    /* Point size. */
-    if (vs_outputs->psize != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
-                              vs_outputs->psize);
-    }
-
-    /* Colors. */
-    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->color[i] != ATTR_UNUSED) {
-            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
-                                  vs_outputs->color[i]);
-        }
-    }
-
-    /* XXX Back-face colors. */
-
-    /* Texture coordinates. */
-    gen_count = 0;
-    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
-        if (vs_outputs->generic[i] != ATTR_UNUSED) {
-            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                                  vs_outputs->generic[i]);
-            gen_count++;
-        }
-    }
-
-    /* Fog coordinates. */
-    if (vs_outputs->fog != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                              vs_outputs->fog);
-        gen_count++;
-    }
-
-    /* XXX magic */
-    assert(gen_count <= 8);
-}
-
-/* Update the PSC tables. */
-/* XXX move this function into r300_state.c after TCL-bypass gets removed
- * XXX because this one is dependent only on vertex elements. */
-static void r300_vertex_psc(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_vertex_stream_state *vformat =
-        (struct r300_vertex_stream_state*)r300->vertex_stream_state.state;
-    uint16_t type, swizzle;
-    enum pipe_format format;
-    unsigned i;
-    int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-    int* stream_tab;
-
-    memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
-
-    stream_tab = identity;
-
-    /* Vertex shaders have no semantics on their inputs,
-     * so PSC should just route stuff based on the vertex elements,
-     * a