Merge commit 'origin/master' into gallium-sw-api-2
authorKeith Whitwell <keithw@vmware.com>
Wed, 10 Mar 2010 08:29:27 +0000 (08:29 +0000)
committerKeith Whitwell <keithw@vmware.com>
Wed, 10 Mar 2010 08:29:27 +0000 (08:29 +0000)
138 files changed:
progs/gallium/python/retrace/interpreter.py
progs/tests/fbotest1.c
progs/tests/fbotest2.c
progs/tests/fbotest3.c
src/gallium/auxiliary/cso_cache/cso_cache.c
src/gallium/auxiliary/cso_cache/cso_cache.h
src/gallium/auxiliary/cso_cache/cso_context.c
src/gallium/auxiliary/cso_cache/cso_context.h
src/gallium/auxiliary/draw/draw_pt.c
src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/auxiliary/gallivm/lp_bld_const.c
src/gallium/auxiliary/gallivm/lp_bld_logic.c
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
src/gallium/auxiliary/gallivm/lp_bld_type.h
src/gallium/auxiliary/util/u_blit.c
src/gallium/auxiliary/util/u_blitter.c
src/gallium/auxiliary/util/u_blitter.h
src/gallium/auxiliary/util/u_draw_quad.c
src/gallium/auxiliary/util/u_dump_state.c
src/gallium/auxiliary/util/u_format.h
src/gallium/auxiliary/util/u_format_pack.py
src/gallium/auxiliary/util/u_gen_mipmap.c
src/gallium/auxiliary/vl/vl_compositor.c
src/gallium/auxiliary/vl/vl_compositor.h
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
src/gallium/docs/source/context.rst
src/gallium/docs/source/cso/velems.rst [new file with mode: 0644]
src/gallium/drivers/cell/ppu/cell_context.h
src/gallium/drivers/cell/ppu/cell_state_vertex.c
src/gallium/drivers/failover/fo_context.h
src/gallium/drivers/failover/fo_state.c
src/gallium/drivers/failover/fo_state_emit.c
src/gallium/drivers/i915/i915_context.h
src/gallium/drivers/i915/i915_state.c
src/gallium/drivers/i965/brw_context.h
src/gallium/drivers/i965/brw_draw_upload.c
src/gallium/drivers/i965/brw_pipe_vertex.c
src/gallium/drivers/i965/brw_structs.h
src/gallium/drivers/identity/id_context.c
src/gallium/drivers/llvmpipe/lp_context.c
src/gallium/drivers/llvmpipe/lp_context.h
src/gallium/drivers/llvmpipe/lp_state.h
src/gallium/drivers/llvmpipe/lp_state_vertex.c
src/gallium/drivers/nouveau/nouveau_screen.c
src/gallium/drivers/nouveau/nouveau_util.h
src/gallium/drivers/nv30/nv30_context.h
src/gallium/drivers/nv30/nv30_miptree.c
src/gallium/drivers/nv30/nv30_state.c
src/gallium/drivers/nv30/nv30_vbo.c
src/gallium/drivers/nv40/nv40_context.h
src/gallium/drivers/nv40/nv40_state.c
src/gallium/drivers/nv40/nv40_state_emit.c
src/gallium/drivers/nv40/nv40_vbo.c
src/gallium/drivers/nv50/Makefile
src/gallium/drivers/nv50/nv50_clear.c
src/gallium/drivers/nv50/nv50_context.c
src/gallium/drivers/nv50/nv50_context.h
src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_push.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_screen.c
src/gallium/drivers/nv50/nv50_screen.h
src/gallium/drivers/nv50/nv50_state.c
src/gallium/drivers/nv50/nv50_state_validate.c
src/gallium/drivers/nv50/nv50_tex.c
src/gallium/drivers/nv50/nv50_vbo.c
src/gallium/drivers/r300/r300_blit.c
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_state_derived.c
src/gallium/drivers/r300/r300_state_inlines.h
src/gallium/drivers/softpipe/sp_context.c
src/gallium/drivers/softpipe/sp_context.h
src/gallium/drivers/softpipe/sp_state.h
src/gallium/drivers/softpipe/sp_state_vertex.c
src/gallium/drivers/svga/svga_context.h
src/gallium/drivers/svga/svga_pipe_vertex.c
src/gallium/drivers/svga/svga_state_need_swtnl.c
src/gallium/drivers/svga/svga_state_rss.c
src/gallium/drivers/svga/svga_state_vdecl.c
src/gallium/drivers/svga/svga_state_vs.c
src/gallium/drivers/svga/svga_swtnl_state.c
src/gallium/drivers/trace/tr_context.c
src/gallium/drivers/trace/tr_dump_state.c
src/gallium/include/pipe/p_compiler.h
src/gallium/include/pipe/p_context.h
src/gallium/include/pipe/p_state.h
src/gallium/state_trackers/python/p_context.i
src/gallium/state_trackers/vega/api_masks.c
src/gallium/state_trackers/vega/polygon.c
src/gallium/state_trackers/vega/renderer.c
src/gallium/state_trackers/vega/vg_context.c
src/gallium/state_trackers/vega/vg_context.h
src/gallium/state_trackers/xorg/xorg_renderer.c
src/gallium/state_trackers/xorg/xorg_renderer.h
src/mesa/SConscript
src/mesa/drivers/dri/i965/brw_disasm.c
src/mesa/drivers/dri/i965/brw_program.c
src/mesa/drivers/dri/i965/brw_vs_emit.c
src/mesa/drivers/dri/i965/brw_wm_glsl.c
src/mesa/drivers/dri/r200/Makefile
src/mesa/drivers/dri/r200/r200_context.c
src/mesa/drivers/dri/r200/radeon_pixel_read.c [new symlink]
src/mesa/drivers/dri/r300/Makefile
src/mesa/drivers/dri/r300/r300_cmdbuf.c
src/mesa/drivers/dri/r300/r300_context.c
src/mesa/drivers/dri/r300/r300_state.c
src/mesa/drivers/dri/r300/r300_tex.c
src/mesa/drivers/dri/r300/r300_tex.h
src/mesa/drivers/dri/r300/radeon_pixel_read.c [new symlink]
src/mesa/drivers/dri/r600/Makefile
src/mesa/drivers/dri/r600/r600_context.c
src/mesa/drivers/dri/r600/radeon_pixel_read.c [new symlink]
src/mesa/drivers/dri/radeon/Makefile
src/mesa/drivers/dri/radeon/radeon_common.h
src/mesa/drivers/dri/radeon/radeon_common_context.h
src/mesa/drivers/dri/radeon/radeon_context.c
src/mesa/drivers/dri/radeon/radeon_fbo.c
src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
src/mesa/drivers/dri/radeon/radeon_pixel_read.c [new file with mode: 0644]
src/mesa/drivers/dri/radeon/radeon_texture.c
src/mesa/drivers/dri/radeon/radeon_texture.h
src/mesa/glapi/glapi.c
src/mesa/glapi/glapi.h
src/mesa/glapi/glapi_entrypoint.c [new file with mode: 0644]
src/mesa/glapi/glapi_getproc.c
src/mesa/glapi/glapi_priv.h [new file with mode: 0644]
src/mesa/sources.mak
src/mesa/state_tracker/st_cb_bitmap.c
src/mesa/state_tracker/st_cb_clear.c
src/mesa/state_tracker/st_cb_drawpixels.c
src/mesa/state_tracker/st_context.c
src/mesa/state_tracker/st_context.h
src/mesa/state_tracker/st_draw.c
src/mesa/state_tracker/st_draw_feedback.c

index b30469dfaeebc75b8c670bd5c790f77b8595784d..1a9618125535768250f054924229e3d7719d1f05 100755 (executable)
@@ -551,7 +551,6 @@ class Context(Object):
                 data = vbuf.buffer.read()
                 values = unpack_from(format, data, offset)
                 sys.stdout.write('\t\t{' + ', '.join(map(str, values)) + '},\n')
-                assert len(values) == velem.nr_components
             sys.stdout.write('\t},\n')
         sys.stdout.flush()
 
index 0cd7f95c355c616755777adf861b0e9c78b61e8f..a95fdff74c35b32ac74ca125a460b51a25de5142 100644 (file)
@@ -36,8 +36,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -161,7 +161,7 @@ Init( void )
    assert(i == MyFB);
 
    CheckError(__LINE__);
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, MyRB);
 
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
index f9c506193f64196e27c5fd391b10905fd43f2939..872b46279e6acf802e9cfce186bbfdc05c445e2b 100644 (file)
@@ -40,8 +40,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -163,7 +163,7 @@ Init( void )
    glGenRenderbuffersEXT(1, &ColorRb);
    glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb);
    assert(glIsRenderbufferEXT(ColorRb));
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, ColorRb);
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
 
index 8e288b38b8323b4a7f7b0220da851904c2625dd8..c176f82d2ba9661f4a18d91a7215121f17e74f7a 100644 (file)
@@ -50,8 +50,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -189,7 +189,7 @@ Init( void )
    glGenRenderbuffersEXT(1, &ColorRb);
    glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb);
    assert(glIsRenderbufferEXT(ColorRb));
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, ColorRb);
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
 
index a6a07e72c2f985bdad571aa95bb0a64f07e09747..900c64df4b93279ac951a780a63bfb15a0b1b045 100644 (file)
@@ -43,6 +43,7 @@ struct cso_cache {
    struct cso_hash *vs_hash;
    struct cso_hash *rasterizer_hash;
    struct cso_hash *sampler_hash;
+   struct cso_hash *velements_hash;
    int    max_size;
 
    cso_sanitize_callback sanitize_cb;
@@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    return hash;
@@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data)
    FREE(state);
 }
 
+static void delete_velements(void *state, void *data)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+}
 
 static INLINE void delete_cso(void *state, enum cso_cache_type type)
 {
@@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
    case CSO_VERTEX_SHADER:
       delete_vs_state(state, 0);
       break;
+   case CSO_VELEMENTS:
+      delete_velements(state, 0);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void)
    sc->rasterizer_hash    = cso_hash_create();
    sc->fs_hash            = cso_hash_create();
    sc->vs_hash            = cso_hash_create();
+   sc->velements_hash     = cso_hash_create();
    sc->sanitize_cb        = sanitize_cb;
    sc->sanitize_data      = 0;
 
@@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    iter = cso_hash_first_node(hash);
@@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0);
    cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
    cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
+   cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0);
 
    cso_hash_delete(sc->blend_hash);
    cso_hash_delete(sc->sampler_hash);
@@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_hash_delete(sc->rasterizer_hash);
    cso_hash_delete(sc->fs_hash);
    cso_hash_delete(sc->vs_hash);
+   cso_hash_delete(sc->velements_hash);
    FREE(sc);
 }
 
@@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
    sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
    sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
    sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+   sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size);
 }
 
 int cso_maximum_cache_size(const struct cso_cache *sc)
index eea60b940bb8663f5037a804f1f0cec4068bbfc6..fb09b83c623ef61be9ca4f540540667126dc8d80 100644 (file)
@@ -53,6 +53,7 @@
   * - rasterizer (old setup)
   * - sampler
   * - vertex shader
+  * - vertex elements
   *
   * Things that are not constant state objects include:
   * - blend_color
@@ -90,7 +91,8 @@ enum cso_cache_type {
    CSO_DEPTH_STENCIL_ALPHA,
    CSO_RASTERIZER,
    CSO_FRAGMENT_SHADER,
-   CSO_VERTEX_SHADER
+   CSO_VERTEX_SHADER,
+   CSO_VELEMENTS
 };
 
 typedef void (*cso_state_callback)(void *ctx, void *obj);
@@ -144,6 +146,18 @@ struct cso_sampler {
    struct pipe_context *context;
 };
 
+struct cso_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+};
+
+struct cso_velements {
+   struct cso_velems_state state;
+   void *data;
+   cso_state_callback delete_state;
+   struct pipe_context *context;
+};
+
 unsigned cso_construct_key(void *item, int item_size);
 
 struct cso_cache *cso_cache_create(void);
index a7335c340ca8d930235aa30605fcfacef3414293..6500891a10cb6cae31ca45bcfb1c2edfb702e97c 100644 (file)
@@ -89,6 +89,7 @@ struct cso_context {
    void *rasterizer, *rasterizer_saved;
    void *fragment_shader, *fragment_shader_saved, *geometry_shader;
    void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
+   void *velements, *velements_saved;
 
    struct pipe_clip_state clip;
    struct pipe_clip_state clip_saved;
@@ -174,6 +175,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state)
    return FALSE;
 }
 
+static boolean delete_vertex_elements(struct cso_context *ctx,
+                                      void *state)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+
+   if (ctx->velements == cso->data)
+      return FALSE;
+
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+   return TRUE;
+}
+
 
 static INLINE boolean delete_cso(struct cso_context *ctx,
                                  void *state, enum cso_cache_type type)
@@ -197,6 +212,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
    case CSO_VERTEX_SHADER:
       return delete_vs_state(ctx, state);
       break;
+   case CSO_VELEMENTS:
+      return delete_vertex_elements(ctx, state);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -271,6 +289,7 @@ void cso_release_all( struct cso_context *ctx )
       ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
       ctx->pipe->bind_fs_state( ctx->pipe, NULL );
       ctx->pipe->bind_vs_state( ctx->pipe, NULL );
+      ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
    }
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
@@ -1130,7 +1149,6 @@ void cso_restore_geometry_shader(struct cso_context *ctx)
    ctx->geometry_shader_saved = NULL;
 }
 
-
 /* clip state */
 
 static INLINE void
@@ -1180,3 +1198,66 @@ cso_restore_clip(struct cso_context *ctx)
       ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
    }
 }
+
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states)
+{
+   unsigned key_size, hash_key;
+   struct cso_hash_iter iter;
+   void *handle;
+   struct cso_velems_state velems_state;
+
+   /* need to include the count into the stored state data too.
+      Otherwise first few count pipe_vertex_elements could be identical even if count
+      is different, and there's no guarantee the hash would be different in that
+      case neither */
+   key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
+   velems_state.count = count;
+   memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count);
+   hash_key = cso_construct_key((void*)&velems_state, key_size);
+   iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size);
+
+   if (cso_hash_iter_is_null(iter)) {
+      struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
+      if (!cso)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+
+      memcpy(&cso->state, &velems_state, key_size);
+      cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]);
+      cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state;
+      cso->context = ctx->pipe;
+
+      iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso);
+      if (cso_hash_iter_is_null(iter)) {
+         FREE(cso);
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+
+      handle = cso->data;
+   }
+   else {
+      handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
+   }
+
+   if (ctx->velements != handle) {
+      ctx->velements = handle;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
+   }
+   return PIPE_OK;
+}
+
+void cso_save_vertex_elements(struct cso_context *ctx)
+{
+   assert(!ctx->velements_saved);
+   ctx->velements_saved = ctx->velements;
+}
+
+void cso_restore_vertex_elements(struct cso_context *ctx)
+{
+   if (ctx->velements != ctx->velements_saved) {
+      ctx->velements = ctx->velements_saved;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
+   }
+   ctx->velements_saved = NULL;
+}
index 251a9a644f85a1b46f928d9048ff984339b6a04c..9c16abd28dd483738ff6d424e625dd12a6dce334 100644 (file)
@@ -122,6 +122,12 @@ void
 cso_restore_vertex_sampler_textures(struct cso_context *cso);
 
 
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states);
+void cso_save_vertex_elements(struct cso_context *ctx);
+void cso_restore_vertex_elements(struct cso_context *ctx);
+
 
 /* These aren't really sensible -- most of the time the api provides
  * object semantics for shaders anyway, and the cases where it doesn't
@@ -157,7 +163,6 @@ void cso_save_geometry_shader(struct cso_context *cso);
 void cso_restore_geometry_shader(struct cso_context *cso);
 
 
-
 enum pipe_error cso_set_framebuffer(struct cso_context *cso,
                                     const struct pipe_framebuffer_state *fb);
 void cso_save_framebuffer(struct cso_context *cso);
index 6d90a6c42fde3be6f3faadaede06e2c3de66f19b..a8cdc57ad962012fc91ee8707e051195a7f4b20d 100644 (file)
@@ -307,9 +307,8 @@ draw_arrays_instanced(struct draw_context *draw,
       tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
       debug_printf("Elements:\n");
       for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
-         debug_printf("  format=%s comps=%u\n",
-                      util_format_name(draw->pt.vertex_element[i].src_format),
-                      draw->pt.vertex_element[i].nr_components);
+         debug_printf("  format=%s\n",
+                      util_format_name(draw->pt.vertex_element[i].src_format));
       }
       debug_printf("Buffers:\n");
       for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
index 32f9e5201c54fa566c0e02eb67f450290594d0f2..e2c67883972e032f4c75f551237242ce9fb169d3 100644 (file)
@@ -644,13 +644,26 @@ lp_build_abs(struct lp_build_context *bld,
 
    if(type.floating) {
       /* Mask out the sign bit */
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      unsigned long long absMask = ~(1ULL << (type.width - 1));
-      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      a = LLVMBuildAnd(bld->builder, a, mask, "");
-      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
-      return a;
+      if (type.length == 1) {
+         LLVMTypeRef int_type = LLVMIntType(type.width);
+         LLVMTypeRef float_type = LLVMFloatType();
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+         a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+         return a;
+      }
+      else {
+         /* vector of floats */
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+         return a;
+      }
    }
 
    if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -753,7 +766,7 @@ lp_build_set_sign(struct lp_build_context *bld,
 
 
 /**
- * Convert vector of int to vector of float.
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
  */
 LLVMValueRef
 lp_build_int_to_float(struct lp_build_context *bld,
@@ -764,7 +777,11 @@ lp_build_int_to_float(struct lp_build_context *bld,
    assert(type.floating);
    /*assert(lp_check_value(type, a));*/
 
-   {
+   if (type.length == 1) {
+      LLVMTypeRef float_type = LLVMFloatType();
+      return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+   }
+   else {
       LLVMTypeRef vec_type = lp_build_vec_type(type);
       /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
       LLVMValueRef res;
@@ -921,12 +938,18 @@ lp_build_itrunc(struct lp_build_context *bld,
                 LLVMValueRef a)
 {
    const struct lp_type type = bld->type;
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 
    assert(type.floating);
-   assert(lp_check_value(type, a));
 
-   return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   if (type.length == 1) {
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      return LLVMBuildFPTrunc(bld->builder, a, int_type, "");
+   }
+   else {
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      assert(lp_check_value(type, a));
+      return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   }
 }
 
 
@@ -939,6 +962,15 @@ lp_build_iround(struct lp_build_context *bld,
    LLVMValueRef res;
 
    assert(type.floating);
+
+   if (type.length == 1) {
+      /* scalar float to int */
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      /* XXX we want rounding here! */
+      res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+      return res;
+   }
+
    assert(lp_check_value(type, a));
 
    if(util_cpu_caps.has_sse4_1) {
@@ -1207,6 +1239,7 @@ lp_build_polynomial(struct lp_build_context *bld,
                     unsigned num_coeffs)
 {
    const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
    LLVMValueRef res = NULL;
    unsigned i;
 
@@ -1216,7 +1249,13 @@ lp_build_polynomial(struct lp_build_context *bld,
                    __FUNCTION__);
 
    for (i = num_coeffs; i--; ) {
-      LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+      LLVMValueRef coeff;
+
+      if (type.length == 1)
+         coeff = LLVMConstReal(float_type, coeffs[i]);
+      else
+         coeff = lp_build_const_scalar(type, coeffs[i]);
+
       if(res)
          res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
       else
@@ -1410,11 +1449,87 @@ lp_build_log2_approx(struct lp_build_context *bld,
 }
 
 
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+                           LLVMValueRef x,
+                           LLVMValueRef *p_exp,
+                           LLVMValueRef *p_floor_log2,
+                           LLVMValueRef *p_log2)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
+   LLVMTypeRef int_type = LLVMIntType(type.width);
+
+   LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+   LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+   LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+   LLVMValueRef i = NULL;
+   LLVMValueRef exp = NULL;
+   LLVMValueRef mant = NULL;
+   LLVMValueRef logexp = NULL;
+   LLVMValueRef logmant = NULL;
+   LLVMValueRef res = NULL;
+
+   if(p_exp || p_floor_log2 || p_log2) {
+      /* TODO: optimize the constant case */
+      if(LLVMIsConstant(x))
+         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+                      __FUNCTION__);
+
+      assert(type.floating && type.width == 32);
+
+      i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+      /* exp = (float) exponent(x) */
+      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+   }
+
+   if(p_floor_log2 || p_log2) {
+      LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+      LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+      logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+      logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+      logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+   }
+
+   if(p_log2) {
+      /* mant = (float) mantissa(x) */
+      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+      mant = LLVMBuildOr(bld->builder, mant, one, "");
+      mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+                                    Elements(lp_build_log2_polynomial));
+
+      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+   }
+
+   if(p_exp)
+      *p_exp = exp;
+
+   if(p_floor_log2)
+      *p_floor_log2 = logexp;
+
+   if(p_log2)
+      *p_log2 = res;
+}
+
+
 LLVMValueRef
 lp_build_log2(struct lp_build_context *bld,
               LLVMValueRef x)
 {
    LLVMValueRef res;
-   lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   if (bld->type.length == 1) {
+      lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+   }
+   else {
+      lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   }
    return res;
 }
index c8eaa8c394068225080b7bd1a4190013569b4d29..53447757e8e23ba6f24a0c089af4761983f52cc1 100644 (file)
@@ -264,10 +264,16 @@ lp_build_one(struct lp_type type)
    for(i = 1; i < type.length; ++i)
       elems[i] = elems[0];
 
-   return LLVMConstVector(elems, type.length);
+   if (type.length == 1)
+      return elems[0];
+   else
+      return LLVMConstVector(elems, type.length);
 }
                
 
+/**
+ * Build constant-valued vector from a scalar value.
+ */
 LLVMValueRef
 lp_build_const_scalar(struct lp_type type,
                       double val)
index 2726747eaea5698934c03eefa6f2b64d6e949a52..7c585fda7885c08b36ec75cf28beed28f98d6739 100644 (file)
@@ -198,7 +198,7 @@ lp_build_compare(LLVMBuilderRef builder,
 
          return res;
       }
-   }
+   } /* if (type.width * type.length == 128) */
 #endif
 
    if(type.floating) {
@@ -238,20 +238,25 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildFCmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildFCmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildFCmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise float"
+                      " vector comparison\n", __FUNCTION__);
+         for (i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildFCmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -286,20 +291,26 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildICmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise int vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildICmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildICmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise int"
+                      " vector comparison\n", __FUNCTION__);
+
+         for(i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildICmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -339,26 +350,31 @@ lp_build_select(struct lp_build_context *bld,
    if(a == b)
       return a;
 
-   if(type.floating) {
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+   if (type.length == 1) {
+      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
    }
+   else {
+      if(type.floating) {
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+      }
 
-   a = LLVMBuildAnd(bld->builder, a, mask, "");
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
 
-   /* This often gets translated to PANDN, but sometimes the NOT is
-    * pre-computed and stored in another constant. The best strategy depends
-    * on available registers, so it is not a big deal -- hopefully LLVM does
-    * the right decision attending the rest of the program.
-    */
-   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+      /* This often gets translated to PANDN, but sometimes the NOT is
+       * pre-computed and stored in another constant. The best strategy depends
+       * on available registers, so it is not a big deal -- hopefully LLVM does
+       * the right decision attending the rest of the program.
+       */
+      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
 
-   res = LLVMBuildOr(bld->builder, a, b, "");
+      res = LLVMBuildOr(bld->builder, a, b, "");
 
-   if(type.floating) {
-      LLVMTypeRef vec_type = lp_build_vec_type(type);
-      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      if(type.floating) {
+         LLVMTypeRef vec_type = lp_build_vec_type(type);
+         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      }
    }
 
    return res;
index 1dca29cdd5894559c2a90e05c294a314f03fe61e..a965d394f44b7292d8bf6ddcbe3a02ed0d6a2514 100644 (file)
@@ -65,6 +65,14 @@ struct lp_build_sample_context
 
    const struct util_format_description *format_desc;
 
+   /** regular scalar float type */
+   struct lp_type float_type;
+   struct lp_build_context float_bld;
+
+   /** regular scalar float type */
+   struct lp_type int_type;
+   struct lp_build_context int_bld;
+
    /** Incoming coordinates type and build context */
    struct lp_type coord_type;
    struct lp_build_context coord_bld;
@@ -108,6 +116,27 @@ wrap_mode_uses_border_color(unsigned mode)
 }
 
 
+static LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+                          LLVMValueRef data_array, LLVMValueRef level)
+{
+   LLVMValueRef indexes[2], data_ptr;
+   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   indexes[1] = level;
+   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+   return data_ptr;
+}
+
+
+static LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+                                LLVMValueRef data_array, int level)
+{
+   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+   return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
 
 /**
  * Gen code to fetch a texel from a texture at int coords (x, y).
@@ -124,14 +153,13 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                           LLVMValueRef x,
                           LLVMValueRef y,
                           LLVMValueRef y_stride,
-                          LLVMValueRef data_array,
+                          LLVMValueRef data_ptr,
                           LLVMValueRef *texel)
 {
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
    LLVMValueRef packed;
    LLVMValueRef use_border = NULL;
-   LLVMValueRef data_ptr;
 
    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
@@ -154,16 +182,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
-   /* XXX always use mipmap level 0 for now */
-   {
-      const int level = 0;
-      LLVMValueRef indexes[2];
-      indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
-      data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
-      data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
-   }
-
    /*
     * Note: if we find an app which frequently samples the texture border
     * we might want to implement a true conditional here to avoid sampling
@@ -233,17 +251,8 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
    assert(bld->format_desc->block.height == 1);
    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 
-   /* XXX always use mipmap level 0 for now */
-   {
-      const int level = 0;
-      LLVMValueRef indexes[2];
-      /* get data_ptr[level] */
-      indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
-      data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
-      /* load texture base address */
-      data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
-   }
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 
    return lp_build_gather(bld->builder,
                           bld->texel_type.length,
@@ -733,7 +742,210 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 
 
 /**
- * Sample 2D texture with nearest filtering.
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+                LLVMValueRef base_size,
+                LLVMValueRef level)
+{
+   LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+   return size;
+}
+
+
+static int
+texture_dims(enum pipe_texture_target tex)
+{
+   switch (tex) {
+   case PIPE_TEXTURE_1D:
+      return 1;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_CUBE:
+      return 2;
+   case PIPE_TEXTURE_3D:
+      return 3;
+   default:
+      assert(0 && "bad texture target in texture_dims()");
+      return 2;
+   }
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param s  vector of texcoord s values
+ * \param t  vector of texcoord t values
+ * \param r  vector of texcoord r values
+ * \param width  scalar int texture width
+ * \param height  scalar int texture height
+ * \param depth  scalar int texture depth
+ */
+static LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
+                      LLVMValueRef width,
+                      LLVMValueRef height,
+                      LLVMValueRef depth)
+
+{
+   const int dims = texture_dims(bld->static_state->target);
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *float_bld = &bld->float_bld;
+   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
+   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
+
+   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+   LLVMValueRef s0, s1, s2;
+   LLVMValueRef t0, t1, t2;
+   LLVMValueRef r0, r1, r2;
+   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+   LLVMValueRef rho, lod;
+
+   /*
+    * dsdx = abs(s[1] - s[0]);
+    * dsdy = abs(s[2] - s[0]);
+    * dtdx = abs(t[1] - t[0]);
+    * dtdy = abs(t[2] - t[0]);
+    * drdx = abs(r[1] - r[0]);
+    * drdy = abs(r[2] - r[0]);
+    * XXX we're assuming a four-element quad in 2x2 layout here.
+    */
+   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+   dsdx = lp_build_abs(float_bld, dsdx);
+   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+   dsdy = lp_build_abs(float_bld, dsdy);
+   if (dims > 1) {
+      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+      dtdx = lp_build_abs(float_bld, dtdx);
+      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+      dtdy = lp_build_abs(float_bld, dtdy);
+      if (dims > 2) {
+         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+         drdx = lp_build_abs(float_bld, drdx);
+         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+         drdy = lp_build_abs(float_bld, drdy);
+      }
+   }
+
+   /* Compute rho = max of all partial derivatives scaled by texture size.
+    * XXX this could be vectorized somewhat
+    */
+   rho = LLVMBuildMul(bld->builder,
+                      lp_build_max(float_bld, dsdx, dsdy),
+                      lp_build_int_to_float(float_bld, width), "");
+   if (dims > 1) {
+      LLVMValueRef max;
+      max = LLVMBuildMul(bld->builder,
+                         lp_build_max(float_bld, dtdx, dtdy),
+                         lp_build_int_to_float(float_bld, height), "");
+      rho = lp_build_max(float_bld, rho, max);
+      if (dims > 2) {
+         max = LLVMBuildMul(bld->builder,
+                            lp_build_max(float_bld, drdx, drdy),
+                            lp_build_int_to_float(float_bld, depth), "");
+         rho = lp_build_max(float_bld, rho, max);
+      }
+   }
+
+   /* compute lod = log2(rho) */
+   lod = lp_build_log2(float_bld, rho);
+
+   /* add lod bias */
+   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+
+   /* clamp lod */
+   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+   return lod;
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod  scalar float texture level of detail
+ * \param level_out  returns integer 
+ */
+static void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level_out)
+{
+   struct lp_build_context *float_bld = &bld->float_bld;
+   struct lp_build_context *int_bld = &bld->int_bld;
+   LLVMValueRef last_level, level;
+
+   LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_iround(float_bld, lod);
+
+   /* clamp level to legal range of levels */
+   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes.  Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+static void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level0_out,
+                           LLVMValueRef *level1_out,
+                           LLVMValueRef *weight_out)
+{
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef last_level, level;
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_ifloor(coord_bld, lod);
+
+   /* compute level 0 and clamp to legal range of levels */
+   *level0_out = lp_build_clamp(int_coord_bld, level,
+                                int_coord_bld->zero,
+                                last_level);
+   /* compute level 1 and clamp to legal range of levels */
+   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
+   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
+
+   *weight_out = lp_build_fract(coord_bld, lod);
+}
+
+
+
+/**
+ * Sample 2D texture with nearest filtering, no mipmapping.
  */
 static void
 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
@@ -746,6 +958,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
                                LLVMValueRef *texel)
 {
    LLVMValueRef x, y;
+   LLVMValueRef data_ptr;
 
    x = lp_build_sample_wrap_nearest(bld, s, width,
                                     bld->static_state->pot_width,
@@ -757,7 +970,63 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
    lp_build_name(x, "tex.x.wrapped");
    lp_build_name(y, "tex.y.wrapped");
 
-   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_array, texel);
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
+}
+
+
+/**
+ * Sample 2D texture with nearest filtering, nearest mipmap.
+ */
+static void
+lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld,
+                                           unsigned unit,
+                                           LLVMValueRef s,
+                                           LLVMValueRef t,
+                                           LLVMValueRef width,
+                                           LLVMValueRef height,
+                                           LLVMValueRef width_vec,
+                                           LLVMValueRef height_vec,
+                                           LLVMValueRef stride,
+                                           LLVMValueRef data_array,
+                                           LLVMValueRef *texel)
+{
+   LLVMValueRef x, y;
+   LLVMValueRef lod, ilevel, ilevel_vec;
+   LLVMValueRef data_ptr;
+
+   /* compute float LOD */
+   lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL);
+
+   /* convert LOD to int */
+   lp_build_nearest_mip_level(bld, unit, lod, &ilevel);
+
+   ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel);
+
+   /* compute width_vec, height at mipmap level 'ilevel' */
+   width_vec = lp_build_minify(bld, width_vec, ilevel_vec);
+   height_vec = lp_build_minify(bld, height_vec, ilevel_vec);
+   stride = lp_build_minify(bld, stride, ilevel_vec);
+
+   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+                                    bld->static_state->pot_width,
+                                    bld->static_state->wrap_s);
+   y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+                                    bld->static_state->pot_height,
+                                    bld->static_state->wrap_t);
+
+   lp_build_name(x, "tex.x.wrapped");
+   lp_build_name(y, "tex.y.wrapped");
+
+   /* get pointer to mipmap level [ilevel] data */
+   if (0)
+      data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel);
+   else
+      data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel);
 }
 
 
@@ -779,6 +1048,7 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    LLVMValueRef x0, x1;
    LLVMValueRef y0, y1;
    LLVMValueRef neighbors[2][2][4];
+   LLVMValueRef data_ptr;
    unsigned chan;
 
    lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
@@ -786,10 +1056,13 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
                                bld->static_state->wrap_t, &y0, &y1, &t_fpart);
 
-   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_array, neighbors[0][0]);
-   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_array, neighbors[0][1]);
-   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_array, neighbors[1][0]);
-   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_array, neighbors[1][1]);
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
@@ -857,7 +1130,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef packed, packed_lo, packed_hi;
    LLVMValueRef unswizzled[4];
 
-   lp_build_context_init(&i32, builder, lp_type_int(32));
+   lp_build_context_init(&i32, builder, lp_type_int_vec(32));
    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 
@@ -1066,194 +1339,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
 }
 
 
-static int
-texture_dims(enum pipe_texture_target tex)
-{
-   switch (tex) {
-   case PIPE_TEXTURE_1D:
-      return 1;
-   case PIPE_TEXTURE_2D:
-   case PIPE_TEXTURE_CUBE:
-      return 2;
-   case PIPE_TEXTURE_3D:
-      return 3;
-   default:
-      assert(0 && "bad texture target in texture_dims()");
-      return 2;
-   }
-}
-
-
-/**
- * Generate code to compute texture level of detail (lambda).
- * \param s  vector of texcoord s values
- * \param t  vector of texcoord t values
- * \param r  vector of texcoord r values
- * \param width  scalar int texture width
- * \param height  scalar int texture height
- * \param depth  scalar int texture depth
- */
-static LLVMValueRef
-lp_build_lod_selector(struct lp_build_sample_context *bld,
-                      LLVMValueRef s,
-                      LLVMValueRef t,
-                      LLVMValueRef r,
-                      LLVMValueRef width,
-                      LLVMValueRef height,
-                      LLVMValueRef depth)
-
-{
-   const int dims = texture_dims(bld->static_state->target);
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-
-   LLVMValueRef lod_bias = lp_build_const_scalar(bld->coord_bld.type,
-                                                 bld->static_state->lod_bias);
-   LLVMValueRef min_lod = lp_build_const_scalar(bld->coord_bld.type,
-                                                bld->static_state->min_lod);
-   LLVMValueRef max_lod = lp_build_const_scalar(bld->coord_bld.type,
-                                                bld->static_state->max_lod);
-
-   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
-   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
-   LLVMValueRef s0, s1, s2;
-   LLVMValueRef t0, t1, t2;
-   LLVMValueRef r0, r1, r2;
-   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
-   LLVMValueRef rho, lod;
-
-   /*
-    * dsdx = abs(s[1] - s[0]);
-    * dsdy = abs(s[2] - s[0]);
-    * dtdx = abs(t[1] - t[0]);
-    * dtdy = abs(t[2] - t[0]);
-    * drdx = abs(r[1] - r[0]);
-    * drdy = abs(r[2] - r[0]);
-    * XXX we're assuming a four-element quad in 2x2 layout here.
-    */
-   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
-   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
-   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
-   dsdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s1, s0));
-   dsdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s2, s0));
-   if (dims > 1) {
-      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
-      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
-      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
-      dtdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t1, t0));
-      dtdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t2, t0));
-      if (dims > 2) {
-         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
-         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
-         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
-         drdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r1, r0));
-         drdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r2, r0));
-      }
-   }
-
-   /* Compute rho = max of all partial derivatives scaled by texture size.
-    * XXX this can be vectorized somewhat
-    */
-   rho = lp_build_mul(coord_bld,
-                       lp_build_max(coord_bld, dsdx, dsdy),
-                       lp_build_int_to_float(coord_bld, width));
-   if (dims > 1) {
-      LLVMValueRef max;
-      max = lp_build_mul(coord_bld,
-                         lp_build_max(coord_bld, dtdx, dtdy),
-                         lp_build_int_to_float(coord_bld, height));
-      rho = lp_build_max(coord_bld, rho, max);
-      if (dims > 2) {
-         max = lp_build_mul(coord_bld,
-                            lp_build_max(coord_bld, drdx, drdy),
-                            lp_build_int_to_float(coord_bld, depth));
-         rho = lp_build_max(coord_bld, rho, max);
-      }
-   }
-
-   /* compute lod = log2(rho) */
-   lod = lp_build_log2(coord_bld, rho);
-
-   /* add lod bias */
-   lod = lp_build_add(coord_bld, lod, lod_bias);
-
-   /* clamp lod */
-   lod = lp_build_clamp(coord_bld, lod, min_lod, max_lod);
-
-   return lod;
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
- * \param lod  scalar float texture level of detail
- * \param level_out  returns integer 
- */
-static void
-lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
-                           unsigned unit,
-                           LLVMValueRef lod,
-                           LLVMValueRef *level_out)
-{
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef last_level, level;
-
-   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
-                                               bld->builder, unit);
-
-   /* convert float lod to integer */
-   level = lp_build_iround(coord_bld, lod);
-
-   /* clamp level to legal range of levels */
-   *level_out = lp_build_clamp(int_coord_bld, level,
-                               int_coord_bld->zero,
-                               last_level);
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
- * two (adjacent) mipmap level indexes.  Later, we'll sample from those
- * two mipmap levels and interpolate between them.
- */
-static void
-lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
-                           unsigned unit,
-                           LLVMValueRef lod,
-                           LLVMValueRef *level0_out,
-                           LLVMValueRef *level1_out,
-                           LLVMValueRef *weight_out)
-{
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef last_level, level;
-
-   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
-                                               bld->builder, unit);
-
-   /* convert float lod to integer */
-   level = lp_build_ifloor(coord_bld, lod);
-
-   /* compute level 0 and clamp to legal range of levels */
-   *level0_out = lp_build_clamp(int_coord_bld, level,
-                                int_coord_bld->zero,
-                                last_level);
-   /* compute level 1 and clamp to legal range of levels */
-   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
-   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
-
-   *weight_out = lp_build_fract(coord_bld, lod);
-}
-
-
-
 /**
  * Build texture sampling code.
  * 'texel' will return a vector of four LLVMValueRefs corresponding to
  * R, G, B, A.
+ * \param type  vector float type to use for coords, etc.
  */
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
@@ -1267,17 +1357,19 @@ lp_build_sample_soa(LLVMBuilderRef builder,
                     LLVMValueRef *texel)
 {
    struct lp_build_sample_context bld;
-   LLVMValueRef width;
-   LLVMValueRef height;
-   LLVMValueRef stride;
+   LLVMValueRef width, width_vec;
+   LLVMValueRef height, height_vec;
+   LLVMValueRef stride, stride_vec;
    LLVMValueRef data_array;
    LLVMValueRef s;
    LLVMValueRef t;
    LLVMValueRef r;
+   boolean done = FALSE;
 
    (void) lp_build_lod_selector;   /* temporary to silence warning */
    (void) lp_build_nearest_mip_level;
    (void) lp_build_linear_mip_levels;
+   (void) lp_build_minify;
 
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
@@ -1285,10 +1377,16 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    bld.static_state = static_state;
    bld.dynamic_state = dynamic_state;
    bld.format_desc = util_format_description(static_state->format);
+
+   bld.float_type = lp_type_float(32);
+   bld.int_type = lp_type_int(32);
    bld.coord_type = type;
    bld.uint_coord_type = lp_uint_type(type);
    bld.int_coord_type = lp_int_type(type);
    bld.texel_type = type;
+
+   lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+   lp_build_context_init(&bld.int_bld, builder, bld.int_type);
    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
@@ -1305,30 +1403,56 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    t = coords[1];
    r = coords[2];
 
-   width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
-   height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
-   stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
+   width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
+   height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
+   stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
 
    if(static_state->target == PIPE_TEXTURE_1D)
       t = bld.coord_bld.zero;
 
-   switch (static_state->min_img_filter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      lp_build_sample_2d_nearest_soa(&bld, s, t, width, height,
-                                     stride, data_array, texel);
+   switch (static_state->min_mip_filter) {
+   case PIPE_TEX_MIPFILTER_NONE:
       break;
-   case PIPE_TEX_FILTER_LINEAR:
-      if(lp_format_is_rgba8(bld.format_desc) &&
-         is_simple_wrap_mode(static_state->wrap_s) &&
-         is_simple_wrap_mode(static_state->wrap_t))
-         lp_build_sample_2d_linear_aos(&bld, s, t, width, height,
-                                       stride, data_array, texel);
-      else
-         lp_build_sample_2d_linear_soa(&bld, s, t, width, height,
-                                       stride, data_array, texel);
+   case PIPE_TEX_MIPFILTER_NEAREST:
+
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit,
+                                                    s, t,
+                                                    width, height,
+                                                    width_vec, height_vec,
+                                                    stride_vec,
+                                                    data_array, texel);
+         done = TRUE;
+         break;
+      }
+
+      break;
+   case PIPE_TEX_MIPFILTER_LINEAR:
       break;
    default:
-      assert(0);
+      assert(0 && "invalid mip filter");
+   }
+
+   if (!done) {
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec,
+                                        stride_vec, data_array, texel);
+         break;
+      case PIPE_TEX_FILTER_LINEAR:
+         if(lp_format_is_rgba8(bld.format_desc) &&
+            is_simple_wrap_mode(static_state->wrap_s) &&
+            is_simple_wrap_mode(static_state->wrap_t))
+            lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         else
+            lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         break;
+      default:
+         assert(0);
+      }
    }
 
    /* FIXME: respect static_state->min_mip_filter */;
index 16946cc28a2f9aa63fa4664cfa99aa5f62107003..4daa904e633c8e73457236e190c48c1964c5a6e9 100644 (file)
@@ -103,7 +103,7 @@ struct lp_type {
    unsigned width:14;
 
    /**
-    * Vector length.
+    * Vector length.  If length==1, this is a scalar (float/int) type.
     *
     * width*length should be a power of two greater or equal to eight.
     *
@@ -139,11 +139,28 @@ struct lp_build_context
 };
 
 
+/** Create scalar float type */
 static INLINE struct lp_type
 lp_type_float(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.floating = TRUE;
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector of float type */
+static INLINE struct lp_type
+lp_type_float_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.floating = TRUE;
    res_type.sign = TRUE;
@@ -154,11 +171,27 @@ lp_type_float(unsigned width)
 }
 
 
+/** Create scalar int type */
 static INLINE struct lp_type
 lp_type_int(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector int type */
+static INLINE struct lp_type
+lp_type_int_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.sign = TRUE;
    res_type.width = width;
@@ -168,11 +201,26 @@ lp_type_int(unsigned width)
 }
 
 
+/** Create scalar uint type */
 static INLINE struct lp_type
 lp_type_uint(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector uint type */
+static INLINE struct lp_type
+lp_type_uint_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.width = width;
    res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
index 0b263a9db5ca31148421b003307eb41220d217bd..4d0737ccd3d03ed0fea6b2ca22b739b4d672c6ab 100644 (file)
@@ -63,6 +63,7 @@ struct blit_state
    struct pipe_sampler_state sampler;
    struct pipe_viewport_state viewport;
    struct pipe_clip_state clip;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs[TGSI_WRITEMASK_XYZW + 1];
@@ -114,6 +115,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    ctx->sampler.mag_img_filter = 0; /* set later */
    ctx->sampler.normalized_coords = 1;
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* vertex shader - still required to provide the linkage between
     * fragment shader input semantics and vertex_element/buffers.
     */
@@ -410,12 +420,14 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
    cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -480,6 +492,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
    cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 
    pipe_texture_reference(&tex, NULL);
 }
@@ -564,12 +577,14 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
    cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -628,4 +643,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
    cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
index 0ba09d33bfcf6aad9a5f4f2f87db1a15ff3d092f..33d09085f0bf743e2418192a41930815d7f6bff7 100644 (file)
@@ -88,6 +88,8 @@ struct blitter_context_priv
    void *dsa_write_depth_keep_stencil;
    void *dsa_keep_depth_stencil;
 
+   void *velem_state;
+
    /* Sampler state for clamping to a miplevel. */
    void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
 
@@ -108,6 +110,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    struct pipe_depth_stencil_alpha_state dsa = { { 0 } };
    struct pipe_rasterizer_state rs_state = { 0 };
    struct pipe_sampler_state *sampler_state;
+   struct pipe_vertex_element velem[2];
    unsigned i;
 
    ctx = CALLOC_STRUCT(blitter_context_priv);
@@ -122,6 +125,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
    ctx->blitter.saved_fb_state.nr_cbufs = ~0;
    ctx->blitter.saved_num_textures = ~0;
    ctx->blitter.saved_num_sampler_states = ~0;
@@ -170,6 +174,16 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    rs_state.flatshade = 1;
    ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
 
+   /* vertex elements state */
+   memset(&velem[0], 0, sizeof(velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      velem[i].src_offset = i * 4 * sizeof(float);
+      velem[i].instance_divisor = 0;
+      velem[i].vertex_buffer_index = 0;
+      velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+   ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
+
    /* fragment shaders are created on-demand */
 
    /* vertex shaders */
@@ -219,6 +233,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
    pipe->delete_rasterizer_state(pipe, ctx->rs_state);
    pipe->delete_vs_state(pipe, ctx->vs_col);
    pipe->delete_vs_state(pipe, ctx->vs_tex);
+   pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
 
    for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
       if (ctx->fs_texfetch_col[i])
@@ -246,7 +261,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
           ctx->blitter.saved_dsa_state != INVALID_PTR &&
           ctx->blitter.saved_rs_state != INVALID_PTR &&
           ctx->blitter.saved_fs != INVALID_PTR &&
-          ctx->blitter.saved_vs != INVALID_PTR);
+          ctx->blitter.saved_vs != INVALID_PTR &&
+          ctx->blitter.saved_velem_state != INVALID_PTR);
 }
 
 static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
@@ -259,12 +275,14 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
    pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
    pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
    pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+   pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
 
    ctx->blitter.saved_blend_state = INVALID_PTR;
    ctx->blitter.saved_dsa_state = INVALID_PTR;
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
 
    pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
 
@@ -569,6 +587,7 @@ void util_blitter_clear(struct blitter_context *blitter,
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
 
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
    pipe->bind_vs_state(pipe, ctx->vs_col);
 
@@ -634,6 +653,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
    pipe->bind_vs_state(pipe, ctx->vs_tex);
    pipe->bind_fragment_sampler_states(pipe, 1,
       blitter_get_sampler_state(ctx, src->level));
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
    pipe->set_framebuffer_state(pipe, &fb_state);
 
@@ -807,6 +827,7 @@ void util_blitter_fill(struct blitter_context *blitter,
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
    pipe->bind_vs_state(pipe, ctx->vs_col);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
    fb_state.width = dst->width;
index 92008fce9925e438fba8d11ca1dc2cb205138731..ecafdabafae79ffa3431ca9d78d3bf79c350c8e0 100644 (file)
@@ -43,6 +43,7 @@ struct blitter_context
    /* Private members, really. */
    void *saved_blend_state;   /**< blend state */
    void *saved_dsa_state;     /**< depth stencil alpha state */
+   void *saved_velem_state;   /**< vertex elements state */
    void *saved_rs_state;      /**< rasterizer state */
    void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
 
@@ -172,6 +173,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
    blitter->saved_dsa_state = state;
 }
 
+static INLINE
+void util_blitter_save_vertex_elements(struct blitter_context *blitter,
+                                       void *state)
+{
+   blitter->saved_velem_state = state;
+}
+
 static INLINE
 void util_blitter_save_stencil_ref(struct blitter_context *blitter,
                                    const struct pipe_stencil_ref *state)
index 14506e8451968b238373eba26675b8d238cff26a..8c194102bfca2cf632d5e0bce4c98ec99a5e917a 100644 (file)
@@ -45,8 +45,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
                         uint num_attribs)
 {
    struct pipe_vertex_buffer vbuffer;
-   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
-   uint i;
 
    assert(num_attribs <= PIPE_MAX_ATTRIBS);
 
@@ -58,15 +56,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
    vbuffer.max_index = num_verts - 1;
    pipe->set_vertex_buffers(pipe, 1, &vbuffer);
 
-   /* tell pipe about the vertex attributes */
-   for (i = 0; i < num_attribs; i++) {
-      velements[i].src_offset = i * 4 * sizeof(float);
-      velements[i].instance_divisor = 0;
-      velements[i].vertex_buffer_index = 0;
-      velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      velements[i].nr_components = 4;
-   }
-   pipe->set_vertex_elements(pipe, num_attribs, velements);
+   /* note: vertex elements already set by caller */
 
    /* draw */
    pipe->draw_arrays(pipe, prim_type, 0, num_verts);
index ae7afd7311e608ddebd67b8c8505aec6d9fc6469..52cf3ef4ce048139300044c39c481adda830034f 100644 (file)
@@ -700,7 +700,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem
    util_dump_member(stream, uint, state, src_offset);
 
    util_dump_member(stream, uint, state, vertex_buffer_index);
-   util_dump_member(stream, uint, state, nr_components);
 
    util_dump_member(stream, format, state, src_format);
 
index b2aa5bfb188cba57ab3b78a75a937b1b97964d81..c08fdcafcc8deca38f5013e5570924687f15c070 100644 (file)
@@ -415,6 +415,16 @@ util_format_has_alpha(enum pipe_format format)
    }
 }
 
+/**
+ * Return the number of components stored.
+ * Formats with block size != 1x1 will always have 1 component (the block).
+ */
+static INLINE unsigned
+util_format_get_nr_components(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   return desc->nr_channels;
+}
 
 /*
  * Format access functions.
index b49039db39b03990c7b98fc3c8c9bd479675681f..409d024c63726cb88dd986302fcbcae492638b4e 100644 (file)
@@ -418,31 +418,70 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix):
 
     dst_native_type = native_type(format)
 
+    assert format.layout == PLAIN
+
+    inv_swizzle = format.inv_swizzles()
+    
     print 'static INLINE void'
     print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
     print '{'
-    print '   union util_format_%s pixel;' % format.short_name()
-
-    assert format.layout == PLAIN
+    
+    if format.is_bitmask():
+        depth = format.block_size()
+        print '   uint%u_t value = 0;' % depth 
 
-    inv_swizzle = format.inv_swizzles()
+        shift = 0
+        for i in range(4):
+            dst_channel = format.channels[i]
+            if inv_swizzle[i] is not None:
+                value = 'rgba'[inv_swizzle[i]]
+                value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                if format.colorspace == ZS:
+                    if i == 3:
+                        value = get_one(dst_channel)
+                    elif i >= 1:
+                        value = '0'
+                if dst_channel.type in (UNSIGNED, SIGNED):
+                    if shift + dst_channel.size < depth:
+                        value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1)
+                    if shift:
+                        value = '(%s) << %u' % (value, shift)
+                    if dst_channel.type == SIGNED:
+                        # Cast to unsigned
+                        value = '(uint%u_t)(%s) ' % (depth, value)
+                else:
+                    value = None
+                if value is not None:
+                    print '   value |= %s;' % (value)
+                
+            shift += dst_channel.size
 
-    for i in range(4):
-        dst_channel = format.channels[i]
-        width = dst_channel.size
-        if inv_swizzle[i] is None:
-            continue
-        value = 'rgba'[inv_swizzle[i]]
-        value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
-        if format.colorspace == ZS:
-            if i == 3:
-                value = get_one(dst_channel)
-            elif i >= 1:
-                value = '0'
-        print '   pixel.chan.%s = %s;' % (dst_channel.name, value)
+        print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+        print '   value = util_bswap%u(value);' % depth
+        print '#endif'
+        
+        print '   *(uint%u_t *)dst = value;' % depth 
 
-    bswap_format(format)
-    print '   memcpy(dst, &pixel, sizeof pixel);'
+    else:
+        print '   union util_format_%s pixel;' % format.short_name()
+    
+        for i in range(4):
+            dst_channel = format.channels[i]
+            width = dst_channel.size
+            if inv_swizzle[i] is None:
+                continue
+            value = 'rgba'[inv_swizzle[i]]
+            value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+            if format.colorspace == ZS:
+                if i == 3:
+                    value = get_one(dst_channel)
+                elif i >= 1:
+                    value = '0'
+            print '   pixel.chan.%s = %s;' % (dst_channel.name, value)
+    
+        bswap_format(format)
+        print '   memcpy(dst, &pixel, sizeof pixel);'
+        
     print '}'
     print
     
index fc027e48e4ebb7e8b1cffdb6686ecd55da38a390..d421bee8efe1e26311274996048313fca543d1d2 100644 (file)
@@ -62,6 +62,7 @@ struct gen_mipmap_state
    struct pipe_rasterizer_state rasterizer;
    struct pipe_sampler_state sampler;
    struct pipe_clip_state clip;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs2d, *fsCube;
@@ -1307,6 +1308,15 @@ util_create_gen_mipmap(struct pipe_context *pipe,
    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
    ctx->sampler.normalized_coords = 1;
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* vertex shader - still needed to specify mapping from fragment
     * shader input semantics to vertex elements 
     */
@@ -1501,12 +1511,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_save_vertex_shader(ctx->cso);
    cso_save_viewport(ctx->cso);
    cso_save_clip(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* bind our state */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
    cso_set_clip(ctx->cso, &ctx->clip);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    cso_set_fragment_shader_handle(ctx->cso, fs);
    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
@@ -1593,4 +1605,5 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_restore_vertex_shader(ctx->cso);
    cso_restore_viewport(ctx->cso);
    cso_restore_clip(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
index ba23435f698240257bf29fe798937e66dab6dbed..6d461cb88001e7db34972dcf6f95be52a193ea75 100644 (file)
@@ -230,6 +230,7 @@ static bool
 init_pipe_state(struct vl_compositor *c)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[2];
 
    assert(c);
 
@@ -251,15 +252,27 @@ init_pipe_state(struct vl_compositor *c)
    /*sampler.border_color[i] = ;*/
    /*sampler.max_anisotropy = ;*/
    c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
-       
+
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[1].src_offset = 0;
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 1;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   c->vertex_elems = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
+
+
    return true;
 }
 
 static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
-       
+
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
+   c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems);
 }
 
 static bool
@@ -314,12 +327,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
 
-   c->vertex_elems[0].src_offset = 0;
-   c->vertex_elems[0].instance_divisor = 0;
-   c->vertex_elems[0].vertex_buffer_index = 0;
-   c->vertex_elems[0].nr_components = 2;
-   c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our texcoord buffer and texcoord buffer element
     * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
@@ -344,12 +351,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
 
-   c->vertex_elems[1].src_offset = 0;
-   c->vertex_elems[1].instance_divisor = 0;
-   c->vertex_elems[1].vertex_buffer_index = 1;
-   c->vertex_elems[1].nr_components = 2;
-   c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our vertex shader's constant buffer
     * Const buffer contains scaling and translation vectors
@@ -483,7 +484,7 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
-   compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
+   compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
 
index 6a9a3fd7af111dc327f98fe416abe6b34e984b3f..51755554da1b9098b8cf4dd2c885df07fd6cb36c 100644 (file)
@@ -43,10 +43,10 @@ struct vl_compositor
    void *sampler;
    void *vertex_shader;
    void *fragment_shader;
+   void *vertex_elems;
    struct pipe_viewport_state viewport;
    struct pipe_scissor_state scissor;
    struct pipe_vertex_buffer vertex_bufs[2];
-   struct pipe_vertex_element vertex_elems[2];
    struct pipe_buffer *vs_const_buf, *fs_const_buf;
 };
 
index f323de0ea55d0bfe2ac84ddcdd3c10af52a29d25..0763b5bb0e46763361e2c6cc995edab01d6554d8 100644 (file)
@@ -708,6 +708,7 @@ static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[8];
    unsigned filters[5];
    unsigned i;
 
@@ -771,6 +772,59 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
    }
 
+   /* Position element */
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Luma, texcoord element */
+   vertex_elems[1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 0;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cr texcoord element */
+   vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+   vertex_elems[2].instance_divisor = 0;
+   vertex_elems[2].vertex_buffer_index = 0;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cb texcoord element */
+   vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+   vertex_elems[3].instance_divisor = 0;
+   vertex_elems[3].vertex_buffer_index = 0;
+   vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface top field texcoord element */
+   vertex_elems[4].src_offset = 0;
+   vertex_elems[4].instance_divisor = 0;
+   vertex_elems[4].vertex_buffer_index = 1;
+   vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface bottom field texcoord element */
+   vertex_elems[5].src_offset = sizeof(struct vertex2f);
+   vertex_elems[5].instance_divisor = 0;
+   vertex_elems[5].vertex_buffer_index = 1;
+   vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface top field texcoord element */
+   vertex_elems[6].src_offset = 0;
+   vertex_elems[6].instance_divisor = 0;
+   vertex_elems[6].vertex_buffer_index = 2;
+   vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface bottom field texcoord element */
+   vertex_elems[7].src_offset = sizeof(struct vertex2f);
+   vertex_elems[7].instance_divisor = 0;
+   vertex_elems[7].vertex_buffer_index = 2;
+   vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* need versions with 4,6 and 8 vertex elems */
+   r->vertex_elems[0] = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
+   r->vertex_elems[1] = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
+   r->vertex_elems[2] = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
+
    return true;
 }
 
@@ -783,6 +837,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 5; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+   for (i = 0; i < 3; i++)
+      r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems[i]);
 }
 
 static bool
@@ -888,62 +944,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       );
    }
 
-   /* Position element */
-   r->vertex_elems[0].src_offset = 0;
-   r->vertex_elems[0].instance_divisor = 0;
-   r->vertex_elems[0].vertex_buffer_index = 0;
-   r->vertex_elems[0].nr_components = 2;
-   r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Luma, texcoord element */
-   r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[1].instance_divisor = 0;
-   r->vertex_elems[1].vertex_buffer_index = 0;
-   r->vertex_elems[1].nr_components = 2;
-   r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cr texcoord element */
-   r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
-   r->vertex_elems[2].instance_divisor = 0;
-   r->vertex_elems[2].vertex_buffer_index = 0;
-   r->vertex_elems[2].nr_components = 2;
-   r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cb texcoord element */
-   r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
-   r->vertex_elems[3].instance_divisor = 0;
-   r->vertex_elems[3].vertex_buffer_index = 0;
-   r->vertex_elems[3].nr_components = 2;
-   r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface top field texcoord element */
-   r->vertex_elems[4].src_offset = 0;
-   r->vertex_elems[4].instance_divisor = 0;
-   r->vertex_elems[4].vertex_buffer_index = 1;
-   r->vertex_elems[4].nr_components = 2;
-   r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface bottom field texcoord element */
-   r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[5].instance_divisor = 0;
-   r->vertex_elems[5].vertex_buffer_index = 1;
-   r->vertex_elems[5].nr_components = 2;
-   r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface top field texcoord element */
-   r->vertex_elems[6].src_offset = 0;
-   r->vertex_elems[6].instance_divisor = 0;
-   r->vertex_elems[6].vertex_buffer_index = 2;
-   r->vertex_elems[6].nr_components = 2;
-   r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface bottom field texcoord element */
-   r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[7].instance_divisor = 0;
-   r->vertex_elems[7].vertex_buffer_index = 2;
-   r->vertex_elems[7].nr_components = 2;
-   r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    r->vs_const_buf = pipe_buffer_create
    (
       r->pipe->screen,
@@ -1307,7 +1307,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[0]);
       r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->i_vs);
@@ -1320,7 +1320,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1334,7 +1334,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1348,7 +1348,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1362,7 +1362,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1376,7 +1376,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
@@ -1391,7 +1391,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
index f00b8c7b8b12e71c746598c2482be66c5f0c1a67..a11a3e7307b4e3dd80a2ce610a2ad1c03644035d 100644 (file)
@@ -66,8 +66,8 @@ struct vl_mpeg12_mc_renderer
    struct pipe_buffer *vs_const_buf;
    struct pipe_buffer *fs_const_buf;
    struct pipe_framebuffer_state fb_state;
-   struct pipe_vertex_element vertex_elems[8];
-       
+   void *vertex_elems[3];
+
    union
    {
       void *all[5];
index 9080addba444f94aa0439a8ee7bf37d8d8f7839b..4608e97adbb18ee0241bcfab37769bbb6c7c7db7 100644 (file)
@@ -24,6 +24,7 @@ CSO objects handled by the context object:
 * :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state``
 * :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for
   fragment shaders, and ``*_vs_state`` is for vertex shaders.
+* :ref:`Vertex Elements`: ``*_vertex_elements_state``
 
 
 Resource Binding State
@@ -60,7 +61,6 @@ objects. They all follow simple, one-method binding calls, e.g.
   not have the scissor test enabled, then the scissor bounds never need to
   be set since they will not be used.
 * ``set_viewport_state``
-* ``set_vertex_elements``
 
 
 Clearing
diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst
new file mode 100644 (file)
index 0000000..8e758fa
--- /dev/null
@@ -0,0 +1,24 @@
+.. _vertex,elements
+
+Vertex Elements
+===============
+
+This state controls format etc. of the input attributes contained
+in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member
+for each input attribute.
+
+Members
+-------
+
+src_offset
+    The byte offset of the attribute in the buffer given by
+    vertex_buffer_index for the first vertex.
+instance_divisor
+    The instance data rate divisor, used for instancing.
+    0 means this is per-vertex data, n means per-instance data used for
+    n consecutive instances (n > 0).
+vertex_buffer_index
+    The vertex buffer this attribute lives in. Several attributes may
+    live in the same vertex buffer.
+src_format
+    The format of the attribute data. One of the PIPE_FORMAT tokens.
index 233b91dec6b301021a500c23b8511e6b359ad9e8..28f80b82cd590d3336ff5274066e9c780c5a093f 100644 (file)
@@ -93,6 +93,11 @@ struct cell_buffer_list
    struct cell_buffer_node *head;
 };
 
+struct cell_velems_state
+{
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+}
 
 /**
  * Per-context state, subclass of pipe_context.
@@ -110,6 +115,7 @@ struct cell_context
    const struct pipe_rasterizer_state *rasterizer;
    const struct cell_vertex_shader_state *vs;
    const struct cell_fragment_shader_state *fs;
+   const struct cell_velems_state *velems;
 
    struct spe_function logic_op;
 
@@ -125,8 +131,6 @@ struct cell_context
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    uint num_vertex_buffers;
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-   uint num_vertex_elements;
 
    ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
    ubyte *zsbuf_map;
index fbe55c84721497b572fb8e970eff6e0e4584cc17..d3efb8ecea21fce5f8e2add2a1e1d37ce26b0ed8 100644 (file)
 #include "cell_context.h"
 #include "cell_state.h"
 
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 
 
-static void
-cell_set_vertex_elements(struct pipe_context *pipe,
-                         unsigned count,
-                         const struct pipe_vertex_element *elements)
+void *
+cell_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
 {
-   struct cell_context *cell = cell_context(pipe);
-
+   struct cell_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
+void
+cell_bind_vertex_elements_state(struct pipe_context *pipe,
+                                void *velems)
+{
+   struct cell_context *cell = cell_context(pipe);
+   struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems;
 
-   memcpy(cell->vertex_element, elements, count * sizeof(elements[0]));
-   cell->num_vertex_elements = count;
+   cell->velems = cell_velems;
 
    cell->dirty |= CELL_NEW_VERTEX;
 
-   draw_set_vertex_elements(cell->draw, count, elements);
+   draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
+}
+
+void
+cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
 }
 
 
@@ -75,5 +94,7 @@ void
 cell_init_vertex_functions(struct cell_context *cell)
 {
    cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
-   cell->pipe.set_vertex_elements = cell_set_vertex_elements;
+   cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
+   cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
+   cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
 }
index bb1a168ea7aeb4d46c051ea0e1b28b461ca15731..4a754465bbe9f88e8930e072cafde99a49ce8a1c 100644 (file)
@@ -78,6 +78,7 @@ struct failover_context {
    const struct fo_state     *rasterizer;
    const struct fo_state     *fragment_shader;
    const struct fo_state     *vertex_shader;
+   const struct fo_state     *vertex_elements;
 
    struct pipe_blend_color blend_color;
    struct pipe_stencil_ref stencil_ref;
@@ -89,10 +90,8 @@ struct failover_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
 
    uint num_vertex_buffers;
-   uint num_vertex_elements;
 
    void *sw_sampler_state[PIPE_MAX_SAMPLERS];
    void *hw_sampler_state[PIPE_MAX_SAMPLERS];
index 970606a3f50a0cd3561e2e14c7fcaab1fa0b8d6e..0247fb803b297baa6d12b8d9f8ca894b72cd5c1d 100644 (file)
@@ -255,9 +255,52 @@ failover_delete_vs_state(struct pipe_context *pipe,
    free(state);
 }
 
+
+
+static void *
+failover_create_vertex_elements_state( struct pipe_context *pipe,
+                                       unsigned count,
+                                       const struct pipe_vertex_element *velems )
+{
+   struct fo_state *state = malloc(sizeof(struct fo_state));
+   struct failover_context *failover = failover_context(pipe);
+
+   state->sw_state = failover->sw->create_vertex_elements_state(failover->sw, count, velems);
+   state->hw_state = failover->hw->create_vertex_elements_state(failover->hw, count, velems);
+
+   return state;
+}
+
+static void
+failover_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems )
+{
+   struct failover_context *failover = failover_context(pipe);
+   struct fo_state *state = (struct fo_state*)velems;
+
+   failover->vertex_elements = state;
+   failover->dirty |= FO_NEW_VERTEX_ELEMENT;
+   failover->sw->bind_vertex_elements_state( failover->sw, velems );
+   failover->hw->bind_vertex_elements_state( failover->hw, velems );
+}
+
+static void
+failover_delete_vertex_elements_state( struct pipe_context *pipe,
+                                       void *velems )
+{
+   struct fo_state *state = (struct fo_state*)velems;
+   struct failover_context *failover = failover_context(pipe);
+
+   failover->sw->delete_vertex_elements_state(failover->sw, state->sw_state);
+   failover->hw->delete_vertex_elements_state(failover->hw, state->hw_state);
+   state->sw_state = 0;
+   state->hw_state = 0;
+   free(state);
+}
+
 static void 
 failover_set_polygon_stipple( struct pipe_context *pipe,
-                             const struct pipe_poly_stipple *stipple )
+                              const struct pipe_poly_stipple *stipple )
 {
    struct failover_context *failover = failover_context(pipe);
 
@@ -490,22 +533,6 @@ failover_set_vertex_buffers(struct pipe_context *pipe,
 }
 
 
-static void
-failover_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *vertex_elements)
-{
-   struct failover_context *failover = failover_context(pipe);
-
-   memcpy(failover->vertex_elements, vertex_elements,
-          count * sizeof(vertex_elements[0]));
-
-   failover->dirty |= FO_NEW_VERTEX_ELEMENT;
-   failover->num_vertex_elements = count;
-   failover->sw->set_vertex_elements( failover->sw, count, vertex_elements );
-   failover->hw->set_vertex_elements( failover->hw, count, vertex_elements );
-}
-
 void
 failover_set_constant_buffer(struct pipe_context *pipe,
                              uint shader, uint index,
@@ -543,6 +570,9 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.create_vs_state = failover_create_vs_state;
    failover->pipe.bind_vs_state   = failover_bind_vs_state;
    failover->pipe.delete_vs_state = failover_delete_vs_state;
+   failover->pipe.create_vertex_elements_state = failover_create_vertex_elements_state;
+   failover->pipe.bind_vertex_elements_state = failover_bind_vertex_elements_state;
+   failover->pipe.delete_vertex_elements_state = failover_delete_vertex_elements_state;
 
    failover->pipe.set_blend_color = failover_set_blend_color;
    failover->pipe.set_stencil_ref = failover_set_stencil_ref;
@@ -554,6 +584,5 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures;
    failover->pipe.set_viewport_state = failover_set_viewport_state;
    failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
-   failover->pipe.set_vertex_elements = failover_set_vertex_elements;
    failover->pipe.set_constant_buffer = failover_set_constant_buffer;
 }
index 5c000808425cdeedfd11271e47e73c541eab57f7..09ca19449713229a997072a7e82636b4fcd5b950 100644 (file)
@@ -81,6 +81,10 @@ failover_state_emit( struct failover_context *failover )
       failover->sw->bind_vs_state( failover->sw,
                                    failover->vertex_shader->sw_state );
 
+   if (failover->dirty & FO_NEW_VERTEX_ELEMENT)
+      failover->sw->bind_vertex_elements_state( failover->sw,
+                                                failover->vertex_elements->sw_state );
+
    if (failover->dirty & FO_NEW_STIPPLE)
       failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple );
 
@@ -116,11 +120,5 @@ failover_state_emit( struct failover_context *failover )
                                         failover->vertex_buffers );
    }
 
-   if (failover->dirty & FO_NEW_VERTEX_ELEMENT) {
-      failover->sw->set_vertex_elements( failover->sw,
-                                         failover->num_vertex_elements,
-                                         failover->vertex_elements );
-   }
-
    failover->dirty = 0;
 }
index 499a727314d9746d347305c07a8d17257f4d4d25..49945376837cee71929670930c2f70ef29d0aa30 100644 (file)
@@ -148,7 +148,7 @@ struct i915_state
 
    /** Describes the current hardware vertex layout */
    struct vertex_info vertex_info;
-   
+
    unsigned id;                        /* track lost context events */
 };
 
@@ -187,6 +187,11 @@ struct i915_sampler_state {
    unsigned maxlod;
 };
 
+struct i915_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
 #define I915_MAX_TEXTURE_2D_LEVELS 11  /* max 1024x1024 */
 #define I915_MAX_TEXTURE_3D_LEVELS  8  /* max 128x128x128 */
 
@@ -250,7 +255,6 @@ struct i915_context
 
    unsigned num_samplers;
    unsigned num_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    struct intel_batchbuffer *batch;
index 62169918e2b715bedf8decb306ecde8ffaba4c20..8927dfc33d474177aeae2080b704399fefd119d5 100644 (file)
@@ -742,21 +742,42 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe,
    draw_set_vertex_buffers(i915->draw, count, buffers);
 }
 
-static void i915_set_vertex_elements(struct pipe_context *pipe,
-                                     unsigned count,
-                                     const struct pipe_vertex_element *elements)
+static void *
+i915_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
+{
+   struct i915_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct i915_velems_state *) MALLOC(sizeof(struct i915_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
+static void
+i915_bind_vertex_elements_state(struct pipe_context *pipe,
+                                void *velems)
 {
    struct i915_context *i915 = i915_context(pipe);
+   struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems;
+
    /* Because we change state before the draw_set_vertex_buffers call
     * we need a flush here, just to be sure.
     */
    draw_flush(i915->draw);
 
-   i915->num_vertex_elements = count;
    /* pass-through to draw module */
-   draw_set_vertex_elements(i915->draw, count, elements);
+   draw_set_vertex_elements(i915->draw, i915_velems->count, i915_velems->velem);
 }
 
+static void
+i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 i915_init_state_functions( struct i915_context *i915 )
@@ -782,6 +803,9 @@ i915_init_state_functions( struct i915_context *i915 )
    i915->base.create_vs_state = i915_create_vs_state;
    i915->base.bind_vs_state = i915_bind_vs_state;
    i915->base.delete_vs_state = i915_delete_vs_state;
+   i915->base.create_vertex_elements_state = i915_create_vertex_elements_state;
+   i915->base.bind_vertex_elements_state = i915_bind_vertex_elements_state;
+   i915->base.delete_vertex_elements_state = i915_delete_vertex_elements_state;
 
    i915->base.set_blend_color = i915_set_blend_color;
    i915->base.set_stencil_ref = i915_set_stencil_ref;
@@ -794,5 +818,4 @@ i915_init_state_functions( struct i915_context *i915 )
    i915->base.set_fragment_sampler_textures = i915_set_sampler_textures;
    i915->base.set_viewport_state = i915_set_viewport_state;
    i915->base.set_vertex_buffers = i915_set_vertex_buffers;
-   i915->base.set_vertex_elements = i915_set_vertex_elements;
 }
index 12cfa7b049c2276de81f82f807ad42be0fb9bfc0..f5b1a06576bd1dda50f535e6dd8173bc0832c8f2 100644 (file)
@@ -351,7 +351,7 @@ struct brw_vs_prog_data {
 
 /* Size == 0 if output either not written, or always [0,0,0,1]
  */
-struct brw_vs_ouput_sizes {
+struct brw_vs_output_sizes {
    GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
 };
 
@@ -546,14 +546,13 @@ struct brw_context
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
+      const struct brw_vertex_element_packet *velems;
 
       const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
       unsigned num_samplers;
 
       struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_elements;
       unsigned num_textures;
       unsigned num_vertex_buffers;
 
index 9f136eec71cf1ef9384f619829da854c2fd70ffb..0820ba20a08fc7c4fa61d7b5e5ac70b7787f0e37 100644 (file)
 
 
 
-static unsigned brw_translate_surface_format( unsigned id )
-{
-   switch (id) {
-   case PIPE_FORMAT_R64_FLOAT:
-      return BRW_SURFACEFORMAT_R64_FLOAT;
-   case PIPE_FORMAT_R64G64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64_FLOAT;
-   case PIPE_FORMAT_R64G64B64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
-   case PIPE_FORMAT_R64G64B64A64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
-
-   case PIPE_FORMAT_R32_FLOAT:
-      return BRW_SURFACEFORMAT_R32_FLOAT;
-   case PIPE_FORMAT_R32G32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32_FLOAT;
-   case PIPE_FORMAT_R32G32B32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
-
-   case PIPE_FORMAT_R32_UNORM:
-      return BRW_SURFACEFORMAT_R32_UNORM;
-   case PIPE_FORMAT_R32G32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32_UNORM;
-   case PIPE_FORMAT_R32G32B32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32B32_UNORM;
-   case PIPE_FORMAT_R32G32B32A32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
-
-   case PIPE_FORMAT_R32_USCALED:
-      return BRW_SURFACEFORMAT_R32_USCALED;
-   case PIPE_FORMAT_R32G32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32_USCALED;
-   case PIPE_FORMAT_R32G32B32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32B32_USCALED;
-   case PIPE_FORMAT_R32G32B32A32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
-
-   case PIPE_FORMAT_R32_SNORM:
-      return BRW_SURFACEFORMAT_R32_SNORM;
-   case PIPE_FORMAT_R32G32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32_SNORM;
-   case PIPE_FORMAT_R32G32B32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32B32_SNORM;
-   case PIPE_FORMAT_R32G32B32A32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
-
-   case PIPE_FORMAT_R32_SSCALED:
-      return BRW_SURFACEFORMAT_R32_SSCALED;
-   case PIPE_FORMAT_R32G32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32_SSCALED;
-   case PIPE_FORMAT_R32G32B32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
-   case PIPE_FORMAT_R32G32B32A32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
-
-   case PIPE_FORMAT_R16_UNORM:
-      return BRW_SURFACEFORMAT_R16_UNORM;
-   case PIPE_FORMAT_R16G16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16_UNORM;
-   case PIPE_FORMAT_R16G16B16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16B16_UNORM;
-   case PIPE_FORMAT_R16G16B16A16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
-
-   case PIPE_FORMAT_R16_USCALED:
-      return BRW_SURFACEFORMAT_R16_USCALED;
-   case PIPE_FORMAT_R16G16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16_USCALED;
-   case PIPE_FORMAT_R16G16B16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16B16_USCALED;
-   case PIPE_FORMAT_R16G16B16A16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
-
-   case PIPE_FORMAT_R16_SNORM:
-      return BRW_SURFACEFORMAT_R16_SNORM;
-   case PIPE_FORMAT_R16G16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16_SNORM;
-   case PIPE_FORMAT_R16G16B16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16B16_SNORM;
-   case PIPE_FORMAT_R16G16B16A16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
-
-   case PIPE_FORMAT_R16_SSCALED:
-      return BRW_SURFACEFORMAT_R16_SSCALED;
-   case PIPE_FORMAT_R16G16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16_SSCALED;
-   case PIPE_FORMAT_R16G16B16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
-   case PIPE_FORMAT_R16G16B16A16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
-
-   case PIPE_FORMAT_R8_UNORM:
-      return BRW_SURFACEFORMAT_R8_UNORM;
-   case PIPE_FORMAT_R8G8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8_UNORM;
-   case PIPE_FORMAT_R8G8B8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8_UNORM;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
-
-   case PIPE_FORMAT_R8_USCALED:
-      return BRW_SURFACEFORMAT_R8_USCALED;
-   case PIPE_FORMAT_R8G8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8_USCALED;
-   case PIPE_FORMAT_R8G8B8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8B8_USCALED;
-   case PIPE_FORMAT_R8G8B8A8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
-
-   case PIPE_FORMAT_R8_SNORM:
-      return BRW_SURFACEFORMAT_R8_SNORM;
-   case PIPE_FORMAT_R8G8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8_SNORM;
-   case PIPE_FORMAT_R8G8B8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8B8_SNORM;
-   case PIPE_FORMAT_R8G8B8A8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
-   case PIPE_FORMAT_R8_SSCALED:
-      return BRW_SURFACEFORMAT_R8_SSCALED;
-   case PIPE_FORMAT_R8G8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8_SSCALED;
-   case PIPE_FORMAT_R8G8B8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
-   case PIPE_FORMAT_R8G8B8A8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
-
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
 static unsigned get_index_type(int type)
 {
    switch (type) {
@@ -315,75 +180,16 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
 
 
 
-
 static int brw_emit_vertex_elements(struct brw_context *brw)
 {
-   GLuint nr = brw->curr.num_vertex_elements;
-   GLuint i;
+   const struct brw_vertex_element_packet *brw_velems = brw->curr.velems;
+   unsigned size = brw_velems->header.length + 2;
 
+   /* why is this here */
    brw_emit_query_begin(brw);
 
-   /* If the VS doesn't read any inputs (calculating vertex position from
-    * a state variable for some reason, for example), emit a single pad
-    * VERTEX_ELEMENT struct and bail.
-    *
-    * The stale VB state stays in place, but they don't do anything unless
-    * a VE loads from them.
-    */
-   if (nr == 0) {
-      BEGIN_BATCH(3, IGNORE_CLIPRECTS);
-      OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
-      OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
-               BRW_VE0_VALID |
-               (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
-               (0 << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
-               (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-               (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-               (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
-      ADVANCE_BATCH();
-      return 0;
-   }
-
-   /* Now emit vertex element (VEP) state packets.
-    *
-    */
-   BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
-   for (i = 0; i < nr; i++) {
-      const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
-      uint32_t format = brw_translate_surface_format( input->src_format );
-      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
-
-      switch (input->nr_components) {
-      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
-      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
-      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
-      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
-        break;
-      }
-
-      OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
-               BRW_VE0_VALID |
-               (format << BRW_VE0_FORMAT_SHIFT) |
-               (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
+   brw_batchbuffer_data(brw->batch, brw_velems, size * 4, IGNORE_CLIPRECTS);
 
-      if (BRW_IS_IGDNG(brw))
-          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
-                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
-                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
-                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
-      else
-          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
-                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
-                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
-                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
-                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
-   }
-   ADVANCE_BATCH();
    return 0;
 }
 
@@ -396,10 +202,11 @@ static int brw_emit_vertices( struct brw_context *brw )
    if (ret)
       return ret;
 
+   /* XXX should separate this? */
    ret = brw_emit_vertex_elements( brw );
    if (ret)
       return ret;
-   
+
    return 0;
 }
 
@@ -407,7 +214,8 @@ static int brw_emit_vertices( struct brw_context *brw )
 const struct brw_tracked_state brw_vertices = {
    .dirty = {
       .mesa = (PIPE_NEW_INDEX_RANGE |
-               PIPE_NEW_VERTEX_BUFFER),
+               PIPE_NEW_VERTEX_BUFFER |
+               PIPE_NEW_VERTEX_ELEMENT),
       .brw = BRW_NEW_BATCH,
       .cache = 0,
    },
index e3c48e3149357d88dc11fd51653324280998898a..d6a840857ec0fd7b20e853847ccc683e624b210c 100644 (file)
 #include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
 
+#include "util/u_memory.h"
+#include "util/u_format.h"
 
-static void brw_set_vertex_elements( struct pipe_context *pipe,
-                                    unsigned count,
-                                    const struct pipe_vertex_element *elements )
+
+static unsigned brw_translate_surface_format( unsigned id )
+{
+   switch (id) {
+   case PIPE_FORMAT_R64_FLOAT:
+      return BRW_SURFACEFORMAT_R64_FLOAT;
+   case PIPE_FORMAT_R64G64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64_FLOAT;
+   case PIPE_FORMAT_R64G64B64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+   case PIPE_FORMAT_R64G64B64A64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+   case PIPE_FORMAT_R32_FLOAT:
+      return BRW_SURFACEFORMAT_R32_FLOAT;
+   case PIPE_FORMAT_R32G32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32_FLOAT;
+   case PIPE_FORMAT_R32G32B32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   case PIPE_FORMAT_R32_UNORM:
+      return BRW_SURFACEFORMAT_R32_UNORM;
+   case PIPE_FORMAT_R32G32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32_UNORM;
+   case PIPE_FORMAT_R32G32B32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+   case PIPE_FORMAT_R32G32B32A32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+   case PIPE_FORMAT_R32_USCALED:
+      return BRW_SURFACEFORMAT_R32_USCALED;
+   case PIPE_FORMAT_R32G32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32_USCALED;
+   case PIPE_FORMAT_R32G32B32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+   case PIPE_FORMAT_R32G32B32A32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+   case PIPE_FORMAT_R32_SNORM:
+      return BRW_SURFACEFORMAT_R32_SNORM;
+   case PIPE_FORMAT_R32G32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32_SNORM;
+   case PIPE_FORMAT_R32G32B32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+   case PIPE_FORMAT_R32G32B32A32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+   case PIPE_FORMAT_R32_SSCALED:
+      return BRW_SURFACEFORMAT_R32_SSCALED;
+   case PIPE_FORMAT_R32G32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32_SSCALED;
+   case PIPE_FORMAT_R32G32B32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+   case PIPE_FORMAT_R32G32B32A32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+   case PIPE_FORMAT_R16_UNORM:
+      return BRW_SURFACEFORMAT_R16_UNORM;
+   case PIPE_FORMAT_R16G16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16_UNORM;
+   case PIPE_FORMAT_R16G16B16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+   case PIPE_FORMAT_R16_USCALED:
+      return BRW_SURFACEFORMAT_R16_USCALED;
+   case PIPE_FORMAT_R16G16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16_USCALED;
+   case PIPE_FORMAT_R16G16B16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+   case PIPE_FORMAT_R16G16B16A16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+   case PIPE_FORMAT_R16_SNORM:
+      return BRW_SURFACEFORMAT_R16_SNORM;
+   case PIPE_FORMAT_R16G16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16_SNORM;
+   case PIPE_FORMAT_R16G16B16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+   case PIPE_FORMAT_R16_SSCALED:
+      return BRW_SURFACEFORMAT_R16_SSCALED;
+   case PIPE_FORMAT_R16G16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16_SSCALED;
+   case PIPE_FORMAT_R16G16B16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+   case PIPE_FORMAT_R16G16B16A16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+   case PIPE_FORMAT_R8_UNORM:
+      return BRW_SURFACEFORMAT_R8_UNORM;
+   case PIPE_FORMAT_R8G8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8_UNORM;
+   case PIPE_FORMAT_R8G8B8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+   case PIPE_FORMAT_R8_USCALED:
+      return BRW_SURFACEFORMAT_R8_USCALED;
+   case PIPE_FORMAT_R8G8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8_USCALED;
+   case PIPE_FORMAT_R8G8B8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+   case PIPE_FORMAT_R8G8B8A8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+   case PIPE_FORMAT_R8_SNORM:
+      return BRW_SURFACEFORMAT_R8_SNORM;
+   case PIPE_FORMAT_R8G8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8_SNORM;
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+   case PIPE_FORMAT_R8G8B8A8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+   case PIPE_FORMAT_R8_SSCALED:
+      return BRW_SURFACEFORMAT_R8_SSCALED;
+   case PIPE_FORMAT_R8G8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8_SSCALED;
+   case PIPE_FORMAT_R8G8B8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+   case PIPE_FORMAT_R8G8B8A8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void brw_translate_vertex_elements(struct brw_context *brw,
+                                          struct brw_vertex_element_packet *brw_velems,
+                                          const struct pipe_vertex_element *attribs,
+                                          unsigned count)
+{
+   unsigned i;
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), emit a single pad
+    * VERTEX_ELEMENT struct and bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   brw_velems->header.opcode = CMD_VERTEX_ELEMENT;
+
+   if (count == 0) {
+      brw_velems->header.length = 1;
+      brw_velems->ve[0].ve0.src_offset = 0;
+      brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+      brw_velems->ve[0].ve0.valid = 1;
+      brw_velems->ve[0].ve0.vertex_buffer_index = 0;
+      brw_velems->ve[0].ve1.dst_offset = 0;
+      brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+      return;
+   }
+
+
+   /* Now emit vertex element (VEP) state packets.
+    *
+    */
+   brw_velems->header.length = (1 + count * 2) - 2;
+   for (i = 0; i < count; i++) {
+      const struct pipe_vertex_element *input = &attribs[i];
+      unsigned nr_components = util_format_get_nr_components(input->src_format);
+
+      uint32_t format = brw_translate_surface_format( input->src_format );
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+      switch (nr_components) {
+      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+         break;
+      }
+
+      brw_velems->ve[i].ve0.src_offset = input->src_offset;
+      brw_velems->ve[i].ve0.src_format = format;
+      brw_velems->ve[i].ve0.valid = 1;
+      brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index;
+      brw_velems->ve[i].ve1.vfcomponent0 = comp0;
+      brw_velems->ve[i].ve1.vfcomponent1 = comp1;
+      brw_velems->ve[i].ve1.vfcomponent2 = comp2;
+      brw_velems->ve[i].ve1.vfcomponent3 = comp3;
+
+      if (BRW_IS_IGDNG(brw))
+         brw_velems->ve[i].ve1.dst_offset = 0;
+      else
+         brw_velems->ve[i].ve1.dst_offset = i * 4;
+   }
+}
+
+static void* brw_create_vertex_elements_state( struct pipe_context *pipe,
+                                               unsigned count,
+                                               const struct pipe_vertex_element *attribs )
 {
+   /* note: for the brw_swtnl.c code (if ever we need draw fallback) we'd also need
+      to store the original data */
    struct brw_context *brw = brw_context(pipe);
+   struct brw_vertex_element_packet *velems;
+   assert(count <= BRW_VEP_MAX);
+   velems = (struct brw_vertex_element_packet *) MALLOC(sizeof(struct brw_vertex_element_packet));
+   if (velems) {
+      brw_translate_vertex_elements(brw, velems, attribs, count);
+   }
+   return velems;
+}
 
-   memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
-   brw->curr.num_vertex_elements = count;
+static void brw_bind_vertex_elements_state(struct pipe_context *pipe,
+                                           void *velems)
+{
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_vertex_element_packet *brw_velems = (struct brw_vertex_element_packet *) velems;
+
+   brw->curr.velems = brw_velems;
 
    brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
 }
 
+static void brw_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
+
 
 static void brw_set_vertex_buffers(struct pipe_context *pipe,
-                                  unsigned count,
-                                  const struct pipe_vertex_buffer *buffers)
+                                   unsigned count,
+                                   const struct pipe_vertex_buffer *buffers)
 {
    struct brw_context *brw = brw_context(pipe);
    unsigned i;
@@ -49,7 +278,9 @@ void
 brw_pipe_vertex_init( struct brw_context *brw )
 {
    brw->base.set_vertex_buffers = brw_set_vertex_buffers;
-   brw->base.set_vertex_elements = brw_set_vertex_elements;
+   brw->base.create_vertex_elements_state = brw_create_vertex_elements_state;
+   brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state;
+   brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state;
 }
 
 
index bf10bc04de75ebdf84c61a7abba2f092b96b4bd9..e97ddeb5e1cf729759b39afccc7e71c2e817b55a 100644 (file)
@@ -28,7 +28,7 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-        
+
 
 #ifndef BRW_STRUCTS_H
 #define BRW_STRUCTS_H
@@ -1149,7 +1149,7 @@ struct brw_vertex_element_state
       GLuint valid:1; 
       GLuint vertex_buffer_index:5; 
    } ve0;
-   
+
    struct
    {
       GLuint dst_offset:8; 
index 8248b2a4132f9f8497d4c052e4202f7ab38ef5da..baf0ae440166762c525b966a0dbd2729a243a6f2 100644 (file)
@@ -377,6 +377,42 @@ identity_delete_vs_state(struct pipe_context *_pipe,
                          vs);
 }
 
+
+static void *
+identity_create_vertex_elements_state(struct pipe_context *_pipe,
+                                      unsigned num_elements,
+                                      const struct pipe_vertex_element *vertex_elements)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   return pipe->create_vertex_elements_state(pipe,
+                                             num_elements,
+                                             vertex_elements);
+}
+
+static void
+identity_bind_vertex_elements_state(struct pipe_context *_pipe,
+                                    void *velems)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->bind_vertex_elements_state(pipe,
+                                    velems);
+}
+
+static void
+identity_delete_vertex_elements_state(struct pipe_context *_pipe,
+                                      void *velems)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->delete_vertex_elements_state(pipe,
+                                      velems);
+}
+
 static void
 identity_set_blend_color(struct pipe_context *_pipe,
                          const struct pipe_blend_color *blend_color)
@@ -563,20 +599,6 @@ identity_set_vertex_buffers(struct pipe_context *_pipe,
                             num_buffers,
                             buffers);
 }
-
-static void
-identity_set_vertex_elements(struct pipe_context *_pipe,
-                             unsigned num_elements,
-                             const struct pipe_vertex_element *vertex_elements)
-{
-   struct identity_context *id_pipe = identity_context(_pipe);
-   struct pipe_context *pipe = id_pipe->pipe;
-
-   pipe->set_vertex_elements(pipe,
-                             num_elements,
-                             vertex_elements);
-}
-
 static void
 identity_surface_copy(struct pipe_context *_pipe,
                       struct pipe_surface *_dst,
@@ -733,6 +755,9 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.create_vs_state = identity_create_vs_state;
    id_pipe->base.bind_vs_state = identity_bind_vs_state;
    id_pipe->base.delete_vs_state = identity_delete_vs_state;
+   id_pipe->base.create_vertex_elements_state = identity_create_vertex_elements_state;
+   id_pipe->base.bind_vertex_elements_state = identity_bind_vertex_elements_state;
+   id_pipe->base.delete_vertex_elements_state = identity_delete_vertex_elements_state;
    id_pipe->base.set_blend_color = identity_set_blend_color;
    id_pipe->base.set_stencil_ref = identity_set_stencil_ref;
    id_pipe->base.set_clip_state = identity_set_clip_state;
@@ -744,7 +769,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures;
    id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures;
    id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
-   id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
    id_pipe->base.surface_copy = identity_surface_copy;
    id_pipe->base.surface_fill = identity_surface_fill;
    id_pipe->base.clear = identity_clear;
index e31ae6a3fc167cda917dfe44ce4b3c28fdcaed4f..d94efec16a45a1b6092c8a61f04f24433c1eabf1 100644 (file)
@@ -145,6 +145,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
    llvmpipe->pipe.bind_vs_state   = llvmpipe_bind_vs_state;
    llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state;
 
+   llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state;
+   llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state;
+   llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state;
+
    llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color;
    llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref;
    llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state;
@@ -157,7 +161,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
-   llvmpipe->pipe.set_vertex_elements = llvmpipe_set_vertex_elements;
 
    llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
    llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
index 955c7eb8e0e97c0ce4e0289a00e14296a8615a67..217ec59b68847eb7e6dc7d55d4b8dfe845aca58b 100644 (file)
@@ -46,6 +46,7 @@ struct lp_fragment_shader;
 struct lp_vertex_shader;
 struct lp_blend_state;
 struct setup_context;
+struct lp_velems_state;
 
 struct llvmpipe_context {
    struct pipe_context pipe;  /**< base class */
@@ -58,6 +59,7 @@ struct llvmpipe_context {
    const struct pipe_rasterizer_state *rasterizer;
    struct lp_fragment_shader *fs;
    const struct lp_vertex_shader *vs;
+   const struct lp_velems_state *velems;
 
    /** Other rendering state */
    struct pipe_blend_color blend_color;
@@ -71,13 +73,11 @@ struct llvmpipe_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
 
    unsigned num_samplers;
    unsigned num_textures;
    unsigned num_vertex_samplers;
    unsigned num_vertex_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    unsigned dirty; /**< Mask of LP_NEW_x flags */
index 9beba32271f5022f805d45fc3176b5b5c1d65260..6dbdc195bfc3065f10ba3f95c94b6181d25c4fe5 100644 (file)
@@ -119,6 +119,10 @@ struct lp_vertex_shader {
    struct draw_vertex_shader *draw_data;
 };
 
+struct lp_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
 
 
 void *
@@ -176,8 +180,14 @@ void *llvmpipe_create_vs_state(struct pipe_context *,
 void llvmpipe_bind_vs_state(struct pipe_context *, void *);
 void llvmpipe_delete_vs_state(struct pipe_context *, void *);
 
+void *llvmpipe_create_vertex_elements_state(struct pipe_context *,
+                                            unsigned count,
+                                            const struct pipe_vertex_element *);
+void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *);
+void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *);
+
 void llvmpipe_set_polygon_stipple( struct pipe_context *,
-                                 const struct pipe_poly_stipple * );
+                                   const struct pipe_poly_stipple * );
 
 void llvmpipe_set_scissor_state( struct pipe_context *,
                                  const struct pipe_scissor_state * );
@@ -194,10 +204,6 @@ llvmpipe_set_vertex_sampler_textures(struct pipe_context *,
 void llvmpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
 
-void llvmpipe_set_vertex_elements(struct pipe_context *,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *);
-
 void llvmpipe_set_vertex_buffers(struct pipe_context *,
                                  unsigned count,
                                  const struct pipe_vertex_buffer *);
index 57ac25ea0cba07d27deaaa1920b088096ece5bbf..f6427aa908e2c84c8acddf3738a9bccc672901d0 100644 (file)
 #include "draw/draw_context.h"
 
 
+void *
+llvmpipe_create_vertex_elements_state(struct pipe_context *pipe,
+                                      unsigned count,
+                                      const struct pipe_vertex_element *attribs)
+{
+   struct lp_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
 void
-llvmpipe_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *attribs)
+llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems;
 
-   assert(count <= PIPE_MAX_ATTRIBS);
-
-   memcpy(llvmpipe->vertex_element, attribs,
-          count * sizeof(struct pipe_vertex_element));
-   llvmpipe->num_vertex_elements = count;
+   llvmpipe->velems = lp_velems;
 
    llvmpipe->dirty |= LP_NEW_VERTEX;
 
-   draw_set_vertex_elements(llvmpipe->draw, count, attribs);
+   if (velems)
+      draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
 }
 
+void
+llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
index f7d10a591f62324230ec4a9654f495d482e66150..b1ad686022a0905f838cca62dfb3804f3e2046c8 100644 (file)
@@ -124,7 +124,7 @@ nouveau_screen_map_flags(unsigned pipe)
        if (pipe & PIPE_BUFFER_USAGE_DONTBLOCK)
                flags |= NOUVEAU_BO_NOWAIT;
        else
-       if (pipe & 0 /*PIPE_BUFFER_USAGE_UNSYNCHRONIZED*/)
+       if (pipe & PIPE_BUFFER_USAGE_UNSYNCHRONIZED)
                flags |= NOUVEAU_BO_NOSYNC;
 
        return flags;
index a10114beab98d9ffb9204a448d6ed0d0692cbcc3..7f16e31c3f0838b706edb0e4f8216a2ae2ba2309 100644 (file)
@@ -88,4 +88,104 @@ static INLINE unsigned log2i(unsigned i)
        return r;
 }
 
+struct u_split_prim {
+   void *priv;
+   void (*emit)(void *priv, unsigned start, unsigned count);
+   void (*edge)(void *priv, boolean enabled);
+
+   unsigned mode;
+   unsigned start;
+   unsigned p_start;
+   unsigned p_end;
+
+   int repeat_first:1;
+   int close_first:1;
+   int edgeflag_off:1;
+};
+
+static inline void
+u_split_prim_init(struct u_split_prim *s,
+                  unsigned mode, unsigned start, unsigned count)
+{
+   if (mode == PIPE_PRIM_LINE_LOOP) {
+      s->mode = PIPE_PRIM_LINE_STRIP;
+      s->close_first = 1;
+   } else {
+      s->mode = mode;
+      s->close_first = 0;
+   }
+   s->start = start;
+   s->p_start = start;
+   s->p_end = start + count;
+   s->edgeflag_off = 0;
+   s->repeat_first = 0;
+}
+
+static INLINE boolean
+u_split_prim_next(struct u_split_prim *s, unsigned max_verts)
+{
+   int repeat = 0;
+
+   if (s->repeat_first) {
+      s->emit(s->priv, s->start, 1);
+      max_verts--;
+      if (s->edgeflag_off) {
+         s->edge(s->priv, TRUE);
+         s->edgeflag_off = FALSE;
+      }
+   }
+
+   if (s->p_start + s->close_first + max_verts >= s->p_end) {
+      s->emit(s->priv, s->p_start, s->p_end - s->p_start);
+      if (s->close_first)
+         s->emit(s->priv, s->start, 1);
+      return TRUE;
+   }
+
+   switch (s->mode) {
+   case PIPE_PRIM_LINES:
+      max_verts &= ~1;
+      break;
+   case PIPE_PRIM_LINE_STRIP:
+      repeat = 1;
+      break;
+   case PIPE_PRIM_POLYGON:
+      max_verts--;
+      s->emit(s->priv, s->p_start, max_verts);
+      s->edge(s->priv, FALSE);
+      s->emit(s->priv, s->p_start + max_verts, 1);
+      s->p_start += max_verts;
+      s->repeat_first = TRUE;
+      s->edgeflag_off = TRUE;
+      return FALSE;
+   case PIPE_PRIM_TRIANGLES:
+      max_verts = max_verts - (max_verts % 3);
+      break;
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      /* to ensure winding stays correct, always split
+       * on an even number of generated triangles
+       */
+      max_verts = max_verts & ~1;
+      repeat = 2;
+      break;
+   case PIPE_PRIM_TRIANGLE_FAN:
+      s->repeat_first = TRUE;
+      repeat = 1;
+      break;
+   case PIPE_PRIM_QUADS:
+      max_verts &= ~3;
+      break;
+   case PIPE_PRIM_QUAD_STRIP:
+      max_verts &= ~1;
+      repeat = 2;
+      break;
+   default:
+      break;
+   }
+
+   s->emit (s->priv, s->p_start, max_verts);
+   s->p_start += (max_verts - repeat);
+   return FALSE;
+}
+
 #endif
index ea259aadf359f28222d68a5f089098c2afab467e..1786460aec10b9a4ad1274bfb39048c5834e170c 100644 (file)
@@ -107,6 +107,11 @@ struct nv30_state {
        struct nouveau_stateobj *hw[NV30_STATE_MAX];
 };
 
+struct nv30_vtxelt_state {
+       struct pipe_vertex_element pipe[16];
+       unsigned num_elements;
+};
+
 struct nv30_context {
        struct pipe_context pipe;
 
@@ -142,8 +147,7 @@ struct nv30_context {
        unsigned dirty_samplers;
        struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
        unsigned vtxbuf_nr;
-       struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-       unsigned vtxelt_nr;
+       struct nv30_vtxelt_state *vtxelt;
 };
 
 static INLINE struct nv30_context *
index 5ef74a832dcea3835f74e344626ceca0a1410a35..bfa27b632f70e24a5f759ddcaff10a2d6a46ecac 100644 (file)
@@ -236,5 +236,5 @@ nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
        pscreen->get_tex_surface = nv30_miptree_surface_new;
        pscreen->tex_surface_destroy = nv30_miptree_surface_del;
 
-       nouveau_screen(pscreen)->texture_blanket = nv50_miptree_blanket;
+       nouveau_screen(pscreen)->texture_blanket = nv30_miptree_blanket;
 }
index d911c80707404656a07a15090912432f03159a13..24b15a63ac4a5956ebf3bfb81511f3af988346da 100644 (file)
@@ -669,15 +669,34 @@ nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
        /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
 }
 
+static void *
+nv30_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nv30_vtxelt_state *cso = CALLOC_STRUCT(nv30_vtxelt_state);
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+       cso->num_elements = num_elements;
+       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/*     nv30_vtxelt_construct(cso);*/
+
+       return (void *)cso;
+}
+
 static void
-nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-                        const struct pipe_vertex_element *ve)
+nv30_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-       struct nv30_context *nv30 = nv30_context(pipe);
+       FREE(hwcso);
+}
 
-       memcpy(nv30->vtxelt, ve, sizeof(*ve) * count);
-       nv30->vtxelt_nr = count;
+static void
+nv30_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv30_context *nv30 = nv30_context(pipe);
 
+       nv30->vtxelt = hwcso;
        nv30->dirty |= NV30_NEW_ARRAYS;
        /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
 }
@@ -722,7 +741,10 @@ nv30_init_state_functions(struct nv30_context *nv30)
        nv30->pipe.set_scissor_state = nv30_set_scissor_state;
        nv30->pipe.set_viewport_state = nv30_set_viewport_state;
 
+       nv30->pipe.create_vertex_elements_state = nv30_vtxelts_state_create;
+       nv30->pipe.delete_vertex_elements_state = nv30_vtxelts_state_delete;
+       nv30->pipe.bind_vertex_elements_state = nv30_vtxelts_state_bind;
+
        nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
-       nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
 }
 
index e48823a91381657a21ba43708db7fb4eac30a51f..f3856bb5a5ebe7773f4ec525c09ca45dcf1b10a4 100644 (file)
@@ -492,16 +492,16 @@ nv30_vbo_validate(struct nv30_context *nv30)
        int hw;
 
        vtxbuf = so_new(3, 17, 18);
-       so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
+       so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt->num_elements);
        vtxfmt = so_new(1, 16, 0);
-       so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
+       so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt->num_elements);
 
-       for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
+       for (hw = 0; hw < nv30->vtxelt->num_elements; hw++) {
                struct pipe_vertex_element *ve;
                struct pipe_vertex_buffer *vb;
                unsigned type, ncomp;
 
-               ve = &nv30->vtxelt[hw];
+               ve = &nv30->vtxelt->pipe[hw];
                vb = &nv30->vtxbuf[ve->vertex_buffer_index];
 
                if (!vb->stride) {
index 97fb6a2ef947b4c1495979273e981efcfe07fe26..2550ec654b31d4aec4b91a6b3901afa8d7c1ef9e 100644 (file)
@@ -107,6 +107,12 @@ struct nv40_state {
        struct nouveau_stateobj *hw[NV40_STATE_MAX];
 };
 
+
+struct nv40_vtxelt_state {
+       struct pipe_vertex_element pipe[16];
+       unsigned num_elements;
+};
+
 struct nv40_context {
        struct pipe_context pipe;
 
@@ -157,8 +163,7 @@ struct nv40_context {
        unsigned dirty_samplers;
        struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
        unsigned vtxbuf_nr;
-       struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-       unsigned vtxelt_nr;
+       struct nv40_vtxelt_state *vtxelt;
 };
 
 static INLINE struct nv40_context *
index 4f28675e64c5f9b8d6bf305a958c91fe59a9bad8..ee471e6ce4c2c94c650ddbfd81a1a2dae3968d42 100644 (file)
@@ -684,15 +684,34 @@ nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
        nv40->draw_dirty |= NV40_NEW_ARRAYS;
 }
 
+static void *
+nv40_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nv40_vtxelt_state *cso = CALLOC_STRUCT(nv40_vtxelt_state);
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+       cso->num_elements = num_elements;
+       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/*     nv40_vtxelt_construct(cso);*/
+
+       return (void *)cso;
+}
+
 static void
-nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-                        const struct pipe_vertex_element *ve)
+nv40_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-       struct nv40_context *nv40 = nv40_context(pipe);
+       FREE(hwcso);
+}
 
-       memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
-       nv40->vtxelt_nr = count;
+static void
+nv40_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv40_context *nv40 = nv40_context(pipe);
 
+       nv40->vtxelt = hwcso;
        nv40->dirty |= NV40_NEW_ARRAYS;
        nv40->draw_dirty |= NV40_NEW_ARRAYS;
 }
@@ -737,7 +756,10 @@ nv40_init_state_functions(struct nv40_context *nv40)
        nv40->pipe.set_scissor_state = nv40_set_scissor_state;
        nv40->pipe.set_viewport_state = nv40_set_viewport_state;
 
+       nv40->pipe.create_vertex_elements_state = nv40_vtxelts_state_create;
+       nv40->pipe.delete_vertex_elements_state = nv40_vtxelts_state_delete;
+       nv40->pipe.bind_vertex_elements_state = nv40_vtxelts_state_bind;
+
        nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
-       nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
 }
 
index 8990f303ce4e300d7465d5da6fe38571d2768270..297d71f4fac67eb6fd53a97e3994c7b87ee8a474 100644 (file)
@@ -174,7 +174,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
 
        if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
                draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
-               draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);  
+               draw_set_vertex_elements(draw, nv40->vtxelt->num_elements, nv40->vtxelt->pipe); 
        }
 
        nv40_state_do_validate(nv40, swtnl_states);
index 7812460d2ed2ad1e26f239c9a9c2bd93c13b6641..fabdf4bf23b95b35e8f56580924cebc93ca34b56 100644 (file)
@@ -493,16 +493,16 @@ nv40_vbo_validate(struct nv40_context *nv40)
        int hw;
 
        vtxbuf = so_new(3, 17, 18);
-       so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
+       so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt->num_elements);
        vtxfmt = so_new(1, 16, 0);
-       so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
+       so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt->num_elements);
 
-       for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
+       for (hw = 0; hw < nv40->vtxelt->num_elements; hw++) {
                struct pipe_vertex_element *ve;
                struct pipe_vertex_buffer *vb;
                unsigned type, ncomp;
 
-               ve = &nv40->vtxelt[hw];
+               ve = &nv40->vtxelt->pipe[hw];
                vb = &nv40->vtxbuf[ve->vertex_buffer_index];
 
                if (!vb->stride) {
index 612aea28a34b5c001752bf398f26386cd84dabbf..5d622e1c13cdb60a94f95b703704f79163b67ed3 100644 (file)
@@ -16,6 +16,7 @@ C_SOURCES = \
        nv50_surface.c \
        nv50_tex.c \
        nv50_transfer.c \
-       nv50_vbo.c
+       nv50_vbo.c \
+       nv50_push.c
 
 include ../../Makefile.template
index e0b2d2880b00b1e42bfe7d02a3ba121ea06c208b..8afc95c9fc6b7942ce9e5bd974244f240861fffb 100644 (file)
@@ -36,7 +36,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
        struct pipe_framebuffer_state *fb = &nv50->framebuffer;
        unsigned mode = 0, i;
 
-       if (!nv50_state_validate(nv50))
+       if (!nv50_state_validate(nv50, 64))
                return;
 
        if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
index 7be12fcdef466a27681a66cda1141522f47299bf..0eb42f323ffac218b71d1edd65a27f8f73a24ff7 100644 (file)
@@ -46,43 +46,13 @@ static void
 nv50_destroy(struct pipe_context *pipe)
 {
        struct nv50_context *nv50 = nv50_context(pipe);
+       int i;
 
-        if (nv50->state.fb)
-               so_ref(NULL, &nv50->state.fb);
-       if (nv50->state.blend)
-               so_ref(NULL, &nv50->state.blend);
-       if (nv50->state.blend_colour)
-               so_ref(NULL, &nv50->state.blend_colour);
-       if (nv50->state.zsa)
-               so_ref(NULL, &nv50->state.zsa);
-       if (nv50->state.rast)
-               so_ref(NULL, &nv50->state.rast);
-       if (nv50->state.stipple)
-               so_ref(NULL, &nv50->state.stipple);
-       if (nv50->state.scissor)
-               so_ref(NULL, &nv50->state.scissor);
-       if (nv50->state.viewport)
-               so_ref(NULL, &nv50->state.viewport);
-       if (nv50->state.tsc_upload)
-               so_ref(NULL, &nv50->state.tsc_upload);
-       if (nv50->state.tic_upload)
-               so_ref(NULL, &nv50->state.tic_upload);
-       if (nv50->state.vertprog)
-               so_ref(NULL, &nv50->state.vertprog);
-       if (nv50->state.fragprog)
-               so_ref(NULL, &nv50->state.fragprog);
-       if (nv50->state.geomprog)
-               so_ref(NULL, &nv50->state.geomprog);
-       if (nv50->state.fp_linkage)
-               so_ref(NULL, &nv50->state.fp_linkage);
-       if (nv50->state.gp_linkage)
-               so_ref(NULL, &nv50->state.gp_linkage);
-       if (nv50->state.vtxfmt)
-               so_ref(NULL, &nv50->state.vtxfmt);
-       if (nv50->state.vtxbuf)
-               so_ref(NULL, &nv50->state.vtxbuf);
-       if (nv50->state.vtxattr)
-               so_ref(NULL, &nv50->state.vtxattr);
+       for (i = 0; i < 64; i++) {
+               if (!nv50->state.hw[i])
+                       continue;
+               so_ref(NULL, &nv50->state.hw[i]);
+       }
 
        draw_destroy(nv50->draw);
 
@@ -123,7 +93,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
        nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
        screen->base.channel->user_private = nv50;
-       screen->base.channel->flush_notify = nv50_state_flush_notify;
 
        nv50_init_surface_functions(nv50);
        nv50_init_state_functions(nv50);
index 044437e75fa290f3c9e704f8b3ea168359ab1fc5..8793c2aac5d340b7915c2e9c7ce6271cc0ed6d41 100644 (file)
@@ -72,6 +72,12 @@ struct nv50_sampler_stateobj {
        unsigned tsc[8];
 };
 
+struct nv50_vtxelt_stateobj {
+       struct pipe_vertex_element pipe[16];
+       unsigned num_elements;
+       uint32_t hw[16];
+};
+
 static INLINE unsigned
 get_tile_height(uint32_t tile_mode)
 {
@@ -117,30 +123,12 @@ nv50_surface(struct pipe_surface *pt)
 }
 
 struct nv50_state {
-       unsigned dirty;
+       struct nouveau_stateobj *hw[64];
+       uint64_t hw_dirty;
 
-       struct nouveau_stateobj *fb;
-       struct nouveau_stateobj *blend;
-       struct nouveau_stateobj *blend_colour;
-       struct nouveau_stateobj *zsa;
-       struct nouveau_stateobj *stencil_ref;
-       struct nouveau_stateobj *rast;
-       struct nouveau_stateobj *stipple;
-       struct nouveau_stateobj *scissor;
-       unsigned scissor_enabled;
-       struct nouveau_stateobj *viewport;
-       struct nouveau_stateobj *tsc_upload;
-       struct nouveau_stateobj *tic_upload;
        unsigned miptree_nr[PIPE_SHADER_TYPES];
-       struct nouveau_stateobj *vertprog;
-       struct nouveau_stateobj *fragprog;
-       struct nouveau_stateobj *geomprog;
-       struct nouveau_stateobj *fp_linkage;
-       struct nouveau_stateobj *gp_linkage;
-       struct nouveau_stateobj *vtxfmt;
        struct nouveau_stateobj *vtxbuf;
        struct nouveau_stateobj *vtxattr;
-       struct nouveau_stateobj *instbuf;
        unsigned vtxelt_nr;
 };
 
@@ -169,14 +157,13 @@ struct nv50_context {
        struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
        struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
        unsigned vtxbuf_nr;
-       struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-       unsigned vtxelt_nr;
+       struct nv50_vtxelt_stateobj *vtxelt;
        struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
        unsigned sampler_nr[PIPE_SHADER_TYPES];
        struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
        unsigned miptree_nr[PIPE_SHADER_TYPES];
 
-       uint16_t vbo_fifo;
+       unsigned vbo_fifo;
 };
 
 static INLINE struct nv50_context *
@@ -218,24 +205,36 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
                                         unsigned count,
                                         unsigned startInstance,
                                         unsigned instanceCount);
-extern void nv50_vbo_validate(struct nv50_context *nv50);
+extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
+extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50);
+
+/* nv50_push.c */
+extern void
+nv50_push_elements_instanced(struct pipe_context *, struct pipe_buffer *,
+                            unsigned idxsize, unsigned mode, unsigned start,
+                            unsigned count, unsigned i_start,
+                            unsigned i_count);
 
 /* nv50_clear.c */
 extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
                       const float *rgba, double depth, unsigned stencil);
 
 /* nv50_program.c */
-extern void nv50_vertprog_validate(struct nv50_context *nv50);
-extern void nv50_fragprog_validate(struct nv50_context *nv50);
-extern void nv50_geomprog_validate(struct nv50_context *nv50);
-extern void nv50_fp_linkage_validate(struct nv50_context *nv50);
-extern void nv50_gp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_vertprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fragprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_geomprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_gp_linkage_validate(struct nv50_context *nv50);
 extern void nv50_program_destroy(struct nv50_context *nv50,
                                 struct nv50_program *p);
 
 /* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *nv50);
-extern void nv50_state_flush_notify(struct nouveau_channel *chan);
+extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords);
 
 extern void nv50_so_init_sifc(struct nv50_context *nv50,
                              struct nouveau_stateobj *so,
@@ -243,7 +242,8 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
                              unsigned offset, unsigned size);
 
 /* nv50_tex.c */
-extern void nv50_tex_validate(struct nv50_context *);
+extern void nv50_tex_relocs(struct nv50_context *);
+extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *);
 
 /* nv50_transfer.c */
 extern void
@@ -257,4 +257,35 @@ nv50_upload_sifc(struct nv50_context *nv50,
 struct pipe_context *
 nv50_create(struct pipe_screen *pscreen, void *priv);
 
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+       switch (mode) {
+       case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+       case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+       case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+       case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+       case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+       case PIPE_PRIM_TRIANGLE_STRIP:
+               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+       case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+       case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+       case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+       case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+       case PIPE_PRIM_LINES_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
+       case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
+       case PIPE_PRIM_TRIANGLES_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
+       case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
+       default:
+               break;
+       }
+
+       NOUVEAU_ERR("invalid primitive type %d\n", mode);
+       return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
 #endif
index 2372cbbef69e17eb2b603f663b154057d0cc2246..c857816b31a70a2cad7063aba3a1a3c78fa3d89c 100644 (file)
@@ -4270,7 +4270,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
        FREE(up);
 }
 
-void
+struct nouveau_stateobj *
 nv50_vertprog_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4286,6 +4286,9 @@ nv50_vertprog_validate(struct nv50_context *nv50)
        nv50_program_validate_data(nv50, p);
        nv50_program_validate_code(nv50, p);
 
+       if (!(nv50->dirty & NV50_NEW_VERTPROG))
+               return NULL;
+
        so = so_new(5, 7, 2);
        so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4301,11 +4304,10 @@ nv50_vertprog_validate(struct nv50_context *nv50)
        so_data  (so, p->cfg.high_temp);
        so_method(so, tesla, NV50TCL_VP_START_ID, 1);
        so_data  (so, 0); /* program start offset */
-       so_ref(so, &nv50->state.vertprog);
-       so_ref(NULL, &so);
+       return so;
 }
 
-void
+struct nouveau_stateobj *
 nv50_fragprog_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4321,6 +4323,9 @@ nv50_fragprog_validate(struct nv50_context *nv50)
        nv50_program_validate_data(nv50, p);
        nv50_program_validate_code(nv50, p);
 
+       if (!(nv50->dirty & NV50_NEW_FRAGPROG))
+               return NULL;
+
        so = so_new(6, 7, 2);
        so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4337,11 +4342,10 @@ nv50_fragprog_validate(struct nv50_context *nv50)
        so_data  (so, p->cfg.regs[3]);
        so_method(so, tesla, NV50TCL_FP_START_ID, 1);
        so_data  (so, 0); /* program start offset */
-       so_ref(so, &nv50->state.fragprog);
-       so_ref(NULL, &so);
+       return so;
 }
 
-void
+struct nouveau_stateobj *
 nv50_geomprog_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4357,6 +4361,9 @@ nv50_geomprog_validate(struct nv50_context *nv50)
        nv50_program_validate_data(nv50, p);
        nv50_program_validate_code(nv50, p);
 
+       if (!(nv50->dirty & NV50_NEW_GEOMPROG))
+               return NULL;
+
        so = so_new(6, 7, 2);
        so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4373,8 +4380,7 @@ nv50_geomprog_validate(struct nv50_context *nv50)
        so_data  (so, p->cfg.vert_count);
        so_method(so, tesla, NV50TCL_GP_START_ID, 1);
        so_data  (so, 0);
-       so_ref(so, &nv50->state.geomprog);
-       so_ref(NULL, &so);
+       return so;
 }
 
 static uint32_t
@@ -4454,7 +4460,7 @@ nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
        return mid;
 }
 
-void
+struct nouveau_stateobj *
 nv50_fp_linkage_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4580,8 +4586,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
        so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
        so_data  (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
 
-       so_ref(so, &nv50->state.fp_linkage);
-       so_ref(NULL, &so);
+       return so;
 }
 
 static int
@@ -4615,7 +4620,7 @@ construct_vp_gp_mapping(uint32_t *map32, int m,
        return m;
 }
 
-void
+struct nouveau_stateobj *
 nv50_gp_linkage_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4625,10 +4630,8 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
        uint32_t map[16];
        int m = 0;
 
-       if (!gp) {
-               so_ref(NULL, &nv50->state.gp_linkage);
-               return;
-       }
+       if (!gp)
+               return NULL;
        memset(map, 0, sizeof(map));
 
        m = construct_vp_gp_mapping(map, m, vp, gp);
@@ -4646,8 +4649,7 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
        so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
        so_datap (so, map, m);
 
-       so_ref(so, &nv50->state.gp_linkage);
-       so_ref(NULL, &so);
+       return so;
 }
 
 void
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
new file mode 100644 (file)
index 0000000..96a1f32
--- /dev/null
@@ -0,0 +1,326 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nouveau/nouveau_util.h"
+#include "nv50_context.h"
+
+struct push_context {
+   struct nv50_context *nv50;
+
+   unsigned vtx_size;
+
+   void *idxbuf;
+   unsigned idxsize;
+
+   float edgeflag;
+   int edgeflag_attr;
+
+   struct {
+      void *map;
+      unsigned stride;
+      unsigned divisor;
+      unsigned step;
+      void (*push)(struct nouveau_channel *, void *);
+   } attr[16];
+   unsigned attr_nr;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+   OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, (v[1] << 16) | v[0]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+   int i;
+
+   if (ctx->edgeflag_attr < 16) {
+      float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
+                        ctx->attr[ctx->edgeflag_attr].stride * n;
+
+      if (*edgeflag != ctx->edgeflag) {
+         BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+         OUT_RING  (chan, *edgeflag ? 1 : 0);
+         ctx->edgeflag = *edgeflag;
+      }
+   }
+
+   BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
+   for (i = 0; i < ctx->attr_nr; i++)
+      ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
+}
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+   struct push_context *ctx = priv;
+   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+
+   BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+   OUT_RING  (chan, enabled ? 1 : 0);
+}
+
+static void
+emit_elt08(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint8_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint16_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint32_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_verts(void *priv, unsigned start, unsigned count)
+{
+   while (count--)
+      emit_vertex(priv, start++);
+}
+
+void
+nv50_push_elements_instanced(struct pipe_context *pipe,
+                             struct pipe_buffer *idxbuf, unsigned idxsize,
+                             unsigned mode, unsigned start, unsigned count,
+                             unsigned i_start, unsigned i_count)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nouveau_grobj *tesla = nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+   struct push_context ctx;
+   const unsigned p_overhead = 4 + /* begin/end */
+                               4; /* potential edgeflag enable/disable */
+   const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
+                               2; /* potential edgeflag modification */
+   struct u_split_prim s;
+   unsigned vtx_size;
+   boolean nzi = FALSE;
+   int i;
+
+   ctx.nv50 = nv50;
+   ctx.attr_nr = 0;
+   ctx.idxbuf = NULL;
+   ctx.vtx_size = 0;
+   ctx.edgeflag = 0.5f;
+   ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
+
+   /* map vertex buffers, determine vertex size */
+   for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+      struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+      struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
+      struct nouveau_bo *bo = nouveau_bo(vb->buffer);
+      unsigned size, nr_components, n;
+
+      if (!(nv50->vbo_fifo & (1 << i)))
+         continue;
+      n = ctx.attr_nr++;
+
+      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+         assert(bo->map);
+         return;
+      }
+      ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
+      nouveau_bo_unmap(bo);
+
+      ctx.attr[n].stride = vb->stride;
+      ctx.attr[n].divisor = ve->instance_divisor;
+      if (ctx.attr[n].divisor) {
+         ctx.attr[n].step = i_start % ve->instance_divisor;
+         ctx.attr[n].map += i_start * vb->stride;
+      }
+
+      size = util_format_get_component_bits(ve->src_format,
+                                            UTIL_FORMAT_COLORSPACE_RGB, 0);
+      nr_components = util_format_get_nr_components(ve->src_format);
+      switch (size) {
+      case 8:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b08_1; break;
+         case 2: ctx.attr[n].push = emit_b16_1; break;
+         case 3: ctx.attr[n].push = emit_b08_3; break;
+         case 4: ctx.attr[n].push = emit_b32_1; break;
+         }
+         ctx.vtx_size++;
+         break;
+      case 16:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b16_1; break;
+         case 2: ctx.attr[n].push = emit_b32_1; break;
+         case 3: ctx.attr[n].push = emit_b16_3; break;
+         case 4: ctx.attr[n].push = emit_b32_2; break;
+         }
+         ctx.vtx_size += (nr_components + 1) >> 1;
+         break;
+      case 32:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b32_1; break;
+         case 2: ctx.attr[n].push = emit_b32_2; break;
+         case 3: ctx.attr[n].push = emit_b32_3; break;
+         case 4: ctx.attr[n].push = emit_b32_4; break;
+         }
+         ctx.vtx_size += nr_components;
+         break;
+      default:
+         assert(0);
+         return;
+      }
+   }
+   vtx_size = ctx.vtx_size + v_overhead;
+
+   /* map index buffer, if present */
+   if (idxbuf) {
+      struct nouveau_bo *bo = nouveau_bo(idxbuf);
+
+      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+         assert(bo->map);
+         return;
+      }
+      ctx.idxbuf = bo->map;
+      ctx.idxsize = idxsize;
+      nouveau_bo_unmap(bo);
+   }
+
+   s.priv = &ctx;
+   s.edge = emit_edgeflag;
+   if (idxbuf) {
+      if (idxsize == 1)
+         s.emit = emit_elt08;
+      else
+      if (idxsize == 2)
+         s.emit = emit_elt16;
+      else
+         s.emit = emit_elt32;
+   } else
+      s.emit = emit_verts;
+
+   /* per-instance loop */
+   BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+   OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
+   OUT_RING  (chan, i_start);
+   while (i_count--) {
+      unsigned max_verts;
+      boolean done;
+
+      for (i = 0; i < ctx.attr_nr; i++) {
+         if (!ctx.attr[i].divisor ||
+              ctx.attr[i].divisor != ++ctx.attr[i].step)
+            continue;
+         ctx.attr[i].step = 0;
+         ctx.attr[i].map += ctx.attr[i].stride;
+      }
+
+      u_split_prim_init(&s, mode, start, count);
+      do {
+         if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
+            FIRE_RING(chan);
+            if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
+               assert(0);
+               return;
+            }
+         }
+
+         max_verts  = AVAIL_RING(chan);
+         max_verts -= p_overhead;
+         max_verts /= vtx_size;
+
+         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+         OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
+         done = u_split_prim_next(&s, max_verts);
+         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+         OUT_RING  (chan, 0);
+      } while (!done);
+
+      nzi = TRUE;
+   }
+}
index eed6031eafab715247115bad6992eb2e4e6ca530..7e2e8aa336e7baa58b46c5f1144a3c5478de9a93 100644 (file)
@@ -95,6 +95,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 static int
 nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 {
+       struct nv50_screen *screen = nv50_screen(pscreen);
+
        switch (param) {
        case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
                return 32;
@@ -132,9 +134,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
                return 1;
        case NOUVEAU_CAP_HW_VTXBUF:
-               return 1;
+               return screen->force_push ? 0 : 1;
        case NOUVEAU_CAP_HW_IDXBUF:
-               return 1;
+               return screen->force_push ? 0 : 1;
        case PIPE_CAP_INDEP_BLEND_ENABLE:
                return 1;
        case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -202,28 +204,6 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
        FREE(screen);
 }
 
-static int
-nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
-       unsigned usage)
-{
-       struct nv50_screen *screen = nv50_screen(pscreen);
-       struct nv50_context *ctx = screen->cur_ctx;
-
-       if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
-               return 0;
-
-       /* Our vtxbuf got mapped, it can no longer be considered part of current
-        * state, remove it to avoid emitting reloc markers.
-        */
-       if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
-                       nouveau_bo(pb))) {
-               so_ref(NULL, &ctx->state.vtxbuf);
-               ctx->dirty |= NV50_NEW_ARRAYS;
-       }
-
-       return 0;
-}
-
 struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 {
@@ -252,7 +232,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        pscreen->get_paramf = nv50_screen_get_paramf;
        pscreen->is_format_supported = nv50_screen_is_format_supported;
        pscreen->context_create = nv50_create;
-       screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
 
        nv50_screen_init_miptree_functions(pscreen);
        nv50_transfer_init_screen_functions(pscreen);
@@ -508,10 +487,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
        so_data  (so, 1);
 
-       /* activate first scissor rectangle */
-       so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
-       so_data  (so, 1);
-
        so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
        so_data  (so, 1); /* default edgeflag to TRUE */
 
@@ -520,6 +495,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_ref (NULL, &so);
        nouveau_pushbuf_flush(chan, 0);
 
+       screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE);
        return pscreen;
 }
 
index 2687b72127797a208edfa8db5996a59f8b37b6f1..d1bc80cb9edfcf51667e1ce8d3a486af0a9303ac 100644 (file)
@@ -28,6 +28,8 @@ struct nv50_screen {
        struct nouveau_bo *tsc;
 
        struct nouveau_stateobj *static_init;
+
+       boolean force_push;
 };
 
 static INLINE struct nv50_screen *
index 7d304907b652240da70bc57878c56511e7deef34..b0e5552eff45957a838f7c47c2dfa4f2a25cc8af 100644 (file)
@@ -302,7 +302,7 @@ static void *
 nv50_rasterizer_state_create(struct pipe_context *pipe,
                             const struct pipe_rasterizer_state *cso)
 {
-       struct nouveau_stateobj *so = so_new(15, 21, 0);
+       struct nouveau_stateobj *so = so_new(16, 22, 0);
        struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
        struct nv50_rasterizer_stateobj *rso =
                CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -314,6 +314,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
         *      - point_sprite / sprite_coord_mode
         */
 
+       so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
+       so_data  (so, cso->scissor);
+
        so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
        so_data  (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
                                       NV50TCL_SHADE_MODEL_SMOOTH);
@@ -720,15 +723,34 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
        nv50->dirty |= NV50_NEW_ARRAYS;
 }
 
+static void *
+nv50_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj);
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+       cso->num_elements = num_elements;
+       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+       nv50_vtxelt_construct(cso);
+
+       return (void *)cso;
+}
+
 static void
-nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-                        const struct pipe_vertex_element *ve)
+nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-       struct nv50_context *nv50 = nv50_context(pipe);
+       FREE(hwcso);
+}
 
-       memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
-       nv50->vtxelt_nr = count;
+static void
+nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+       struct nv50_context *nv50 = nv50_context(pipe);
 
+       nv50->vtxelt = hwcso;
        nv50->dirty |= NV50_NEW_ARRAYS;
 }
 
@@ -778,7 +800,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
        nv50->pipe.set_scissor_state = nv50_set_scissor_state;
        nv50->pipe.set_viewport_state = nv50_set_viewport_state;
 
+       nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create;
+       nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete;
+       nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind;
+
        nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
-       nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
 }
 
index c974cc92dcc7d875f857b3f2775694a2ac4ea438..2c8e7ca7982cbd273c6fdd49d94e638fbab437b2 100644 (file)
@@ -25,8 +25,8 @@
 #include "nv50_context.h"
 #include "nouveau/nouveau_stateobj.h"
 
-static void
-nv50_state_validate_fb(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_fb(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
        struct nouveau_stateobj *so = so_new(32, 79, 18);
@@ -167,12 +167,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
        so_data  (so, w << 16);
        so_data  (so, h << 16);
 
-       /* we set scissors to framebuffer size when they're 'turned off' */
-       nv50->dirty |= NV50_NEW_SCISSOR;
-       so_ref(NULL, &nv50->state.scissor);
-
-       so_ref(so, &nv50->state.fb);
-       so_ref(NULL, &so);
+       return so;
 }
 
 static void
@@ -199,263 +194,256 @@ nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
        }
 }
 
-static void
-nv50_state_emit(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_blend(struct nv50_context *nv50)
 {
-       struct nv50_screen *screen = nv50->screen;
-       struct nouveau_channel *chan = screen->base.channel;
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->blend->so, &so);
+       return so;
+}
 
-       /* XXX: this is racy for multiple contexts active on separate
-        * threads.
-        */
-       if (screen->cur_ctx != nv50) {
-               if (nv50->state.fb)
-                       nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
-               if (nv50->state.blend)
-                       nv50->state.dirty |= NV50_NEW_BLEND;
-               if (nv50->state.zsa)
-                       nv50->state.dirty |= NV50_NEW_ZSA;
-               if (nv50->state.vertprog)
-                       nv50->state.dirty |= NV50_NEW_VERTPROG;
-               if (nv50->state.fragprog)
-                       nv50->state.dirty |= NV50_NEW_FRAGPROG;
-               if (nv50->state.geomprog)
-                       nv50->state.dirty |= NV50_NEW_GEOMPROG;
-               if (nv50->state.rast)
-                       nv50->state.dirty |= NV50_NEW_RASTERIZER;
-               if (nv50->state.blend_colour)
-                       nv50->state.dirty |= NV50_NEW_BLEND_COLOUR;
-               if (nv50->state.stencil_ref)
-                       nv50->state.dirty |= NV50_NEW_STENCIL_REF;
-               if (nv50->state.stipple)
-                       nv50->state.dirty |= NV50_NEW_STIPPLE;
-               if (nv50->state.scissor)
-                       nv50->state.dirty |= NV50_NEW_SCISSOR;
-               if (nv50->state.viewport)
-                       nv50->state.dirty |= NV50_NEW_VIEWPORT;
-               if (nv50->state.tsc_upload)
-                       nv50->state.dirty |= NV50_NEW_SAMPLER;
-               if (nv50->state.tic_upload)
-                       nv50->state.dirty |= NV50_NEW_TEXTURE;
-               if (nv50->state.vtxfmt && nv50->state.vtxbuf)
-                       nv50->state.dirty |= NV50_NEW_ARRAYS;
-               screen->cur_ctx = nv50;
-       }
+static struct nouveau_stateobj *
+validate_zsa(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->zsa->so, &so);
+       return so;
+}
 
-       if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
-               so_emit(chan, nv50->state.fb);
-       if (nv50->state.dirty & NV50_NEW_BLEND)
-               so_emit(chan, nv50->state.blend);
-       if (nv50->state.dirty & NV50_NEW_ZSA)
-               so_emit(chan, nv50->state.zsa);
-       if (nv50->state.dirty & NV50_NEW_VERTPROG)
-               so_emit(chan, nv50->state.vertprog);
-       if (nv50->state.dirty & NV50_NEW_FRAGPROG)
-               so_emit(chan, nv50->state.fragprog);
-       if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog)
-               so_emit(chan, nv50->state.geomprog);
-       if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
-                                NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
-               so_emit(chan, nv50->state.fp_linkage);
-       if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG))
-           && nv50->state.gp_linkage)
-               so_emit(chan, nv50->state.gp_linkage);
-       if (nv50->state.dirty & NV50_NEW_RASTERIZER)
-               so_emit(chan, nv50->state.rast);
-       if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
-               so_emit(chan, nv50->state.blend_colour);
-       if (nv50->state.dirty & NV50_NEW_STENCIL_REF)
-               so_emit(chan, nv50->state.stencil_ref);
-       if (nv50->state.dirty & NV50_NEW_STIPPLE)
-               so_emit(chan, nv50->state.stipple);
-       if (nv50->state.dirty & NV50_NEW_SCISSOR)
-               so_emit(chan, nv50->state.scissor);
-       if (nv50->state.dirty & NV50_NEW_VIEWPORT)
-               so_emit(chan, nv50->state.viewport);
-       if (nv50->state.dirty & NV50_NEW_SAMPLER)
-               so_emit(chan, nv50->state.tsc_upload);
-       if (nv50->state.dirty & NV50_NEW_TEXTURE)
-               so_emit(chan, nv50->state.tic_upload);
-       if (nv50->state.dirty & NV50_NEW_ARRAYS) {
-               so_emit(chan, nv50->state.vtxfmt);
-               so_emit(chan, nv50->state.vtxbuf);
-               if (nv50->state.vtxattr)
-                       so_emit(chan, nv50->state.vtxattr);
-       }
-       nv50->state.dirty = 0;
+static struct nouveau_stateobj *
+validate_rast(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->rasterizer->so, &so);
+       return so;
 }
 
-void
-nv50_state_flush_notify(struct nouveau_channel *chan)
+static struct nouveau_stateobj *
+validate_blend_colour(struct nv50_context *nv50)
 {
-       struct nv50_context *nv50 = chan->user_private;
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so_new(1, 4, 0);
+
+       so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+       so_data  (so, fui(nv50->blend_colour.color[0]));
+       so_data  (so, fui(nv50->blend_colour.color[1]));
+       so_data  (so, fui(nv50->blend_colour.color[2]));
+       so_data  (so, fui(nv50->blend_colour.color[3]));
+       return so;
+}
 
-       if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE))
-               so_emit(chan, nv50->state.tic_upload);
+static struct nouveau_stateobj *
+validate_stencil_ref(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so = so_new(2, 2, 0);
 
-       so_emit_reloc_markers(chan, nv50->state.fb);
-       so_emit_reloc_markers(chan, nv50->state.vertprog);
-       so_emit_reloc_markers(chan, nv50->state.fragprog);
-       so_emit_reloc_markers(chan, nv50->state.vtxbuf);
-       so_emit_reloc_markers(chan, nv50->screen->static_init);
+       so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
+       so_data  (so, nv50->stencil_ref.ref_value[0]);
+       so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
+       so_data  (so, nv50->stencil_ref.ref_value[1]);
+       return so;
+}
 
-       if (nv50->state.instbuf)
-               so_emit_reloc_markers(chan, nv50->state.instbuf);
+static struct nouveau_stateobj *
+validate_stipple(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so_new(1, 32, 0);
+       int i;
+
+       so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+       for (i = 0; i < 32; i++)
+               so_data(so, util_bswap32(nv50->stipple.stipple[i]));
+       return so;
 }
 
-boolean
-nv50_state_validate(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_scissor(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
+        struct pipe_scissor_state *s = &nv50->scissor;
        struct nouveau_stateobj *so;
-       unsigned i;
 
-       if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
-               nv50_state_validate_fb(nv50);
+       so = so_new(1, 2, 0);
+       so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
+       so_data  (so, (s->maxx << 16) | s->minx);
+       so_data  (so, (s->maxy << 16) | s->miny);
+       return so;
+}
+
+static struct nouveau_stateobj *
+validate_viewport(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so = so_new(5, 9, 0);
+
+       so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
+       so_data  (so, fui(nv50->viewport.translate[0]));
+       so_data  (so, fui(nv50->viewport.translate[1]));
+       so_data  (so, fui(nv50->viewport.translate[2]));
+       so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
+       so_data  (so, fui(nv50->viewport.scale[0]));
+       so_data  (so, fui(nv50->viewport.scale[1]));
+       so_data  (so, fui(nv50->viewport.scale[2]));
+
+       so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
+       so_data  (so, 1);
+       /* 0x0000 = remove whole primitive only (xyz)
+        * 0x1018 = remove whole primitive only (xy), clamp z
+        * 0x1080 = clip primitive (xyz)
+        * 0x1098 = clip primitive (xy), clamp z
+        */
+       so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+       so_data  (so, 0x1080);
+       /* no idea what 0f90 does */
+       so_method(so, tesla, 0x0f90, 1);
+       so_data  (so, 0);
+
+       return so;
+}
 
-       if (nv50->dirty & NV50_NEW_BLEND)
-               so_ref(nv50->blend->so, &nv50->state.blend);
+static struct nouveau_stateobj *
+validate_sampler(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nouveau_stateobj *so;
+       unsigned nr = 0, i;
 
-       if (nv50->dirty & NV50_NEW_ZSA)
-               so_ref(nv50->zsa->so, &nv50->state.zsa);
+       for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+               nr += nv50->sampler_nr[i];
 
-       if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
-               nv50_vertprog_validate(nv50);
+       so = so_new(1 + 5 * PIPE_SHADER_TYPES,
+                   1 + 19 * PIPE_SHADER_TYPES + nr * 8,
+                   PIPE_SHADER_TYPES * 2);
 
-       if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
-               nv50_fragprog_validate(nv50);
+       nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+       nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
 
-       if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB))
-               nv50_geomprog_validate(nv50);
+       so_method(so, tesla, 0x1334, 1); /* flush TSC */
+       so_data  (so, 0);
 
-       if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
-                          NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
-               nv50_fp_linkage_validate(nv50);
+       return so;
+}
 
-       if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG))
-               nv50_gp_linkage_validate(nv50);
+static struct nouveau_stateobj *
+validate_vtxbuf(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->state.vtxbuf, &so);
+       return so;
+}
 
-       if (nv50->dirty & NV50_NEW_RASTERIZER)
-               so_ref(nv50->rasterizer->so, &nv50->state.rast);
+static struct nouveau_stateobj *
+validate_vtxattr(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so = NULL;
+       so_ref(nv50->state.vtxattr, &so);
+       return so;
+}
 
-       if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
-               so = so_new(1, 4, 0);
-               so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
-               so_data  (so, fui(nv50->blend_colour.color[0]));
-               so_data  (so, fui(nv50->blend_colour.color[1]));
-               so_data  (so, fui(nv50->blend_colour.color[2]));
-               so_data  (so, fui(nv50->blend_colour.color[3]));
-               so_ref(so, &nv50->state.blend_colour);
-               so_ref(NULL, &so);
-       }
+struct state_validate {
+       struct nouveau_stateobj *(*func)(struct nv50_context *nv50);
+       unsigned states;
+} validate_list[] = {
+       { validate_fb             , NV50_NEW_FRAMEBUFFER                      },
+       { validate_blend          , NV50_NEW_BLEND                            },
+       { validate_zsa            , NV50_NEW_ZSA                              },
+       { nv50_vertprog_validate  , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB  },
+       { nv50_fragprog_validate  , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB  },
+       { nv50_geomprog_validate  , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB  },
+       { nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG |
+                                   NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER   },
+       { nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG     },
+       { validate_rast           , NV50_NEW_RASTERIZER                       },
+       { validate_blend_colour   , NV50_NEW_BLEND_COLOUR                     },
+       { validate_stencil_ref    , NV50_NEW_STENCIL_REF                      },
+       { validate_stipple        , NV50_NEW_STIPPLE                          },
+       { validate_scissor        , NV50_NEW_SCISSOR                          },
+       { validate_viewport       , NV50_NEW_VIEWPORT                         },
+       { validate_sampler        , NV50_NEW_SAMPLER                          },
+       { nv50_tex_validate       , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER       },
+       { nv50_vbo_validate       , NV50_NEW_ARRAYS                           },
+       { validate_vtxbuf         , NV50_NEW_ARRAYS                           },
+       { validate_vtxattr        , NV50_NEW_ARRAYS                           },
+       {}
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
-       if (nv50->dirty & NV50_NEW_STENCIL_REF) {
-               so = so_new(2, 2, 0);
-               so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
-               so_data  (so, nv50->stencil_ref.ref_value[0]);
-               so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
-               so_data  (so, nv50->stencil_ref.ref_value[1]);
-               so_ref(so, &nv50->state.stencil_ref);
-               so_ref(NULL, &so);
-       }
+boolean
+nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
+{
+       struct nouveau_channel *chan = nv50->screen->base.channel;
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4;
+       int ret, i;
 
-       if (nv50->dirty & NV50_NEW_STIPPLE) {
-               so = so_new(1, 32, 0);
-               so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
-               for (i = 0; i < 32; i++)
-                       so_data(so, util_bswap32(nv50->stipple.stipple[i]));
-               so_ref(so, &nv50->state.stipple);
-               so_ref(NULL, &so);
-       }
+       for (i = 0; i < validate_list_len; i++) {
+               struct state_validate *validate = &validate_list[i];
+               struct nouveau_stateobj *so;
 
-       if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
-               struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
-               struct pipe_scissor_state *s = &nv50->scissor;
+               if (!(nv50->dirty & validate->states))
+                       continue;
 
-               if (nv50->state.scissor &&
-                   (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
-                       goto scissor_uptodate;
-               nv50->state.scissor_enabled = rast->scissor;
+               so = validate->func(nv50);
+               if (!so)
+                       continue;
 
-               so = so_new(1, 2, 0);
-               so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
-               if (nv50->state.scissor_enabled) {
-                       so_data(so, (s->maxx << 16) | s->minx);
-                       so_data(so, (s->maxy << 16) | s->miny);
-               } else {
-                       so_data(so, (nv50->framebuffer.width << 16));
-                       so_data(so, (nv50->framebuffer.height << 16));
-               }
-               so_ref(so, &nv50->state.scissor);
-               so_ref(NULL, &so);
-               nv50->state.dirty |= NV50_NEW_SCISSOR;
-       }
-scissor_uptodate:
-
-       if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) {
-               if (nv50->state.viewport &&
-                   !(nv50->dirty & NV50_NEW_VIEWPORT))
-                       goto viewport_uptodate;
-
-               so = so_new(5, 9, 0);
-               so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
-               so_data  (so, fui(nv50->viewport.translate[0]));
-               so_data  (so, fui(nv50->viewport.translate[1]));
-               so_data  (so, fui(nv50->viewport.translate[2]));
-               so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
-               so_data  (so, fui(nv50->viewport.scale[0]));
-               so_data  (so, fui(nv50->viewport.scale[1]));
-               so_data  (so, fui(nv50->viewport.scale[2]));
-
-               so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
-               so_data  (so, 1);
-               /* 0x0000 = remove whole primitive only (xyz)
-                * 0x1018 = remove whole primitive only (xy), clamp z
-                * 0x1080 = clip primitive (xyz)
-                * 0x1098 = clip primitive (xy), clamp z
-                */
-               so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
-               so_data  (so, 0x1080);
-               /* no idea what 0f90 does */
-               so_method(so, tesla, 0x0f90, 1);
-               so_data  (so, 0);
+               nr_dwords += (so->total + so->cur);
+               nr_relocs += so->cur_reloc;
 
-               so_ref(so, &nv50->state.viewport);
+               so_ref(so, &nv50->state.hw[i]);
                so_ref(NULL, &so);
-               nv50->state.dirty |= NV50_NEW_VIEWPORT;
+               nv50->state.hw_dirty |= (1 << i);
        }
-viewport_uptodate:
-
-       if (nv50->dirty & NV50_NEW_SAMPLER) {
-               unsigned nr = 0;
-
-               for (i = 0; i < PIPE_SHADER_TYPES; ++i)
-                       nr += nv50->sampler_nr[i];
+       nv50->dirty = 0;
 
-               so = so_new(1 + 5 * PIPE_SHADER_TYPES,
-                           1 + 19 * PIPE_SHADER_TYPES + nr * 8,
-                           PIPE_SHADER_TYPES * 2);
+       if (nv50->screen->cur_ctx != nv50) {
+               for (i = 0; i < validate_list_len; i++) {
+                       if (!nv50->state.hw[i] ||
+                           (nv50->state.hw_dirty & (1 << i)))
+                               continue;
 
-               nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
-               nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
+                       nr_dwords += (nv50->state.hw[i]->total +
+                                     nv50->state.hw[i]->cur);
+                       nr_relocs += nv50->state.hw[i]->cur_reloc;
+                       nv50->state.hw_dirty |= (1 << i);
+               }
 
-               so_method(so, tesla, 0x1334, 1); /* flush TSC */
-               so_data  (so, 0);
+               nv50->screen->cur_ctx = nv50;
+       }
 
-               so_ref(so, &nv50->state.tsc_upload);
-               so_ref(NULL, &so);
+       ret = MARK_RING(chan, nr_dwords, nr_relocs);
+       if (ret) {
+               debug_printf("MARK_RING(%d, %d) failed: %d\n",
+                            nr_dwords, nr_relocs, ret);
+               return FALSE;
        }
 
-       if (nv50->dirty & (NV50_NEW_TEXTURE | NV50_NEW_SAMPLER))
-               nv50_tex_validate(nv50);
+       while (nv50->state.hw_dirty) {
+               i = ffs(nv50->state.hw_dirty) - 1;
+               nv50->state.hw_dirty &= ~(1 << i);
 
-       if (nv50->dirty & NV50_NEW_ARRAYS)
-               nv50_vbo_validate(nv50);
+               so_emit(chan, nv50->state.hw[i]);
+       }
 
-       nv50->state.dirty |= nv50->dirty;
-       nv50->dirty = 0;
-       nv50_state_emit(nv50);
+       /* Yes, really, we need to do this.  If a buffer that is referenced
+        * on the hardware isn't part of changed state above, without doing
+        * this the kernel is given no clue that the buffer is being used
+        * still.  This can cause all sorts of fun issues.
+        */
+       nv50_tex_relocs(nv50);
+       so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
+       so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
+       so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
+       so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
+       so_emit_reloc_markers(chan, nv50->screen->static_init);
 
+       /* No idea.. */
+       BEGIN_RING(chan, tesla, 0x142c, 1);
+       OUT_RING  (chan, 0);
+       BEGIN_RING(chan, tesla, 0x142c, 1);
+       OUT_RING  (chan, 0);
        return TRUE;
 }
 
index de0560e20cd07b4f44cf2716e958d0c7da37b296..4c48b12cd87b377b24a061b47c74276fcbf3c558 100644 (file)
@@ -24,6 +24,7 @@
 #include "nv50_texture.h"
 
 #include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nouveau_reloc.h"
 
 #include "util/u_format.h"
 
@@ -195,6 +196,35 @@ nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
 }
 
 void
+nv50_tex_relocs(struct nv50_context *nv50)
+{
+       struct nouveau_channel *chan = nv50->screen->tesla->channel;
+       int p, unit;
+
+       p = PIPE_SHADER_FRAGMENT;
+       for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+               if (!nv50->miptree[p][unit])
+                       continue;
+               nouveau_reloc_emit(chan, nv50->screen->tic,
+                                  ((p * 32) + unit) * 32, NULL,
+                                  nv50->miptree[p][unit]->base.bo, 0, 0,
+                                  NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+                                  NOUVEAU_BO_RD, 0, 0);
+       }
+
+       p = PIPE_SHADER_VERTEX;
+       for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+               if (!nv50->miptree[p][unit])
+                       continue;
+               nouveau_reloc_emit(chan, nv50->screen->tic,
+                                  ((p * 32) + unit) * 32, NULL,
+                                  nv50->miptree[p][unit]->base.bo, 0, 0,
+                                  NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+                                  NOUVEAU_BO_RD, 0, 0);
+       }
+}
+
+struct nouveau_stateobj *
 nv50_tex_validate(struct nv50_context *nv50)
 {
        struct nouveau_stateobj *so;
@@ -217,12 +247,11 @@ nv50_tex_validate(struct nv50_context *nv50)
                so_ref(NULL, &so);
 
                NOUVEAU_ERR("failed tex validate\n");
-               return;
+               return NULL;
        }
 
        so_method(so, tesla, 0x1330, 1); /* flush TIC */
        so_data  (so, 0);
 
-       so_ref(so, &nv50->state.tic_upload);
-       so_ref(NULL, &so);
+       return so;
 }
index 1c8ee0b9adf83364c5720ef7f3a4c8d293c3e91f..6b9c1ee231e40e47d8ee94866f24d21a7f25d40f 100644 (file)
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 
+#include "nouveau/nouveau_util.h"
 #include "nv50_context.h"
 
-static boolean
-nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
-
-static boolean
-nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
-
-#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
-
-static INLINE unsigned
-nv50_prim(unsigned mode)
-{
-       switch (mode) {
-       case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
-       case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
-       case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
-       case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
-       case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
-       case PIPE_PRIM_TRIANGLE_STRIP:
-               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
-       case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
-       case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
-       case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
-       case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
-       case PIPE_PRIM_LINES_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
-       case PIPE_PRIM_LINE_STRIP_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
-       case PIPE_PRIM_TRIANGLES_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
-       case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
-               return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
-       default:
-               break;
-       }
-
-       NOUVEAU_ERR("invalid primitive type %d\n", mode);
-       return NV50TCL_VERTEX_BEGIN_POINTS;
-}
-
 static INLINE uint32_t
 nv50_vbo_type_to_hw(enum pipe_format format)
 {
@@ -139,15 +95,16 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
        uint32_t hw_type, hw_size;
        enum pipe_format pf = ve->src_format;
        const struct util_format_description *desc;
-       unsigned size;
+       unsigned size, nr_components;
 
        desc = util_format_description(pf);
        assert(desc);
 
        size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
+       nr_components = util_format_get_nr_components(pf);
 
        hw_type = nv50_vbo_type_to_hw(pf);
-       hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
+       hw_size = nv50_vbo_size_to_hw(size, nr_components);
 
        if (!hw_type || !hw_size) {
                NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
@@ -161,250 +118,58 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
        return (hw_type | hw_size);
 }
 
-/* For instanced drawing from user buffers, hitting the FIFO repeatedly
- * with the same vertex data is probably worse than uploading all data.
- */
-static boolean
-nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
-{
-       struct nv50_screen *nscreen = nv50->screen;
-       struct pipe_screen *pscreen = &nscreen->base.base;
-       struct pipe_buffer *buf = nscreen->strm_vbuf[i];
-       struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
-       uint8_t *src;
-       unsigned size = align(vb->buffer->size, 4096);
-
-       if (buf && buf->size < size)
-               pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
-
-       if (!nscreen->strm_vbuf[i]) {
-               nscreen->strm_vbuf[i] = pipe_buffer_create(
-                       pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
-               buf = nscreen->strm_vbuf[i];
-       }
-
-       src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
-       if (!src)
-               return FALSE;
-       src += vb->buffer_offset;
-
-       size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
-       if (vb->buffer_offset + size > vb->buffer->size)
-               size = vb->buffer->size - vb->buffer_offset;
-
-       pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
-       pipe_buffer_unmap(pscreen, vb->buffer);
-
-       vb->buffer = buf; /* don't pipe_reference, this is a private copy */
-       return TRUE;
-}
-
-static void
-nv50_upload_user_vbufs(struct nv50_context *nv50)
-{
-       unsigned i;
-
-       if (nv50->vbo_fifo)
-               nv50->dirty |= NV50_NEW_ARRAYS;
-       if (!(nv50->dirty & NV50_NEW_ARRAYS))
-               return;
-
-       for (i = 0; i < nv50->vtxbuf_nr; ++i) {
-               if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
-                       continue;
-               nv50_upload_vtxbuf(nv50, i);
-       }
-}
-
-static void
-nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
-{
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
-       float v[4];
-
-       util_format_read_4f(nv50->vtxelt[i].src_format,
-                           v, 0, data, 0, 0, 0, 1, 1);
-
-       switch (nv50->vtxelt[i].nr_components) {
-       case 4:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
-               OUT_RINGf (chan, v[0]);
-               OUT_RINGf (chan, v[1]);
-               OUT_RINGf (chan, v[2]);
-               OUT_RINGf (chan, v[3]);
-               break;
-       case 3:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
-               OUT_RINGf (chan, v[0]);
-               OUT_RINGf (chan, v[1]);
-               OUT_RINGf (chan, v[2]);
-               break;
-       case 2:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
-               OUT_RINGf (chan, v[0]);
-               OUT_RINGf (chan, v[1]);
-               break;
-       case 1:
-               BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
-               OUT_RINGf (chan, v[0]);
-               break;
-       default:
-               assert(0);
-               break;
-       }
-}
-
-static unsigned
-init_per_instance_arrays_immd(struct nv50_context *nv50,
-                             unsigned startInstance,
-                             unsigned pos[16], unsigned step[16])
-{
-       struct nouveau_bo *bo;
-       unsigned i, b, count = 0;
-
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
-                       continue;
-               ++count;
-               b = nv50->vtxelt[i].vertex_buffer_index;
-
-               pos[i] = nv50->vtxelt[i].src_offset +
-                       nv50->vtxbuf[b].buffer_offset +
-                       startInstance * nv50->vtxbuf[b].stride;
-               step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-               if (!bo->map)
-                       nouveau_bo_map(bo, NOUVEAU_BO_RD);
-
-               nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
-       }
-
-       return count;
-}
-
-static unsigned
-init_per_instance_arrays(struct nv50_context *nv50,
-                        unsigned startInstance,
-                        unsigned pos[16], unsigned step[16])
-{
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
+struct instance {
        struct nouveau_bo *bo;
-       struct nouveau_stateobj *so;
-       unsigned i, b, count = 0;
-       const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
-       if (nv50->vbo_fifo)
-               return init_per_instance_arrays_immd(nv50, startInstance,
-                                                    pos, step);
-
-       so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
-
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
-                       continue;
-               ++count;
-               b = nv50->vtxelt[i].vertex_buffer_index;
-
-               pos[i] = nv50->vtxelt[i].src_offset +
-                       nv50->vtxbuf[b].buffer_offset +
-                       startInstance * nv50->vtxbuf[b].stride;
-
-               if (!startInstance) {
-                       step[i] = 0;
-                       continue;
-               }
-               step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-               so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
-       }
-
-       if (count && startInstance) {
-               so_ref (so, &nv50->state.instbuf); /* for flush notify */
-               so_emit(chan, nv50->state.instbuf);
-       }
-       so_ref (NULL, &so);
-
-       return count;
-}
+       unsigned delta;
+       unsigned stride;
+       unsigned step;
+       unsigned divisor;
+};
 
 static void
-step_per_instance_arrays_immd(struct nv50_context *nv50,
-                             unsigned pos[16], unsigned step[16])
+instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
 {
-       struct nouveau_bo *bo;
-       unsigned i, b;
+       int i;
 
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
-                       continue;
-               if (++step[i] != nv50->vtxelt[i].instance_divisor)
-                       continue;
-               b = nv50->vtxelt[i].vertex_buffer_index;
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+       for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+               struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+               struct pipe_vertex_buffer *vb;
 
-               step[i] = 0;
-               pos[i] += nv50->vtxbuf[b].stride;
+               a[i].divisor = ve->instance_divisor;
+               if (a[i].divisor) {
+                       vb = &nv50->vtxbuf[ve->vertex_buffer_index];
 
-               nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+                       a[i].bo = nouveau_bo(vb->buffer);
+                       a[i].stride = vb->stride;
+                       a[i].step = first % a[i].divisor;
+                       a[i].delta = vb->buffer_offset + ve->src_offset +
+                                    (first * a[i].stride);
+               }
        }
 }
 
 static void
-step_per_instance_arrays(struct nv50_context *nv50,
-                        unsigned pos[16], unsigned step[16])
+instance_step(struct nv50_context *nv50, struct instance *a)
 {
+       struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
-       struct nouveau_bo *bo;
-       struct nouveau_stateobj *so;
-       unsigned i, b;
-       const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
-       if (nv50->vbo_fifo) {
-               step_per_instance_arrays_immd(nv50, pos, step);
-               return;
-       }
-
-       so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+       int i;
 
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               if (!nv50->vtxelt[i].instance_divisor)
+       for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+               if (!a[i].divisor)
                        continue;
-               b = nv50->vtxelt[i].vertex_buffer_index;
 
-               if (++step[i] == nv50->vtxelt[i].instance_divisor) {
-                       step[i] = 0;
-                       pos[i] += nv50->vtxbuf[b].stride;
+               BEGIN_RING(chan, tesla,
+                          NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+               OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+               OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+               if (++a[i].step == a[i].divisor) {
+                       a[i].step = 0;
+                       a[i].delta += a[i].stride;
                }
-
-               bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-               so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-               so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
        }
-
-       so_ref (so, &nv50->state.instbuf); /* for flush notify */
-       so_ref (NULL, &so);
-
-       so_emit(chan, nv50->state.instbuf);
-}
-
-static INLINE void
-nv50_unmap_vbufs(struct nv50_context *nv50)
-{
-        unsigned i;
-
-        for (i = 0; i < nv50->vtxbuf_nr; ++i)
-                if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
-                        nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
 }
 
 void
@@ -415,198 +180,207 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
        struct nv50_context *nv50 = nv50_context(pipe);
        struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       unsigned i, nz_divisors;
-       unsigned step[16], pos[16];
-
-       if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
-               nv50_upload_user_vbufs(nv50);
+       struct instance a[16];
+       unsigned prim = nv50_prim(mode);
 
-       nv50_state_validate(nv50);
+       instance_init(nv50, a, startInstance);
+       if (!nv50_state_validate(nv50, 10 + 16*3))
+               return;
 
-       nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+       if (nv50->vbo_fifo) {
+               nv50_push_elements_instanced(pipe, NULL, 0, mode, start,
+                                            count, startInstance,
+                                            instanceCount);
+               return;
+       }
 
        BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
        OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
        OUT_RING  (chan, startInstance);
+       while (instanceCount--) {
+               if (AVAIL_RING(chan) < (7 + 16*3)) {
+                       FIRE_RING(chan);
+                       if (!nv50_state_validate(nv50, 7 + 16*3)) {
+                               assert(0);
+                               return;
+                       }
+               }
+               instance_step(nv50, a);
 
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       if (nv50->vbo_fifo)
-               nv50_push_arrays(nv50, start, count);
-       else {
+               BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+               OUT_RING  (chan, prim);
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
                OUT_RING  (chan, start);
                OUT_RING  (chan, count);
-       }
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
-
-       for (i = 1; i < instanceCount; i++) {
-               if (nz_divisors) /* any non-zero array divisors ? */
-                       step_per_instance_arrays(nv50, pos, step);
-
-               BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-               OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
-
-               if (nv50->vbo_fifo)
-                       nv50_push_arrays(nv50, start, count);
-               else {
-                       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-                       OUT_RING  (chan, start);
-                       OUT_RING  (chan, count);
-               }
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
                OUT_RING  (chan, 0);
-       }
-       nv50_unmap_vbufs(nv50);
 
-       so_ref(NULL, &nv50->state.instbuf);
+               prim |= (1 << 28);
+       }
 }
 
 void
 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
                 unsigned count)
 {
-       struct nv50_context *nv50 = nv50_context(pipe);
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       boolean ret;
-
-       nv50_state_validate(nv50);
-
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       if (nv50->vbo_fifo)
-               ret = nv50_push_arrays(nv50, start, count);
-       else {
-               BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-               OUT_RING  (chan, start);
-               OUT_RING  (chan, count);
-               ret = TRUE;
-       }
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
-
-       nv50_unmap_vbufs(nv50);
-
-        /* XXX: not sure what to do if ret != TRUE: flush and retry?
-         */
-        assert(ret);
+       nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1);
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
-                             unsigned start, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-
-       map += start;
+struct inline_ctx {
+       struct nv50_context *nv50;
+       void *map;
+};
 
-       if (nv50->vbo_fifo)
-               return nv50_push_elements_u08(nv50, map, count);
+static void
+inline_elt08(void *priv, unsigned start, unsigned count)
+{
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+       uint8_t *map = (uint8_t *)ctx->map + start;
 
        if (count & 1) {
                BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
                OUT_RING  (chan, map[0]);
                map++;
-               count--;
+               count &= ~1;
        }
 
-       while (count) {
-               unsigned nr = count > 2046 ? 2046 : count;
-               int i;
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
-               for (i = 0; i < nr; i += 2)
-                       OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
+       count >>= 1;
+       if (!count)
+               return;
 
-               count -= nr;
-               map += nr;
+       BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+       while (count--) {
+               OUT_RING(chan, (map[1] << 16) | map[0]);
+               map += 2;
        }
-       return TRUE;
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
-                             unsigned start, unsigned count)
+static void
+inline_elt16(void *priv, unsigned start, unsigned count)
 {
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-
-       map += start;
-
-       if (nv50->vbo_fifo)
-               return nv50_push_elements_u16(nv50, map, count);
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+       uint16_t *map = (uint16_t *)ctx->map + start;
 
        if (count & 1) {
                BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
                OUT_RING  (chan, map[0]);
+               count &= ~1;
                map++;
-               count--;
        }
 
-       while (count) {
-               unsigned nr = count > 2046 ? 2046 : count;
-               int i;
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
-               for (i = 0; i < nr; i += 2)
-                       OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
+       count >>= 1;
+       if (!count)
+               return;
 
-               count -= nr;
-               map += nr;
+       BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+       while (count--) {
+               OUT_RING(chan, (map[1] << 16) | map[0]);
+               map += 2;
        }
-       return TRUE;
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
-                             unsigned start, unsigned count)
+static void
+inline_elt32(void *priv, unsigned start, unsigned count)
+{
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+
+       BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
+       OUT_RINGp    (chan, (uint32_t *)ctx->map + start, count);
+}
+
+static void
+inline_edgeflag(void *priv, boolean enabled)
 {
+       struct inline_ctx *ctx = priv;
+       struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+       struct nouveau_channel *chan = tesla->channel;
+
+       BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+       OUT_RING  (chan, enabled ? 1 : 0);
+}
+
+static void
+nv50_draw_elements_inline(struct pipe_context *pipe,
+                         struct pipe_buffer *indexBuffer, unsigned indexSize,
+                         unsigned mode, unsigned start, unsigned count,
+                         unsigned startInstance, unsigned instanceCount)
+{
+       struct pipe_screen *pscreen = pipe->screen;
+       struct nv50_context *nv50 = nv50_context(pipe);
        struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct instance a[16];
+       struct inline_ctx ctx;
+       struct u_split_prim s;
+       boolean nzi = FALSE;
+       unsigned overhead;
+
+       overhead = 16*3; /* potential instance adjustments */
+       overhead += 4; /* Begin()/End() */
+       overhead += 4; /* potential edgeflag disable/reenable */
+       overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
+
+       s.priv = &ctx;
+       if (indexSize == 1)
+               s.emit = inline_elt08;
+       else
+       if (indexSize == 2)
+               s.emit = inline_elt16;
+       else
+               s.emit = inline_elt32;
+       s.edge = inline_edgeflag;
+
+       ctx.nv50 = nv50;
+       ctx.map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+       assert(ctx.map);
+       if (!ctx.map)
+               return;
 
-       map += start;
+       instance_init(nv50, a, startInstance);
+       if (!nv50_state_validate(nv50, overhead + 6 + 3))
+               return;
 
-       if (nv50->vbo_fifo)
-               return nv50_push_elements_u32(nv50, map, count);
+       BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+       OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
+       OUT_RING  (chan, startInstance);
+       while (instanceCount--) {
+               unsigned max_verts;
+               boolean done;
+
+               u_split_prim_init(&s, mode, start, count);
+               do {
+                       if (AVAIL_RING(chan) < (overhead + 6)) {
+                               FIRE_RING(chan);
+                               if (!nv50_state_validate(nv50, (overhead + 6))) {
+                                       assert(0);
+                                       return;
+                               }
+                       }
 
-       while (count) {
-               unsigned nr = count > 2047 ? 2047 : count;
+                       max_verts = AVAIL_RING(chan) - overhead;
+                       if (max_verts > 2047)
+                               max_verts = 2047;
+                       if (indexSize != 4)
+                               max_verts <<= 1;
+                       instance_step(nv50, a);
 
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
-               OUT_RINGp (chan, map, nr);
+                       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+                       OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
+                       done = u_split_prim_next(&s, max_verts);
+                       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+                       OUT_RING  (chan, 0);
+               } while (!done);
 
-               count -= nr;
-               map += nr;
+               nzi = TRUE;
        }
-       return TRUE;
-}
 
-static INLINE void
-nv50_draw_elements_inline(struct nv50_context *nv50,
-                         void *map, unsigned indexSize,
-                         unsigned start, unsigned count)
-{
-       switch (indexSize) {
-       case 1:
-               nv50_draw_elements_inline_u08(nv50, map, start, count);
-               break;
-       case 2:
-               nv50_draw_elements_inline_u16(nv50, map, start, count);
-               break;
-       case 4:
-               nv50_draw_elements_inline_u32(nv50, map, start, count);
-               break;
-       }
+       pipe_buffer_unmap(pscreen, indexBuffer);
 }
 
 void
@@ -617,49 +391,68 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
                             unsigned startInstance, unsigned instanceCount)
 {
        struct nv50_context *nv50 = nv50_context(pipe);
+       struct nouveau_channel *chan = nv50->screen->tesla->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_channel *chan = tesla->channel;
-       struct pipe_screen *pscreen = pipe->screen;
-       void *map;
-       unsigned i, nz_divisors;
-       unsigned step[16], pos[16];
+       struct instance a[16];
+       unsigned prim = nv50_prim(mode);
 
-       map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
-
-       if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
-               nv50_upload_user_vbufs(nv50);
-
-       nv50_state_validate(nv50);
+       instance_init(nv50, a, startInstance);
+       if (!nv50_state_validate(nv50, 13 + 16*3))
+               return;
 
-       nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+       if (nv50->vbo_fifo) {
+               nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
+                                            mode, start, count, startInstance,
+                                            instanceCount);
+               return;
+       } else
+       if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || indexSize == 1) {
+               nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
+                                         mode, start, count, startInstance,
+                                         instanceCount);
+               return;
+       }
 
        BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
        OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
        OUT_RING  (chan, startInstance);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
-
-       for (i = 1; i < instanceCount; ++i) {
-               if (nz_divisors) /* any non-zero array divisors ? */
-                       step_per_instance_arrays(nv50, pos, step);
+       while (instanceCount--) {
+               if (AVAIL_RING(chan) < (7 + 16*3)) {
+                       FIRE_RING(chan);
+                       if (!nv50_state_validate(nv50, 10 + 16*3)) {
+                               assert(0);
+                               return;
+                       }
+               }
+               instance_step(nv50, a);
 
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-               OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
-
-               nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
+               OUT_RING  (chan, prim);
+               if (indexSize == 4) {
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
+                       OUT_RING  (chan, count);
+                       nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+                                              start << 2, count << 2);
+               } else
+               if (indexSize == 2) {
+                       unsigned vb_start = (start & ~1);
+                       unsigned vb_end = (start + count + 1) & ~1;
+                       unsigned dwords = (vb_end - vb_start) >> 1;
+
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+                       OUT_RING  (chan, ((start & 1) << 31) | count);
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
+                       OUT_RING  (chan, dwords);
+                       nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+                                              vb_start << 1, dwords << 2);
+                       BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+                       OUT_RING  (chan, 0);
+               }
                BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
                OUT_RING  (chan, 0);
-       }
-       nv50_unmap_vbufs(nv50);
 
-       so_ref(NULL, &nv50->state.instbuf);
+               prim |= (1 << 28);
+       }
 }
 
 void
@@ -667,51 +460,8 @@ nv50_draw_elements(struct pipe_context *pipe,
                   struct pipe_buffer *indexBuffer, unsigned indexSize,
                   unsigned mode, unsigned start, unsigned count)
 {
-       struct nv50_context *nv50 = nv50_context(pipe);
-       struct nouveau_channel *chan = nv50->screen->tesla->channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct pipe_screen *pscreen = pipe->screen;
-       void *map;
-       
-       nv50_state_validate(nv50);
-
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-       BEGIN_RING(chan, tesla, 0x142c, 1);
-       OUT_RING  (chan, 0);
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-       OUT_RING  (chan, nv50_prim(mode));
-
-       if (!nv50->vbo_fifo && indexSize == 4) {
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
-               OUT_RING  (chan, count);
-               nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
-                                      start << 2, count << 2);
-       } else
-       if (!nv50->vbo_fifo && indexSize == 2) {
-               unsigned vb_start = (start & ~1);
-               unsigned vb_end = (start + count + 1) & ~1;
-               unsigned dwords = (vb_end - vb_start) >> 1;
-
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
-               OUT_RING  (chan, ((start & 1) << 31) | count);
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
-               OUT_RING  (chan, dwords);
-               nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
-                                      vb_start << 1, dwords << 2);
-               BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
-               OUT_RING  (chan, 0);
-       } else {
-               map = pipe_buffer_map(pscreen, indexBuffer,
-                                     PIPE_BUFFER_USAGE_CPU_READ);
-               nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-               nv50_unmap_vbufs(nv50);
-               pipe_buffer_unmap(pscreen, indexBuffer);
-       }
-
-       BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-       OUT_RING  (chan, 0);
+       nv50_draw_elements_instanced(pipe, indexBuffer, indexSize,
+                                    mode, start, count, 0, 1);
 }
 
 static INLINE boolean
@@ -726,6 +476,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
        struct nouveau_bo *bo = nouveau_bo(vb->buffer);
        float v[4];
        int ret;
+       unsigned nr_components = util_format_get_nr_components(ve->src_format);
 
        ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
        if (ret)
@@ -736,9 +487,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
                            0, 0, 1, 1);
        so = *pso;
        if (!so)
-               *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
+               *pso = so = so_new(nv50->vtxelt->num_elements,
+                                  nv50->vtxelt->num_elements * 4, 0);
 
-       switch (ve->nr_components) {
+       switch (nr_components) {
        case 4:
                so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
                so_data  (so, fui(v[0]));
@@ -775,6 +527,18 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 }
 
 void
+nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
+{
+       unsigned i;
+
+       for (i = 0; i < cso->num_elements; ++i) {
+               struct pipe_vertex_element *ve = &cso->pipe[i];
+
+               cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
+       }
+}
+
+struct nouveau_stateobj *
 nv50_vbo_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -783,30 +547,31 @@ nv50_vbo_validate(struct nv50_context *nv50)
 
        /* don't validate if Gallium took away our buffers */
        if (nv50->vtxbuf_nr == 0)
-               return;
-       nv50->vbo_fifo = 0;
+               return NULL;
+
+       if (nv50->screen->force_push ||
+           nv50->vertprog->cfg.edgeflag_in < 16)
+               nv50->vbo_fifo = 0xffff;
 
-       for (i = 0; i < nv50->vtxbuf_nr; ++i)
+       for (i = 0; i < nv50->vtxbuf_nr; i++) {
                if (nv50->vtxbuf[i].stride &&
                    !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
                        nv50->vbo_fifo = 0xffff;
+       }
 
-       if (NV50_USING_LOATHED_EDGEFLAG(nv50))
-               nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
-
-       n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
+       n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
 
        vtxattr = NULL;
-       vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+       vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
        vtxfmt = so_new(1, n_ve, 0);
        so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
 
-       for (i = 0; i < nv50->vtxelt_nr; i++) {
-               struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+       for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+               struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
                struct pipe_vertex_buffer *vb =
                        &nv50->vtxbuf[ve->vertex_buffer_index];
                struct nouveau_bo *bo = nouveau_bo(vb->buffer);
-               uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
+               uint32_t hw = nv50->vtxelt->hw[i];
 
                if (!vb->stride &&
                    nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
@@ -821,13 +586,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
                }
 
                if (nv50->vbo_fifo) {
-                       so_data  (vtxfmt, hw |
-                                 (ve->instance_divisor ? (1 << 4) : i));
+                       so_data  (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
                        so_method(vtxbuf, tesla,
                                  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
                        so_data  (vtxbuf, 0);
                        continue;
                }
+
                so_data(vtxfmt, hw | i);
 
                so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
@@ -855,355 +620,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
                so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
                so_data  (vtxbuf, 0);
        }
-       nv50->state.vtxelt_nr = nv50->vtxelt_nr;
+       nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
 
-       so_ref (vtxfmt, &nv50->state.vtxfmt);
        so_ref (vtxbuf, &nv50->state.vtxbuf);
        so_ref (vtxattr, &nv50->state.vtxattr);
        so_ref (NULL, &vtxbuf);
-       so_ref (NULL, &vtxfmt);
        so_ref (NULL, &vtxattr);
+       return vtxfmt;
 }
 
-typedef void (*pfn_push)(struct nouveau_channel *, void *);
-
-struct nv50_vbo_emitctx
-{
-       pfn_push push[16];
-       uint8_t *map[16];
-       unsigned stride[16];
-       unsigned nr_ve;
-       unsigned vtx_dwords;
-       unsigned vtx_max;
-
-       float edgeflag;
-       unsigned ve_edgeflag;
-};
-
-static INLINE void
-emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
-{
-       unsigned i;
-
-       for (i = 0; i < emit->nr_ve; ++i) {
-               emit->push[i](chan, emit->map[i]);
-               emit->map[i] += emit->stride[i];
-       }
-}
-
-static INLINE void
-emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
-        uint32_t vi)
-{
-       unsigned i;
-
-       for (i = 0; i < emit->nr_ve; ++i)
-               emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
-}
-
-static INLINE boolean
-nv50_map_vbufs(struct nv50_context *nv50)
-{
-       int i;
-
-       for (i = 0; i < nv50->vtxbuf_nr; ++i) {
-               struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
-               unsigned size = vb->stride * (vb->max_index + 1) + 16;
-
-               if (nouveau_bo(vb->buffer)->map)
-                       continue;
-
-               size = vb->stride * (vb->max_index + 1) + 16;
-               size = MIN2(size, vb->buffer->size);
-               if (!size)
-                       size = vb->buffer->size;
-
-               if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
-                                        0, size, NOUVEAU_BO_RD))
-                       break;
-       }
-
-       if (i == nv50->vtxbuf_nr)
-               return TRUE;
-       for (; i >= 0; --i)
-               nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
-       return FALSE;
-}
-
-static void
-emit_b32_1(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b32_2(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-       OUT_RING(chan, v[1]);
-}
-
-static void
-emit_b32_3(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-       OUT_RING(chan, v[1]);
-       OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b32_4(struct nouveau_channel *chan, void *data)
-{
-       uint32_t *v = data;
-
-       OUT_RING(chan, v[0]);
-       OUT_RING(chan, v[1]);
-       OUT_RING(chan, v[2]);
-       OUT_RING(chan, v[3]);
-}
-
-static void
-emit_b16_1(struct nouveau_channel *chan, void *data)
-{
-       uint16_t *v = data;
-
-       OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b16_3(struct nouveau_channel *chan, void *data)
-{
-       uint16_t *v = data;
-
-       OUT_RING(chan, (v[1] << 16) | v[0]);
-       OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b08_1(struct nouveau_channel *chan, void *data)
-{
-       uint8_t *v = data;
-
-       OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b08_3(struct nouveau_channel *chan, void *data)
-{
-       uint8_t *v = data;
-
-       OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
-}
-
-static boolean
-emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
-            unsigned start)
-{
-       unsigned i;
-
-       if (nv50_map_vbufs(nv50) == FALSE)
-               return FALSE;
-
-       emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
-
-       emit->edgeflag = 0.5f;
-       emit->nr_ve = 0;
-       emit->vtx_dwords = 0;
-
-       for (i = 0; i < nv50->vtxelt_nr; ++i) {
-               struct pipe_vertex_element *ve;
-               struct pipe_vertex_buffer *vb;
-               unsigned n, size;
-               const struct util_format_description *desc;
-
-               ve = &nv50->vtxelt[i];
-               vb = &nv50->vtxbuf[ve->vertex_buffer_index];
-               if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
-                       continue;
-               n = emit->nr_ve++;
-
-               emit->stride[n] = vb->stride;
-               emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
-                       vb->buffer_offset +
-                       (start * vb->stride + ve->src_offset);
-
-               desc = util_format_description(ve->src_format);
-               assert(desc);
-
-               size = util_format_get_component_bits(
-                       ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
-
-               assert(ve->nr_components > 0 && ve->nr_components <= 4);
-
-               /* It shouldn't be necessary to push the implicit 1s
-                * for case 3 and size 8 cases 1, 2, 3.
-                */
-               switch (size) {
-               default:
-                       NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
-                       return FALSE;
-               case 32:
-                       switch (ve->nr_components) {
-                       case 1: emit->push[n] = emit_b32_1; break;
-                       case 2: emit->push[n] = emit_b32_2; break;
-                       case 3: emit->push[n] = emit_b32_3; break;
-                       case 4: emit->push[n] = emit_b32_4; break;
-                       }
-                       emit->vtx_dwords += ve->nr_components;
-                       break;
-               case 16:
-                       switch (ve->nr_components) {
-                       case 1: emit->push[n] = emit_b16_1; break;
-                       case 2: emit->push[n] = emit_b32_1; break;
-                       case 3: emit->push[n] = emit_b16_3; break;
-                       case 4: emit->push[n] = emit_b32_2; break;
-                       }
-                       emit->vtx_dwords += (ve->nr_components + 1) >> 1;
-                       break;
-               case 8:
-                       switch (ve->nr_components) {
-                       case 1: emit->push[n] = emit_b08_1; break;
-                       case 2: emit->push[n] = emit_b16_1; break;
-                       case 3: emit->push[n] = emit_b08_3; break;
-                       case 4: emit->push[n] = emit_b32_1; break;
-                       }
-                       emit->vtx_dwords += 1;
-                       break;
-               }
-       }
-
-       emit->vtx_max = 512 / emit->vtx_dwords;
-       if (emit->ve_edgeflag < 16)
-               emit->vtx_max = 1;
-
-       return TRUE;
-}
-
-static INLINE void
-set_edgeflag(struct nouveau_channel *chan,
-            struct nouveau_grobj *tesla,
-            struct nv50_vbo_emitctx *emit, uint32_t index)
-{
-       unsigned i = emit->ve_edgeflag;
-
-       if (i < 16) {
-               float f = *((float *)(emit->map[i] + index * emit->stride[i]));
-
-               if (emit->edgeflag != f) {
-                       emit->edgeflag = f;
-
-                       BEGIN_RING(chan, tesla, 0x15e4, 1);
-                       OUT_RING  (chan, f ? 1 : 0);
-               }
-       }
-}
-
-static boolean
-nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
 
-       if (emit_prepare(nv50, &emit, start) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx_next(chan, &emit);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
-
-       if (emit_prepare(nv50, &emit, 0) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, *map);
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx(chan, &emit, *map++);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
-
-       if (emit_prepare(nv50, &emit, 0) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, *map);
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx(chan, &emit, *map++);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
-
-static boolean
-nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
-{
-       struct nouveau_channel *chan = nv50->screen->base.channel;
-       struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nv50_vbo_emitctx emit;
-
-       if (emit_prepare(nv50, &emit, 0) == FALSE)
-               return FALSE;
-
-       while (count) {
-               unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-               dw = nr * emit.vtx_dwords;
-
-               set_edgeflag(chan, tesla, &emit, *map);
-
-               BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-               for (i = 0; i < nr; ++i)
-                       emit_vtx(chan, &emit, *map++);
-
-               count -= nr;
-       }
-
-       return TRUE;
-}
index 513cc0f5d44e433d9051ac85232fc65a3f776cb1..b7ad6b2020608169d2939ef541537aa1945e3585 100644 (file)
@@ -36,6 +36,7 @@ static void r300_blitter_save_states(struct r300_context* r300)
     util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
     util_blitter_save_viewport(r300->blitter, &r300->viewport);
     util_blitter_save_clip(r300->blitter, &r300->clip);
+    util_blitter_save_vertex_elements(r300->blitter, r300->velems);
 }
 
 /* Clear currently bound buffers. */
index e0a553232737d3a90eefc09188798a18b932984a..923e1e541ff490e74983e7d21ea44c4e99a34dcf 100644 (file)
@@ -60,7 +60,6 @@ static void r300_destroy_context(struct pipe_context* context)
     FREE(r300->rs_block_state.state);
     FREE(r300->scissor_state.state);
     FREE(r300->textures_state.state);
-    FREE(r300->vertex_stream_state.state);
     FREE(r300->vap_output_state.state);
     FREE(r300->viewport_state.state);
     FREE(r300->ztop_state.state);
@@ -147,7 +146,6 @@ static void r300_setup_atoms(struct r300_context* r300)
     r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
     r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
     r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
-    r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state);
     r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state);
     r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
     r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
index c2825d5f2676923349d0de20758fa71ff8714421..985e33911263f073c5430e2d48935047f32c149c 100644 (file)
@@ -278,6 +278,23 @@ struct r300_texture {
     enum r300_buffer_tiling microtile, macrotile;
 };
 
+struct r300_vertex_info {
+    /* Parent class */
+    struct vertex_info vinfo;
+
+    /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
+    uint32_t vap_prog_stream_cntl[8];
+    /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
+    uint32_t vap_prog_stream_cntl_ext[8];
+};
+
+struct r300_vertex_element_state {
+    unsigned count;
+    struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+
+    struct r300_vertex_stream_state vertex_stream;
+};
+
 extern struct pipe_viewport_state r300_viewport_identity;
 
 struct r300_context {
@@ -350,8 +367,7 @@ struct r300_context {
     int vertex_buffer_count;
     int vertex_buffer_max_index;
     /* Vertex elements for Gallium. */
-    struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-    int vertex_element_count;
+    struct r300_vertex_element_state *velems;
 
     /* Vertex info for Draw. */
     struct vertex_info vertex_info;
index 51fc590e5d927615c8226056e38db0ca824d6252..55e9217fd323cc24f92dc09b0426b2817d2caeb8 100644 (file)
@@ -757,9 +757,9 @@ void r300_emit_textures_state(struct r300_context *r300,
 void r300_emit_aos(struct r300_context* r300, unsigned offset)
 {
     struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     int i;
-    unsigned size1, size2, aos_count = r300->vertex_element_count;
+    unsigned size1, size2, aos_count = r300->velems->count;
     unsigned packet_size = (aos_count * 3 + 1) / 2;
     CS_LOCALS(r300);
 
@@ -1004,7 +1004,7 @@ void r300_emit_buffer_validate(struct r300_context *r300,
         (struct r300_textures_state*)r300->textures_state.state;
     struct r300_texture* tex;
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     struct pipe_buffer *pbuf;
     unsigned i;
     boolean invalid = FALSE;
@@ -1062,7 +1062,7 @@ validate:
     }
     /* ...vertex buffers for HWTCL path... */
     if (do_validate_vertex_buffers) {
-        for (i = 0; i < r300->vertex_element_count; i++) {
+        for (i = 0; i < r300->velems->count; i++) {
             pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
 
             if (!r300->winsys->add_buffer(r300->winsys, pbuf,
index c8420bcdd5be0fb28b9e3f565187a777eb2abd09..9c001ae186de24cea10f958837ff928994bdb011 100644 (file)
@@ -143,7 +143,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 {
     struct pipe_vertex_element* velem;
     struct pipe_vertex_buffer* vbuf;
-    unsigned vertex_element_count = r300->vertex_element_count;
+    unsigned vertex_element_count = r300->velems->count;
     unsigned i, v, vbi, dw, elem_offset, dwords;
 
     /* Size of the vertex, in dwords. */
@@ -166,7 +166,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 
     /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
     for (i = 0; i < vertex_element_count; i++) {
-        velem = &r300->vertex_element[i];
+        velem = &r300->velems->velem[i];
         offset[i] = velem->src_offset / 4;
         size[i] = util_format_get_blocksize(velem->src_format) / 4;
         vertex_size += size[i];
@@ -183,7 +183,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
         }
     }
 
-    dwords = 10 + count * vertex_size;
+    dwords = 9 + count * vertex_size;
 
     r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords);
     r300_emit_buffer_validate(r300, FALSE, NULL);
@@ -193,8 +193,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
     OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(count - 1);
+    OUT_CS(0);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
             r300_translate_primitive(mode));
@@ -202,7 +203,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     /* Emit vertices. */
     for (v = 0; v < count; v++) {
         for (i = 0; i < vertex_element_count; i++) {
-            velem = &r300->vertex_element[i];
+            velem = &r300->velems->velem[i];
             vbi = velem->vertex_buffer_index;
             elem_offset = offset[i] + stride[vbi] * (v + start);
 
@@ -215,7 +216,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 
     /* Unmap buffers. */
     for (i = 0; i < vertex_element_count; i++) {
-        vbi = r300->vertex_element[i].vertex_buffer_index;
+        vbi = r300->velems->velem[i].vertex_buffer_index;
 
         if (map[vbi]) {
             vbuf = &r300->vertex_buffer[vbi];
@@ -238,15 +239,16 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
 
     if (alt_num_verts) {
         assert(count < (1 << 24));
-        BEGIN_CS(10);
+        BEGIN_CS(9);
         OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
     } else {
-        BEGIN_CS(8);
+        BEGIN_CS(7);
     }
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(count - 1);
+    OUT_CS(0);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
            r300_translate_primitive(mode) |
@@ -281,15 +283,16 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
 
     if (alt_num_verts) {
-        BEGIN_CS(16);
+        BEGIN_CS(15);
         OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
     } else {
-        BEGIN_CS(14);
+        BEGIN_CS(13);
     }
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(maxIndex);
+    OUT_CS(minIndex);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
     if (indexSize == 4) {
         count_dwords = count;
index 8c9f6046228810a0da00dc405ab61e355b394b8f..bd4c2766cb1426859c0fe11a538ef78ee8759a01 100644 (file)
@@ -1046,19 +1046,17 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
     if (r300->draw) {
         draw_flush(r300->draw);
         draw_set_vertex_buffers(r300->draw, count, buffers);
-    } else {
-        r300->vertex_stream_state.dirty = TRUE;
     }
 }
 
 static boolean r300_validate_aos(struct r300_context *r300)
 {
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     int i;
 
     /* Check if formats and strides are aligned to the size of DWORD. */
-    for (i = 0; i < r300->vertex_element_count; i++) {
+    for (i = 0; i < r300->velems->count; i++) {
         if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
             util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
             return FALSE;
@@ -1067,20 +1065,209 @@ static boolean r300_validate_aos(struct r300_context *r300)
     return TRUE;
 }
 
-static void r300_set_vertex_elements(struct pipe_context* pipe,
-                                    unsigned count,
-                                    const struct pipe_vertex_element* elements)
+static void r300_draw_emit_attrib(struct r300_context* r300,
+                                  enum attrib_emit emit,
+                                  enum interp_mode interp,
+                                  int index)
 {
-    struct r300_context* r300 = r300_context(pipe);
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct tgsi_shader_info* info = &vs->info;
+    int output;
+
+    output = draw_find_shader_output(r300->draw,
+                                     info->output_semantic_name[index],
+                                     info->output_semantic_index[index]);
+    draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
+}
+
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
+{
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct r300_shader_semantics* vs_outputs = &vs->outputs;
+    int i, gen_count;
+
+    /* Position. */
+    if (vs_outputs->pos != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->pos);
+    } else {
+        assert(0);
+    }
+
+    /* Point size. */
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+                              vs_outputs->psize);
+    }
+
+    /* Colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+                                  vs_outputs->color[i]);
+        }
+    }
+
+    /* XXX Back-face colors. */
+
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                                  vs_outputs->generic[i]);
+            gen_count++;
+        }
+    }
+
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->fog);
+        gen_count++;
+    }
+
+    /* XXX magic */
+    assert(gen_count <= 8);
+}
+
+/* Update the PSC tables. */
+static void r300_vertex_psc(struct r300_vertex_element_state *velems)
+{
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i;
+
+    assert(velems->count <= 16);
+
+    /* Vertex shaders have no semantics on their inputs,
+     * so PSC should just route stuff based on the vertex elements,
+     * and not on attrib information. */
+    for (i = 0; i < velems->count; i++) {
+        format = velems->velem[i].src_format;
+
+        type = r300_translate_vertex_data_type(format) |
+            (i << R300_DST_VEC_LOC_SHIFT);
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+    }
 
-    memcpy(r300->vertex_element,
-           elements,
-           sizeof(struct pipe_vertex_element) * count);
-    r300->vertex_element_count = count;
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vstream->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+    vstream->count = (i >> 1) + 1;
+}
+
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context *r300,
+                                  struct r300_vertex_element_state *velems)
+{
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct vertex_info* vinfo = &r300->vertex_info;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i, attrib_count;
+    int* vs_output_tab = vs->stream_loc_notcl;
+
+    /* For each Draw attribute, route it to the fragment shader according
+     * to the vs_output_tab. */
+    attrib_count = vinfo->num_attribs;
+    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+    for (i = 0; i < attrib_count; i++) {
+        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+               vs_output_tab[i]);
+    }
+
+    for (i = 0; i < attrib_count; i++) {
+        /* Make sure we have a proper destination for our attribute. */
+        assert(vs_output_tab[i] != -1);
+
+        format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+
+        /* Obtain the type of data in this attribute. */
+        type = r300_translate_vertex_data_type(format) |
+            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
+
+        /* Obtain the swizzle for this attribute. Note that the default
+         * swizzle in the hardware is not XYZW! */
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        /* Add the attribute to the PSC table. */
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+    }
+
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vstream->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+    vstream->count = (i >> 1) + 1;
+}
+
+static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
+                                               unsigned count,
+                                               const struct pipe_vertex_element* attribs)
+{
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_screen* r300screen = r300_screen(pipe->screen);
+    struct r300_vertex_element_state *velems;
+
+    assert(count <= PIPE_MAX_ATTRIBS);
+    velems = CALLOC_STRUCT(r300_vertex_element_state);
+    if (velems != NULL) {
+        velems->count = count;
+        memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
+
+        if (r300screen->caps->has_tcl) {
+            r300_vertex_psc(velems);
+        } else {
+            memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
+            r300_draw_emit_all_attribs(r300);
+            draw_compute_vertex_size(&r300->vertex_info);
+            r300_swtcl_vertex_psc(r300, velems);
+        }
+    }
+    return velems;
+}
+
+static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
+                                            void *state)
+{
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_vertex_element_state *velems = state;
+
+    if (velems == NULL) {
+        return;
+    }
+
+    r300->velems = velems;
 
     if (r300->draw) {
         draw_flush(r300->draw);
-        draw_set_vertex_elements(r300->draw, count, elements);
+        draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
     }
 
     if (!r300_validate_aos(r300)) {
@@ -1088,6 +1275,14 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
         assert(0);
         abort();
     }
+
+    UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
+    r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
+}
+
+static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
+{
+   FREE(state);
 }
 
 static void* r300_create_vs_state(struct pipe_context* pipe,
@@ -1262,7 +1457,10 @@ void r300_init_state_functions(struct r300_context* r300)
     r300->context.set_viewport_state = r300_set_viewport_state;
 
     r300->context.set_vertex_buffers = r300_set_vertex_buffers;
-    r300->context.set_vertex_elements = r300_set_vertex_elements;
+
+    r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
+    r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
+    r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
 
     r300->context.create_vs_state = r300_create_vs_state;
     r300->context.bind_vs_state = r300_bind_vs_state;
index e9e40747ef1f5f9d8f939ca857e0411ac65c08a1..6b9f61acd7bcfc95279bb23b328c101c6ca1080b 100644 (file)
 /* r300_state_derived: Various bits of state which are dependent upon
  * currently bound CSO data. */
 
-static void r300_draw_emit_attrib(struct r300_context* r300,
-                                  enum attrib_emit emit,
-                                  enum interp_mode interp,
-                                  int index)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct tgsi_shader_info* info = &vs->info;
-    int output;
-
-    output = draw_find_shader_output(r300->draw,
-                                     info->output_semantic_name[index],
-                                     info->output_semantic_index[index]);
-    draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
-}
-
-static void r300_draw_emit_all_attribs(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_shader_semantics* vs_outputs = &vs->outputs;
-    int i, gen_count;
-
-    /* Position. */
-    if (vs_outputs->pos != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                              vs_outputs->pos);
-    } else {
-        assert(0);
-    }
-
-    /* Point size. */
-    if (vs_outputs->psize != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
-                              vs_outputs->psize);
-    }
-
-    /* Colors. */
-    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->color[i] != ATTR_UNUSED) {
-            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
-                                  vs_outputs->color[i]);
-        }
-    }
-
-    /* XXX Back-face colors. */
-
-    /* Texture coordinates. */
-    gen_count = 0;
-    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
-        if (vs_outputs->generic[i] != ATTR_UNUSED) {
-            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                                  vs_outputs->generic[i]);
-            gen_count++;
-        }
-    }
-
-    /* Fog coordinates. */
-    if (vs_outputs->fog != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                              vs_outputs->fog);
-        gen_count++;
-    }
-
-    /* XXX magic */
-    assert(gen_count <= 8);
-}
-
-/* Update the PSC tables. */
-/* XXX move this function into r300_state.c after TCL-bypass gets removed
- * XXX because this one is dependent only on vertex elements. */
-static void r300_vertex_psc(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_vertex_stream_state *vformat =
-        (struct r300_vertex_stream_state*)r300->vertex_stream_state.state;
-    uint16_t type, swizzle;
-    enum pipe_format format;
-    unsigned i;
-    int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-    int* stream_tab;
-
-    memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
-
-    stream_tab = identity;
-
-    /* Vertex shaders have no semantics on their inputs,
-     * so PSC should just route stuff based on the vertex elements,
-     * and not on attrib information. */
-    DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
-            " in psc\n",
-            vs->info.num_inputs,
-            r300->vertex_element_count);
-
-    for (i = 0; i < r300->vertex_element_count; i++) {
-        format = r300->vertex_element[i].src_format;
-
-        type = r300_translate_vertex_data_type(format) |
-            (stream_tab[i] << R300_DST_VEC_LOC_SHIFT);
-        swizzle = r300_translate_vertex_data_swizzle(format);
-
-        if (i & 1) {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
-        } else {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
-        }
-    }
-
-    assert(i <= 15);
-
-    /* Set the last vector in the PSC. */
-    if (i) {
-        i -= 1;
-    }
-    vformat->vap_prog_stream_cntl[i >> 1] |=
-        (R300_LAST_VEC << (i & 1 ? 16 : 0));
-
-    vformat->count = (i >> 1) + 1;
-    r300->vertex_stream_state.size = (1 + vformat->count) * 2;
-}
-
-/* Update the PSC tables for SW TCL, using Draw. */
-static void r300_swtcl_vertex_psc(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_vertex_stream_state *vformat =
-        (struct r300_vertex_stream_state*)r300->vertex_stream_state.state;
-    struct vertex_info* vinfo = &r300->vertex_info;
-    uint16_t type, swizzle;
-    enum pipe_format format;
-    unsigned i, attrib_count;
-    int* vs_output_tab = vs->stream_loc_notcl;
-
-    memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
-
-    /* For each Draw attribute, route it to the fragment shader according
-     * to the vs_output_tab. */
-    attrib_count = vinfo->num_attribs;
-    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
-    for (i = 0; i < attrib_count; i++) {
-        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
-               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
-               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
-               vs_output_tab[i]);
-    }
-
-    for (i = 0; i < attrib_count; i++) {
-        /* Make sure we have a proper destination for our attribute. */
-        assert(vs_output_tab[i] != -1);
-
-        format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
-
-        /* Obtain the type of data in this attribute. */
-        type = r300_translate_vertex_data_type(format) |
-            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
-
-        /* Obtain the swizzle for this attribute. Note that the default
-         * swizzle in the hardware is not XYZW! */
-        swizzle = r300_translate_vertex_data_swizzle(format);
-
-        /* Add the attribute to the PSC table. */
-        if (i & 1) {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
-        } else {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
-        }
-    }
-
-    /* Set the last vector in the PSC. */
-    if (i) {
-        i -= 1;
-    }
-    vformat->vap_prog_stream_cntl[i >> 1] |=
-        (R300_LAST_VEC << (i & 1 ? 16 : 0));
-
-    vformat->count = (i >> 1) + 1;
-    r300->vertex_stream_state.size = (1 + vformat->count) * 2;
-}
-
 static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
                         boolean swizzle_0001)
 {
@@ -432,18 +251,8 @@ static void r300_update_rs_block(struct r300_context* r300,
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
     struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
 
     r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs);
-
-    if (r300screen->caps->has_tcl) {
-        r300_vertex_psc(r300);
-    } else {
-        memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
-        r300_draw_emit_all_attribs(r300);
-        draw_compute_vertex_size(&r300->vertex_info);
-        r300_swtcl_vertex_psc(r300);
-    }
 }
 
 static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
@@ -576,8 +385,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    if (r300->rs_block_state.dirty ||
-        r300->vertex_stream_state.dirty) { /* XXX put updating PSC out of this file */
+    if (r300->rs_block_state.dirty) {
         r300_update_derived_shader_state(r300);
     }
 
index af7827820cca7549b083743054d4e57069806198..a32924ed0a33ac926bfc6a062dea55f1c722847f 100644 (file)
@@ -348,39 +348,12 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
     return 0;
 }
 
-/* Utility function to count the number of components in RGBAZS formats.
- * XXX should go to util or p_format.h */
-static INLINE unsigned pf_component_count(enum pipe_format format) {
-    unsigned count = 0;
-
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) {
-        count++;
-    }
-
-    return count;
-}
-
 /* Translate pipe_formats into PSC vertex types. */
 static INLINE uint16_t
 r300_translate_vertex_data_type(enum pipe_format format) {
     uint32_t result = 0;
     const struct util_format_description *desc;
-    unsigned components = pf_component_count(format);
+    unsigned components = util_format_get_nr_components(format);
 
     desc = util_format_description(format);
 
index 53eecd1cbf389604143ec708ed80a181ee458f32..30494719f7b9fa817e1dd3d241874eed834b4c4c 100644 (file)
@@ -245,6 +245,10 @@ softpipe_create_context( struct pipe_screen *screen,
    softpipe->pipe.bind_gs_state   = softpipe_bind_gs_state;
    softpipe->pipe.delete_gs_state = softpipe_delete_gs_state;
 
+   softpipe->pipe.create_vertex_elements_state = softpipe_create_vertex_elements_state;
+   softpipe->pipe.bind_vertex_elements_state = softpipe_bind_vertex_elements_state;
+   softpipe->pipe.delete_vertex_elements_state = softpipe_delete_vertex_elements_state;
+
    softpipe->pipe.set_blend_color = softpipe_set_blend_color;
    softpipe->pipe.set_stencil_ref = softpipe_set_stencil_ref;
    softpipe->pipe.set_clip_state = softpipe_set_clip_state;
@@ -257,7 +261,6 @@ softpipe_create_context( struct pipe_screen *screen,
    softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
 
    softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers;
-   softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements;
 
    softpipe->pipe.draw_arrays = softpipe_draw_arrays;
    softpipe->pipe.draw_elements = softpipe_draw_elements;
index 3d69cfdb114aa896dca6b15973179ec22508b6eb..9a8158e6a22978f47bdcbe213cbc8ee25f7efd4c 100644 (file)
@@ -45,6 +45,7 @@ struct softpipe_tile_cache;
 struct softpipe_tex_tile_cache;
 struct sp_fragment_shader;
 struct sp_vertex_shader;
+struct sp_velems_state;
 
 
 struct softpipe_context {
@@ -59,6 +60,7 @@ struct softpipe_context {
    struct sp_fragment_shader *fs;
    struct sp_vertex_shader *vs;
    struct sp_geometry_shader *gs;
+   struct sp_velems_state *velems;
 
    /** Other rendering state */
    struct pipe_blend_color blend_color;
@@ -72,13 +74,11 @@ struct softpipe_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
 
    unsigned num_samplers;
    unsigned num_textures;
    unsigned num_vertex_samplers;
    unsigned num_vertex_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    unsigned dirty; /**< Mask of SP_NEW_x flags */
index 4370bbeaee2713de4aa78ed921419faeffe7a56a..6b01c0f4d722b13a80b724d245a896850d8f175a 100644 (file)
@@ -100,6 +100,11 @@ struct sp_geometry_shader {
    struct draw_geometry_shader *draw_data;
 };
 
+struct sp_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
 
 void *
 softpipe_create_blend_state(struct pipe_context *,
@@ -160,8 +165,14 @@ void *softpipe_create_gs_state(struct pipe_context *,
 void softpipe_bind_gs_state(struct pipe_context *, void *);
 void softpipe_delete_gs_state(struct pipe_context *, void *);
 
+void *softpipe_create_vertex_elements_state(struct pipe_context *,
+                                            unsigned count,
+                                            const struct pipe_vertex_element *);
+void softpipe_bind_vertex_elements_state(struct pipe_context *, void *);
+void softpipe_delete_vertex_elements_state(struct pipe_context *, void *);
+
 void softpipe_set_polygon_stipple( struct pipe_context *,
-                                 const struct pipe_poly_stipple * );
+                                   const struct pipe_poly_stipple * );
 
 void softpipe_set_scissor_state( struct pipe_context *,
                                  const struct pipe_scissor_state * );
@@ -178,10 +189,6 @@ softpipe_set_vertex_sampler_textures(struct pipe_context *,
 void softpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
 
-void softpipe_set_vertex_elements(struct pipe_context *,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *);
-
 void softpipe_set_vertex_buffers(struct pipe_context *,
                                  unsigned count,
                                  const struct pipe_vertex_buffer *);
index b491d92ed154612dfe8f4d182d3226e45c1dc46f..a151758ddcad96c79f6144952e13f50401313f8b 100644 (file)
 #include "sp_context.h"
 #include "sp_state.h"
 
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 
 
+void *
+softpipe_create_vertex_elements_state(struct pipe_context *pipe,
+                                      unsigned count,
+                                      const struct pipe_vertex_element *attribs)
+{
+   struct sp_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct sp_velems_state *) MALLOC(sizeof(struct sp_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
 void
-softpipe_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *attribs)
+softpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
+   struct sp_velems_state *sp_velems = (struct sp_velems_state *) velems;
 
-   assert(count <= PIPE_MAX_ATTRIBS);
-
-   memcpy(softpipe->vertex_element, attribs,
-          count * sizeof(struct pipe_vertex_element));
-   softpipe->num_vertex_elements = count;
+   softpipe->velems = sp_velems;
 
    softpipe->dirty |= SP_NEW_VERTEX;
 
-   draw_set_vertex_elements(softpipe->draw, count, attribs);
+   draw_set_vertex_elements(softpipe->draw, sp_velems->count, sp_velems->velem);
 }
 
+void
+softpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 softpipe_set_vertex_buffers(struct pipe_context *pipe,
index 03302e2a6ec12175b84d3fd6d6b996d7a626b494..791d30edc0ec85b30c84e52f9c51655f17e1a454 100644 (file)
@@ -169,6 +169,11 @@ struct svga_sampler_state {
    unsigned view_max_lod;
 };
 
+struct svga_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
 /* Use to calculate differences between state emitted to hardware and
  * current driver-calculated state.  
  */
@@ -178,13 +183,13 @@ struct svga_state
    const struct svga_depth_stencil_state *depth;
    const struct svga_rasterizer_state *rast;
    const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   const struct svga_velems_state *velems;
 
    struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
    struct svga_fragment_shader *fs;
    struct svga_vertex_shader *vs;
 
    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
    struct pipe_buffer *cb[PIPE_SHADER_TYPES];
 
    struct pipe_framebuffer_state framebuffer;
@@ -204,7 +209,6 @@ struct svga_state
 
    unsigned num_samplers;
    unsigned num_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
    unsigned reduced_prim;
 
index 836b8441da219f4c56ac9d3ba21e4276d9dca6dd..1715a47fc6298b0b988bacbf0695382eb7daa3a2 100644 (file)
@@ -26,6 +26,7 @@
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 
 #include "svga_screen.h"
@@ -64,20 +65,37 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe,
    svga->dirty |= SVGA_NEW_VBUFFER;
 }
 
-static void svga_set_vertex_elements(struct pipe_context *pipe,
-                                     unsigned count,
-                                     const struct pipe_vertex_element *elements)
+
+static void *
+svga_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
 {
-   struct svga_context *svga = svga_context(pipe);
-   unsigned i;
+   struct svga_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
 
-   for (i = 0; i < count; i++)
-      svga->curr.ve[i] = elements[i];
+static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
+                                            void *velems)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
 
-   svga->curr.num_vertex_elements = count;
+   svga->curr.velems = svga_velems;
    svga->dirty |= SVGA_NEW_VELEMENT;
 }
 
+static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
+                                              void *velems)
+{
+   FREE(velems);
+}
 
 void svga_cleanup_vertex_state( struct svga_context *svga )
 {
@@ -91,7 +109,9 @@ void svga_cleanup_vertex_state( struct svga_context *svga )
 void svga_init_vertex_functions( struct svga_context *svga )
 {
    svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
-   svga->pipe.set_vertex_elements = svga_set_vertex_elements;
+   svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state;
+   svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
+   svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
 }
 
 
index d774e3e504dac6166bca6d3f23e3e74900652e73..10d473584d1241e2e6c20cf4c8e64b905e6afcf6 100644 (file)
@@ -76,8 +76,8 @@ static int update_need_swvfetch( struct svga_context *svga,
    unsigned i;
    boolean need_swvfetch = FALSE;
 
-   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
-      svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format);
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format);
       if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
          need_swvfetch = TRUE;
          break;
index 107cc403b4def714bf7f2b7d266d23d519d28c4d..b7195d246bc87c0b41b27aeb73b6948f964d69de 100644 (file)
@@ -191,15 +191,24 @@ static int emit_rss( struct svga_context *svga,
       EMIT_RS( svga, svga->curr.stencil_ref.ref_value[0], STENCILREF, fail );
    }
 
-   if (dirty & SVGA_NEW_RAST)
+   if (dirty & (SVGA_NEW_RAST | SVGA_NEW_NEED_PIPELINE))
    {
       const struct svga_rasterizer_state *curr = svga->curr.rast; 
+      unsigned cullmode = curr->cullmode;
 
       /* Shademode: still need to rearrange index list to move
        * flat-shading PV first vertex.
        */
       EMIT_RS( svga, curr->shademode, SHADEMODE, fail );
-      EMIT_RS( svga, curr->cullmode, CULLMODE, fail );
+
+      /* Don't do culling while the software pipeline is active.  It
+       * does it for us, and additionally introduces potentially
+       * back-facing triangles.
+       */
+      if (svga->state.sw.need_pipeline)
+         cullmode = SVGA3D_FACE_NONE;
+
+      EMIT_RS( svga, cullmode, CULLMODE, fail );
       EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail );
       EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail );
       EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
index ded903170b5c25df77c5cd55fe95e302917f4139..f531e22304874c2d0312337e6c99e0a18d553ea7 100644 (file)
@@ -95,17 +95,17 @@ upload_user_buffers( struct svga_context *svga )
 static int emit_hw_vs_vdecl( struct svga_context *svga,
                              unsigned dirty )
 {
-   const struct pipe_vertex_element *ve = svga->curr.ve;
+   const struct pipe_vertex_element *ve = svga->curr.velems->velem;
    SVGA3dVertexDecl decl;
    unsigned i;
 
-   assert(svga->curr.num_vertex_elements >=
+   assert(svga->curr.velems->count >=
           svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
 
    svga_hwtnl_reset_vdecl( svga->hwtnl, 
-                           svga->curr.num_vertex_elements );
+                           svga->curr.velems->count );
 
-   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+   for (i = 0; i < svga->curr.velems->count; i++) {
       const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
       unsigned usage, index;
 
index d7999fe53d2e01e74a89375acde8796c65a69ed2..781f7bf533909b18334fbd4506cb1fdfbca80d16 100644 (file)
@@ -186,8 +186,8 @@ static int update_zero_stride( struct svga_context *svga,
    svga->curr.zero_stride_vertex_elements = 0;
    svga->curr.num_zero_stride_vertex_elements = 0;
 
-   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
-      const struct pipe_vertex_element *vel = &svga->curr.ve[i];
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      const struct pipe_vertex_element *vel = &svga->curr.velems->velem[i];
       const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[
          vel->vertex_buffer_index];
       if (vbuffer->stride == 0) {
index 35f36a828fdffd99b04023002995aea217836998..246d34e649efe1bba1d4a21de27663c0073e47ad 100644 (file)
@@ -99,8 +99,8 @@ static int update_swtnl_draw( struct svga_context *svga,
 
    if (dirty & SVGA_NEW_VELEMENT)
       draw_set_vertex_elements(svga->swtnl.draw, 
-                               svga->curr.num_vertex_elements
-                               svga->curr.ve );
+                               svga->curr.velems->count
+                               svga->curr.velems->velem );
 
    if (dirty & SVGA_NEW_CLIP)
       draw_set_clip_state(svga->swtnl.draw, 
index df40fbade6c124ecc18a505dd09018b12d393c86..133521f45e2bce9fcd2081dd576825039f23d334 100644 (file)
@@ -773,6 +773,70 @@ trace_context_delete_vs_state(struct pipe_context *_pipe,
 }
 
 
+static INLINE void *
+trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
+                                           unsigned num_elements,
+                                           const struct  pipe_vertex_element *elements)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+   void * result;
+
+   trace_dump_call_begin("pipe_context", "create_vertex_elements_state");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(uint, num_elements);
+
+   trace_dump_arg_begin("elements");
+   trace_dump_struct_array(vertex_element, elements, num_elements);
+   trace_dump_arg_end();
+
+   result = pipe->create_vertex_elements_state(pipe, num_elements, elements);
+
+   trace_dump_ret(ptr, result);
+
+   trace_dump_call_end();
+
+   return result;
+}
+
+
+static INLINE void
+trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
+                                         void *state)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+
+   trace_dump_call_begin("pipe_context", "bind_vertex_elements_state");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(ptr, state);
+
+   pipe->bind_vertex_elements_state(pipe, state);
+
+   trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
+                                           void *state)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+
+   trace_dump_call_begin("pipe_context", "delete_verte_elements_state");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(ptr, state);
+
+   pipe->delete_vertex_elements_state(pipe, state);
+
+   trace_dump_call_end();
+}
+
+
 static INLINE void
 trace_context_set_blend_color(struct pipe_context *_pipe,
                               const struct pipe_blend_color *state)
@@ -1047,29 +1111,6 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
 }
 
 
-static INLINE void
-trace_context_set_vertex_elements(struct pipe_context *_pipe,
-                                  unsigned num_elements,
-                                  const struct pipe_vertex_element *elements)
-{
-   struct trace_context *tr_ctx = trace_context(_pipe);
-   struct pipe_context *pipe = tr_ctx->pipe;
-
-   trace_dump_call_begin("pipe_context", "set_vertex_elements");
-
-   trace_dump_arg(ptr, pipe);
-   trace_dump_arg(uint, num_elements);
-
-   trace_dump_arg_begin("elements");
-   trace_dump_struct_array(vertex_element, elements, num_elements);
-   trace_dump_arg_end();
-
-   pipe->set_vertex_elements(pipe, num_elements, elements);
-
-   trace_dump_call_end();
-}
-
-
 static INLINE void
 trace_context_surface_copy(struct pipe_context *_pipe,
                            struct pipe_surface *dest,
@@ -1303,6 +1344,9 @@ trace_context_create(struct trace_screen *tr_scr,
    tr_ctx->base.create_vs_state = trace_context_create_vs_state;
    tr_ctx->base.bind_vs_state = trace_context_bind_vs_state;
    tr_ctx->base.delete_vs_state = trace_context_delete_vs_state;
+   tr_ctx->base.create_vertex_elements_state = trace_context_create_vertex_elements_state;
+   tr_ctx->base.bind_vertex_elements_state = trace_context_bind_vertex_elements_state;
+   tr_ctx->base.delete_vertex_elements_state = trace_context_delete_vertex_elements_state;
    tr_ctx->base.set_blend_color = trace_context_set_blend_color;
    tr_ctx->base.set_stencil_ref = trace_context_set_stencil_ref;
    tr_ctx->base.set_clip_state = trace_context_set_clip_state;
@@ -1314,7 +1358,6 @@ trace_context_create(struct trace_screen *tr_scr,
    tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures;
    tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures;
    tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
-   tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements;
    if (pipe->surface_copy)
       tr_ctx->base.surface_copy = trace_context_surface_copy;
    if (pipe->surface_fill)
index f97d963dba6975864fc679a686e55797c649182e..f82dd01c697385d709195bfcfffca2b73df27365 100644 (file)
@@ -479,7 +479,6 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state)
    trace_dump_member(uint, state, src_offset);
 
    trace_dump_member(uint, state, vertex_buffer_index);
-   trace_dump_member(uint, state, nr_components);
 
    trace_dump_member(format, state, src_format);
 
index b93b38310ac93c18ad3dd9fb73b89d104fabaa80..e2766d15cd17292c68c8daad2b3b7e4569d0564e 100644 (file)
 
 #include "p_config.h"
 
-#ifndef XFree86Server
 #include <stdlib.h>
 #include <string.h>
-#else
-#include "xf86_ansic.h"
-#include "xf86_libc.h"
-#endif
 #include <stddef.h>
 #include <stdarg.h>
 
index f82b77903e93674a316a1a1142de34e2407a3685..376b01aa69645ad778b681d2401677f0dd0aff76 100644 (file)
@@ -177,6 +177,12 @@ struct pipe_context {
    void   (*bind_gs_state)(struct pipe_context *, void *);
    void   (*delete_gs_state)(struct pipe_context *, void *);
 
+   void * (*create_vertex_elements_state)(struct pipe_context *,
+                                          unsigned num_elements,
+                                          const struct pipe_vertex_element *);
+   void   (*bind_vertex_elements_state)(struct pipe_context *, void *);
+   void   (*delete_vertex_elements_state)(struct pipe_context *, void *);
+
    /*@}*/
 
    /**
@@ -220,9 +226,6 @@ struct pipe_context {
                                unsigned num_buffers,
                                const struct pipe_vertex_buffer * );
 
-   void (*set_vertex_elements)( struct pipe_context *,
-                                unsigned num_elements,
-                                const struct pipe_vertex_element * );
    /*@}*/
 
 
index 72635d1031bdfe73c668cfa41806eb367ae64ea1..3a97d888ce63fbc1b441aeb9175d689a3016d86b 100644 (file)
@@ -373,7 +373,6 @@ struct pipe_vertex_element
     * this attribute live in?
     */
    unsigned vertex_buffer_index:8;
-   unsigned nr_components:8;
  
    enum pipe_format src_format;
 };
index 3f36ccb6217a5918b373112af78803d23cb98a8e..5c44462e80fc42553bbbebdfa1ae5af34be7431b 100644 (file)
@@ -51,7 +51,7 @@ struct st_context {
    void set_blend( const struct pipe_blend_state *state ) {
       cso_set_blend($self->cso, state);
    }
-   
+
    void set_fragment_sampler( unsigned index, const struct pipe_sampler_state *state ) {
       cso_single_sampler($self->cso, index, state);
       cso_single_sampler_done($self->cso);
@@ -222,9 +222,9 @@ struct st_context {
    void set_vertex_elements(unsigned num) 
    {
       $self->num_vertex_elements = num;
-      $self->pipe->set_vertex_elements($self->pipe, 
-                                       $self->num_vertex_elements, 
-                                       $self->vertex_elements);
+      cso_set_vertex_elements($self->cso,
+                              $self->num_vertex_elements, 
+                              $self->vertex_elements);
    }
 
    /*
index 9c123a4cf955bcb9ea443df4afb31b4de6ee375c..7eb5ea1f07829eefcd7b6acdfbf6fa93c3012bc2 100644 (file)
@@ -86,6 +86,8 @@ draw_clear_quad(struct vg_context *st,
 
    /* draw */
    if (buf) {
+      cso_set_vertex_elements(st->cso_context, 2, st->velems);
+
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
index c06dbf52069be78348a35eef7c6e794bee5dd524..eef2c1eb8769f0329d5e7ba91ddf739cd8d5e2dc 100644 (file)
@@ -292,12 +292,12 @@ static void draw_polygon(struct vg_context *ctx,
    pipe->set_vertex_buffers(pipe, 1, &vbuffer);
 
    /* tell pipe about the vertex attributes */
+   memset(&velement, 0, sizeof(velement));
    velement.src_offset = 0;
    velement.instance_divisor = 0;
    velement.vertex_buffer_index = 0;
    velement.src_format = PIPE_FORMAT_R32G32_FLOAT;
-   velement.nr_components = COMPONENTS;
-   pipe->set_vertex_elements(pipe, 1, &velement);
+   cso_set_vertex_elements(ctx->cso_context, 1, &velement);
 
    /* draw */
    pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 
index 05620efa9c0d38f5464fd95f6ede265390dbab2d..47e8b470a11c94f881c11a1de257061170157234 100644 (file)
@@ -210,6 +210,7 @@ void renderer_draw_quad(struct renderer *r,
    buf = setup_vertex_data(r, x1, y1, x2, y2, depth);
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, 2, r->owner->velems);
       util_draw_vertex_buffer(r->pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
@@ -248,6 +249,7 @@ void renderer_draw_texture(struct renderer *r,
                                s0, t0, s1, t1, 0.0f);
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, 2, r->owner->velems);
       util_draw_vertex_buffer(pipe, buf, 0,
                            PIPE_PRIM_TRIANGLE_FAN,
                            4,  /* verts */
@@ -370,6 +372,7 @@ void renderer_copy_texture(struct renderer *ctx,
                          0.0f);
 
    if (buf) {
+      cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems);
       util_draw_vertex_buffer(ctx->pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
@@ -535,6 +538,7 @@ void renderer_copy_surface(struct renderer *ctx,
                            (float) dstX1, (float) dstY1, z);
 
    if (buf) {
+      cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems);
       util_draw_vertex_buffer(ctx->pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
@@ -587,6 +591,7 @@ void renderer_texture_quad(struct renderer *r,
                           s0, t0, s1, t1, 0.0f);
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, 2, r->owner->velems);
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
index 426bf9bc62b091a26b23ea8b4a5df9c038ba1c72..170391ec03117d5e9f1e59ca809ea81acc226775 100644 (file)
@@ -72,6 +72,7 @@ struct vg_context * vg_create_context(struct pipe_context *pipe,
                                       struct vg_context *share)
 {
    struct vg_context *ctx;
+   unsigned i;
 
    ctx = CALLOC_STRUCT(vg_context);
 
@@ -103,6 +104,13 @@ struct vg_context * vg_create_context(struct pipe_context *pipe,
    ctx->blend_sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
    ctx->blend_sampler.normalized_coords = 0;
 
+   for (i = 0; i < 2; i++) {
+      ctx->velems[i].src_offset = i * 4 * sizeof(float);
+      ctx->velems[i].instance_divisor = 0;
+      ctx->velems[i].vertex_buffer_index = 0;
+      ctx->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    vg_set_error(ctx, VG_NO_ERROR);
 
    ctx->owned_objects[VG_OBJECT_PAINT] = cso_hash_create();
index bc88c8d139dca9b992b8bb3cfc5cde71f3af2eb3..804e9e76d77e2799a33732f348cc7dd3eefb07ed 100644 (file)
@@ -146,6 +146,7 @@ struct vg_context
    struct vg_shader *clear_vs;
    struct vg_shader *texture_vs;
    struct pipe_buffer *vs_const_buffer;
+   struct pipe_vertex_element velems[2];
 };
 
 struct vg_object {
index 83b0d31e38d8156b80517563ab6aac602a19f93e..1eb926360b9d0182bca36b4787f004bfb8de0913 100644 (file)
@@ -68,6 +68,8 @@ renderer_draw(struct xorg_renderer *r)
 
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, r->attrs_per_vertex, r->velems);
+
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_QUADS,
                               num_verts,  /* verts */
@@ -92,6 +94,7 @@ renderer_init_state(struct xorg_renderer *r)
 {
    struct pipe_depth_stencil_alpha_state dsa;
    struct pipe_rasterizer_state raster;
+   unsigned i;
 
    /* set common initial clip state */
    memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state));
@@ -103,6 +106,14 @@ renderer_init_state(struct xorg_renderer *r)
    raster.gl_rasterization_rules = 1;
    cso_set_rasterizer(r->cso, &raster);
 
+   /* vertex elements state */
+   memset(&r->velems[0], 0, sizeof(r->velems[0]) * 3);
+   for (i = 0; i < 3; i++) {
+      r->velems[i].src_offset = i * 4 * sizeof(float);
+      r->velems[i].instance_divisor = 0;
+      r->velems[i].vertex_buffer_index = 0;
+      r->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
 }
 
 
@@ -600,6 +611,8 @@ void renderer_draw_yuv(struct xorg_renderer *r,
    if (buf) {
       const int num_attribs = 2; /*pos + tex coord*/
 
+      cso_set_vertex_elements(r->cso, num_attribs, r->velems);
+
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_QUADS,
                               4,  /* verts */
index af6aa0567d61f61c8a21e466214cce3649a9d72c..3d006287199a0828ab42939f2f431e9e3ba5257f 100644 (file)
@@ -28,6 +28,7 @@ struct xorg_renderer {
 
    float buffer[BUF_SIZE];
    int buffer_size;
+   struct pipe_vertex_element velems[3];
 
    /* number of attributes per vertex for the current
     * draw operation */
index 0a25dccde56ca4cbae76f06a3910662d3d3666be..e80ec5ee8807d22caeaad12cd377c03abd3572f5 100644 (file)
@@ -249,6 +249,7 @@ if env['platform'] != 'winddk':
        glapi_sources = [
                'glapi/glapi.c',
                'glapi/glapi_dispatch.c',
+               'glapi/glapi_entrypoint.c',
                'glapi/glapi_getproc.c',
                'glapi/glapi_nop.c',
                'glapi/glthread.c',
index a8f6b993ac3d07a92b49f725036aca5ad1ee7c75..54699cf8d3406060becb53f28799e58e70f30ff4 100644 (file)
@@ -74,9 +74,9 @@ struct {
     [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
-    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
-    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
index c78f7b38aeedcce2a7deb7c1820a879b9587f387..1fd957b3ad66ad55b6475f54e7ae170de4c9a120 100644 (file)
@@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx,
                              struct gl_program *prog )
 {
    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
-      dri_bo_unreference(brw_fprog->const_buffer);
+      struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *brw_fp = brw_fragment_program(fp);
+
+      dri_bo_unreference(brw_fp->const_buffer);
+   }
+
+   if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *brw_vp = brw_vertex_program(vp);
+
+      dri_bo_unreference(brw_vp->const_buffer);
    }
 
    _mesa_delete_program( ctx, prog );
index a7c4b589727b97e6fb3df7b2d8992e9d44c8920f..a48804a660fb93d2735af5a6c45fdba3b7028bc1 100644 (file)
@@ -1717,11 +1717,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
             /* patch all the BREAK/CONT instructions from last BEGINLOOP */
             while (inst0 > loop_inst[loop_depth]) {
                inst0--;
-               if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+               if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+                  inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
                   inst0->bits3.if_else.pop_count = 0;
                }
-               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+                       inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                   inst0->bits3.if_else.pop_count = 0;
                }
index 562608e2ecdfb6d8833f36a1415109c48e47e968..ea3c2405af9bd5c6cfc31591c65e15c58c2226d0 100644 (file)
@@ -614,112 +614,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
     }
 }
 
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias3(struct brw_wm_compile *c,
-        void (*func)(struct brw_compile *c,
-                     const struct brw_reg *dst,
-                     GLuint mask,
-                     const struct brw_reg *arg0,
-                     const struct brw_reg *arg1,
-                     const struct brw_reg *arg2),
-        const struct brw_reg *dst,
-        GLuint mask,
-        const struct brw_reg *arg0,
-        const struct brw_reg *arg1,
-        const struct brw_reg *arg2)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-       tmp_arg0[j] = arg0[j];
-       tmp_arg1[j] = arg1[j];
-       tmp_arg2[j] = arg2[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           for (j = 0; j < 4; j++) {
-               if (arg0[j].file == dst[i].file &&
-                   dst[i].nr == arg0[j].nr) {
-                   tmp_arg0[j] = alloc_tmp(c);
-                   brw_MOV(p, tmp_arg0[j], arg0[j]);
-               }
-               if (arg1[j].file == dst[i].file &&
-                   dst[i].nr == arg1[j].nr) {
-                   tmp_arg1[j] = alloc_tmp(c);
-                   brw_MOV(p, tmp_arg1[j], arg1[j]);
-               }
-               if (arg2[j].file == dst[i].file &&
-                   dst[i].nr == arg2[j].nr) {
-                   tmp_arg2[j] = alloc_tmp(c);
-                   brw_MOV(p, tmp_arg2[j], arg2[j]);
-               }
-           }
-       }
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
-
-    release_tmps(c, mark);
-}
-
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias2(struct brw_wm_compile *c,
-        void (*func)(struct brw_compile *c,
-                     const struct brw_reg *dst,
-                     GLuint mask,
-                     const struct brw_reg *arg0,
-                     const struct brw_reg *arg1),
-        const struct brw_reg *dst,
-        GLuint mask,
-        const struct brw_reg *arg0,
-        const struct brw_reg *arg1)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-       tmp_arg0[j] = arg0[j];
-       tmp_arg1[j] = arg1[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           for (j = 0; j < 4; j++) {
-               if (arg0[j].file == dst[i].file &&
-                   dst[i].nr == arg0[j].nr) {
-                   tmp_arg0[j] = alloc_tmp(c);
-                   brw_MOV(p, tmp_arg0[j], arg0[j]);
-               }
-               if (arg1[j].file == dst[i].file &&
-                   dst[i].nr == arg1[j].nr) {
-                   tmp_arg1[j] = alloc_tmp(c);
-                   brw_MOV(p, tmp_arg1[j], arg1[j]);
-               }
-           }
-       }
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1);
-
-    release_tmps(c, mark);
-}
-
 static void emit_arl(struct brw_wm_compile *c,
                      const struct prog_instruction *inst)
 {
@@ -1813,14 +1707,29 @@ static void
 get_argument_regs(struct brw_wm_compile *c,
                  const struct prog_instruction *inst,
                  int index,
+                 struct brw_reg *dst,
                  struct brw_reg *regs,
                  int mask)
 {
-    int i;
+    struct brw_compile *p = &c->func;
+    int i, j;
 
     for (i = 0; i < 4; i++) {
-       if (mask & (1 << i))
+       if (mask & (1 << i)) {
            regs[i] = get_src_reg(c, inst, index, i);
+
+           /* Unalias destination registers from our sources. */
+           if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+              for (j = 0; j < 4; j++) {
+                  if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
+                      struct brw_reg tmp = alloc_tmp(c);
+                      brw_MOV(p, tmp, regs[i]);
+                      regs[i] = tmp;
+                      break;
+                  }
+              }
+           }
+       }
     }
 }
 
@@ -1845,6 +1754,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
        int dst_flags;
        struct brw_reg args[3][4], dst[4];
        int j;
+       int mark = mark_tmps( c );
 
         c->cur_inst = i;
 
@@ -1866,7 +1776,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
           }
        }
        for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
-           get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+           get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
 
        dst_flags = inst->DstReg.WriteMask;
        if (inst->SaturateMode == SATURATE_ZERO_ONE)
@@ -1920,8 +1830,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
                break;
            case OPCODE_LRP:
-               unalias3(c, emit_lrp,
-                        dst, dst_flags, args[0], args[1], args[2]);
+               emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
                break;
            case OPCODE_TRUNC:
                emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
@@ -1961,10 +1870,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
                break;
            case OPCODE_MIN:    
-               unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+               emit_min(p, dst, dst_flags, args[0], args[1]);
                break;
            case OPCODE_MAX:    
-               unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
+               emit_max(p, dst, dst_flags, args[0], args[1]);
                break;
            case OPCODE_DDX:
            case OPCODE_DDY:
@@ -2103,11 +2012,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                   /* patch all the BREAK/CONT instructions from last BGNLOOP */
                   while (inst0 > loop_inst[loop_depth]) {
                      inst0--;
-                     if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+                     if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+                        inst0->bits3.if_else.jump_count == 0) {
                        inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
                        inst0->bits3.if_else.pop_count = 0;
                      }
-                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+                             inst0->bits3.if_else.jump_count == 0) {
                         inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                         inst0->bits3.if_else.pop_count = 0;
                      }
@@ -2119,6 +2030,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                        inst->Opcode);
        }
 
+       /* Release temporaries containing any unaliased source regs. */
+       release_tmps( c, mark );
+
        if (inst->CondUpdate)
            brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
        else
index 3f8710057059611a0667107178dcafa692faf7cf..9ea81fd50590d9469dc0970cdb539beb18731049 100644 (file)
@@ -21,6 +21,7 @@ RADEON_COMMON_SOURCES = \
        radeon_fbo.c \
        radeon_lock.c \
        radeon_mipmap_tree.c \
+       radeon_pixel_read.c \
        radeon_queryobj.c \
        radeon_span.c \
        radeon_texture.c \
index dad2580e08b28b58d4be876873e8666c62f6c18c..4f1a56658ccb1843a5d0d970037e1decb0df38ae 100644 (file)
@@ -266,6 +266,7 @@ static void r200_init_vtbl(radeonContextPtr radeon)
    radeon->vtbl.emit_query_finish = r200_emit_query_finish;
    radeon->vtbl.check_blit = r200_check_blit;
    radeon->vtbl.blit = r200_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
 }
 
 
diff --git a/src/mesa/drivers/dri/r200/radeon_pixel_read.c b/src/mesa/drivers/dri/r200/radeon_pixel_read.c
new file mode 120000 (symlink)
index 0000000..3b03803
--- /dev/null
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
\ No newline at end of file
index 4257a32b89f56ad2aab07ab818b646ebb498f4c7..2245998c952332faea7b1b1c2bc1f0e9c1ba4a4a 100644 (file)
@@ -31,8 +31,9 @@ RADEON_COMMON_SOURCES = \
        radeon_fbo.c \
        radeon_lock.c \
        radeon_mipmap_tree.c \
-       radeon_span.c \
+       radeon_pixel_read.c \
        radeon_queryobj.c \
+       radeon_span.c \
        radeon_texture.c \
        radeon_tex_copy.c \
        radeon_tex_getimage.c \
index 6cfa5686f4a5a759a0d1b54b08b35d34be2e757e..e2dbb1dbf400aab51613dbfdfb3df691f1ec61e6 100644 (file)
@@ -332,36 +332,37 @@ void r300_emit_cb_setup(struct r300_context *r300,
     assert(offset % 32 == 0);
 
     switch (format) {
-        case MESA_FORMAT_RGB565:
-            assert(_mesa_little_endian());
-            cbpitch |= R300_COLOR_FORMAT_RGB565;
+        case MESA_FORMAT_SL8:
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+            cbpitch |= R300_COLOR_FORMAT_I8;
             break;
+        case MESA_FORMAT_RGB565:
         case MESA_FORMAT_RGB565_REV:
-            assert(!_mesa_little_endian());
             cbpitch |= R300_COLOR_FORMAT_RGB565;
             break;
         case MESA_FORMAT_ARGB4444:
-            assert(_mesa_little_endian());
-            cbpitch |= R300_COLOR_FORMAT_ARGB4444;
-            break;
         case MESA_FORMAT_ARGB4444_REV:
-            assert(!_mesa_little_endian());
             cbpitch |= R300_COLOR_FORMAT_ARGB4444;
             break;
+        case MESA_FORMAT_RGBA5551:
         case MESA_FORMAT_ARGB1555:
-            assert(_mesa_little_endian());
-            cbpitch |= R300_COLOR_FORMAT_ARGB1555;
-            break;
         case MESA_FORMAT_ARGB1555_REV:
-            assert(!_mesa_little_endian());
             cbpitch |= R300_COLOR_FORMAT_ARGB1555;
             break;
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_XRGB8888:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_XRGB8888_REV:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_SRGBA8:
+        case MESA_FORMAT_SARGB8:
+            cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+            break;
         default:
-            if (cpp == 4) {
-                cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-            } else {
-                _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");;
-            }
+            _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");
             break;
     }
 
index ff35cd52753cb139d8d66bf3372456481e6cdf02..364e0ba6b6147f07e7c36b716a87b196f7a6b41e 100644 (file)
@@ -321,6 +321,12 @@ static void r300_init_vtbl(radeonContextPtr radeon)
 
        radeon->vtbl.check_blit = r300_check_blit;
        radeon->vtbl.blit = r300_blit;
+
+       if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+               radeon->vtbl.is_format_renderable = r500IsFormatRenderable;
+       } else {
+               radeon->vtbl.is_format_renderable = r300IsFormatRenderable;
+       }
 }
 
 static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
index 5979dedac4fa558f73ef99b2aa7eaeb6b27a3132..874894124195c416ce0faf994622a56c69648a80 100644 (file)
@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/simple_list.h"
 #include "main/api_arrayelt.h"
 
+#include "drivers/common/meta.h"
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
 #include "shader/prog_parameter.h"
@@ -2237,6 +2238,68 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
        }
 }
 
+#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \
+       (R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \
+        R500_C2_SEL_##c2 | R500_C3_SEL_##c3)
+static void r300SetupUsOutputFormat(GLcontext *ctx)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       uint32_t hw_format;
+       struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon);
+
+       if (!rrb) {
+               return;
+       }
+       
+       switch (rrb->base.Format)
+       {
+               case MESA_FORMAT_RGBA5551:
+               case MESA_FORMAT_RGBA8888:
+                       hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R);
+                       break;
+               case MESA_FORMAT_RGB565_REV:
+               case MESA_FORMAT_RGBA8888_REV:
+                       hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A);
+                       break;
+               case MESA_FORMAT_RGB565:
+               case MESA_FORMAT_ARGB4444:
+               case MESA_FORMAT_ARGB1555:
+               case MESA_FORMAT_XRGB8888:
+               case MESA_FORMAT_ARGB8888:
+                       hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A);
+                       break;
+               case MESA_FORMAT_ARGB4444_REV:
+               case MESA_FORMAT_ARGB1555_REV:
+               case MESA_FORMAT_XRGB8888_REV:
+               case MESA_FORMAT_ARGB8888_REV:
+                       hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B);
+                       break;
+               case MESA_FORMAT_SRGBA8:
+                       hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R);
+                       break;
+               case MESA_FORMAT_SARGB8:
+                       hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A);
+                       break;
+               case MESA_FORMAT_SL8:
+                       hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A);
+                       break;
+               case MESA_FORMAT_A8:
+                       hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A);
+                       break;
+               case MESA_FORMAT_L8:
+               case MESA_FORMAT_I8:
+                       hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A);
+                       break;
+               default:
+                       assert(!"Unsupported format");
+                       break;
+       }
+
+       R300_STATECHANGE(rmesa, us_out_fmt);
+       rmesa->hw.us_out_fmt.cmd[1] = hw_format;
+}
+#undef EASY_US_OUT_FMT
+
 /**
  * Called by Mesa after an internal state update.
  */
@@ -2266,6 +2329,10 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
                        r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
        }
 
+       if (new_state & _NEW_BUFFERS) {
+               r300SetupUsOutputFormat(ctx);
+       }
+
        r300->radeon.NewGLState |= new_state;
 }
 
@@ -2326,8 +2393,12 @@ void r300InitStateFuncs(struct dd_function_table *functions)
        functions->ClipPlane = r300ClipPlane;
        functions->Scissor = radeonScissor;
 
-       functions->DrawBuffer           = radeonDrawBuffer;
-       functions->ReadBuffer           = radeonReadBuffer;
+       functions->DrawBuffer = radeonDrawBuffer;
+       functions->ReadBuffer = radeonReadBuffer;
+
+       functions->CopyPixels = _mesa_meta_CopyPixels;
+       functions->DrawPixels = _mesa_meta_DrawPixels;
+       functions->ReadPixels = radeonReadPixels;
 }
 
 void r300InitShaderFunctions(r300ContextPtr r300)
index 8dd8507395488b07ff074ac642b85c7a9758a7d7..baef206bc26e256563a4a27a3393cb97c77781e0 100644 (file)
@@ -308,6 +308,45 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
        return &t->base;
 }
 
+unsigned r300IsFormatRenderable(gl_format mesa_format)
+{
+       switch (mesa_format)
+       {
+               case MESA_FORMAT_RGB565:
+               case MESA_FORMAT_RGBA5551:
+               case MESA_FORMAT_RGBA8888:
+               case MESA_FORMAT_RGB565_REV:
+               case MESA_FORMAT_RGBA8888_REV:
+               case MESA_FORMAT_ARGB4444:
+               case MESA_FORMAT_ARGB1555:
+               case MESA_FORMAT_XRGB8888:
+               case MESA_FORMAT_ARGB8888:
+               case MESA_FORMAT_ARGB4444_REV:
+               case MESA_FORMAT_ARGB1555_REV:
+               case MESA_FORMAT_XRGB8888_REV:
+               case MESA_FORMAT_ARGB8888_REV:
+               case MESA_FORMAT_SRGBA8:
+               case MESA_FORMAT_SARGB8:
+               case MESA_FORMAT_SL8:
+               case MESA_FORMAT_A8:
+               case MESA_FORMAT_L8:
+               case MESA_FORMAT_I8:
+               case MESA_FORMAT_Z16:
+                       return 1;
+               default:
+                       return 0;
+       }
+}
+
+unsigned r500IsFormatRenderable(gl_format mesa_format)
+{
+       if (mesa_format == MESA_FORMAT_S8_Z24) {
+               return 1;
+       } else {
+               return r300IsFormatRenderable(mesa_format);
+       }
+}
+
 void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
 {
        /* Note: we only plug in the functions we implement in the driver
index 9694e703b83480c41d366e671b9e01106d2dccfd..aca44cd76699244d91b9fafffaeb2a4e56801a38 100644 (file)
@@ -53,4 +53,7 @@ extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_tab
 
 int32_t r300TranslateTexFormat(gl_format mesaFormat);
 
+unsigned r300IsFormatRenderable(gl_format mesaFormat);
+unsigned r500IsFormatRenderable(gl_format mesaFormat);
+
 #endif                         /* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
new file mode 120000 (symlink)
index 0000000..3b03803
--- /dev/null
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
\ No newline at end of file
index f76859d11e24add4019276afae8a044332a6d07d..17915621ee4d1bc6907c77686b1db64f49c10108 100644 (file)
@@ -31,9 +31,10 @@ RADEON_COMMON_SOURCES = \
        radeon_fbo.c \
        radeon_lock.c \
        radeon_mipmap_tree.c \
+       radeon_pixel_read.c \
+       radeon_queryobj.c \
        radeon_span.c \
        radeon_texture.c \
-       radeon_queryobj.c \
        radeon_tex_copy.c \
        radeon_tex_getimage.c \
        radeon_tile.c
index 134e97e7c3321c907b08bf5086f14673118a87c8..76d5027649e30a1c238dd91b5f81f0b003c59496 100644 (file)
@@ -239,6 +239,7 @@ static void r600_init_vtbl(radeonContextPtr radeon)
        radeon->vtbl.emit_query_finish = r600_emit_query_finish;
        radeon->vtbl.check_blit = r600_check_blit;
        radeon->vtbl.blit = r600_blit;
+       radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
 }
 
 static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
diff --git a/src/mesa/drivers/dri/r600/radeon_pixel_read.c b/src/mesa/drivers/dri/r600/radeon_pixel_read.c
new file mode 120000 (symlink)
index 0000000..3b03803
--- /dev/null
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
\ No newline at end of file
index 6904ebbee3bbbae99093a7f393c89d58f83b8d3d..19df62742ecc8e6ff1a7c99f4d83e0ba6574105d 100644 (file)
@@ -22,6 +22,7 @@ RADEON_COMMON_SOURCES = \
        radeon_fbo.c \
        radeon_lock.c \
        radeon_mipmap_tree.c \
+       radeon_pixel_read.c \
        radeon_queryobj.c \
        radeon_span.c \
        radeon_texture.c \
index cd01c9984e31f3fcdeac1890fd7a3b2b7c4a8f37..35b3f08fff90b4b43a12a7c89c89f295fa9443db 100644 (file)
@@ -44,6 +44,12 @@ radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
 struct radeon_renderbuffer *
 radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv);
 
+void
+radeonReadPixels(GLcontext * ctx,
+                               GLint x, GLint y, GLsizei width, GLsizei height,
+                               GLenum format, GLenum type,
+                               const struct gl_pixelstore_attrib *pack, GLvoid * pixels);
+
 void radeon_check_front_buffer_rendering(GLcontext *ctx);
 static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
 {
index d1a24e265f2e14ba1561fe211ea9df3186c073eb..5156c5d0d0a8bcf5db3e2c9ded72205ef407b82f 100644 (file)
@@ -539,6 +539,7 @@ struct radeon_context {
                         unsigned reg_width,
                         unsigned reg_height,
                         unsigned flip_y);
+          unsigned (*is_format_renderable)(gl_format mesa_format);
    } vtbl;
 };
 
index 878a453bd539cd9b84abc97fadadb5bc80ebeb43..56aba16e9e097f10ed2c146f7d1cc446dc9be3b1 100644 (file)
@@ -200,6 +200,7 @@ static void r100_init_vtbl(radeonContextPtr radeon)
    radeon->vtbl.emit_query_finish = r100_emit_query_finish;
    radeon->vtbl.check_blit = r100_check_blit;
    radeon->vtbl.blit = r100_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
 }
 
 /* Create the device specific context.
index 46664a175569f8fd826bbe2911dd7aab2d2269ef..63986058356a79d8efd03ddd0f1e1c9620e11915 100644 (file)
@@ -409,82 +409,51 @@ radeon_framebuffer_renderbuffer(GLcontext * ctx,
    radeon_draw_buffer(ctx, fb);
 }
 
-
-/* TODO: According to EXT_fbo spec internal format of texture image
- * once set during glTexImage call, should be preserved when
- * attaching image to renderbuffer. When HW doesn't support
- * rendering to format of attached image, set framebuffer
- * completeness accordingly in radeon_validate_framebuffer (issue #79).
- */
 static GLboolean
 radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, 
                     struct gl_texture_image *texImage)
 {
-       int retry = 0;
-       gl_format texFormat;
-
        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
-               "%s(%p, rrb %p, texImage %p) \n",
-               __func__, ctx, rrb, texImage);
-
-restart:
-       if (texImage->TexFormat == _dri_texformat_argb8888) {
-               rrb->base.DataType = GL_UNSIGNED_BYTE;
-               DBG("Render to RGBA8 texture OK\n");
+               "%s(%p, rrb %p, texImage %p, texFormat %s) \n",
+               __func__, ctx, rrb, texImage, _mesa_get_format_name(texImage->TexFormat));
+
+       switch (texImage->TexFormat) {
+               case MESA_FORMAT_RGBA8888:
+               case MESA_FORMAT_RGBA8888_REV:
+               case MESA_FORMAT_ARGB8888:
+               case MESA_FORMAT_ARGB8888_REV:
+               case MESA_FORMAT_XRGB8888:
+               case MESA_FORMAT_XRGB8888_REV:
+               case MESA_FORMAT_RGB565:
+               case MESA_FORMAT_RGB565_REV:
+               case MESA_FORMAT_RGBA5551:
+               case MESA_FORMAT_ARGB1555:
+               case MESA_FORMAT_ARGB1555_REV:
+               case MESA_FORMAT_ARGB4444:
+               case MESA_FORMAT_ARGB4444_REV:
+                       rrb->base.DataType = GL_UNSIGNED_BYTE;
+                       break;
+               case MESA_FORMAT_Z16:
+                       rrb->base.DataType = GL_UNSIGNED_SHORT;
+                       break;
+               case MESA_FORMAT_X8_Z24:
+                       rrb->base.DataType = GL_UNSIGNED_INT;
+                       break;
+               case MESA_FORMAT_S8_Z24:
+                       rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+                       break;
        }
-       else if (texImage->TexFormat == _dri_texformat_rgb565) {
-               rrb->base.DataType = GL_UNSIGNED_BYTE;
-               DBG("Render to RGB5 texture OK\n");
-       }
-       else if (texImage->TexFormat == _dri_texformat_argb1555) {
-               rrb->base.DataType = GL_UNSIGNED_BYTE;
-               DBG("Render to ARGB1555 texture OK\n");
-       }
-       else if (texImage->TexFormat == _dri_texformat_argb4444) {
-               rrb->base.DataType = GL_UNSIGNED_BYTE;
-               DBG("Render to ARGB4444 texture OK\n");
-       }
-       else if (texImage->TexFormat == MESA_FORMAT_Z16) {
-               rrb->base.DataType = GL_UNSIGNED_SHORT;
-               DBG("Render to DEPTH16 texture OK\n");
-       }
-       else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
-               rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-               DBG("Render to DEPTH_STENCIL texture OK\n");
-       }
-       else {
-               /* try redoing the FBO */
-               if (retry == 1) {
-                       DBG("Render to texture BAD FORMAT %d\n",
-                           texImage->TexFormat);
-                       return GL_FALSE;
-               }
-                /* XXX why is the tex format being set here?
-                 * I think this can be removed.
-                 */
-               texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0,
-                                                               _mesa_get_format_datatype(texImage->TexFormat),
-                                                               1);
-
-               retry++;
-               goto restart;
-       }
-       
-       texFormat = texImage->TexFormat;
-
-       rrb->base.Format = texFormat;
-
-        rrb->cpp = _mesa_get_format_bytes(texFormat);
+               
+       rrb->cpp = _mesa_get_format_bytes(texImage->TexFormat);
        rrb->pitch = texImage->Width * rrb->cpp;
+       rrb->base.Format = texImage->TexFormat;
        rrb->base.InternalFormat = texImage->InternalFormat;
-        rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
-
+       rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
        rrb->base.Width = texImage->Width;
        rrb->base.Height = texImage->Height;
-       
        rrb->base.Delete = radeon_delete_renderbuffer;
        rrb->base.AllocStorage = radeon_nop_alloc_storage;
-       
+
        return GL_TRUE;
 }
 
@@ -607,6 +576,35 @@ radeon_finish_render_texture(GLcontext * ctx,
 static void
 radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
 {
+       radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+       gl_format mesa_format;
+       int i;
+
+       for (i = -2; i < (GLint) ctx->Const.MaxColorAttachments; i++) {
+               struct gl_renderbuffer_attachment *att;
+               if (i == -2) {
+                       att = &fb->Attachment[BUFFER_DEPTH];
+               } else if (i == -1) {
+                       att = &fb->Attachment[BUFFER_STENCIL];
+               } else {
+                       att = &fb->Attachment[BUFFER_COLOR0 + i];
+               }
+
+               if (att->Type == GL_TEXTURE) {
+                       mesa_format = att->Texture->Image[att->CubeMapFace][att->TextureLevel]->TexFormat;
+               } else {
+                       /* All renderbuffer formats are renderable, but not sampable */
+                       continue;
+               }
+
+               if (!radeon->vtbl.is_format_renderable(mesa_format)){
+                       fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
+                       radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                                               "%s: HW doesn't support format %s as output format of attachment %d\n",
+                                               __FUNCTION__, _mesa_get_format_name(mesa_format), i);
+                       return;
+               }
+       }
 }
 
 void radeon_fbo_init(struct radeon_context *radeon)
index c6cc417dd6c4b08d71710cf094796f55f1ffcc15..78c5f5dd572e2a7e6e8b2fb59923c8bb817834b0 100644 (file)
@@ -468,12 +468,9 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt,
 
                radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
 
-               /* TODO: bring back these assertions once the FBOs are fixed */
-#if 0
                assert(image->mtlevel == level);
                assert(srclvl->size == dstlvl->size);
                assert(srclvl->rowstride == dstlvl->rowstride);
-#endif
 
                radeon_bo_map(image->mt->bo, GL_FALSE);
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
new file mode 100644 (file)
index 0000000..2784193
--- /dev/null
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "stdint.h"
+#include "main/bufferobj.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "swrast/swrast.h"
+
+#include "radeon_common_context.h"
+#include "radeon_debug.h"
+#include "radeon_mipmap_tree.h"
+
+static gl_format gl_format_and_type_to_mesa_format(GLenum format, GLenum type)
+{
+    switch (format)
+    {
+        case GL_RGB:
+            switch (type) {
+                case GL_UNSIGNED_SHORT_5_6_5:
+                    return MESA_FORMAT_RGB565;
+                case GL_UNSIGNED_SHORT_5_6_5_REV:
+                    return MESA_FORMAT_RGB565_REV;
+            }
+            break;
+        case GL_RGBA:
+            switch (type) {
+                case GL_UNSIGNED_BYTE:
+                    return MESA_FORMAT_RGBA8888_REV;
+                case GL_FLOAT:
+                    return MESA_FORMAT_RGBA_FLOAT32;
+                case GL_UNSIGNED_SHORT_4_4_4_4:
+                    return MESA_FORMAT_ARGB4444;
+                case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+                    return MESA_FORMAT_ARGB4444;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                    return MESA_FORMAT_RGBA5551;
+                case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+                    return MESA_FORMAT_ARGB1555_REV;
+                case GL_UNSIGNED_INT_8_8_8_8:
+                    return MESA_FORMAT_ARGB8888;
+                case GL_UNSIGNED_INT_8_8_8_8_REV:
+                    return MESA_FORMAT_ARGB8888_REV;
+            }
+            break;
+    }
+
+    return MESA_FORMAT_NONE;
+}
+
+static GLboolean
+do_blit_readpixels(GLcontext * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    const struct radeon_renderbuffer *rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+    const gl_format dst_format = gl_format_and_type_to_mesa_format(format, type);
+    unsigned dst_rowstride, dst_imagesize, aligned_rowstride, flip_y;
+    struct radeon_bo *dst_buffer;
+    GLint dst_x = 0, dst_y = 0;
+
+    /* It's not worth if number of pixels to copy is really small */
+    if (width * height < 100) {
+        return GL_FALSE;
+    }
+
+    if (dst_format == MESA_FORMAT_NONE ||
+        !radeon->vtbl.check_blit(dst_format) || !radeon->vtbl.blit) {
+        return GL_FALSE;
+    }
+
+    if (ctx->_ImageTransferState) {
+        return GL_FALSE;
+    }
+
+    if (pack->SwapBytes || pack->LsbFirst) {
+        return GL_FALSE;
+    }
+
+    if (pack->RowLength > 0) {
+        dst_rowstride = pack->RowLength;
+    } else {
+        dst_rowstride = width;
+    }
+
+    if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) {
+        return GL_TRUE;
+    }
+    assert(x >= 0 && y >= 0);
+
+    aligned_rowstride = get_texture_image_row_stride(radeon, dst_format, dst_rowstride, 0);
+    dst_imagesize = get_texture_image_size(dst_format,
+                                           aligned_rowstride,
+                                           height, 1, 0);
+    dst_buffer = radeon_bo_open(radeon->radeonScreen->bom, 0, dst_imagesize, 1024, RADEON_GEM_DOMAIN_GTT, 0);
+
+    /* Disable source Y flipping for FBOs */
+    flip_y = (ctx->ReadBuffer->Name == 0);
+    if (pack->Invert) {
+        y = rrb->base.Height - height - y;
+        flip_y = !flip_y;
+    }
+
+    if (radeon->vtbl.blit(ctx,
+                          rrb->bo,
+                          rrb->draw_offset,
+                          rrb->base.Format,
+                          rrb->pitch / rrb->cpp,
+                          rrb->base.Width,
+                          rrb->base.Height,
+                          x,
+                          y,
+                          dst_buffer,
+                          0, /* dst_offset */
+                          dst_format,
+                          aligned_rowstride / _mesa_get_format_bytes(dst_format),
+                          width,
+                          height,
+                          0, /* dst_x */
+                          0, /* dst_y */
+                          width,
+                          height,
+                          flip_y))
+    {
+        radeon_bo_map(dst_buffer, 0);
+        dst_rowstride *= _mesa_get_format_bytes(dst_format);
+        copy_rows(pixels, dst_rowstride, dst_buffer->ptr,
+                  aligned_rowstride, height, dst_rowstride);
+        radeon_bo_unmap(dst_buffer);
+        radeon_bo_unref(dst_buffer);
+        return GL_TRUE;
+    } else {
+        radeon_bo_unref(dst_buffer);
+        return GL_FALSE;
+    }
+}
+
+void
+radeonReadPixels(GLcontext * ctx,
+                 GLint x, GLint y, GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
+        return;
+
+    /* Update Mesa state before calling down into _swrast_ReadPixels, as
+     * the spans code requires the computed buffer states to be up to date,
+     * but _swrast_ReadPixels only updates Mesa state after setting up
+     * the spans code.
+     */
+
+    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+                 "Falling back to sw for ReadPixels (format %s, type %s)\n",
+                 _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+
+    if (ctx->NewState)
+        _mesa_update_state(ctx);
+
+    _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
+}
index 3ccc711253bb725fa08e39ab578d67a561532315..2b655fbd953f8ebcc73c57d0fe18b7845f48fa36 100644 (file)
@@ -1006,3 +1006,19 @@ void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
        radeon_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, 0,
                format, type, pixels, packing, texObj, texImage, 0);
 }
+
+unsigned radeonIsFormatRenderable(gl_format mesa_format)
+{
+       if (mesa_format == _dri_texformat_argb8888 || mesa_format == _dri_texformat_rgb565 ||
+               mesa_format == _dri_texformat_argb1555 || mesa_format == _dri_texformat_argb4444)
+               return 1;
+
+       switch (mesa_format)
+       {
+               case MESA_FORMAT_Z16:
+               case MESA_FORMAT_S8_Z24:
+                       return 1;
+               default:
+                       return 0;
+       }
+}
index f09dd65214229665aaf303e46bd2b4ebd1a33490..4ce639ea34eb5a3ed4e78138fc73291f2e582e68 100644 (file)
@@ -135,4 +135,6 @@ void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
                        GLint x, GLint y,
                        GLsizei width, GLsizei height);
 
+unsigned radeonIsFormatRenderable(gl_format mesa_format);
+
 #endif
index 13de594aafbe8e5c90fca2887fa67609384c8091..ce85cf6a87bb85b855fa74f0ec2eb6760bbd484c 100644 (file)
@@ -59,7 +59,7 @@
 #endif
 
 #include "glapi/glapi.h"
-#include "glapi/glapitable.h"
+#include "glapi/glapi_priv.h"
 
 extern _glapi_proc __glapi_noop_table[];
 
@@ -291,45 +291,3 @@ _glapi_get_dispatch(void)
    return _glapi_Dispatch;
 #endif
 }
-
-
-
-
-/*
- * The dispatch table size (number of entries) is the size of the
- * _glapi_table struct plus the number of dynamic entries we can add.
- * The extra slots can be filled in by DRI drivers that register new extension
- * functions.
- */
-#define DISPATCH_TABLE_SIZE (sizeof(struct _glapi_table) / sizeof(void *) + MAX_EXTENSION_FUNCS)
-
-
-/**
- * Return size of dispatch table struct as number of functions (or
- * slots).
- */
-PUBLIC GLuint
-_glapi_get_dispatch_table_size(void)
-{
-   return DISPATCH_TABLE_SIZE;
-}
-
-
-/**
- * Make sure there are no NULL pointers in the given dispatch table.
- * Intended for debugging purposes.
- */
-void
-_glapi_check_table_not_null(const struct _glapi_table *table)
-{
-#if 0 /* enable this for extra DEBUG */
-   const GLuint entries = _glapi_get_dispatch_table_size();
-   const void **tab = (const void **) table;
-   GLuint i;
-   for (i = 1; i < entries; i++) {
-      assert(tab[i]);
-   }
-#else
-   (void) table;
-#endif
-}
index 1ca2e4beff12ae4ee7ff0cbf7c14135f0a9213e7..7dcf2e8910b2fe0201b6664ebb8e3e6f5e9d320c 100644 (file)
@@ -165,29 +165,8 @@ extern _glapi_proc
 _glapi_get_proc_address(const char *funcName);
 
 
-/**
- * GL API local functions and defines
- */
-
-extern void
-init_glapi_relocs_once(void);
-
-extern void
-_glapi_check_table_not_null(const struct _glapi_table *table);
-
-
-extern void
-_glapi_check_table(const struct _glapi_table *table);
-
-
 extern const char *
 _glapi_get_proc_name(unsigned int offset);
 
 
-/*
- * Number of extension functions which we can dynamically add at runtime.
- */
-#define MAX_EXTENSION_FUNCS 300
-
-
 #endif
diff --git a/src/mesa/glapi/glapi_entrypoint.c b/src/mesa/glapi/glapi_entrypoint.c
new file mode 100644 (file)
index 0000000..5e6e599
--- /dev/null
@@ -0,0 +1,331 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glapi_entrypoint.c
+ *
+ * Arch-specific code for manipulating GL API entrypoints (dispatch stubs).
+ */
+
+
+#ifdef HAVE_DIX_CONFIG_H
+#include <dix-config.h>
+#include "glapi/mesa.h"
+#else
+#include "main/glheader.h"
+#include "main/compiler.h"
+#endif
+
+#include "glapi/glapi.h"
+#include "glapi/glapi_priv.h"
+
+
+#ifdef USE_X86_ASM
+
+#if defined( GLX_USE_TLS )
+extern       GLubyte gl_dispatch_functions_start[];
+extern       GLubyte gl_dispatch_functions_end[];
+#else
+extern const GLubyte gl_dispatch_functions_start[];
+#endif
+
+#endif /* USE_X86_ASM */
+
+
+#if defined(DISPATCH_FUNCTION_SIZE)
+
+_glapi_proc
+get_entrypoint_address(GLuint functionOffset)
+{
+   return (_glapi_proc) (gl_dispatch_functions_start
+                         + (DISPATCH_FUNCTION_SIZE * functionOffset));
+}
+
+#endif
+
+
+#if defined(PTHREADS) || defined(GLX_USE_TLS)
+
+/**
+ * Perform platform-specific GL API entry-point fixups.
+ */
+static void
+init_glapi_relocs( void )
+{
+#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT)
+    extern unsigned long _x86_get_dispatch(void);
+    char run_time_patch[] = {
+       0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */
+    };
+    GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */
+    const GLubyte * const get_disp = (const GLubyte *) run_time_patch;
+    GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start;
+
+    *offset = _x86_get_dispatch();
+    while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) {
+       (void) memcpy( curr_func, get_disp, sizeof(run_time_patch));
+       curr_func += DISPATCH_FUNCTION_SIZE;
+    }
+#endif
+#ifdef USE_SPARC_ASM
+    extern void __glapi_sparc_icache_flush(unsigned int *);
+    static const unsigned int template[] = {
+#ifdef GLX_USE_TLS
+       0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */
+       0x8730e00a, /* srl %g3, 10, %g3 */
+       0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */
+#ifdef __arch64__
+       0xc259c002, /* ldx [%g7 + %g2], %g1 */
+       0xc2584003, /* ldx [%g1 + %g3], %g1 */
+#else
+       0xc201c002, /* ld [%g7 + %g2], %g1 */
+       0xc2004003, /* ld [%g1 + %g3], %g1 */
+#endif
+       0x81c04000, /* jmp %g1 */
+       0x01000000, /* nop  */
+#else
+#ifdef __arch64__
+       0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */
+       0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */
+       0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */
+       0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */
+       0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */
+       0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */
+       0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */
+#else
+       0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */
+       0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */
+       0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */
+#endif
+       0x80a06000, /*             --> cmp %g1, 0 */
+       0x02800005, /*             --> be +4*5 */
+       0x01000000, /*             -->  nop  */
+#ifdef __arch64__
+       0xc2584003, /* 64-bit      --> ldx [%g1 + %g3], %g1 */
+#else
+       0xc2004003, /* 32-bit      --> ld [%g1 + %g3], %g1 */
+#endif
+       0x81c04000, /*             --> jmp %g1 */
+       0x01000000, /*             --> nop  */
+#ifdef __arch64__
+       0x9de3bf80, /* 64-bit      --> save  %sp, -128, %sp */
+#else
+       0x9de3bfc0, /* 32-bit      --> save  %sp, -64, %sp */
+#endif
+       0xa0100003, /*             --> mov  %g3, %l0 */
+       0x40000000, /*             --> call _glapi_get_dispatch */
+       0x01000000, /*             -->  nop */
+       0x82100008, /*             --> mov %o0, %g1 */
+       0x86100010, /*             --> mov %l0, %g3 */
+       0x10bffff7, /*             --> ba -4*9 */
+       0x81e80000, /*             -->  restore  */
+#endif
+    };
+#ifdef GLX_USE_TLS
+    extern unsigned int __glapi_sparc_tls_stub;
+    extern unsigned long __glapi_sparc_get_dispatch(void);
+    unsigned int *code = &__glapi_sparc_tls_stub;
+    unsigned long dispatch = __glapi_sparc_get_dispatch();
+#else
+    extern unsigned int __glapi_sparc_pthread_stub;
+    unsigned int *code = &__glapi_sparc_pthread_stub;
+    unsigned long dispatch = (unsigned long) &_glapi_Dispatch;
+    unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch;
+    int idx;
+#endif
+
+#if defined(GLX_USE_TLS)
+    code[0] = template[0] | (dispatch >> 10);
+    code[1] = template[1];
+    __glapi_sparc_icache_flush(&code[0]);
+    code[2] = template[2] | (dispatch & 0x3ff);
+    code[3] = template[3];
+    __glapi_sparc_icache_flush(&code[2]);
+    code[4] = template[4];
+    code[5] = template[5];
+    __glapi_sparc_icache_flush(&code[4]);
+    code[6] = template[6];
+    __glapi_sparc_icache_flush(&code[6]);
+#else
+#if defined(__arch64__)
+    code[0] = template[0] | (dispatch >> (32 + 10));
+    code[1] = template[1] | ((dispatch & 0xffffffff) >> 10);
+    __glapi_sparc_icache_flush(&code[0]);
+    code[2] = template[2] | ((dispatch >> 32) & 0x3ff);
+    code[3] = template[3];
+    __glapi_sparc_icache_flush(&code[2]);
+    code[4] = template[4];
+    code[5] = template[5];
+    __glapi_sparc_icache_flush(&code[4]);
+    code[6] = template[6] | (dispatch & 0x3ff);
+    idx = 7;
+#else
+    code[0] = template[0] | (dispatch >> 10);
+    code[1] = template[1];
+    __glapi_sparc_icache_flush(&code[0]);
+    code[2] = template[2] | (dispatch & 0x3ff);
+    idx = 3;
+#endif
+    code[idx + 0] = template[idx + 0];
+    __glapi_sparc_icache_flush(&code[idx - 1]);
+    code[idx + 1] = template[idx + 1];
+    code[idx + 2] = template[idx + 2];
+    __glapi_sparc_icache_flush(&code[idx + 1]);
+    code[idx + 3] = template[idx + 3];
+    code[idx + 4] = template[idx + 4];
+    __glapi_sparc_icache_flush(&code[idx + 3]);
+    code[idx + 5] = template[idx + 5];
+    code[idx + 6] = template[idx + 6];
+    __glapi_sparc_icache_flush(&code[idx + 5]);
+    code[idx + 7] = template[idx + 7];
+    code[idx + 8] = template[idx + 8] |
+           (((call_dest - ((unsigned long) &code[idx + 8]))
+             >> 2) & 0x3fffffff);
+    __glapi_sparc_icache_flush(&code[idx + 7]);
+    code[idx + 9] = template[idx + 9];
+    code[idx + 10] = template[idx + 10];
+    __glapi_sparc_icache_flush(&code[idx + 9]);
+    code[idx + 11] = template[idx + 11];
+    code[idx + 12] = template[idx + 12];
+    __glapi_sparc_icache_flush(&code[idx + 11]);
+    code[idx + 13] = template[idx + 13];
+    __glapi_sparc_icache_flush(&code[idx + 13]);
+#endif
+#endif
+}
+
+void
+init_glapi_relocs_once( void )
+{
+   static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+   pthread_once( & once_control, init_glapi_relocs );
+}
+
+#else
+
+void
+init_glapi_relocs_once( void ) { }
+
+#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */
+
+
+#ifdef USE_SPARC_ASM
+extern void __glapi_sparc_icache_flush(unsigned int *);
+#endif
+
+/**
+ * Generate a dispatch function (entrypoint) which jumps through
+ * the given slot number (offset) in the current dispatch table.
+ * We need assembly language in order to accomplish this.
+ */
+_glapi_proc
+generate_entrypoint(GLuint functionOffset)
+{
+#if defined(USE_X86_ASM)
+   /* 32 is chosen as something of a magic offset.  For x86, the dispatch
+    * at offset 32 is the first one where the offset in the
+    * "jmp OFFSET*4(%eax)" can't be encoded in a single byte.
+    */
+   const GLubyte * const template_func = gl_dispatch_functions_start 
+     + (DISPATCH_FUNCTION_SIZE * 32);
+   GLubyte * const code = (GLubyte *) malloc(DISPATCH_FUNCTION_SIZE);
+
+
+   if ( code != NULL ) {
+      (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE);
+      fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset );
+   }
+
+   return (_glapi_proc) code;
+#elif defined(USE_SPARC_ASM)
+
+#if defined(PTHREADS) || defined(GLX_USE_TLS)
+   static const unsigned int template[] = {
+      0x07000000, /* sethi %hi(0), %g3 */
+      0x8210000f, /* mov  %o7, %g1 */
+      0x40000000, /* call */
+      0x9e100001, /* mov  %g1, %o7 */
+   };
+#ifdef GLX_USE_TLS
+   extern unsigned int __glapi_sparc_tls_stub;
+   unsigned long call_dest = (unsigned long ) &__glapi_sparc_tls_stub;
+#else
+   extern unsigned int __glapi_sparc_pthread_stub;
+   unsigned long call_dest = (unsigned long ) &__glapi_sparc_pthread_stub;
+#endif
+   unsigned int *code = (unsigned int *) malloc(sizeof(template));
+   if (code) {
+      code[0] = template[0] | (functionOffset & 0x3fffff);
+      code[1] = template[1];
+      __glapi_sparc_icache_flush(&code[0]);
+      code[2] = template[2] |
+         (((call_dest - ((unsigned long) &code[2]))
+          >> 2) & 0x3fffffff);
+      code[3] = template[3];
+      __glapi_sparc_icache_flush(&code[2]);
+   }
+   return (_glapi_proc) code;
+#endif
+
+#else
+   (void) functionOffset;
+   return NULL;
+#endif /* USE_*_ASM */
+}
+
+
+/**
+ * This function inserts a new dispatch offset into the assembly language
+ * stub that was generated with the preceeding function.
+ */
+void
+fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset)
+{
+#if defined(USE_X86_ASM)
+   GLubyte * const code = (GLubyte *) entrypoint;
+
+#if DISPATCH_FUNCTION_SIZE == 32
+   *((unsigned int *)(code + 11)) = 4 * offset;
+   *((unsigned int *)(code + 22)) = 4 * offset;
+#elif DISPATCH_FUNCTION_SIZE == 16 && defined( GLX_USE_TLS )
+   *((unsigned int *)(code +  8)) = 4 * offset;
+#elif DISPATCH_FUNCTION_SIZE == 16
+   *((unsigned int *)(code +  7)) = 4 * offset;
+#else
+# error Invalid DISPATCH_FUNCTION_SIZE!
+#endif
+
+#elif defined(USE_SPARC_ASM)
+   unsigned int *code = (unsigned int *) entrypoint;
+   code[0] &= ~0x3fffff;
+   code[0] |= (offset * sizeof(void *)) & 0x3fffff;
+   __glapi_sparc_icache_flush(&code[0]);
+#else
+
+   /* an unimplemented architecture */
+   (void) entrypoint;
+   (void) offset;
+
+#endif /* USE_*_ASM */
+}
index a6dbf173e82d98228ef4008df5fdbdf30eb2500b..46b466920b6b96723f68aa580b431614084ac81d 100644 (file)
 #endif
 
 #include "glapi/glapi.h"
-#include "glapi/glapioffsets.h"
+#include "glapi/glapi_priv.h"
 #include "glapi/glapitable.h"
+#include "glapi/glapioffsets.h"
 
 
-#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS)
-# define DISPATCH_FUNCTION_SIZE  16
-#elif defined(USE_X86_ASM)
-# if defined(THREADS) && !defined(GLX_USE_TLS)
-#  define DISPATCH_FUNCTION_SIZE  32
-# else
-#  define DISPATCH_FUNCTION_SIZE  16
-# endif
-#endif
-
-#if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server) && !defined(XGLServer)
+#if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server)
 # define NEED_FUNCTION_POINTER
 #endif
 
@@ -72,7 +63,7 @@ find_entry( const char * n )
    for (i = 0; static_functions[i].Name_offset >= 0; i++) {
       const char *testName = gl_string_table + static_functions[i].Name_offset;
 #ifdef MANGLE
-      /* skip the "m" prefix on the name */
+      /* skip the prefix on the name */
       if (strcmp(testName, n + 1) == 0)
 #else
       if (strcmp(testName, n) == 0)
@@ -100,19 +91,7 @@ get_static_proc_offset(const char *funcName)
 }
 
 
-#ifdef USE_X86_ASM
-
-#if defined( GLX_USE_TLS )
-extern       GLubyte gl_dispatch_functions_start[];
-extern       GLubyte gl_dispatch_functions_end[];
-#else
-extern const GLubyte gl_dispatch_functions_start[];
-#endif
-
-#endif /* USE_X86_ASM */
-
-
-#if !defined(XFree86Server) && !defined(XGLServer)
+#if !defined(XFree86Server)
 
 /**
  * Return dispatch function address for the named static (built-in) function.
@@ -125,12 +104,10 @@ get_static_proc_address(const char *funcName)
    if (f) {
 #if defined(DISPATCH_FUNCTION_SIZE) && defined(GLX_INDIRECT_RENDERING)
       return (f->Address == NULL)
-        ? (_glapi_proc) (gl_dispatch_functions_start
-                         + (DISPATCH_FUNCTION_SIZE * f->Offset))
+        ? get_entrypoint_address(f->Offset)
          : f->Address;
 #elif defined(DISPATCH_FUNCTION_SIZE)
-      return (_glapi_proc) (gl_dispatch_functions_start 
-                            + (DISPATCH_FUNCTION_SIZE * f->Offset));
+      return get_entrypoint_address(f->Offset);
 #else
       return f->Address;
 #endif
@@ -140,7 +117,7 @@ get_static_proc_address(const char *funcName)
    }
 }
 
-#endif /* !defined(XFree86Server) && !defined(XGLServer) */
+#endif /* !defined(XFree86Server) */
 
 
 
@@ -162,172 +139,6 @@ get_static_proc_name( GLuint offset )
 
 
 
-#if defined(PTHREADS) || defined(GLX_USE_TLS)
-
-/**
- * Perform platform-specific GL API entry-point fixups.
- */
-static void
-init_glapi_relocs( void )
-{
-#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT)
-    extern unsigned long _x86_get_dispatch(void);
-    char run_time_patch[] = {
-       0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */
-    };
-    GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */
-    const GLubyte * const get_disp = (const GLubyte *) run_time_patch;
-    GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start;
-
-    *offset = _x86_get_dispatch();
-    while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) {
-       (void) memcpy( curr_func, get_disp, sizeof(run_time_patch));
-       curr_func += DISPATCH_FUNCTION_SIZE;
-    }
-#endif
-#ifdef USE_SPARC_ASM
-    extern void __glapi_sparc_icache_flush(unsigned int *);
-    static const unsigned int template[] = {
-#ifdef GLX_USE_TLS
-       0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */
-       0x8730e00a, /* srl %g3, 10, %g3 */
-       0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */
-#ifdef __arch64__
-       0xc259c002, /* ldx [%g7 + %g2], %g1 */
-       0xc2584003, /* ldx [%g1 + %g3], %g1 */
-#else
-       0xc201c002, /* ld [%g7 + %g2], %g1 */
-       0xc2004003, /* ld [%g1 + %g3], %g1 */
-#endif
-       0x81c04000, /* jmp %g1 */
-       0x01000000, /* nop  */
-#else
-#ifdef __arch64__
-       0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */
-       0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */
-       0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */
-       0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */
-       0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */
-       0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */
-       0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */
-#else
-       0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */
-       0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */
-       0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */
-#endif
-       0x80a06000, /*             --> cmp %g1, 0 */
-       0x02800005, /*             --> be +4*5 */
-       0x01000000, /*             -->  nop  */
-#ifdef __arch64__
-       0xc2584003, /* 64-bit      --> ldx [%g1 + %g3], %g1 */
-#else
-       0xc2004003, /* 32-bit      --> ld [%g1 + %g3], %g1 */
-#endif
-       0x81c04000, /*             --> jmp %g1 */
-       0x01000000, /*             --> nop  */
-#ifdef __arch64__
-       0x9de3bf80, /* 64-bit      --> save  %sp, -128, %sp */
-#else
-       0x9de3bfc0, /* 32-bit      --> save  %sp, -64, %sp */
-#endif
-       0xa0100003, /*             --> mov  %g3, %l0 */
-       0x40000000, /*             --> call _glapi_get_dispatch */
-       0x01000000, /*             -->  nop */
-       0x82100008, /*             --> mov %o0, %g1 */
-       0x86100010, /*             --> mov %l0, %g3 */
-       0x10bffff7, /*             --> ba -4*9 */
-       0x81e80000, /*             -->  restore  */
-#endif
-    };
-#ifdef GLX_USE_TLS
-    extern unsigned int __glapi_sparc_tls_stub;
-    extern unsigned long __glapi_sparc_get_dispatch(void);
-    unsigned int *code = &__glapi_sparc_tls_stub;
-    unsigned long dispatch = __glapi_sparc_get_dispatch();
-#else
-    extern unsigned int __glapi_sparc_pthread_stub;
-    unsigned int *code = &__glapi_sparc_pthread_stub;
-    unsigned long dispatch = (unsigned long) &_glapi_Dispatch;
-    unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch;
-    int idx;
-#endif
-
-#if defined(GLX_USE_TLS)
-    code[0] = template[0] | (dispatch >> 10);
-    code[1] = template[1];
-    __glapi_sparc_icache_flush(&code[0]);
-    code[2] = template[2] | (dispatch & 0x3ff);
-    code[3] = template[3];
-    __glapi_sparc_icache_flush(&code[2]);
-    code[4] = template[4];
-    code[5] = template[5];
-    __glapi_sparc_icache_flush(&code[4]);
-    code[6] = template[6];
-    __glapi_sparc_icache_flush(&code[6]);
-#else
-#if defined(__arch64__)
-    code[0] = template[0] | (dispatch >> (32 + 10));
-    code[1] = template[1] | ((dispatch & 0xffffffff) >> 10);
-    __glapi_sparc_icache_flush(&code[0]);
-    code[2] = template[2] | ((dispatch >> 32) & 0x3ff);
-    code[3] = template[3];
-    __glapi_sparc_icache_flush(&code[2]);
-    code[4] = template[4];
-    code[5] = template[5];
-    __glapi_sparc_icache_flush(&code[4]);
-    code[6] = template[6] | (dispatch & 0x3ff);
-    idx = 7;
-#else
-    code[0] = template[0] | (dispatch >> 10);
-    code[1] = template[1];
-    __glapi_sparc_icache_flush(&code[0]);
-    code[2] = template[2] | (dispatch & 0x3ff);
-    idx = 3;
-#endif
-    code[idx + 0] = template[idx + 0];
-    __glapi_sparc_icache_flush(&code[idx - 1]);
-    code[idx + 1] = template[idx + 1];
-    code[idx + 2] = template[idx + 2];
-    __glapi_sparc_icache_flush(&code[idx + 1]);
-    code[idx + 3] = template[idx + 3];
-    code[idx + 4] = template[idx + 4];
-    __glapi_sparc_icache_flush(&code[idx + 3]);
-    code[idx + 5] = template[idx + 5];
-    code[idx + 6] = template[idx + 6];
-    __glapi_sparc_icache_flush(&code[idx + 5]);
-    code[idx + 7] = template[idx + 7];
-    code[idx + 8] = template[idx + 8] |
-           (((call_dest - ((unsigned long) &code[idx + 8]))
-             >> 2) & 0x3fffffff);
-    __glapi_sparc_icache_flush(&code[idx + 7]);
-    code[idx + 9] = template[idx + 9];
-    code[idx + 10] = template[idx + 10];
-    __glapi_sparc_icache_flush(&code[idx + 9]);
-    code[idx + 11] = template[idx + 11];
-    code[idx + 12] = template[idx + 12];
-    __glapi_sparc_icache_flush(&code[idx + 11]);
-    code[idx + 13] = template[idx + 13];
-    __glapi_sparc_icache_flush(&code[idx + 13]);
-#endif
-#endif
-}
-
-void
-init_glapi_relocs_once( void )
-{
-   static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-   pthread_once( & once_control, init_glapi_relocs );
-}
-
-#else
-
-void
-init_glapi_relocs_once( void ) { }
-
-#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */
-
-
-
 /**********************************************************************
  * Extension function management.
  */
@@ -378,111 +189,14 @@ struct _glapi_function {
 };
 
 
-static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS];
-static GLuint NumExtEntryPoints = 0;
-
-#ifdef USE_SPARC_ASM
-extern void __glapi_sparc_icache_flush(unsigned int *);
-#endif
-
-static void
-fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset);
-
-/**
- * Generate a dispatch function (entrypoint) which jumps through
- * the given slot number (offset) in the current dispatch table.
- * We need assembly language in order to accomplish this.
- */
-static _glapi_proc
-generate_entrypoint(GLuint functionOffset)
-{
-#if defined(USE_X86_ASM)
-   /* 32 is chosen as something of a magic offset.  For x86, the dispatch
-    * at offset 32 is the first one where the offset in the
-    * "jmp OFFSET*4(%eax)" can't be encoded in a single byte.
-    */
-   const GLubyte * const template_func = gl_dispatch_functions_start 
-     + (DISPATCH_FUNCTION_SIZE * 32);
-   GLubyte * const code = (GLubyte *) malloc(DISPATCH_FUNCTION_SIZE);
-
-
-   if ( code != NULL ) {
-      (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE);
-      fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset );
-   }
-
-   return (_glapi_proc) code;
-#elif defined(USE_SPARC_ASM)
-
-#if defined(PTHREADS) || defined(GLX_USE_TLS)
-   static const unsigned int template[] = {
-      0x07000000, /* sethi %hi(0), %g3 */
-      0x8210000f, /* mov  %o7, %g1 */
-      0x40000000, /* call */
-      0x9e100001, /* mov  %g1, %o7 */
-   };
-#ifdef GLX_USE_TLS
-   extern unsigned int __glapi_sparc_tls_stub;
-   unsigned long call_dest = (unsigned long ) &__glapi_sparc_tls_stub;
-#else
-   extern unsigned int __glapi_sparc_pthread_stub;
-   unsigned long call_dest = (unsigned long ) &__glapi_sparc_pthread_stub;
-#endif
-   unsigned int *code = (unsigned int *) malloc(sizeof(template));
-   if (code) {
-      code[0] = template[0] | (functionOffset & 0x3fffff);
-      code[1] = template[1];
-      __glapi_sparc_icache_flush(&code[0]);
-      code[2] = template[2] |
-         (((call_dest - ((unsigned long) &code[2]))
-          >> 2) & 0x3fffffff);
-      code[3] = template[3];
-      __glapi_sparc_icache_flush(&code[2]);
-   }
-   return (_glapi_proc) code;
-#endif
-
-#else
-   (void) functionOffset;
-   return NULL;
-#endif /* USE_*_ASM */
-}
-
-
-/**
- * This function inserts a new dispatch offset into the assembly language
- * stub that was generated with the preceeding function.
+/*
+ * Number of extension functions which we can dynamically add at runtime.
  */
-static void
-fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset)
-{
-#if defined(USE_X86_ASM)
-   GLubyte * const code = (GLubyte *) entrypoint;
-
-#if DISPATCH_FUNCTION_SIZE == 32
-   *((unsigned int *)(code + 11)) = 4 * offset;
-   *((unsigned int *)(code + 22)) = 4 * offset;
-#elif DISPATCH_FUNCTION_SIZE == 16 && defined( GLX_USE_TLS )
-   *((unsigned int *)(code +  8)) = 4 * offset;
-#elif DISPATCH_FUNCTION_SIZE == 16
-   *((unsigned int *)(code +  7)) = 4 * offset;
-#else
-# error Invalid DISPATCH_FUNCTION_SIZE!
-#endif
-
-#elif defined(USE_SPARC_ASM)
-   unsigned int *code = (unsigned int *) entrypoint;
-   code[0] &= ~0x3fffff;
-   code[0] |= (offset * sizeof(void *)) & 0x3fffff;
-   __glapi_sparc_icache_flush(&code[0]);
-#else
+#define MAX_EXTENSION_FUNCS 300
 
-   /* an unimplemented architecture */
-   (void) entrypoint;
-   (void) offset;
 
-#endif /* USE_*_ASM */
-}
+static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS];
+static GLuint NumExtEntryPoints = 0;
 
 
 /**
@@ -710,7 +424,8 @@ _glapi_get_proc_address(const char *funcName)
    GLuint i;
 
 #ifdef MANGLE
-   if (funcName[0] != 'm' || funcName[1] != 'g' || funcName[2] != 'l')
+   /* skip the prefix on the name */
+   if (funcName[1] != 'g' || funcName[2] != 'l')
       return NULL;
 #else
    if (funcName[0] != 'g' || funcName[1] != 'l')
@@ -724,7 +439,7 @@ _glapi_get_proc_address(const char *funcName)
       }
    }
 
-#if !defined( XFree86Server ) && !defined( XGLServer )
+#if !defined( XFree86Server )
    /* search static functions */
    {
       const _glapi_proc func = get_static_proc_address(funcName);
@@ -766,6 +481,51 @@ _glapi_get_proc_name(GLuint offset)
 
 
 
+/**********************************************************************
+ * GL API table functions.
+ */
+
+
+/*
+ * The dispatch table size (number of entries) is the size of the
+ * _glapi_table struct plus the number of dynamic entries we can add.
+ * The extra slots can be filled in by DRI drivers that register new extension
+ * functions.
+ */
+#define DISPATCH_TABLE_SIZE (sizeof(struct _glapi_table) / sizeof(void *) + MAX_EXTENSION_FUNCS)
+
+
+/**
+ * Return size of dispatch table struct as number of functions (or
+ * slots).
+ */
+PUBLIC GLuint
+_glapi_get_dispatch_table_size(void)
+{
+   return DISPATCH_TABLE_SIZE;
+}
+
+
+/**
+ * Make sure there are no NULL pointers in the given dispatch table.
+ * Intended for debugging purposes.
+ */
+void
+_glapi_check_table_not_null(const struct _glapi_table *table)
+{
+#ifdef EXTRA_DEBUG /* set to DEBUG for extra DEBUG */
+   const GLuint entries = _glapi_get_dispatch_table_size();
+   const void **tab = (const void **) table;
+   GLuint i;
+   for (i = 1; i < entries; i++) {
+      assert(tab[i]);
+   }
+#else
+   (void) table;
+#endif
+}
+
+
 /**
  * Do some spot checks to be sure that the dispatch table
  * slots are assigned correctly. For debugging only.
@@ -773,7 +533,7 @@ _glapi_get_proc_name(GLuint offset)
 void
 _glapi_check_table(const struct _glapi_table *table)
 {
-#if 0 /* enable this for extra DEBUG */
+#ifdef EXTRA_DEBUG /* set to DEBUG for extra DEBUG */
    {
       GLuint BeginOffset = _glapi_get_proc_offset("glBegin");
       char *BeginFunc = (char*) &table->Begin;
diff --git a/src/mesa/glapi/glapi_priv.h b/src/mesa/glapi/glapi_priv.h
new file mode 100644 (file)
index 0000000..7cd81ee
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _GLAPI_PRIV_H
+#define _GLAPI_PRIV_H
+
+#include "glthread.h"
+
+extern void
+_glapi_check_table_not_null(const struct _glapi_table *table);
+
+
+extern void
+_glapi_check_table(const struct _glapi_table *table);
+
+
+extern void
+init_glapi_relocs_once(void);
+
+
+extern _glapi_proc
+generate_entrypoint(GLuint functionOffset);
+
+
+extern void
+fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset);
+
+
+extern _glapi_proc
+get_entrypoint_address(GLuint functionOffset);
+
+
+#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS)
+# define DISPATCH_FUNCTION_SIZE  16
+#elif defined(USE_X86_ASM)
+# if defined(THREADS) && !defined(GLX_USE_TLS)
+#  define DISPATCH_FUNCTION_SIZE  32
+# else
+#  define DISPATCH_FUNCTION_SIZE  16
+# endif
+#endif
+
+
+#endif
index 9f2e4e51575b72e29dc2be3ce9953646a559d4af..74885548e5a2abfd8b5539919621c2506241beaf 100644 (file)
@@ -88,6 +88,7 @@ MAIN_SOURCES = \
 GLAPI_SOURCES = \
        glapi/glapi.c \
        glapi/glapi_dispatch.c \
+       glapi/glapi_entrypoint.c \
        glapi/glapi_getproc.c \
        glapi/glapi_nop.c \
        glapi/glthread.c
index 0332d4dbdfeba5fa88cacb027098605ebcf1303c..f326601c3beaa7fd99fb64681b8d5695fef81244 100644 (file)
@@ -440,6 +440,7 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_save_viewport(cso);
    cso_save_fragment_shader(cso);
    cso_save_vertex_shader(cso);
+   cso_save_vertex_elements(cso);
 
    /* rasterizer state: just scissor */
    st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
@@ -490,6 +491,8 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
       cso_set_viewport(cso, &vp);
    }
 
+   cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
    /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
    z = z * 2.0 - 1.0;
 
@@ -509,6 +512,7 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_restore_viewport(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
+   cso_restore_vertex_elements(cso);
 }
 
 
index 9e66eed36348cae45fc5c96d6dd4b6998b938981..de86062fc404ed601d34d02c0b4eb887368db191 100644 (file)
@@ -213,6 +213,7 @@ clear_with_quad(GLcontext *ctx,
    cso_save_clip(st->cso_context);
    cso_save_fragment_shader(st->cso_context);
    cso_save_vertex_shader(st->cso_context);
+   cso_save_vertex_elements(st->cso_context);
 
    /* blend state: RGBA masking */
    {
@@ -264,6 +265,8 @@ clear_with_quad(GLcontext *ctx,
       cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
    }
 
+   cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
+
    cso_set_rasterizer(st->cso_context, &st->clear.raster);
 
    /* viewport state: viewport matching window dims */
@@ -297,6 +300,8 @@ clear_with_quad(GLcontext *ctx,
    cso_restore_clip(st->cso_context);
    cso_restore_fragment_shader(st->cso_context);
    cso_restore_vertex_shader(st->cso_context);
+   cso_restore_vertex_elements(st->cso_context);
+
 }
 
 
index 98453321f86adea05e38e15a981879e0e6185132..c609435a158a1600d6f3a35fd69b3f6730d5bd57 100644 (file)
@@ -529,6 +529,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_save_sampler_textures(cso);
    cso_save_fragment_shader(cso);
    cso_save_vertex_shader(cso);
+   cso_save_vertex_elements(cso);
 
    /* rasterizer state: just scissor */
    {
@@ -581,6 +582,8 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
       cso_set_viewport(cso, &vp);
    }
 
+   cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
    /* texture state: */
    if (st->pixel_xfer.pixelmap_enabled) {
       struct pipe_texture *textures[2];
@@ -615,6 +618,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_restore_sampler_textures(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
+   cso_restore_vertex_elements(cso);
 }
 
 
index de8beaf5e25ec65e4df791aeeb3be0dcdafcc43c..0358a70726123082e7d4b3f48622daff910443f5 100644 (file)
@@ -141,6 +141,14 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe )
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
       st->state.sampler_list[i] = &st->state.samplers[i];
 
+   for (i = 0; i < 3; i++) {
+      memset(&st->velems_util_draw[i], 0, sizeof(struct pipe_vertex_element));
+      st->velems_util_draw[i].src_offset = i * 4 * sizeof(float);
+      st->velems_util_draw[i].instance_divisor = 0;
+      st->velems_util_draw[i].vertex_buffer_index = 0;
+      st->velems_util_draw[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* we want all vertex data to be placed in buffer objects */
    vbo_use_buffer_objects(ctx);
 
index 045c029c3055394d711817beb5ad9c8454ae80b1..6622361a7e3e56c3270ebd8807eb380b1073b9bc 100644 (file)
@@ -174,6 +174,9 @@ struct st_context
       unsigned vbuf_slot;
    } clear;
 
+   /** used for anything using util_draw_vertex_buffer */
+   struct pipe_vertex_element velems_util_draw[3];
+
    void *passthrough_fs;  /**< simple pass-through frag shader */
 
    struct gen_mipmap_state *gen_mipmap;
index 32b9a473cfc693bd2551f6dcdb3f7faeadc272f8..8a6e1ed4662a4a8df2469c9be96cdb160ca5413e 100644 (file)
@@ -57,6 +57,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "cso_cache/cso_context.h"
 
 
 static GLuint double_types[4] = {
@@ -368,7 +369,6 @@ setup_interleaved_attribs(GLcontext *ctx,
          (unsigned) (arrays[mesaAttr]->Ptr - offset0);
       velements[attr].instance_divisor = 0;
       velements[attr].vertex_buffer_index = 0;
-      velements[attr].nr_components = arrays[mesaAttr]->Size;
       velements[attr].src_format =
          st_pipe_vertex_format(arrays[mesaAttr]->Type,
                                arrays[mesaAttr]->Size,
@@ -458,7 +458,6 @@ setup_non_interleaved_attribs(GLcontext *ctx,
       vbuffer[attr].max_index = max_index;
       velements[attr].instance_divisor = 0;
       velements[attr].vertex_buffer_index = attr;
-      velements[attr].nr_components = arrays[mesaAttr]->Size;
       velements[attr].src_format
          = st_pipe_vertex_format(arrays[mesaAttr]->Type,
                                  arrays[mesaAttr]->Size,
@@ -564,6 +563,7 @@ st_draw_vbo(GLcontext *ctx,
    (void) check_uniforms;
 #endif
 
+   memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
    /*
     * Setup the vbuffer[] and velements[] arrays.
     */
@@ -596,14 +596,13 @@ st_draw_vbo(GLcontext *ctx,
       for (i = 0; i < num_velements; i++) {
          printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
          printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
-         printf("vlements[%d].nr_comps = %u\n", i, velements[i].nr_components);
          printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
       }
    }
 #endif
 
    pipe->set_vertex_buffers(pipe, num_vbuffers, vbuffer);
-   pipe->set_vertex_elements(pipe, num_velements, velements);
+   cso_set_vertex_elements(ctx->st->cso_context, num_velements, velements);
 
    if (num_vbuffers == 0 || num_velements == 0)
       return;
index 087f2f22bbf70da581bf688a299bae4eae1b412a..26a5b3fcd637d102e566379add00ca18216729db 100644 (file)
@@ -178,7 +178,6 @@ st_feedback_draw_vbo(GLcontext *ctx,
       vbuffers[attr].max_index = max_index;
       velements[attr].instance_divisor = 0;
       velements[attr].vertex_buffer_index = attr;
-      velements[attr].nr_components = arrays[mesaAttr]->Size;
       velements[attr].src_format = 
          st_pipe_vertex_format(arrays[mesaAttr]->Type,
                                arrays[mesaAttr]->Size,