i915: Made vertex submission eaven faster
authorJakob Bornecrantz <jakob@tungstengraphics.com>
Tue, 27 May 2008 17:00:16 +0000 (19:00 +0200)
committerJakob Bornecrantz <jakob@tungstengraphics.com>
Wed, 28 May 2008 11:44:36 +0000 (13:44 +0200)
src/gallium/drivers/i915simple/i915_context.h
src/gallium/drivers/i915simple/i915_flush.c
src/gallium/drivers/i915simple/i915_prim_vbuf.c
src/gallium/drivers/i915simple/i915_state_immediate.c
src/gallium/winsys/dri/intel/intel_winsys_pipe.c

index 53fc5ed0795c4144c6ed988db1bbdf9fdaeb5e28..2da90ae49d9fa03942e62b8691795e779ee09c11 100644 (file)
@@ -245,6 +245,8 @@ struct i915_context
 
    /** Vertex buffer */
    struct pipe_buffer *vbo;
+   size_t vbo_offset;
+   unsigned vbo_flushed;
 
    struct i915_state current;
    unsigned hardware_dirty;
index 7d23e6b6b90572cc1b82de5518d1bce8db8a4241..4c4718d68e12bcc725017f4dbdb64af808036080 100644 (file)
@@ -68,6 +68,7 @@ static void i915_flush( struct pipe_context *pipe,
    /* If there are no flags, just flush pending commands to hardware:
     */
    FLUSH_BATCH(fence);
+   i915->vbo_flushed = 1;
 }
 
 
index 81293d0d1f86d1a55f5c1cce403f9ba737cc5b15..4f36c2a22af53175a2762db0d26704e26608ca7a 100644 (file)
@@ -70,6 +70,13 @@ struct i915_vbuf_render {
 
    /** Genereate a vertex list */
    unsigned fallback;
+
+   /* Stuff for the vbo */
+   struct pipe_buffer *vbo;
+   size_t vbo_size;
+   size_t vbo_offset;
+   void *vbo_ptr;
+   size_t vbo_alloc_size;
 };
 
 
@@ -111,14 +118,31 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
 
    /* FIXME: handle failure */
    assert(!i915->vbo);
-   i915->vbo = winsys->buffer_create(winsys, 64, I915_BUFFER_USAGE_LIT_VERTEX,
-                                     size);
 
+   if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) {
+   } else {
+      i915->vbo_flushed = 0;
+      pipe_buffer_reference(winsys, &i915_render->vbo, NULL);
+   }
+
+   if (!i915_render->vbo) {
+      i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
+      i915_render->vbo_offset = 0;
+      i915_render->vbo = winsys->buffer_create(winsys,
+                                              64,
+                                              I915_BUFFER_USAGE_LIT_VERTEX,
+                                              i915_render->vbo_size);
+      i915_render->vbo_ptr = winsys->buffer_map(winsys,
+                                               i915_render->vbo,
+                                               PIPE_BUFFER_USAGE_CPU_WRITE);
+      winsys->buffer_unmap(winsys, i915_render->vbo);
+   }
+
+   i915->vbo = i915_render->vbo;
+   i915->vbo_offset = i915_render->vbo_offset;
    i915->dirty |= I915_NEW_VBO;
 
-   return winsys->buffer_map(winsys, 
-                             i915->vbo, 
-                             PIPE_BUFFER_USAGE_CPU_WRITE);
+   return i915_render->vbo_ptr + i915->vbo_offset;
 }
 
 
@@ -231,7 +255,6 @@ draw_arrays_fallback( struct vbuf_render *render,
    struct pipe_winsys *winsys = i915->pipe.winsys;
    unsigned nr_indices;
 
-   winsys->buffer_unmap( winsys, i915->vbo );
    if (i915->dirty)
       i915_update_derived( i915 );
 
@@ -247,6 +270,7 @@ draw_arrays_fallback( struct vbuf_render *render,
        */
       i915_update_derived( i915 );
       i915_emit_hardware_state( i915 );
+      i915->vbo_flushed = 1;
 
       if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) {
         assert(0);
@@ -260,8 +284,9 @@ draw_arrays_fallback( struct vbuf_render *render,
              nr_indices );
 
    draw_arrays_generate_indices( render, start, nr, i915_render->fallback );
+
 out:
-   winsys->buffer_map( winsys, i915->vbo, PIPE_BUFFER_USAGE_CPU_WRITE );
+   return;
 }
 
 static void
@@ -353,7 +378,6 @@ i915_vbuf_render_draw( struct vbuf_render *render,
    nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback );
 
    assert(nr_indices);
-   winsys->buffer_unmap( winsys, i915->vbo );
 
    if (i915->dirty)
       i915_update_derived( i915 );
@@ -368,6 +392,7 @@ i915_vbuf_render_draw( struct vbuf_render *render,
        */
       i915_update_derived( i915 );
       i915_emit_hardware_state( i915 );
+      i915->vbo_flushed = 1;
 
       if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) {
         assert(0);
@@ -386,7 +411,6 @@ i915_vbuf_render_draw( struct vbuf_render *render,
                          i915_render->fallback );
 
 out:
-   winsys->buffer_map( winsys, i915->vbo, PIPE_BUFFER_USAGE_CPU_WRITE );
    return;
 }
 
@@ -400,10 +424,13 @@ i915_vbuf_render_release_vertices( struct vbuf_render *render,
    struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
    struct i915_context *i915 = i915_render->i915;
    struct pipe_winsys *winsys = i915->pipe.winsys;
+   size_t size = (size_t)vertex_size * (size_t)vertices_used;
 
    assert(i915->vbo);
-   winsys->buffer_unmap(winsys, i915->vbo);
-   pipe_buffer_reference(winsys, &i915->vbo, NULL);
+
+   i915_render->vbo_offset += size;
+   i915->vbo = NULL;
+   i915->dirty |= I915_NEW_VBO;
 }
 
 
@@ -422,6 +449,7 @@ static struct vbuf_render *
 i915_vbuf_render_create( struct i915_context *i915 )
 {
    struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render);
+   struct pipe_winsys *winsys = i915->pipe.winsys;
 
    i915_render->i915 = i915;
    
@@ -431,7 +459,7 @@ i915_vbuf_render_create( struct i915_context *i915 )
     * batch buffer.
     */
    i915_render->base.max_indices = 16*1024;
-   
+
    i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info;
    i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices;
    i915_render->base.set_primitive = i915_vbuf_render_set_primitive;
@@ -439,7 +467,19 @@ i915_vbuf_render_create( struct i915_context *i915 )
    i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays;
    i915_render->base.release_vertices = i915_vbuf_render_release_vertices;
    i915_render->base.destroy = i915_vbuf_render_destroy;
-   
+
+   i915_render->vbo_alloc_size = 128 * 4096;
+   i915_render->vbo_size = i915_render->vbo_alloc_size;
+   i915_render->vbo_offset = 0;
+   i915_render->vbo = winsys->buffer_create(winsys,
+                                           64,
+                                           I915_BUFFER_USAGE_LIT_VERTEX,
+                                           i915_render->vbo_size);
+   i915_render->vbo_ptr = winsys->buffer_map(winsys,
+                                            i915_render->vbo,
+                                            PIPE_BUFFER_USAGE_CPU_WRITE);
+   winsys->buffer_unmap(winsys, i915_render->vbo);
+
    return &i915_render->base;
 }
 
index dfbbcab624a1b45ea5bd82d3fcc85134e08a324b..704ea4d838c0ce292151e45156e2ea227ca6c9e3 100644 (file)
@@ -54,7 +54,7 @@ static void upload_S0S1(struct i915_context *i915)
 
    /* INTEL_NEW_VBO */
    /* TODO: re-use vertex buffers here? */
-   LIS0 = 0;
+   LIS0 = i915->vbo_offset;
 
    /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! 
     */
index 059b16be3b141595e3ac6f2cb2e5f9f178601d8d..fb8f44c8457c6b343e708bf8faac686e82913a2e 100644 (file)
@@ -224,7 +224,6 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys,
                                  unsigned tex_usage)
 {
    const unsigned alignment = 64;
-   //int ret;
 
    surf->width = width;
    surf->height = height;
@@ -235,7 +234,8 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys,
    assert(!surf->buffer);
    surf->buffer = winsys->buffer_create(winsys, alignment,
                                         PIPE_BUFFER_USAGE_PIXEL,
-                                        surf->pitch * surf->cpp * height);
+                                        surf->pitch * surf->cpp * surf->height);
+
    if(!surf->buffer)
       return -1;
 
@@ -328,8 +328,8 @@ intel_create_pipe_winsys( int fd, struct _DriFreeSlabManager *fMan )
                                        DRM_BO_FLAG_READ |
                                        DRM_BO_FLAG_WRITE |
                                        DRM_BO_FLAG_MEM_TT,
-                                       128,
-                                       6, 120, 32 * 4096, 0,
+                                       128 * 4096,
+                                       1, 120, 128 * 4096 * 4, 0,
                                        fMan);
    }