r300g: optimize the immediate mode emission path a bit
authorMarek Olšák <maraeo@gmail.com>
Tue, 22 Jun 2010 04:41:15 +0000 (06:41 +0200)
committerMarek Olšák <maraeo@gmail.com>
Tue, 22 Jun 2010 04:50:23 +0000 (06:50 +0200)
src/gallium/drivers/r300/r300_cb.h
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_winsys.h
src/gallium/winsys/radeon/drm/radeon_r300.c

index 9d3d4fc1b19843a2028d404f1d3843a66d439cd0..69874712442401f5ff914369b00da7c979c8c6b7 100644 (file)
@@ -89,6 +89,9 @@
     CB_DEBUG(cs_count = size;) \
 } while (0)
 
+#define BEGIN_CS_AS_CB(r300, size) \
+    BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, dwords), dwords)
+
 #define END_CB do { \
     CB_DEBUG(if (cs_count != 0) \
         debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
index fdbdb4b192d4e7662dd347c386859e59f97b7c92..8d0b4bb3d376d98ba92c98ac1e67ff75b8f39128 100644 (file)
@@ -363,6 +363,9 @@ struct r300_vertex_element_state {
     enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
     unsigned hw_format_size[PIPE_MAX_ATTRIBS];
 
+    /* The size of the vertex, in dwords. */
+    unsigned vertex_size_dwords;
+
     /* This might mean two things:
      * - src_format != hw_format, as discussed above.
      * - src_offset % 4 != 0. */
index 655819001c96b7aeffc62affd45eab5f3d6313f2..4afd124c0eb1fb72d4e67b5af1592a386f66d8a0 100644 (file)
@@ -35,6 +35,7 @@
 #include "util/u_prim.h"
 
 #include "r300_cs.h"
+#include "r300_cb.h"
 #include "r300_context.h"
 #include "r300_screen_buffer.h"
 #include "r300_emit.h"
@@ -43,6 +44,8 @@
 
 #include <limits.h>
 
+#define IMMD_DWORDS 32
+
 static uint32_t r300_translate_primitive(unsigned prim)
 {
     switch (prim) {
@@ -269,7 +272,7 @@ static boolean immd_is_good_idea(struct r300_context *r300,
         return FALSE;
     }
 
-    if (count > 10) {
+    if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) {
         return FALSE;
     }
 
@@ -308,10 +311,10 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     struct pipe_vertex_element* velem;
     struct pipe_vertex_buffer* vbuf;
     unsigned vertex_element_count = r300->velems->count;
-    unsigned i, v, vbi, dw, elem_offset, dwords;
+    unsigned i, v, vbi, dwords;
 
     /* Size of the vertex, in dwords. */
-    unsigned vertex_size = 0;
+    unsigned vertex_size = r300->velems->vertex_size_dwords;
 
     /* Offsets of the attribute, in dwords, from the start of the vertex. */
     unsigned offset[PIPE_MAX_ATTRIBS];
@@ -327,14 +330,13 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
     struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL};
 
-    CS_LOCALS(r300);
+    CB_LOCALS;
 
     /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
     for (i = 0; i < vertex_element_count; i++) {
         velem = &r300->velems->velem[i];
         offset[i] = velem->src_offset / 4;
         size[i] = r300->velems->hw_format_size[i] / 4;
-        vertex_size += size[i];
         vbi = velem->vertex_buffer_index;
 
         /* Map the buffer. */
@@ -344,8 +346,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
                                                   vbuf->buffer,
                                                   PIPE_TRANSFER_READ,
                                                  &transfer[vbi]);
-            map[vbi] += vbuf->buffer_offset / 4;
             stride[vbi] = vbuf->stride / 4;
+            map[vbi] += vbuf->buffer_offset / 4 + stride[vbi] * start;
         }
     }
 
@@ -353,30 +355,26 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 
     r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
 
-    BEGIN_CS(dwords);
-    OUT_CS_REG(R300_GA_COLOR_CONTROL,
+    BEGIN_CS_AS_CB(r300, dwords);
+    OUT_CB_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
-    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
-    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
-    OUT_CS(count - 1);
-    OUT_CS(0);
-    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
-    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
+    OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size);
+    OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CB(count - 1);
+    OUT_CB(0);
+    OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
+    OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
             r300_translate_primitive(mode));
 
     /* Emit vertices. */
     for (v = 0; v < count; v++) {
         for (i = 0; i < vertex_element_count; i++) {
-            velem = &r300->velems->velem[i];
-            vbi = velem->vertex_buffer_index;
-            elem_offset = offset[i] + stride[vbi] * (v + start);
+            vbi = r300->velems->velem[i].vertex_buffer_index;
 
-            for (dw = 0; dw < size[i]; dw++) {
-                OUT_CS(map[vbi][elem_offset + dw]);
-            }
+            OUT_CB_TABLE(&map[vbi][offset[i] + stride[vbi] * v], size[i]);
         }
     }
-    END_CS;
+    END_CB;
 
     /* Unmap buffers. */
     for (i = 0; i < vertex_element_count; i++) {
index 3f0acd994894009fba9afa07514ca90e3e3beb97..bc2b62ba54177041c3815ca6f29d5690244f5c20 100644 (file)
@@ -1500,11 +1500,13 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
 
             /* Align the formats to the size of DWORD.
              * We only care about the blocksizes of the formats since
-             * swizzles are already set up. */
+             * swizzles are already set up.
+             * Also compute the vertex size. */
             for (i = 0; i < count; i++) {
                 /* This is OK because we check for aligned strides too. */
                 velems->hw_format_size[i] =
                     align(util_format_get_blocksize(velems->hw_format[i]), 4);
+                velems->vertex_size_dwords += velems->hw_format_size[i] / 4;
             }
         }
     }
index 334ec9fa84a7eb91272612d8e51058f3920901c9..77c1c13ef9ae55419828033e2e60e2c492c53763 100644 (file)
@@ -105,6 +105,11 @@ struct r300_winsys_screen {
     /* Return the number of free dwords in CS. */
     unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys);
 
+    /* Return the pointer to the first free dword in CS and assume a pipe
+     * driver wants to fill "count" dwords. */
+    uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys,
+                                unsigned count);
+
     /* Write a dword to the command buffer. */
     void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword);
 
index 237fdc8bac4c29078591de7b20cc1737e579c012..d2d317dc209e1bbd0bd0e2993fbf4fc85c4ce210 100644 (file)
@@ -192,6 +192,17 @@ static unsigned radeon_get_cs_free_dwords(struct r300_winsys_screen *rws)
     return cs->ndw - cs->cdw;
 }
 
+static uint32_t *radeon_get_cs_pointer(struct r300_winsys_screen *rws,
+                                       unsigned count)
+{
+    struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws);
+    struct radeon_cs *cs = ws->cs;
+    uint32_t *ptr = cs->packets + cs->cdw;
+
+    cs->cdw += count;
+    return ptr;
+}
+
 static void radeon_write_cs_dword(struct r300_winsys_screen *rws,
                                   uint32_t dword)
 {
@@ -316,6 +327,7 @@ radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws)
     ws->base.validate = radeon_validate;
     ws->base.destroy = radeon_winsys_destroy;
     ws->base.get_cs_free_dwords = radeon_get_cs_free_dwords;
+    ws->base.get_cs_pointer = radeon_get_cs_pointer;
     ws->base.write_cs_dword = radeon_write_cs_dword;
     ws->base.write_cs_table = radeon_write_cs_table;
     ws->base.write_cs_reloc = radeon_write_cs_reloc;