r300g: implement draw_instanced for HWTCL
authorMarek Olšák <maraeo@gmail.com>
Sat, 5 Mar 2011 14:54:27 +0000 (15:54 +0100)
committerMarek Olšák <maraeo@gmail.com>
Mon, 25 Apr 2011 12:04:51 +0000 (14:04 +0200)
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_state_derived.c
src/gallium/drivers/r300/r300_tgsi_to_rc.c
src/gallium/drivers/r300/r300_tgsi_to_rc.h
src/gallium/drivers/r300/r300_vs.c

index 15d1278c3bbd0614593f2f5e9e6ea1f52148cae5..934871f655381e623e8bdb31034c031939601c6f 100644 (file)
@@ -80,6 +80,7 @@ static void r300_release_referenced_objects(struct r300_context *r300)
     /* Manually-created vertex buffers. */
     pipe_resource_reference(&r300->dummy_vb, NULL);
     pipe_resource_reference(&r300->vbo, NULL);
+    pipe_resource_reference((struct pipe_resource**)&r300->vb_instanceid, NULL);
 
     /* If there are any queries pending or not destroyed, remove them now. */
     foreach_s(query, temp, &r300->query_list) {
@@ -493,6 +494,31 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
         r300->dummy_vb = screen->resource_create(screen, &vb);
     }
 
+    {
+        int i, num = 128000;
+        struct pipe_resource vb, *r;
+        struct pipe_transfer *transfer;
+        float *buf;
+
+        memset(&vb, 0, sizeof(vb));
+        vb.target = PIPE_BUFFER;
+        vb.format = PIPE_FORMAT_R8_UNORM;
+        vb.bind = PIPE_BIND_VERTEX_BUFFER;
+        vb.usage = PIPE_USAGE_IMMUTABLE;
+        vb.width0 = 4 * num;
+        vb.height0 = 1;
+        vb.depth0 = 1;
+
+        r = screen->resource_create(screen, &vb);
+
+        buf = pipe_buffer_map(&r300->context, r, PIPE_TRANSFER_WRITE, &transfer);
+        for (i = 0; i < num; i++)
+            buf[i] = i;
+        pipe_buffer_unmap(&r300->context, transfer);
+
+        r300->vb_instanceid = r300_resource(r);
+    }
+
     {
         struct pipe_depth_stencil_alpha_state dsa;
         memset(&dsa, 0, sizeof(dsa));
index 8a0a54cf1e97d8f58e8a2d2a9e5173b7a4401325..8f42431f8f720ff658bae4e109398b320802e4aa 100644 (file)
@@ -431,6 +431,7 @@ struct r300_vertex_element_state {
     unsigned vertex_size_dwords;
 
     struct r300_vertex_stream_state vertex_stream;
+    struct r300_vertex_stream_state vertex_stream_instanced;
 };
 
 enum r300_hiz_func {
@@ -490,6 +491,8 @@ struct r300_context {
     /* When no vertex buffer is set, this one is used instead to prevent
      * hardlocks. */
     struct pipe_resource *dummy_vb;
+    /* Vertex buffer for InstanceID. */
+    struct r300_resource *vb_instanceid;
 
     /* The currently active query. */
     struct r300_query *query_current;
index 62435c5e2e29d40ce682f31bc74f10b86b164773..173fd5dd80f3e7e4121df2d2c300695f1ae832f1 100644 (file)
@@ -816,15 +816,17 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
     struct r300_resource *buf;
     int i;
     unsigned vertex_array_count = r300->velems->count;
-    unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
+    unsigned real_vertex_array_count = vertex_array_count +
+                (vertex_array_count == 16 || instance_id == -1 ? 0 : 1);
+    unsigned packet_size = (real_vertex_array_count * 3 + 1) / 2;
     struct pipe_vertex_buffer *vb1, *vb2;
     unsigned *hw_format_size = r300->velems->format_size;
     unsigned size1, size2, offset1, offset2, stride1, stride2;
     CS_LOCALS(r300);
 
-    BEGIN_CS(2 + packet_size + vertex_array_count * 2);
+    BEGIN_CS(2 + packet_size + real_vertex_array_count * 2);
     OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
-    OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+    OUT_CS(real_vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
 
     if (instance_id == -1) {
         /* Non-instanced arrays. This ignores instance_divisor and instance_id. */
@@ -896,14 +898,28 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
                 offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
             }
 
-            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
-            OUT_CS(offset1);
+            /* Insert vertex buffer containing InstanceID. */
+            if (vertex_array_count < 16) {
+                OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |
+                       R300_VBPNTR_SIZE1(4));
+                OUT_CS(offset1);
+                OUT_CS(4 * instance_id);
+            } else {
+                OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
+                OUT_CS(offset1);
+            }
+        } else if (vertex_array_count < 16) {
+            /* Insert vertex buffer containing InstanceID. */
+            OUT_CS(R300_VBPNTR_SIZE0(4));
+            OUT_CS(4 * instance_id);
         }
 
         for (i = 0; i < vertex_array_count; i++) {
             buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
             OUT_CS_RELOC(buf);
         }
+        if (vertex_array_count < 16)
+            OUT_CS_RELOC(r300->vb_instanceid);
     }
     END_CS;
 }
@@ -936,11 +952,18 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
 void r300_emit_vertex_stream_state(struct r300_context* r300,
                                    unsigned size, void* state)
 {
-    struct r300_vertex_stream_state *streams =
-        (struct r300_vertex_stream_state*)state;
+    struct r300_vertex_element_state *velems =
+        (struct r300_vertex_element_state*)state;
+    struct r300_vertex_stream_state *streams;
     unsigned i;
     CS_LOCALS(r300);
 
+    if (r300->screen->caps.has_tcl && r300->instancing_enabled) {
+        streams = &velems->vertex_stream_instanced;
+    } else {
+        streams = &velems->vertex_stream;
+    }
+
     if (DBG_ON(r300, DBG_PSC)) {
         fprintf(stderr, "r300: PSC emit:\n");
 
@@ -955,7 +978,7 @@ void r300_emit_vertex_stream_state(struct r300_context* r300,
         }
     }
 
-    BEGIN_CS(size);
+    BEGIN_CS((1 + streams->count) * 2);
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
     OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
@@ -1219,6 +1242,10 @@ validate:
             r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf,
                                     r300_resource(*buf)->domain, 0);
         }
+        if (r300->instancing_enabled) {
+            r300->rws->cs_add_reloc(r300->cs, r300->vb_instanceid->cs_buf,
+                                    r300->vb_instanceid->domain, 0);
+        }
     }
     /* ...and index buffer for HWTCL path. */
     if (index_buffer)
index 429b85545f70a6dfd04972c7ae40db14044fa806..3674edc975f158e1b24181b7185087fd71cf9190 100644 (file)
@@ -305,6 +305,18 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300,
                                           int index_bias,
                                           int instance_id)
 {
+    /* Update vertex elements for InstanceID here. */
+    boolean instancing_enabled = instance_id != -1;
+
+    if (r300->screen->caps.has_tcl &&
+        (flags & PREP_EMIT_AOS) &&
+        instancing_enabled != r300->instancing_enabled) {
+        r300->instancing_enabled = instancing_enabled;
+        r300_mark_atom_dirty(r300, &r300->vertex_stream_state);
+        r300->vertex_arrays_dirty = TRUE;
+        flags |= PREP_EMIT_STATES;
+    }
+
     /* Make sure there is enough space in the command stream and emit states. */
     if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
         flags |= PREP_EMIT_STATES;
index 24b41d5085d039061a1c783a78899b2daa1f0225..da444f7c326416b00e485baae89391a07579d95f 100644 (file)
@@ -1605,9 +1605,10 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
 }
 
 /* Initialize the PSC tables. */
-static void r300_vertex_psc(struct r300_vertex_element_state *velems)
+static void r300_vertex_psc(struct r300_vertex_element_state *velems,
+                            struct r300_vertex_stream_state *vstream,
+                            boolean insert_instance_id_attrib)
 {
-    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i;
@@ -1638,6 +1639,27 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
         }
     }
 
+    /* Insert attrib emulating InstanceID. */
+    if (i < 15 && insert_instance_id_attrib) {
+        format = PIPE_FORMAT_R32_FLOAT;
+
+        type = r300_translate_vertex_data_type(format);
+        assert(type != R300_INVALID_FORMAT);
+
+        type |= i << R300_DST_VEC_LOC_SHIFT;
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+
+        i++;
+    }
+
     /* Set the last vector in the PSC. */
     if (i) {
         i -= 1;
@@ -1680,7 +1702,8 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
     if (r300_screen(pipe->screen)->caps.has_tcl) {
         /* Setup PSC.
          * The unused components will be replaced by (..., 0, 1). */
-        r300_vertex_psc(velems);
+        r300_vertex_psc(velems, &velems->vertex_stream, FALSE);
+        r300_vertex_psc(velems, &velems->vertex_stream_instanced, TRUE);
 
         for (i = 0; i < count; i++) {
             velems->format_size[i] =
@@ -1711,8 +1734,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
         return;
     }
 
-    UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
-    r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
+    UPDATE_STATE(velems, r300->vertex_stream_state);
+    r300->vertex_stream_state.size = (1 + velems->vertex_stream_instanced.count) * 2;
     r300->vertex_arrays_dirty = TRUE;
 }
 
index afc1451183d63e5e494de0533e710af857bda8fb..a1dfd7d0c80817e1b9003c3cc2a7dff6e6939a98 100644 (file)
@@ -133,7 +133,9 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
 /* Update the PSC tables for SW TCL, using Draw. */
 static void r300_swtcl_vertex_psc(struct r300_context *r300)
 {
-    struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
+    struct r300_vertex_element_state *velems =
+            (struct r300_vertex_element_state*)r300->vertex_stream_state.state;
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
     struct vertex_info *vinfo = &r300->vertex_info;
     uint16_t type, swizzle;
     enum pipe_format format;
index 6a000cfe2c6d435fd366cf30e6ed7d87d8b5da44..2ac52906d13af7018e1e3c5ab581bcfa23382fed 100644 (file)
@@ -25,6 +25,7 @@
 #include "radeon_compiler.h"
 #include "radeon_program.h"
 
+#include "util/u_math.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -168,6 +169,7 @@ static unsigned translate_register_file(unsigned file)
             /* fall-through */
         case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY;
         case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS;
+        case TGSI_FILE_SYSTEM_VALUE: return RC_FILE_INPUT;
     }
 }
 
@@ -179,6 +181,17 @@ static int translate_register_index(
     if (file == TGSI_FILE_IMMEDIATE)
         return ttr->immediate_offset + index;
 
+    if (file == TGSI_FILE_SYSTEM_VALUE) {
+        if (index == ttr->instance_id) {
+            return ttr->num_inputs;
+        } else {
+            fprintf(stderr, "Unknown system value semantic index: %i\n",
+                    index);
+            ttr->error = TRUE;
+            return 0;
+        }
+    }
+
     return index;
 }
 
@@ -268,7 +281,8 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi
     }
 }
 
-static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
+static void transform_instruction(struct tgsi_to_rc * ttr,
+                                  struct tgsi_full_instruction * src)
 {
     struct rc_instruction * dst;
     int i;
@@ -328,6 +342,27 @@ static void handle_immediate(struct tgsi_to_rc * ttr,
     }
 }
 
+static void handle_declaration(struct tgsi_to_rc *ttr,
+                               struct tgsi_full_declaration *decl)
+{
+    switch (decl->Declaration.File) {
+    case TGSI_FILE_INPUT:
+        ttr->num_inputs = MAX2(ttr->num_inputs, decl->Range.First + 1);
+        break;
+
+    case TGSI_FILE_SYSTEM_VALUE:
+        if (decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+            printf("Got instance id\n");
+            ttr->instance_id = decl->Range.First;
+        } else {
+            fprintf(stderr, "Unknown system value semantic: %i.\n",
+                    decl->Semantic.Name);
+            ttr->error = TRUE;
+        }
+        break;
+    }
+}
+
 void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
                      const struct tgsi_token * tokens)
 {
@@ -336,6 +371,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
     unsigned imm_index = 0;
     int i;
 
+    ttr->num_inputs = 0;
+    ttr->instance_id = -1;
     ttr->error = FALSE;
 
     /* Allocate constants placeholders.
@@ -362,21 +399,29 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
 
         switch (parser.FullToken.Token.Type) {
             case TGSI_TOKEN_TYPE_DECLARATION:
+                handle_declaration(ttr, &parser.FullToken.FullDeclaration);
+                if (ttr->error)
+                    goto end_while;
                 break;
+
             case TGSI_TOKEN_TYPE_IMMEDIATE:
                 handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index);
                 imm_index++;
                 break;
+
             case TGSI_TOKEN_TYPE_INSTRUCTION:
                 inst = &parser.FullToken.FullInstruction;
                 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
-                    break;
+                    goto end_while;
                 }
 
                 transform_instruction(ttr, inst);
+                if (ttr->error)
+                    goto end_while;
                 break;
         }
     }
+end_while:
 
     tgsi_parse_free(&parser);
 
index adb044cfe5698582c40bf2432ff6d2ea40720067..c9bd6277266a2f75979503bd9371c5c936048f41 100644 (file)
@@ -44,6 +44,9 @@ struct tgsi_to_rc {
     struct swizzled_imms * imms_to_swizzle;
     unsigned imms_to_swizzle_count;
 
+    int num_inputs;
+    int instance_id;
+
     /* Vertex shaders have no half swizzles, and no way to handle them, so
      * until rc grows proper support, indicate if they're safe to use. */
     boolean use_half_swizzles;
index b319890157fe970ea4a134612bf722c23922f635..90eba5a8f45c5b07be02ad82af0b7cd312f33868 100644 (file)
@@ -103,7 +103,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
                               outputs->bcolor[1] != ATTR_UNUSED;
 
     /* Fill in the input mapping */
-    for (i = 0; i < info->num_inputs; i++)
+    for (i = 0; i < info->num_inputs+1; i++)
         c->code->inputs[i] = i;
 
     /* Position. */