From 596f09aa7bafd769912b1c0efe97434dff4c3f0b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 5 Mar 2011 15:54:27 +0100 Subject: [PATCH] r300g: implement draw_instanced for HWTCL --- src/gallium/drivers/r300/r300_context.c | 26 ++++++++++ src/gallium/drivers/r300/r300_context.h | 3 ++ src/gallium/drivers/r300/r300_emit.c | 43 +++++++++++++--- src/gallium/drivers/r300/r300_render.c | 12 +++++ src/gallium/drivers/r300/r300_state.c | 33 +++++++++++-- src/gallium/drivers/r300/r300_state_derived.c | 4 +- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 49 ++++++++++++++++++- src/gallium/drivers/r300/r300_tgsi_to_rc.h | 3 ++ src/gallium/drivers/r300/r300_vs.c | 2 +- 9 files changed, 158 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 15d1278c3bb..934871f6553 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -80,6 +80,7 @@ static void r300_release_referenced_objects(struct r300_context *r300) /* Manually-created vertex buffers. */ pipe_resource_reference(&r300->dummy_vb, NULL); pipe_resource_reference(&r300->vbo, NULL); + pipe_resource_reference((struct pipe_resource**)&r300->vb_instanceid, NULL); /* If there are any queries pending or not destroyed, remove them now. */ foreach_s(query, temp, &r300->query_list) { @@ -493,6 +494,31 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->dummy_vb = screen->resource_create(screen, &vb); } + { + int i, num = 128000; + struct pipe_resource vb, *r; + struct pipe_transfer *transfer; + float *buf; + + memset(&vb, 0, sizeof(vb)); + vb.target = PIPE_BUFFER; + vb.format = PIPE_FORMAT_R8_UNORM; + vb.bind = PIPE_BIND_VERTEX_BUFFER; + vb.usage = PIPE_USAGE_IMMUTABLE; + vb.width0 = 4 * num; + vb.height0 = 1; + vb.depth0 = 1; + + r = screen->resource_create(screen, &vb); + + buf = pipe_buffer_map(&r300->context, r, PIPE_TRANSFER_WRITE, &transfer); + for (i = 0; i < num; i++) + buf[i] = i; + pipe_buffer_unmap(&r300->context, transfer); + + r300->vb_instanceid = r300_resource(r); + } + { struct pipe_depth_stencil_alpha_state dsa; memset(&dsa, 0, sizeof(dsa)); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 8a0a54cf1e9..8f42431f8f7 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -431,6 +431,7 @@ struct r300_vertex_element_state { unsigned vertex_size_dwords; struct r300_vertex_stream_state vertex_stream; + struct r300_vertex_stream_state vertex_stream_instanced; }; enum r300_hiz_func { @@ -490,6 +491,8 @@ struct r300_context { /* When no vertex buffer is set, this one is used instead to prevent * hardlocks. */ struct pipe_resource *dummy_vb; + /* Vertex buffer for InstanceID. */ + struct r300_resource *vb_instanceid; /* The currently active query. */ struct r300_query *query_current; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 62435c5e2e2..173fd5dd80f 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -816,15 +816,17 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, struct r300_resource *buf; int i; unsigned vertex_array_count = r300->velems->count; - unsigned packet_size = (vertex_array_count * 3 + 1) / 2; + unsigned real_vertex_array_count = vertex_array_count + + (vertex_array_count == 16 || instance_id == -1 ? 0 : 1); + unsigned packet_size = (real_vertex_array_count * 3 + 1) / 2; struct pipe_vertex_buffer *vb1, *vb2; unsigned *hw_format_size = r300->velems->format_size; unsigned size1, size2, offset1, offset2, stride1, stride2; CS_LOCALS(r300); - BEGIN_CS(2 + packet_size + vertex_array_count * 2); + BEGIN_CS(2 + packet_size + real_vertex_array_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); + OUT_CS(real_vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); if (instance_id == -1) { /* Non-instanced arrays. This ignores instance_divisor and instance_id. */ @@ -896,14 +898,28 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; } - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); - OUT_CS(offset1); + /* Insert vertex buffer containing InstanceID. */ + if (vertex_array_count < 16) { + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | + R300_VBPNTR_SIZE1(4)); + OUT_CS(offset1); + OUT_CS(4 * instance_id); + } else { + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); + OUT_CS(offset1); + } + } else if (vertex_array_count < 16) { + /* Insert vertex buffer containing InstanceID. */ + OUT_CS(R300_VBPNTR_SIZE0(4)); + OUT_CS(4 * instance_id); } for (i = 0; i < vertex_array_count; i++) { buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); OUT_CS_RELOC(buf); } + if (vertex_array_count < 16) + OUT_CS_RELOC(r300->vb_instanceid); } END_CS; } @@ -936,11 +952,18 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state) { - struct r300_vertex_stream_state *streams = - (struct r300_vertex_stream_state*)state; + struct r300_vertex_element_state *velems = + (struct r300_vertex_element_state*)state; + struct r300_vertex_stream_state *streams; unsigned i; CS_LOCALS(r300); + if (r300->screen->caps.has_tcl && r300->instancing_enabled) { + streams = &velems->vertex_stream_instanced; + } else { + streams = &velems->vertex_stream; + } + if (DBG_ON(r300, DBG_PSC)) { fprintf(stderr, "r300: PSC emit:\n"); @@ -955,7 +978,7 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, } } - BEGIN_CS(size); + BEGIN_CS((1 + streams->count) * 2); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); @@ -1219,6 +1242,10 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf, r300_resource(*buf)->domain, 0); } + if (r300->instancing_enabled) { + r300->rws->cs_add_reloc(r300->cs, r300->vb_instanceid->cs_buf, + r300->vb_instanceid->domain, 0); + } } /* ...and index buffer for HWTCL path. */ if (index_buffer) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 429b85545f7..3674edc975f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -305,6 +305,18 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300, int index_bias, int instance_id) { + /* Update vertex elements for InstanceID here. */ + boolean instancing_enabled = instance_id != -1; + + if (r300->screen->caps.has_tcl && + (flags & PREP_EMIT_AOS) && + instancing_enabled != r300->instancing_enabled) { + r300->instancing_enabled = instancing_enabled; + r300_mark_atom_dirty(r300, &r300->vertex_stream_state); + r300->vertex_arrays_dirty = TRUE; + flags |= PREP_EMIT_STATES; + } + /* Make sure there is enough space in the command stream and emit states. */ if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) flags |= PREP_EMIT_STATES; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 24b41d5085d..da444f7c326 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1605,9 +1605,10 @@ static void r300_set_index_buffer(struct pipe_context* pipe, } /* Initialize the PSC tables. */ -static void r300_vertex_psc(struct r300_vertex_element_state *velems) +static void r300_vertex_psc(struct r300_vertex_element_state *velems, + struct r300_vertex_stream_state *vstream, + boolean insert_instance_id_attrib) { - struct r300_vertex_stream_state *vstream = &velems->vertex_stream; uint16_t type, swizzle; enum pipe_format format; unsigned i; @@ -1638,6 +1639,27 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) } } + /* Insert attrib emulating InstanceID. */ + if (i < 15 && insert_instance_id_attrib) { + format = PIPE_FORMAT_R32_FLOAT; + + type = r300_translate_vertex_data_type(format); + assert(type != R300_INVALID_FORMAT); + + type |= i << R300_DST_VEC_LOC_SHIFT; + swizzle = r300_translate_vertex_data_swizzle(format); + + if (i & 1) { + vstream->vap_prog_stream_cntl[i >> 1] |= type << 16; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vstream->vap_prog_stream_cntl[i >> 1] |= type; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; + } + + i++; + } + /* Set the last vector in the PSC. */ if (i) { i -= 1; @@ -1680,7 +1702,8 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, if (r300_screen(pipe->screen)->caps.has_tcl) { /* Setup PSC. * The unused components will be replaced by (..., 0, 1). */ - r300_vertex_psc(velems); + r300_vertex_psc(velems, &velems->vertex_stream, FALSE); + r300_vertex_psc(velems, &velems->vertex_stream_instanced, TRUE); for (i = 0; i < count; i++) { velems->format_size[i] = @@ -1711,8 +1734,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, return; } - UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); - r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; + UPDATE_STATE(velems, r300->vertex_stream_state); + r300->vertex_stream_state.size = (1 + velems->vertex_stream_instanced.count) * 2; r300->vertex_arrays_dirty = TRUE; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index afc1451183d..a1dfd7d0c80 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -133,7 +133,9 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) /* Update the PSC tables for SW TCL, using Draw. */ static void r300_swtcl_vertex_psc(struct r300_context *r300) { - struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state; + struct r300_vertex_element_state *velems = + (struct r300_vertex_element_state*)r300->vertex_stream_state.state; + struct r300_vertex_stream_state *vstream = &velems->vertex_stream; struct vertex_info *vinfo = &r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 6a000cfe2c6..2ac52906d13 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -25,6 +25,7 @@ #include "radeon_compiler.h" #include "radeon_program.h" +#include "util/u_math.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -168,6 +169,7 @@ static unsigned translate_register_file(unsigned file) /* fall-through */ case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY; case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS; + case TGSI_FILE_SYSTEM_VALUE: return RC_FILE_INPUT; } } @@ -179,6 +181,17 @@ static int translate_register_index( if (file == TGSI_FILE_IMMEDIATE) return ttr->immediate_offset + index; + if (file == TGSI_FILE_SYSTEM_VALUE) { + if (index == ttr->instance_id) { + return ttr->num_inputs; + } else { + fprintf(stderr, "Unknown system value semantic index: %i\n", + index); + ttr->error = TRUE; + return 0; + } + } + return index; } @@ -268,7 +281,8 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi } } -static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) +static void transform_instruction(struct tgsi_to_rc * ttr, + struct tgsi_full_instruction * src) { struct rc_instruction * dst; int i; @@ -328,6 +342,27 @@ static void handle_immediate(struct tgsi_to_rc * ttr, } } +static void handle_declaration(struct tgsi_to_rc *ttr, + struct tgsi_full_declaration *decl) +{ + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + ttr->num_inputs = MAX2(ttr->num_inputs, decl->Range.First + 1); + break; + + case TGSI_FILE_SYSTEM_VALUE: + if (decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + printf("Got instance id\n"); + ttr->instance_id = decl->Range.First; + } else { + fprintf(stderr, "Unknown system value semantic: %i.\n", + decl->Semantic.Name); + ttr->error = TRUE; + } + break; + } +} + void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) { @@ -336,6 +371,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, unsigned imm_index = 0; int i; + ttr->num_inputs = 0; + ttr->instance_id = -1; ttr->error = FALSE; /* Allocate constants placeholders. @@ -362,21 +399,29 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, switch (parser.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: + handle_declaration(ttr, &parser.FullToken.FullDeclaration); + if (ttr->error) + goto end_while; break; + case TGSI_TOKEN_TYPE_IMMEDIATE: handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index); imm_index++; break; + case TGSI_TOKEN_TYPE_INSTRUCTION: inst = &parser.FullToken.FullInstruction; if (inst->Instruction.Opcode == TGSI_OPCODE_END) { - break; + goto end_while; } transform_instruction(ttr, inst); + if (ttr->error) + goto end_while; break; } } +end_while: tgsi_parse_free(&parser); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index adb044cfe56..c9bd6277266 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -44,6 +44,9 @@ struct tgsi_to_rc { struct swizzled_imms * imms_to_swizzle; unsigned imms_to_swizzle_count; + int num_inputs; + int instance_id; + /* Vertex shaders have no half swizzles, and no way to handle them, so * until rc grows proper support, indicate if they're safe to use. */ boolean use_half_swizzles; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b319890157f..90eba5a8f45 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -103,7 +103,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) outputs->bcolor[1] != ATTR_UNUSED; /* Fill in the input mapping */ - for (i = 0; i < info->num_inputs; i++) + for (i = 0; i < info->num_inputs+1; i++) c->code->inputs[i] = i; /* Position. */ -- 2.30.2