From: Dave Airlie Date: Fri, 24 Sep 2010 03:11:17 +0000 (+1000) Subject: r600g: add initial vertex translate support. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=914b669b082258fc05d0fec047b69949d88585c4;p=mesa.git r600g: add initial vertex translate support. --- diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 091751e93a9..cff15c57d28 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -62,6 +62,9 @@ static void r600_destroy_context(struct pipe_context *context) u_upload_destroy(rctx->upload_vb); u_upload_destroy(rctx->upload_ib); + if (rctx->tran.translate_cache) + translate_cache_destroy(rctx->tran.translate_cache); + radeon_ctx_fini(rctx->ctx); FREE(rctx); } @@ -153,6 +156,10 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) goto out_free; } + rctx->tran.translate_cache = translate_cache_create(); + if (rctx->tran.translate_cache == NULL) + goto out_free; + rctx->vs_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state)); if (!rctx->vs_constant) { goto out_free; diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index d104531d365..ae32194318c 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -31,6 +31,7 @@ #include #include #include +#include "translate/translate_cache.h" #include "radeon.h" #include "r600_shader.h" @@ -115,7 +116,11 @@ struct r600_vertex_element { unsigned refcount; unsigned count; - struct pipe_vertex_element elements[32]; + struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; + + enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; + unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + boolean incompatible_layout; }; struct r600_draw { @@ -132,6 +137,18 @@ struct r600_draw { unsigned index_bias; }; +struct r600_translate_context { + /* Translate cache for incompatible vertex offset/stride/format fallback. */ + struct translate_cache *translate_cache; + + /* The vertex buffer slot containing the translated buffer. */ + unsigned vb_slot; + + /* Saved and new vertex element state. */ + void *saved_velems, *new_velems; +}; + + struct r600_context_hw_states { struct radeon_state rasterizer; struct radeon_state scissor; @@ -247,6 +264,10 @@ struct r600_context { struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; bool any_user_vbs; + unsigned vb_max_index; + + /* For translating vertex buffers having incompatible vertex layout. */ + struct r600_translate_context tran; }; /* Convenience cast wrapper. */ diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index c41156f15fd..ed1b20f7b9d 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -32,12 +32,173 @@ #include #include #include +#include "translate/translate_cache.h" +#include "translate/translate.h" #include "radeon.h" #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" #include "r600_state_inlines.h" +static void r600_begin_vertex_translate(struct r600_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + struct translate_key key = {0}; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; + struct translate *tr; + struct r600_vertex_element *ve = rctx->vertex_elements; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; + struct pipe_resource *out_buffer; + unsigned i, num_verts; + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < ve->count; i++) { + struct pipe_vertex_buffer *vb = + &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; + enum pipe_format output_format = ve->hw_format[i]; + unsigned output_format_size = ve->hw_format_size[i]; + + /* Check for support. */ + if (ve->elements[i].src_format == ve->hw_format[i] && + (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && + vb->stride % 4 == 0) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = ve->elements[i].vertex_buffer_index; + te->input_format = ve->elements[i].src_format; + te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(rctx->tran.translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + } + } + + /* Create and map the output buffer. */ + num_verts = rctx->vb_max_index + 1; + + out_buffer = pipe_buffer_create(&rctx->screen->screen, + PIPE_BIND_VERTEX_BUFFER, + key.output_stride * num_verts); + + out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, + &out_transfer); + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, + vb_transfer[i]); + } + } + + pipe_buffer_unmap(pipe, out_buffer, out_transfer); + + /* Setup the new vertex buffer in the first free slot. */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + if (!vb->buffer) { + pipe_resource_reference(&vb->buffer, out_buffer); + vb->buffer_offset = 0; + vb->max_index = num_verts - 1; + vb->stride = key.output_stride; + rctx->tran.vb_slot = i; + break; + } + } + + /* Save and replace vertex elements. */ + { + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + rctx->tran.saved_velems = rctx->vertex_elements; + + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); + } + } + + rctx->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); + } + + pipe_resource_reference(&out_buffer, NULL); +} + +static void r600_end_vertex_translate(struct r600_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + + /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); + pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, + NULL); +} + + static void r600_translate_index_buffer(struct r600_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, @@ -117,7 +278,7 @@ static int r600_draw_common(struct r600_draw *draw) rbuffer = (struct r600_resource*)vertex_buffer->buffer; offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - rctx->vtbl->vs_resource(rctx, i, rbuffer, offset, vertex_buffer->stride, rctx->vertex_elements->elements[i].src_format); + rctx->vtbl->vs_resource(rctx, i, rbuffer, offset, vertex_buffer->stride, rctx->vertex_elements->hw_format[i]); radeon_draw_bind(&rctx->draw, vs_resource); } rctx->vs_nresource = rctx->vertex_elements->count; @@ -144,9 +305,14 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_context *rctx = r600_context(ctx); struct r600_draw draw; int r; + boolean translate = FALSE; memset(&draw, 0, sizeof(draw)); + if (rctx->vertex_elements->incompatible_layout) { + r600_begin_vertex_translate(rctx); + translate = TRUE; + } if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); rctx->any_user_vbs = FALSE; @@ -186,5 +352,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (r) fprintf(stderr,"draw common failed %d\n", r); + if (translate) { + r600_end_vertex_translate(rctx); + } pipe_resource_reference(&draw.index_buffer, NULL); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 86f9825b526..1cadeea2fdd 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -204,15 +204,41 @@ static void *r600_create_shader_state(struct pipe_context *ctx, return rstate; } +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + static void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + unsigned i; + enum pipe_format *format; assert(count < 32); v->count = count; memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + + for (i = 0; i < count; i++) { + v->hw_format[i] = v->elements[i].src_format; + format = &v->hw_format[i]; + + switch (*format) { + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + default:; + } + v->incompatible_layout = + v->incompatible_layout || + v->elements[i].src_format != v->hw_format[i] || + v->elements[i].src_offset % 4 != 0; + + v->hw_format_size[i] = + align(util_format_get_blocksize(v->hw_format[i]), 4); + } + v->refcount = 1; return v; } @@ -455,19 +481,32 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, struct r600_context *rctx = r600_context(ctx); unsigned i; boolean any_user_buffers = FALSE; - + unsigned max_index, vbo_max_index; + const struct pipe_vertex_buffer *vbo; for (i = 0; i < rctx->nvertex_buffer; i++) { pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); } memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + max_index = 0; for (i = 0; i < count; i++) { + vbo = &buffers[i]; rctx->vertex_buffer[i].buffer = NULL; if (r600_buffer_is_user_buffer(buffers[i].buffer)) any_user_buffers = TRUE; pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + if (vbo->max_index == ~0) { + if (!vbo->stride) + vbo_max_index = 1; + else + vbo_max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + } else + vbo_max_index = vbo->max_index; + + max_index = MIN2(vbo_max_index, max_index); } rctx->any_user_vbs = any_user_buffers; rctx->nvertex_buffer = count; + rctx->vb_max_index = max_index; } static void r600_set_index_buffer(struct pipe_context *ctx,