From: Dave Airlie Date: Thu, 21 Oct 2010 07:34:25 +0000 (+1000) Subject: r600g: initial translate state support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e68c83a5a01a8a659857310cfcc785c7e028d3f0;p=mesa.git r600g: initial translate state support --- diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index dd8fa4fcd77..bea7ef5df84 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -85,6 +85,9 @@ static void r600_destroy_context(struct pipe_context *context) u_upload_destroy(rctx->upload_vb); u_upload_destroy(rctx->upload_ib); + if (rctx->tran.translate_cache) + translate_cache_destroy(rctx->tran.translate_cache); + FREE(rctx->ps_resource); FREE(rctx->vs_resource); FREE(rctx); @@ -173,6 +176,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->tran.translate_cache = translate_cache_create(); + if (rctx->tran.translate_cache == NULL) { + FREE(rctx); + return NULL; + } + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); if (!rctx->vs_resource) { FREE(rctx); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index e7c4b60d00c..7fb47b608e4 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,6 +30,7 @@ #include #include #include +#include "translate/translate_cache.h" #include "r600.h" #include "r600_public.h" #include "r600_shader.h" @@ -83,7 +84,10 @@ struct r600_vertex_element { unsigned count; unsigned refcount; - struct pipe_vertex_element elements[32]; + struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; + enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; + unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + boolean incompatible_layout; }; struct r600_pipe_shader { @@ -103,6 +107,16 @@ struct r600_textures_info { unsigned n_samplers; }; +struct r600_translate_context { + /* Translate cache for incompatible vertex offset/stride/format fallback. */ + struct translate_cache *translate_cache; + + /* The vertex buffer slot containing the translated buffer. */ + unsigned vb_slot; + /* Saved and new vertex element state. */ + void *saved_velems, *new_velems; +}; + #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 @@ -142,6 +156,9 @@ struct r600_pipe_context { unsigned any_user_vbs; struct r600_textures_info ps_samplers; + unsigned vb_max_index; + struct r600_translate_context tran; + }; struct r600_drawl { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f98f05512a1..0dd416c0d83 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -270,7 +270,7 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader } rshader->vertex_elements = *rctx->vertex_elements; for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; } r600_bo_reference(rctx->radeon, &rshader->bo, NULL); LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index bab3f224d7d..5cc4b3044b4 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -39,6 +39,8 @@ #include #include #include +#include "translate/translate_cache.h" +#include "translate/translate.h" #include #include "r600.h" #include "r600d.h" @@ -99,7 +101,7 @@ static void r600_draw_common(struct r600_drawl *draw) vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); word2 = format | S_038008_STRIDE(vertex_buffer->stride); @@ -182,6 +184,171 @@ static void r600_draw_common(struct r600_drawl *draw) r600_context_draw(&rctx->ctx, &rdraw); } +static void r600_begin_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + struct translate_key key = {0}; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; + struct translate *tr; + struct r600_vertex_element *ve = rctx->vertex_elements; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; + struct pipe_resource *out_buffer; + unsigned i, num_verts; + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < ve->count; i++) { + struct pipe_vertex_buffer *vb = + &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; + enum pipe_format output_format = ve->hw_format[i]; + unsigned output_format_size = ve->hw_format_size[i]; + + /* Check for support. */ + if (ve->elements[i].src_format == ve->hw_format[i] && + (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && + vb->stride % 4 == 0) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = ve->elements[i].vertex_buffer_index; + te->input_format = ve->elements[i].src_format; + te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(rctx->tran.translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + } + } + + /* Create and map the output buffer. */ + num_verts = rctx->vb_max_index + 1; + + out_buffer = pipe_buffer_create(&rctx->screen->screen, + PIPE_BIND_VERTEX_BUFFER, + key.output_stride * num_verts); + + out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, + &out_transfer); + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, + vb_transfer[i]); + } + } + + { + float *flt = out_map; + fprintf(stderr,"num verts is %d\n", num_verts); + for (i = 0; i < num_verts; i++) { + fprintf(stderr,"%f %f\n", flt[i*2], flt[i*2+1]); + } + } + pipe_buffer_unmap(pipe, out_buffer, out_transfer); + + /* Setup the new vertex buffer in the first free slot. */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + if (!vb->buffer) { + pipe_resource_reference(&vb->buffer, out_buffer); + vb->buffer_offset = 0; + vb->max_index = num_verts - 1; + vb->stride = key.output_stride; + rctx->tran.vb_slot = i; + break; + } + } + + /* Save and replace vertex elements. */ + { + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + rctx->tran.saved_velems = rctx->vertex_elements; + + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); + } + } + + rctx->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); + } + + pipe_resource_reference(&out_buffer, NULL); +} + +static void r600_end_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + + /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); + pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, + NULL); +} + void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, @@ -210,12 +377,17 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_drawl draw; + boolean translate = FALSE; + + if (rctx->vertex_elements->incompatible_layout) { + r600_begin_vertex_translate(rctx); + translate = TRUE; + } if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); rctx->any_user_vbs = FALSE; } - memset(&draw, 0, sizeof(struct r600_drawl)); draw.ctx = ctx; draw.mode = info->mode; @@ -246,6 +418,9 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } r600_draw_common(&draw); + if (translate) + r600_end_vertex_translate(rctx); + pipe_resource_reference(&draw.index_buffer, NULL); } @@ -582,16 +757,45 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, return rstate; } +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + static void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + int i; + enum pipe_format *format; assert(count < 32); + if (!v) + return NULL; + v->count = count; - v->refcount = 1; memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + + for (i = 0; i < count; i++) { + v->hw_format[i] = v->elements[i].src_format; + format = &v->hw_format[i]; + + switch (*format) { + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + default:; + } + v->incompatible_layout = + v->incompatible_layout || + v->elements[i].src_format != v->hw_format[i] || + v->elements[i].src_offset % 4 != 0; + + v->hw_format_size[i] = + align(util_format_get_blocksize(v->hw_format[i]), 4); + } + + v->refcount = 1; return v; } @@ -1164,18 +1368,33 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_vertex_buffer *vbo; + unsigned max_index = (unsigned)-1, vbo_max_index; for (int i = 0; i < rctx->nvertex_buffer; i++) { pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); } memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (int i = 0; i < count; i++) { + vbo = (struct pipe_vertex_buffer*)&buffers[i]; + rctx->vertex_buffer[i].buffer = NULL; if (r600_buffer_is_user_buffer(buffers[i].buffer)) rctx->any_user_vbs = TRUE; pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + + if (vbo->max_index == ~0) { + if (!vbo->stride) + vbo->max_index = 1; + else + vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + } + + max_index = MIN2(vbo->max_index, max_index); } rctx->nvertex_buffer = count; + rctx->vb_max_index = max_index; } static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,