From 112239e9a66a155d36fe2ad0ab130e6f26eff298 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 Jan 2010 00:15:52 +0100 Subject: [PATCH] r300g,radeong: finish and enable the immediate mode Nearly 100% performance increase in glxgears. --- src/gallium/drivers/r300/r300_emit.c | 22 --- src/gallium/drivers/r300/r300_render.c | 141 ++++++++++++++---- src/gallium/drivers/r300/r300_state.c | 22 +++ .../winsys/drm/radeon/core/radeon_buffer.c | 83 +++++++++-- .../winsys/drm/radeon/core/radeon_buffer.h | 2 + .../winsys/drm/radeon/core/radeon_drm.h | 2 +- .../winsys/drm/radeon/core/radeon_r300.c | 8 +- .../winsys/drm/radeon/core/radeon_winsys.h | 6 + 8 files changed, 219 insertions(+), 67 deletions(-) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 36d2c64b587..badbf3715c7 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -772,22 +772,6 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } -static boolean r300_validate_aos(struct r300_context *r300) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; - int i; - - /* Check if formats and strides are aligned to the size of DWORD. */ - for (i = 0; i < r300->vertex_element_count; i++) { - if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || - util_format_get_blocksize(velem[i].src_format) % 4 != 0) { - return FALSE; - } - } - return TRUE; -} - void r300_emit_aos(struct r300_context* r300, unsigned offset) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; @@ -797,12 +781,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); - /* XXX Move this checking to a more approriate place. */ - if (!r300_validate_aos(r300)) { - /* XXX We should fallback using Draw. */ - assert(0); - } - BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(aos_count); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 677031ef04e..7f095bffe7c 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -28,6 +28,7 @@ #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_prim.h" @@ -114,20 +115,53 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } -static void r300_emit_draw_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) + +static void r300_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) { - struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; - unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); - unsigned i; - uint32_t* map; + struct pipe_vertex_element* velem; + struct pipe_vertex_buffer* vbuf; + unsigned vertex_element_count = r300->vertex_element_count; + unsigned i, v, vbi, dw, elem_offset; + + /* Size of the vertex, in dwords. */ + unsigned vertex_size = 0; + + /* Offsets of the attribute, in dwords, from the start of the vertex. */ + unsigned offset[PIPE_MAX_ATTRIBS]; + + /* Size of the vertex element, in dwords. */ + unsigned size[PIPE_MAX_ATTRIBS]; + + /* Stride to the same attrib in the next vertex in the vertex buffer, + * in dwords. */ + unsigned stride[PIPE_MAX_ATTRIBS]; + + /* Mapped vertex buffers. */ + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; + CS_LOCALS(r300); - map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, - start * vertex_size, count * vertex_size, - PIPE_BUFFER_USAGE_CPU_READ); + /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + offset[i] = velem->src_offset >> 2; + size[i] = util_format_get_blocksize(velem->src_format) >> 2; + vertex_size += size[i]; + vbi = velem->vertex_buffer_index; + + /* Map the buffer. */ + if (!map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + map[vbi] = (uint32_t*)pipe_buffer_map(r300->context.screen, + vbuf->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + map[vbi] += vbuf->buffer_offset >> 2; + stride[vbi] = vbuf->stride >> 2; + } + } BEGIN_CS(10 + count * vertex_size); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -138,18 +172,31 @@ static void r300_emit_draw_immediate(struct r300_context *r300, OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); - //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); - for (i = 0; i < count * vertex_size; i++) { - if (i % vertex_size == 0) { - //debug_printf("r300: -- vert --\n"); + + /* Emit vertices. */ + for (v = 0; v < count; v++) { + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + vbi = velem->vertex_buffer_index; + elem_offset = offset[i] + stride[vbi] * (v + start); + + for (dw = 0; dw < size[i]; dw++) { + OUT_CS(map[vbi][elem_offset + dw]); + } } - //debug_printf("r300: 0x%08x\n", *map); - OUT_CS(*map); - map++; } END_CS; - pipe_buffer_unmap(r300->context.screen, vbo); + /* Unmap buffers. */ + for (i = 0; i < vertex_element_count; i++) { + vbi = r300->vertex_element[i].vertex_buffer_index; + + if (map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + pipe_buffer_unmap(r300->context.screen, vbuf->buffer); + map[vbi] = 0; + } + } } static void r300_emit_draw_arrays(struct r300_context *r300, @@ -222,16 +269,49 @@ static void r300_emit_draw_elements(struct r300_context *r300, } +static boolean r300_setup_local_vertex_buffers(struct r300_context *r300) +{ + struct pipe_vertex_buffer *vb; + boolean found_local_bo = FALSE, found_managed_bo = FALSE; + unsigned i; + + /* See what buffers we got. */ + for (i = 0; i < r300->vertex_element_count; i++) { + vb = &r300->vertex_buffer[r300->vertex_element[i].vertex_buffer_index]; + if (r300->winsys->buffer_is_local(r300->winsys, vb->buffer)) { + found_local_bo = TRUE; + } else { + found_managed_bo = TRUE; + } + } + + /* If we found both local and managed buffers, make local buffers managed + * because we shouldn't use the immediate mode in case a managed buffer is + * present, due to performance reasons. */ + if (found_local_bo && found_managed_bo) { + for (i = 0; i < r300->vertex_element_count; i++) { + vb = &r300->vertex_buffer[r300->vertex_element[i].vertex_buffer_index]; + if (r300->winsys->buffer_is_local(r300->winsys, vb->buffer)) { + r300->winsys->buffer_make_managed(r300->winsys, vb->buffer); + } + } + } + + return !found_managed_bo; +} + static boolean r300_setup_vertex_buffers(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_buffer *pbuf; validate: for (int i = 0; i < r300->vertex_element_count; i++) { - if (!r300->winsys->add_buffer(r300->winsys, - vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { + pbuf = vbuf[velem[i].vertex_buffer_index].buffer; + + if (!r300->winsys->add_buffer(r300->winsys, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -245,6 +325,7 @@ validate: return TRUE; } + static void r300_shorten_ubyte_elts(struct r300_context* r300, struct pipe_buffer** elts, unsigned count) @@ -365,15 +446,15 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_buffer_validate(r300); - if (!r300_setup_vertex_buffers(r300)) { - return; - } - - r300_emit_dirty_state(r300); - - if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { - r300_emit_draw_immediate(r300, mode, start, count); + if (r300_setup_local_vertex_buffers(r300)) { + r300_emit_dirty_state(r300); + r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { + if (!r300_setup_vertex_buffers(r300)) { + return; + } + + r300_emit_dirty_state(r300); r300_emit_aos(r300, start); r300_emit_draw_arrays(r300, mode, count); } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e2ec0bc5bd2..641e95e7fca 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -924,6 +924,22 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->dirty_state |= R300_NEW_VERTEX_FORMAT; } +static boolean r300_validate_aos(struct r300_context *r300) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + int i; + + /* Check if formats and strides are aligned to the size of DWORD. */ + for (i = 0; i < r300->vertex_element_count; i++) { + if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || + util_format_get_blocksize(velem[i].src_format) % 4 != 0) { + return FALSE; + } + } + return TRUE; +} + static void r300_set_vertex_elements(struct pipe_context* pipe, unsigned count, const struct pipe_vertex_element* elements) @@ -939,6 +955,12 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, count, elements); } + + if (!r300_validate_aos(r300)) { + /* XXX We should fallback using draw. */ + assert(0); + abort(); + } } static void* r300_create_vs_state(struct pipe_context* pipe, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 25e1cdcdb6d..5214b6d8bcb 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -51,6 +51,23 @@ static const char *radeon_get_name(struct pipe_winsys *ws) return "Radeon/GEM+KMS"; } +uint32_t radeon_domain_from_usage(unsigned usage) +{ + uint32_t domain = 0; + + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + if (usage & PIPE_BUFFER_USAGE_INDEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + + return domain; +} + static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, unsigned alignment, unsigned usage, @@ -71,25 +88,17 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, radeon_buffer->base.usage = usage; radeon_buffer->base.size = size; - if (usage == PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) { + if ((usage == PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) || + (usage == PIPE_BUFFER_USAGE_VERTEX && size < 512)) { /* Don't bother allocating a BO, as it'll never get to the card. */ + /* Also, create small vertex buffers in RAM. */ desc.alignment = alignment; desc.usage = usage; radeon_buffer->pb = pb_malloc_buffer_create(size, &desc); return &radeon_buffer->base; } - domain = 0; - - if (usage & PIPE_BUFFER_USAGE_PIXEL) { - domain |= RADEON_GEM_DOMAIN_VRAM; - } - if (usage & PIPE_BUFFER_USAGE_VERTEX) { - domain |= RADEON_GEM_DOMAIN_GTT; - } - if (usage & PIPE_BUFFER_USAGE_INDEX) { - domain |= RADEON_GEM_DOMAIN_GTT; - } + domain = radeon_domain_from_usage(usage); radeon_buffer->bo = radeon_bo_open(radeon_ws->priv->bom, 0, size, alignment, domain, 0); @@ -222,6 +231,54 @@ static void radeon_buffer_set_tiling(struct radeon_winsys *ws, radeon_bo_set_tiling(radeon_buffer->bo, flags, pitch); } +static boolean radeon_buffer_is_local(struct radeon_winsys *ws, + struct pipe_buffer *buffer) +{ + struct radeon_pipe_buffer *radeon_buffer = + (struct radeon_pipe_buffer*)buffer; + + return radeon_buffer->pb != NULL; +} + +static void radeon_buffer_make_managed(struct radeon_winsys *ws, + struct pipe_buffer *buffer) +{ + struct radeon_pipe_buffer* radeon_buffer = + (struct radeon_pipe_buffer*)buffer; + uint32_t domain; + void *map; + + if (radeon_buffer->pb) { + domain = radeon_domain_from_usage(buffer->usage); + + /* Create a managed buffer. */ + radeon_buffer->bo = radeon_bo_open(ws->priv->bom, 0, + buffer->size, buffer->alignment, + domain, 0); + if (radeon_buffer->bo == NULL) { + /* XXX What now? */ + fprintf(stderr, "radeon: cannot create a buffer in function %s\n", + __FUNCTION__); + assert(0); + abort(); + } + + /* Move data. */ + radeon_bo_map(radeon_buffer->bo, 1); + map = pb_map(radeon_buffer->pb, PIPE_BUFFER_USAGE_CPU_READ); + + memcpy(radeon_buffer->bo->ptr, map, buffer->size); + + pb_unmap(radeon_buffer->pb); + radeon_bo_unmap(radeon_buffer->bo); + + /* Release the locally-created buffer. */ + pipe_reference_init(&radeon_buffer->pb->base.reference, 0); + pb_destroy(radeon_buffer->pb); + radeon_buffer->pb = 0; + } +} + static void radeon_fence_reference(struct pipe_winsys *ws, struct pipe_fence_handle **ptr, struct pipe_fence_handle *pfence) @@ -325,6 +382,8 @@ struct radeon_winsys* radeon_pipe_winsys(int fd) radeon_ws->base.get_name = radeon_get_name; radeon_ws->buffer_set_tiling = radeon_buffer_set_tiling; + radeon_ws->buffer_is_local = radeon_buffer_is_local; + radeon_ws->buffer_make_managed = radeon_buffer_make_managed; return radeon_ws; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index de71cb2f42d..c46abff793e 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -77,6 +77,8 @@ struct radeon_winsys_priv { void *flush_data; }; +uint32_t radeon_domain_from_usage(unsigned usage); + struct radeon_winsys* radeon_pipe_winsys(int fb); #if 0 struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h index ddd7983824a..077388ee028 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h @@ -81,7 +81,7 @@ void radeon_destroy_drm_api(struct drm_api* api); /* Guess at whether this chipset should use r300g. * * I believe that this check is valid, but I haven't been exhaustive. */ -static boolean is_r3xx(int pciid) +static INLINE boolean is_r3xx(int pciid) { return (pciid > 0x3150) && (pciid < 0x796f); } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index 0253bc2527e..d759beaba13 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -81,9 +81,13 @@ static void radeon_write_cs_reloc(struct radeon_winsys* winsys, uint32_t flags) { int retval = 0; + struct radeon_pipe_buffer* radeon_buffer = + (struct radeon_pipe_buffer*)pbuffer; - retval = radeon_cs_write_reloc(winsys->priv->cs, - ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags); + assert(!radeon_buffer->pb); + + retval = radeon_cs_write_reloc(winsys->priv->cs, radeon_buffer->bo, + rd, wd, flags); if (retval) { debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n", diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h index 864082b99b3..462fba844ef 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h @@ -106,6 +106,12 @@ struct radeon_winsys { uint32_t pitch, boolean microtiled, boolean macrotiled); + + boolean (*buffer_is_local)(struct radeon_winsys* winsys, + struct pipe_buffer* buffer); + + void (*buffer_make_managed)(struct radeon_winsys* winsys, + struct pipe_buffer* buffer); }; #endif -- 2.30.2