}
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+ int index_bias)
+{
+ hwtnl->index_bias = index_bias;
+}
unsigned size = vb ? vb->width0 : 0;
unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
- unsigned index_bias = range->indexBias;
+ int index_bias = (int) range->indexBias + hwtnl->index_bias;
unsigned width;
assert(vb);
assert(size);
assert(offset < size);
- assert(index_bias >= 0);
assert(min_index <= max_index);
- assert(offset + index_bias*stride < size);
+ if (index_bias >= 0) {
+ assert(offset + index_bias*stride < size);
+ }
if (min_index != ~0) {
assert(offset + (index_bias + min_index) * stride < size);
}
hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+ hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
hwtnl->cmd.prim_count++;
enum pipe_error
svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+ int index_bias);
+
#endif /* SVGA_DRAW_H_ */
struct svga_hwtnl {
struct svga_context *svga;
struct u_upload_mgr *upload_ib;
+
+ /* Additional negative index bias due to partial buffer uploads
+ * This is compensated for in the offset associated with all
+ * vertex buffers.
+ */
+
+ int index_bias;
/* Flatshade information:
*/
#include "svga_state.h"
#include "svga_swtnl.h"
#include "svga_debug.h"
+#include "svga_resource_buffer.h"
+#include "util/u_upload_mgr.h"
+
+/**
+ * svga_upload_user_buffers - upload parts of user buffers
+ *
+ * This function streams a part of a user buffer to hw and sets
+ * svga_buffer::source_offset to the first byte uploaded. After upload
+ * also svga_buffer::uploaded::buffer is set to !NULL
+ */
+
+static int
+svga_upload_user_buffers(struct svga_context *svga,
+ unsigned start,
+ unsigned count,
+ unsigned instance_count)
+{
+ const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+ unsigned i;
+ int ret;
+
+ for (i=0; i < svga->curr.velems->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
+
+ if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+ struct svga_buffer *buffer = svga_buffer(vb->buffer);
+ unsigned first, size;
+ boolean flushed;
+ unsigned instance_div = ve[i].instance_divisor;
+
+ svga->dirty |= SVGA_NEW_VBUFFER;
+
+ if (instance_div) {
+ first = 0;
+ size = vb->stride *
+ (instance_count + instance_div - 1) / instance_div;
+ } else if (vb->stride) {
+ first = vb->stride * start;
+ size = vb->stride * count;
+ } else {
+ /* Only a single vertex!
+ * Upload with the largest vertex size the hw supports,
+ * if possible.
+ */
+ first = 0;
+ size = MIN2(16, vb->buffer->width0);
+ }
+
+ ret = u_upload_buffer( svga->upload_vb,
+ 0, first, size,
+ &buffer->b.b,
+ &buffer->uploaded.offset,
+ &buffer->uploaded.buffer,
+ &flushed);
+
+ if (ret)
+ return ret;
+
+ if (0)
+ debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d"
+ " sz %d\n",
+ __FUNCTION__,
+ i,
+ buffer,
+ buffer->uploaded.buffer,
+ buffer->uploaded.offset,
+ first,
+ size);
+
+ vb->buffer_offset = buffer->uploaded.offset;
+ buffer->source_offset = first;
+ }
+ }
+
+ return PIPE_OK;
+}
+
+/**
+ * svga_release_user_upl_buffers - release uploaded parts of user buffers
+ *
+ * This function releases the hw copy of the uploaded fraction of the
+ * user-buffer. It's important to do this as soon as all draw calls
+ * affecting the uploaded fraction are issued, as this allows for
+ * efficient reuse of the hardware surface backing the uploaded fraction.
+ *
+ * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer
+ * is set to 0.
+ */
+
+static void
+svga_release_user_upl_buffers(struct svga_context *svga)
+{
+ unsigned i;
+ unsigned nr;
+
+ nr = svga->curr.num_vertex_buffers;
+
+ for (i = 0; i < nr; ++i) {
+ struct pipe_vertex_buffer *vb = &svga->curr.vb[i];
+
+ if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+ struct svga_buffer *buffer = svga_buffer(vb->buffer);
+
+ buffer->source_offset = 0;
+ if (buffer->uploaded.buffer)
+ pipe_resource_reference(&buffer->uploaded.buffer, NULL);
+ }
+ }
+}
unsigned prim,
unsigned start,
unsigned count,
+ unsigned instance_count,
boolean do_retry )
{
enum pipe_error ret = 0;
svga->curr.rast->templ.flatshade,
svga->curr.rast->templ.flatshade_first );
+ ret = svga_upload_user_buffers( svga, min_index + index_bias,
+ max_index - min_index + 1, instance_count );
+ if (ret != PIPE_OK)
+ goto retry;
ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
if (ret)
index_buffer, index_size, index_bias,
min_index, max_index,
prim, start, count,
- FALSE );
+ instance_count, FALSE );
}
return ret;
unsigned prim,
unsigned start,
unsigned count,
+ unsigned instance_count,
boolean do_retry )
{
enum pipe_error ret;
svga->curr.rast->templ.flatshade,
svga->curr.rast->templ.flatshade_first );
+ ret = svga_upload_user_buffers( svga, start, count, instance_count );
+
+ if (ret != PIPE_OK)
+ goto retry;
+
ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
if (ret)
goto retry;
prim,
start,
count,
+ instance_count,
FALSE );
}
svga_context_flush(svga, NULL);
}
+ /* Avoid leaking the previous hwtnl bias to swtnl */
+ svga_hwtnl_set_index_bias( svga->hwtnl, 0 );
ret = svga_swtnl_draw_vbo( svga, info );
}
else {
info->mode,
info->start + offset,
info->count,
+ info->instance_count,
TRUE );
}
else {
info->mode,
info->start,
info->count,
+ info->instance_count,
TRUE );
}
}
+ svga_release_user_upl_buffers( svga );
+
if (SVGA_DEBUG & DEBUG_FLUSH) {
svga_hwtnl_flush_retry( svga );
svga_context_flush(svga, NULL);
unsigned offset;
} uploaded;
+ /**
+ * For user buffers, this is the offset to the data about to be
+ * referenced by the next draw command, and hence the data that needs
+ * to be uploaded.
+ */
+ unsigned source_offset;
+
/**
* DMA'ble memory.
*
#include "svga_hw_reg.h"
-static int
-upload_user_buffers( struct svga_context *svga )
-{
- enum pipe_error ret = PIPE_OK;
- int i;
- int nr;
-
- if (0)
- debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
-
- nr = svga->curr.num_vertex_buffers;
-
- for (i = 0; i < nr; i++)
- {
- if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
- {
- struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
-
- if (!buffer->uploaded.buffer) {
- boolean flushed;
- ret = u_upload_buffer( svga->upload_vb,
- 0, 0,
- buffer->b.b.width0,
- &buffer->b.b,
- &buffer->uploaded.offset,
- &buffer->uploaded.buffer,
- &flushed);
- if (ret)
- return ret;
-
- if (0)
- debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n",
- __FUNCTION__,
- i,
- buffer,
- buffer->uploaded.buffer,
- buffer->uploaded.offset,
- buffer->b.b.width0);
- }
-
- svga->curr.vb[i].buffer_offset = buffer->uploaded.offset;
- }
- }
-
- if (0)
- debug_printf("%s: DONE\n", __FUNCTION__);
-
- return ret;
-}
-
-
/***********************************************************************
*/
const struct pipe_vertex_element *ve = svga->curr.velems->velem;
SVGA3dVertexDecl decl;
unsigned i;
+ unsigned neg_bias = 0;
assert(svga->curr.velems->count >=
svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
svga_hwtnl_reset_vdecl( svga->hwtnl,
svga->curr.velems->count );
+ /**
+ * We can't set the VDECL offset to something negative, so we
+ * must calculate a common negative additional index bias, and modify
+ * the VDECL offsets accordingly so they *all* end up positive.
+ *
+ * Note that the exact value of the negative index bias is not that
+ * important, since we compensate for it when we calculate the vertex
+ * buffer offset below. The important thing is that all vertex buffer
+ * offsets remain positive.
+ *
+ * Note that we use a negative bias variable in order to make the
+ * rounding maths more easy to follow, and to avoid int / unsigned
+ * confusion.
+ */
+
for (i = 0; i < svga->curr.velems->count; i++) {
- const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+ const struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
+ struct svga_buffer *buffer;
+ unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+ unsigned tmp_neg_bias = 0;
+
+ if (!vb->buffer)
+ continue;
+
+ buffer = svga_buffer(vb->buffer);
+ if (buffer->source_offset > offset) {
+ tmp_neg_bias = buffer->source_offset - offset;
+ if (vb->stride)
+ tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
+ neg_bias = MAX2(neg_bias, tmp_neg_bias);
+ }
+ }
+
+ for (i = 0; i < svga->curr.velems->count; i++) {
+ const struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
unsigned usage, index;
- struct svga_buffer *buffer = svga_buffer(vb->buffer);
+ struct svga_buffer *buffer;
+ if (!vb->buffer)
+ continue;
+ buffer= svga_buffer(vb->buffer);
svga_generate_vdecl_semantics( i, &usage, &index );
/* SVGA_NEW_VELEMENT
decl.identity.usage = usage;
decl.identity.usageIndex = index;
decl.array.stride = vb->stride;
- decl.array.offset = (vb->buffer_offset +
- ve[i].src_offset);
+
+ /* Compensate for partially uploaded vbo, and
+ * for the negative index bias.
+ */
+ decl.array.offset = (vb->buffer_offset
+ + ve[i].src_offset
+ + neg_bias * vb->stride
+ - buffer->source_offset);
+
+ assert(decl.array.offset >= 0);
svga_hwtnl_vdecl( svga->hwtnl,
i,
vb->buffer );
}
+ svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias );
return 0;
}
static int emit_hw_vdecl( struct svga_context *svga,
unsigned dirty )
{
- int ret = 0;
-
/* SVGA_NEW_NEED_SWTNL
*/
if (svga->state.sw.need_swtnl)
return 0; /* Do not emit during swtnl */
- /* If we get to here, we know that we're going to draw. Upload
- * userbuffers now and try to combine multiple userbuffers from
- * multiple draw calls into a single host buffer for performance.
- */
- if (svga->curr.any_user_vertex_buffers) {
- ret = upload_user_buffers( svga );
- if (ret)
- return ret;
- }
-
return emit_hw_vs_vdecl( svga, dirty );
}