+
+ svga->state.hw_draw.topology = range->primType;
+ }
+
+ if (ib) {
+ /* indexed drawing */
+ if (indirect) {
+ ret = SVGA3D_sm5_DrawIndexedInstancedIndirect(svga->swc,
+ indirect_handle,
+ indirect->offset);
+ }
+ else if (instance_count > 1) {
+ ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
+ vcount,
+ instance_count,
+ 0, /* startIndexLocation */
+ range->indexBias,
+ start_instance);
+ }
+ else {
+ /* non-instanced drawing */
+ ret = SVGA3D_vgpu10_DrawIndexed(svga->swc,
+ vcount,
+ 0, /* startIndexLocation */
+ range->indexBias);
+ }
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+ else {
+ /* non-indexed drawing */
+ if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID ||
+ svga->state.hw_draw.ib != NULL) {
+ /* Unbind previously bound index buffer */
+ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL,
+ SVGA3D_FORMAT_INVALID, 0);
+ if (ret != PIPE_OK)
+ return ret;
+ pipe_resource_reference(&svga->state.hw_draw.ib, NULL);
+ svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID;
+ }
+
+ assert(svga->state.hw_draw.ib == NULL);
+
+ if (so_vertex_count) {
+ /* Stream-output drawing */
+ ret = SVGA3D_vgpu10_DrawAuto(svga->swc);
+ }
+ else if (indirect) {
+ ret = SVGA3D_sm5_DrawInstancedIndirect(svga->swc,
+ indirect_handle,
+ indirect->offset);
+ }
+ else if (instance_count > 1) {
+ ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
+ vcount,
+ instance_count,
+ range->indexBias,
+ start_instance);
+ }
+ else {
+ /* non-instanced */
+ ret = SVGA3D_vgpu10_Draw(svga->swc,
+ vcount,
+ range->indexBias);
+ }
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+
+ hwtnl->cmd.prim_count = 0;
+
+ return PIPE_OK;
+}
+
+
+
+/**
+ * Emit any pending drawing commands to the command buffer.
+ * When we receive VGPU9 drawing commands we accumulate them and don't
+ * immediately emit them into the command buffer.
+ * This function needs to be called before we change state that could
+ * effect those pending draws.
+ */
+enum pipe_error
+svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+{
+ enum pipe_error ret = PIPE_OK;
+
+ SVGA_STATS_TIME_PUSH(svga_sws(hwtnl->svga), SVGA_STATS_TIME_HWTNLFLUSH);
+
+ if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) {
+ /* we only queue up primitive for VGPU9 */
+ ret = draw_vgpu9(hwtnl);
+ }
+
+ SVGA_STATS_TIME_POP(svga_screen(hwtnl->svga->pipe.screen)->sws);
+ return ret;
+}
+
+
+void
+svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias)
+{
+ hwtnl->index_bias = index_bias;
+}
+
+
+
+/***********************************************************************
+ * Internal functions:
+ */
+
+/**
+ * For debugging only.
+ */
+static void
+check_draw_params(struct svga_hwtnl *hwtnl,
+ const SVGA3dPrimitiveRange *range,
+ unsigned min_index, unsigned max_index,
+ struct pipe_resource *ib)
+{
+ unsigned i;
+
+ assert(!svga_have_vgpu10(hwtnl->svga));
+
+ for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+ unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+ const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j];
+ unsigned size = vb->buffer.resource ? vb->buffer.resource->width0 : 0;
+ unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
+ unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
+ int index_bias = (int) range->indexBias + hwtnl->index_bias;
+ unsigned width;
+
+ if (size == 0)
+ continue;
+
+ assert(vb);
+ assert(size);
+ assert(offset < size);
+ assert(min_index <= max_index);
+ (void) width;
+ (void) stride;
+ (void) offset;
+ (void) size;
+
+ switch (hwtnl->cmd.vdecl[i].identity.type) {
+ case SVGA3D_DECLTYPE_FLOAT1:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT2:
+ width = 4 * 2;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT3:
+ width = 4 * 3;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT4:
+ width = 4 * 4;
+ break;
+ case SVGA3D_DECLTYPE_D3DCOLOR:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_UBYTE4:
+ width = 1 * 4;
+ break;
+ case SVGA3D_DECLTYPE_SHORT2:
+ width = 2 * 2;
+ break;
+ case SVGA3D_DECLTYPE_SHORT4:
+ width = 2 * 4;
+ break;
+ case SVGA3D_DECLTYPE_UBYTE4N:
+ width = 1 * 4;
+ break;
+ case SVGA3D_DECLTYPE_SHORT2N:
+ width = 2 * 2;
+ break;
+ case SVGA3D_DECLTYPE_SHORT4N:
+ width = 2 * 4;
+ break;
+ case SVGA3D_DECLTYPE_USHORT2N:
+ width = 2 * 2;
+ break;
+ case SVGA3D_DECLTYPE_USHORT4N:
+ width = 2 * 4;
+ break;
+ case SVGA3D_DECLTYPE_UDEC3:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_DEC3N:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT16_2:
+ width = 2 * 2;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT16_4:
+ width = 2 * 4;
+ break;
+ default:
+ assert(0);
+ width = 0;
+ break;
+ }
+
+ if (index_bias >= 0) {
+ assert(offset + index_bias * stride + width <= size);
+ }
+
+ /*
+ * min_index/max_index are merely conservative guesses, so we can't
+ * make buffer overflow detection based on their values.
+ */
+ }
+
+ assert(range->indexWidth == range->indexArray.stride);
+
+ if (ib) {
+ ASSERTED unsigned size = ib->width0;
+ ASSERTED unsigned offset = range->indexArray.offset;
+ ASSERTED unsigned stride = range->indexArray.stride;
+ ASSERTED unsigned count;
+
+ assert(size);
+ assert(offset < size);
+ assert(stride);
+
+ switch (range->primType) {
+ case SVGA3D_PRIMITIVE_POINTLIST:
+ count = range->primitiveCount;
+ break;
+ case SVGA3D_PRIMITIVE_LINELIST:
+ count = range->primitiveCount * 2;
+ break;
+ case SVGA3D_PRIMITIVE_LINESTRIP:
+ count = range->primitiveCount + 1;
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLELIST:
+ count = range->primitiveCount * 3;
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
+ count = range->primitiveCount + 2;
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLEFAN:
+ count = range->primitiveCount + 2;
+ break;
+ default:
+ assert(0);
+ count = 0;
+ break;
+ }
+
+ assert(offset + count * stride <= size);
+ }
+}
+
+
+/**
+ * All drawing filters down into this function, either directly
+ * on the hardware path or after doing software vertex processing.
+ * \param indirect if non-null, get the vertex count, first vertex, etc.
+ * from a buffer.
+ * \param so_vertex_count if non-null, get the vertex count from a
+ * stream-output target.
+ */
+enum pipe_error
+svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
+ const SVGA3dPrimitiveRange *range,
+ unsigned vcount,
+ unsigned min_index, unsigned max_index,
+ struct pipe_resource *ib,
+ unsigned start_instance, unsigned instance_count,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_stream_output_target *so_vertex_count)
+{
+ enum pipe_error ret = PIPE_OK;
+
+ SVGA_STATS_TIME_PUSH(svga_sws(hwtnl->svga), SVGA_STATS_TIME_HWTNLPRIM);
+
+ if (svga_have_vgpu10(hwtnl->svga)) {
+ /* draw immediately */
+ SVGA_RETRY(hwtnl->svga, draw_vgpu10(hwtnl, range, vcount, min_index,
+ max_index, ib, start_instance,
+ instance_count, indirect,
+ so_vertex_count));