svga: Performance fixes
[mesa.git] / src / gallium / drivers / svga / svga_swtnl_backend.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "draw/draw_vbuf.h"
27 #include "draw/draw_context.h"
28 #include "draw/draw_vertex.h"
29
30 #include "util/u_debug.h"
31 #include "util/u_inlines.h"
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34
35 #include "svga_context.h"
36 #include "svga_state.h"
37 #include "svga_swtnl.h"
38
39 #include "svga_types.h"
40 #include "svga_reg.h"
41 #include "svga3d_reg.h"
42 #include "svga_draw.h"
43 #include "svga_shader.h"
44 #include "svga_swtnl_private.h"
45
46
47 static const struct vertex_info *
48 svga_vbuf_render_get_vertex_info(struct vbuf_render *render)
49 {
50 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
51 struct svga_context *svga = svga_render->svga;
52
53 svga_swtnl_update_vdecl(svga);
54
55 return &svga_render->vertex_info;
56 }
57
58
59 static boolean
60 svga_vbuf_render_allocate_vertices(struct vbuf_render *render,
61 ushort vertex_size,
62 ushort nr_vertices)
63 {
64 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
65 struct svga_context *svga = svga_render->svga;
66 struct pipe_screen *screen = svga->pipe.screen;
67 size_t size = (size_t)nr_vertices * (size_t)vertex_size;
68 boolean new_vbuf = FALSE;
69 boolean new_ibuf = FALSE;
70
71 SVGA_STATS_TIME_PUSH(svga_sws(svga),
72 SVGA_STATS_TIME_VBUFRENDERALLOCVERT);
73
74 if (svga_render->vertex_size != vertex_size)
75 svga->swtnl.new_vdecl = TRUE;
76 svga_render->vertex_size = (size_t)vertex_size;
77
78 if (svga->swtnl.new_vbuf)
79 new_ibuf = new_vbuf = TRUE;
80 svga->swtnl.new_vbuf = FALSE;
81
82 if (svga_render->vbuf_size
83 < svga_render->vbuf_offset + svga_render->vbuf_used + size)
84 new_vbuf = TRUE;
85
86 if (new_vbuf)
87 pipe_resource_reference(&svga_render->vbuf, NULL);
88 if (new_ibuf)
89 pipe_resource_reference(&svga_render->ibuf, NULL);
90
91 if (!svga_render->vbuf) {
92 svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
93 svga_render->vbuf = SVGA_TRY_PTR(pipe_buffer_create
94 (screen, PIPE_BIND_VERTEX_BUFFER,
95 PIPE_USAGE_STREAM,
96 svga_render->vbuf_size));
97 if (!svga_render->vbuf) {
98 svga_retry_enter(svga);
99 svga_context_flush(svga, NULL);
100 assert(!svga_render->vbuf);
101 svga_render->vbuf = pipe_buffer_create(screen,
102 PIPE_BIND_VERTEX_BUFFER,
103 PIPE_USAGE_STREAM,
104 svga_render->vbuf_size);
105 /* The buffer allocation may fail if we run out of memory.
106 * The draw module's vbuf code should handle that without crashing.
107 */
108 svga_retry_exit(svga);
109 }
110
111 svga->swtnl.new_vdecl = TRUE;
112 svga_render->vbuf_offset = 0;
113 } else {
114 svga_render->vbuf_offset += svga_render->vbuf_used;
115 }
116
117 svga_render->vbuf_used = 0;
118
119 if (svga->swtnl.new_vdecl)
120 svga_render->vdecl_offset = svga_render->vbuf_offset;
121
122 SVGA_STATS_TIME_POP(svga_sws(svga));
123
124 return TRUE;
125 }
126
127
128 static void *
129 svga_vbuf_render_map_vertices(struct vbuf_render *render)
130 {
131 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
132 struct svga_context *svga = svga_render->svga;
133 void * retPtr = NULL;
134
135 SVGA_STATS_TIME_PUSH(svga_sws(svga),
136 SVGA_STATS_TIME_VBUFRENDERMAPVERT);
137
138 if (svga_render->vbuf) {
139 char *ptr = (char*)pipe_buffer_map(&svga->pipe,
140 svga_render->vbuf,
141 PIPE_TRANSFER_WRITE |
142 PIPE_TRANSFER_FLUSH_EXPLICIT |
143 PIPE_TRANSFER_DISCARD_RANGE |
144 PIPE_TRANSFER_UNSYNCHRONIZED,
145 &svga_render->vbuf_transfer);
146 if (ptr) {
147 svga_render->vbuf_ptr = ptr;
148 retPtr = ptr + svga_render->vbuf_offset;
149 }
150 else {
151 svga_render->vbuf_ptr = NULL;
152 svga_render->vbuf_transfer = NULL;
153 retPtr = NULL;
154 }
155 }
156 else {
157 /* we probably ran out of memory when allocating the vertex buffer */
158 retPtr = NULL;
159 }
160
161 SVGA_STATS_TIME_POP(svga_sws(svga));
162 return retPtr;
163 }
164
165
166 static void
167 svga_vbuf_render_unmap_vertices(struct vbuf_render *render,
168 ushort min_index,
169 ushort max_index)
170 {
171 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
172 struct svga_context *svga = svga_render->svga;
173 unsigned offset, length;
174 size_t used = svga_render->vertex_size * ((size_t)max_index + 1);
175
176 SVGA_STATS_TIME_PUSH(svga_sws(svga),
177 SVGA_STATS_TIME_VBUFRENDERUNMAPVERT);
178
179 offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
180 length = svga_render->vertex_size * (max_index + 1 - min_index);
181
182 if (0) {
183 /* dump vertex data */
184 const float *f = (const float *) ((char *) svga_render->vbuf_ptr +
185 svga_render->vbuf_offset);
186 unsigned i;
187 debug_printf("swtnl vertex data:\n");
188 for (i = 0; i < length / 4; i += 4) {
189 debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]);
190 }
191 }
192
193 pipe_buffer_flush_mapped_range(&svga->pipe,
194 svga_render->vbuf_transfer,
195 offset, length);
196 pipe_buffer_unmap(&svga->pipe, svga_render->vbuf_transfer);
197 svga_render->min_index = min_index;
198 svga_render->max_index = max_index;
199 svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used);
200
201 SVGA_STATS_TIME_POP(svga_sws(svga));
202 }
203
204
205 static void
206 svga_vbuf_render_set_primitive(struct vbuf_render *render,
207 enum pipe_prim_type prim)
208 {
209 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
210 svga_render->prim = prim;
211 }
212
213
214 static void
215 svga_vbuf_submit_state(struct svga_vbuf_render *svga_render)
216 {
217 struct svga_context *svga = svga_render->svga;
218 SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
219 unsigned i;
220 static const unsigned zero[PIPE_MAX_ATTRIBS] = {0};
221 boolean retried;
222
223 /* if the vdecl or vbuf hasn't changed do nothing */
224 if (!svga->swtnl.new_vdecl)
225 return;
226
227 SVGA_STATS_TIME_PUSH(svga_sws(svga),
228 SVGA_STATS_TIME_VBUFSUBMITSTATE);
229
230 memcpy(vdecl, svga_render->vdecl, sizeof(vdecl));
231
232 /* flush the hw state */
233 SVGA_RETRY_CHECK(svga, svga_hwtnl_flush(svga->hwtnl), retried);
234 if (retried) {
235 /* if we hit this path we might become synced with hw */
236 svga->swtnl.new_vbuf = TRUE;
237 }
238
239 for (i = 0; i < svga_render->vdecl_count; i++) {
240 vdecl[i].array.offset += svga_render->vdecl_offset;
241 }
242
243 svga_hwtnl_vertex_decls(svga->hwtnl,
244 svga_render->vdecl_count,
245 vdecl,
246 zero,
247 svga_render->layout_id);
248
249 /* Specify the vertex buffer (there's only ever one) */
250 {
251 struct pipe_vertex_buffer vb;
252 vb.is_user_buffer = false;
253 vb.buffer.resource = svga_render->vbuf;
254 vb.buffer_offset = svga_render->vdecl_offset;
255 vb.stride = vdecl[0].array.stride;
256 svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb);
257 }
258
259 /* We have already taken care of flatshading, so let the hwtnl
260 * module use whatever is most convenient:
261 */
262 if (svga->state.sw.need_pipeline) {
263 svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
264 svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
265 }
266 else {
267 svga_hwtnl_set_flatshade(svga->hwtnl,
268 svga->curr.rast->templ.flatshade ||
269 svga_is_using_flat_shading(svga),
270 svga->curr.rast->templ.flatshade_first);
271
272 svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
273 }
274
275 svga->swtnl.new_vdecl = FALSE;
276 SVGA_STATS_TIME_POP(svga_sws(svga));
277 }
278
279
280 static void
281 svga_vbuf_render_draw_arrays(struct vbuf_render *render,
282 unsigned start, uint nr)
283 {
284 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
285 struct svga_context *svga = svga_render->svga;
286 unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset)
287 / svga_render->vertex_size;
288 /* instancing will already have been resolved at this point by 'draw' */
289 const unsigned start_instance = 0;
290 const unsigned instance_count = 1;
291 boolean retried;
292
293 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_VBUFDRAWARRAYS);
294
295 /* off to hardware */
296 svga_vbuf_submit_state(svga_render);
297
298 /* Need to call update_state() again as the draw module may have
299 * altered some of our state behind our backs. Testcase:
300 * redbook/polys.c
301 */
302 svga_update_state_retry(svga, SVGA_STATE_HW_DRAW);
303 SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_arrays
304 (svga->hwtnl, svga_render->prim, start + bias,
305 nr, start_instance, instance_count, 0), retried);
306 if (retried) {
307 svga->swtnl.new_vbuf = TRUE;
308 }
309
310 SVGA_STATS_TIME_POP(svga_sws(svga));
311 }
312
313
314 static void
315 svga_vbuf_render_draw_elements(struct vbuf_render *render,
316 const ushort *indices,
317 uint nr_indices)
318 {
319 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
320 struct svga_context *svga = svga_render->svga;
321 int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset)
322 / svga_render->vertex_size;
323 boolean retried;
324 /* instancing will already have been resolved at this point by 'draw' */
325 const struct pipe_draw_info info = {
326 .index_size = 2,
327 .mode = svga_render->prim,
328 .has_user_indices = 1,
329 .index.user = indices,
330 .start_instance = 0,
331 .instance_count = 1,
332 .index_bias = bias,
333 .min_index = svga_render->min_index,
334 .max_index = svga_render->max_index,
335 .start = 0,
336 .count = nr_indices
337 };
338
339 assert((svga_render->vbuf_offset - svga_render->vdecl_offset)
340 % svga_render->vertex_size == 0);
341
342 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_VBUFDRAWELEMENTS);
343
344 /* off to hardware */
345 svga_vbuf_submit_state(svga_render);
346
347 /* Need to call update_state() again as the draw module may have
348 * altered some of our state behind our backs. Testcase:
349 * redbook/polys.c
350 */
351 svga_update_state_retry(svga, SVGA_STATE_HW_DRAW);
352 SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_range_elements(svga->hwtnl, &info,
353 nr_indices), retried);
354 if (retried) {
355 svga->swtnl.new_vbuf = TRUE;
356 }
357
358 SVGA_STATS_TIME_POP(svga_sws(svga));
359 }
360
361
362 static void
363 svga_vbuf_render_release_vertices(struct vbuf_render *render)
364 {
365
366 }
367
368
369 static void
370 svga_vbuf_render_destroy(struct vbuf_render *render)
371 {
372 struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
373
374 pipe_resource_reference(&svga_render->vbuf, NULL);
375 pipe_resource_reference(&svga_render->ibuf, NULL);
376 FREE(svga_render);
377 }
378
379
380 /**
381 * Create a new primitive render.
382 */
383 struct vbuf_render *
384 svga_vbuf_render_create(struct svga_context *svga)
385 {
386 struct svga_vbuf_render *svga_render = CALLOC_STRUCT(svga_vbuf_render);
387
388 svga_render->svga = svga;
389 svga_render->ibuf_size = 0;
390 svga_render->vbuf_size = 0;
391 svga_render->ibuf_alloc_size = 4*1024;
392 svga_render->vbuf_alloc_size = 64*1024;
393 svga_render->layout_id = SVGA3D_INVALID_ID;
394 svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
395 svga_render->base.max_indices = 65536;
396 svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
397 svga_render->base.allocate_vertices = svga_vbuf_render_allocate_vertices;
398 svga_render->base.map_vertices = svga_vbuf_render_map_vertices;
399 svga_render->base.unmap_vertices = svga_vbuf_render_unmap_vertices;
400 svga_render->base.set_primitive = svga_vbuf_render_set_primitive;
401 svga_render->base.draw_elements = svga_vbuf_render_draw_elements;
402 svga_render->base.draw_arrays = svga_vbuf_render_draw_arrays;
403 svga_render->base.release_vertices = svga_vbuf_render_release_vertices;
404 svga_render->base.destroy = svga_vbuf_render_destroy;
405
406 return &svga_render->base;
407 }