nv50: properly set the PRIMITIVE_ID enable flag when it is a gp input.
[mesa.git] / src / gallium / drivers / nouveau / nouveau_buffer.c
1
2 #include "util/u_inlines.h"
3 #include "util/u_memory.h"
4 #include "util/u_math.h"
5 #include "util/u_surface.h"
6
7 #include "nouveau_screen.h"
8 #include "nouveau_context.h"
9 #include "nouveau_winsys.h"
10 #include "nouveau_fence.h"
11 #include "nouveau_buffer.h"
12 #include "nouveau_mm.h"
13
14 #define NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD 192
15
16 struct nouveau_transfer {
17 struct pipe_transfer base;
18
19 uint8_t *map;
20 struct nouveau_bo *bo;
21 struct nouveau_mm_allocation *mm;
22 uint32_t offset;
23 };
24
25 static INLINE struct nouveau_transfer *
26 nouveau_transfer(struct pipe_transfer *transfer)
27 {
28 return (struct nouveau_transfer *)transfer;
29 }
30
31 static INLINE boolean
32 nouveau_buffer_malloc(struct nv04_resource *buf)
33 {
34 if (!buf->data)
35 buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
36 return !!buf->data;
37 }
38
39 static INLINE boolean
40 nouveau_buffer_allocate(struct nouveau_screen *screen,
41 struct nv04_resource *buf, unsigned domain)
42 {
43 uint32_t size = buf->base.width0;
44
45 if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER |
46 PIPE_BIND_COMPUTE_RESOURCE |
47 PIPE_BIND_SHADER_RESOURCE))
48 size = align(size, 0x100);
49
50 if (domain == NOUVEAU_BO_VRAM) {
51 buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
52 &buf->bo, &buf->offset);
53 if (!buf->bo)
54 return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
55 NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0);
56 } else
57 if (domain == NOUVEAU_BO_GART) {
58 buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
59 &buf->bo, &buf->offset);
60 if (!buf->bo)
61 return FALSE;
62 NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
63 } else {
64 assert(domain == 0);
65 if (!nouveau_buffer_malloc(buf))
66 return FALSE;
67 }
68 buf->domain = domain;
69 if (buf->bo)
70 buf->address = buf->bo->offset + buf->offset;
71
72 return TRUE;
73 }
74
75 static INLINE void
76 release_allocation(struct nouveau_mm_allocation **mm,
77 struct nouveau_fence *fence)
78 {
79 nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
80 (*mm) = NULL;
81 }
82
83 INLINE void
84 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
85 {
86 nouveau_bo_ref(NULL, &buf->bo);
87
88 if (buf->mm)
89 release_allocation(&buf->mm, buf->fence);
90
91 if (buf->domain == NOUVEAU_BO_VRAM)
92 NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
93 if (buf->domain == NOUVEAU_BO_GART)
94 NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0);
95
96 buf->domain = 0;
97 }
98
99 static INLINE boolean
100 nouveau_buffer_reallocate(struct nouveau_screen *screen,
101 struct nv04_resource *buf, unsigned domain)
102 {
103 nouveau_buffer_release_gpu_storage(buf);
104
105 nouveau_fence_ref(NULL, &buf->fence);
106 nouveau_fence_ref(NULL, &buf->fence_wr);
107
108 buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
109
110 return nouveau_buffer_allocate(screen, buf, domain);
111 }
112
113 static void
114 nouveau_buffer_destroy(struct pipe_screen *pscreen,
115 struct pipe_resource *presource)
116 {
117 struct nv04_resource *res = nv04_resource(presource);
118
119 nouveau_buffer_release_gpu_storage(res);
120
121 if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
122 align_free(res->data);
123
124 nouveau_fence_ref(NULL, &res->fence);
125 nouveau_fence_ref(NULL, &res->fence_wr);
126
127 FREE(res);
128
129 NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1);
130 }
131
132 /* Set up a staging area for the transfer. This is either done in "regular"
133 * system memory if the driver supports push_data (nv50+) and the data is
134 * small enough (and permit_pb == true), or in GART memory.
135 */
136 static uint8_t *
137 nouveau_transfer_staging(struct nouveau_context *nv,
138 struct nouveau_transfer *tx, boolean permit_pb)
139 {
140 const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
141 const unsigned size = align(tx->base.box.width, 4) + adj;
142
143 if (!nv->push_data)
144 permit_pb = FALSE;
145
146 if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) {
147 tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
148 if (tx->map)
149 tx->map += adj;
150 } else {
151 tx->mm =
152 nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
153 if (tx->bo) {
154 tx->offset += adj;
155 if (!nouveau_bo_map(tx->bo, 0, NULL))
156 tx->map = (uint8_t *)tx->bo->map + tx->offset;
157 }
158 }
159 return tx->map;
160 }
161
162 /* Copies data from the resource into the the transfer's temporary GART
163 * buffer. Also updates buf->data if present.
164 *
165 * Maybe just migrate to GART right away if we actually need to do this. */
166 static boolean
167 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
168 {
169 struct nv04_resource *buf = nv04_resource(tx->base.resource);
170 const unsigned base = tx->base.box.x;
171 const unsigned size = tx->base.box.width;
172
173 NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size);
174
175 nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
176 buf->bo, buf->offset + base, buf->domain, size);
177
178 if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
179 return FALSE;
180
181 if (buf->data)
182 memcpy(buf->data + base, tx->map, size);
183
184 return TRUE;
185 }
186
187 static void
188 nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
189 unsigned offset, unsigned size)
190 {
191 struct nv04_resource *buf = nv04_resource(tx->base.resource);
192 uint8_t *data = tx->map + offset;
193 const unsigned base = tx->base.box.x + offset;
194 const boolean can_cb = !((base | size) & 3);
195
196 if (buf->data)
197 memcpy(data, buf->data + base, size);
198 else
199 buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
200
201 if (buf->domain == NOUVEAU_BO_VRAM)
202 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size);
203 if (buf->domain == NOUVEAU_BO_GART)
204 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size);
205
206 if (tx->bo)
207 nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
208 tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
209 else
210 if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
211 nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
212 base, size / 4, (const uint32_t *)data);
213 else
214 nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
215
216 nouveau_fence_ref(nv->screen->fence.current, &buf->fence);
217 nouveau_fence_ref(nv->screen->fence.current, &buf->fence_wr);
218 }
219
220 /* Does a CPU wait for the buffer's backing data to become reliably accessible
221 * for write/read by waiting on the buffer's relevant fences.
222 */
223 static INLINE boolean
224 nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
225 {
226 if (rw == PIPE_TRANSFER_READ) {
227 if (!buf->fence_wr)
228 return TRUE;
229 NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
230 !nouveau_fence_signalled(buf->fence_wr));
231 if (!nouveau_fence_wait(buf->fence_wr))
232 return FALSE;
233 } else {
234 if (!buf->fence)
235 return TRUE;
236 NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
237 !nouveau_fence_signalled(buf->fence));
238 if (!nouveau_fence_wait(buf->fence))
239 return FALSE;
240
241 nouveau_fence_ref(NULL, &buf->fence);
242 }
243 nouveau_fence_ref(NULL, &buf->fence_wr);
244
245 return TRUE;
246 }
247
248 static INLINE boolean
249 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
250 {
251 if (rw == PIPE_TRANSFER_READ)
252 return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
253 else
254 return (buf->fence && !nouveau_fence_signalled(buf->fence));
255 }
256
257 static INLINE void
258 nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
259 struct pipe_resource *resource,
260 const struct pipe_box *box,
261 unsigned usage)
262 {
263 tx->base.resource = resource;
264 tx->base.level = 0;
265 tx->base.usage = usage;
266 tx->base.box.x = box->x;
267 tx->base.box.y = 0;
268 tx->base.box.z = 0;
269 tx->base.box.width = box->width;
270 tx->base.box.height = 1;
271 tx->base.box.depth = 1;
272 tx->base.stride = 0;
273 tx->base.layer_stride = 0;
274
275 tx->bo = NULL;
276 tx->map = NULL;
277 }
278
279 static INLINE void
280 nouveau_buffer_transfer_del(struct nouveau_context *nv,
281 struct nouveau_transfer *tx)
282 {
283 if (tx->map) {
284 if (likely(tx->bo)) {
285 nouveau_bo_ref(NULL, &tx->bo);
286 if (tx->mm)
287 release_allocation(&tx->mm, nv->screen->fence.current);
288 } else {
289 align_free(tx->map -
290 (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
291 }
292 }
293 }
294
295 /* Creates a cache in system memory of the buffer data. */
296 static boolean
297 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
298 {
299 struct nouveau_transfer tx;
300 boolean ret;
301 tx.base.resource = &buf->base;
302 tx.base.box.x = 0;
303 tx.base.box.width = buf->base.width0;
304 tx.bo = NULL;
305 tx.map = NULL;
306
307 if (!buf->data)
308 if (!nouveau_buffer_malloc(buf))
309 return FALSE;
310 if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
311 return TRUE;
312 nv->stats.buf_cache_count++;
313
314 if (!nouveau_transfer_staging(nv, &tx, FALSE))
315 return FALSE;
316
317 ret = nouveau_transfer_read(nv, &tx);
318 if (ret) {
319 buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
320 memcpy(buf->data, tx.map, buf->base.width0);
321 }
322 nouveau_buffer_transfer_del(nv, &tx);
323 return ret;
324 }
325
326
327 #define NOUVEAU_TRANSFER_DISCARD \
328 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
329
330 /* Checks whether it is possible to completely discard the memory backing this
331 * resource. This can be useful if we would otherwise have to wait for a read
332 * operation to complete on this data.
333 */
334 static INLINE boolean
335 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
336 {
337 if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
338 return FALSE;
339 if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
340 return FALSE;
341 return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE);
342 }
343
344 /* Returns a pointer to a memory area representing a window into the
345 * resource's data.
346 *
347 * This may or may not be the _actual_ memory area of the resource. However
348 * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
349 * area, the contents of the returned map are copied over to the resource.
350 *
351 * The usage indicates what the caller plans to do with the map:
352 *
353 * WRITE means that the user plans to write to it
354 *
355 * READ means that the user plans on reading from it
356 *
357 * DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
358 * potentially overwritten, and even if it isn't, the bits that aren't don't
359 * need to be maintained.
360 *
361 * DISCARD_RANGE means that all the data in the specified range is going to
362 * be overwritten.
363 *
364 * The strategy for determining what kind of memory area to return is complex,
365 * see comments inside of the function.
366 */
367 static void *
368 nouveau_buffer_transfer_map(struct pipe_context *pipe,
369 struct pipe_resource *resource,
370 unsigned level, unsigned usage,
371 const struct pipe_box *box,
372 struct pipe_transfer **ptransfer)
373 {
374 struct nouveau_context *nv = nouveau_context(pipe);
375 struct nv04_resource *buf = nv04_resource(resource);
376 struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
377 uint8_t *map;
378 int ret;
379
380 if (!tx)
381 return NULL;
382 nouveau_buffer_transfer_init(tx, resource, box, usage);
383 *ptransfer = &tx->base;
384
385 if (usage & PIPE_TRANSFER_READ)
386 NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1);
387 if (usage & PIPE_TRANSFER_WRITE)
388 NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1);
389
390 if (buf->domain == NOUVEAU_BO_VRAM) {
391 if (usage & NOUVEAU_TRANSFER_DISCARD) {
392 /* Set up a staging area for the user to write to. It will be copied
393 * back into VRAM on unmap. */
394 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
395 buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
396 nouveau_transfer_staging(nv, tx, TRUE);
397 } else {
398 if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
399 /* The GPU is currently writing to this buffer. Copy its current
400 * contents to a staging area in the GART. This is necessary since
401 * not the whole area being mapped is being discarded.
402 */
403 if (buf->data) {
404 align_free(buf->data);
405 buf->data = NULL;
406 }
407 nouveau_transfer_staging(nv, tx, FALSE);
408 nouveau_transfer_read(nv, tx);
409 } else {
410 /* The buffer is currently idle. Create a staging area for writes,
411 * and make sure that the cached data is up-to-date. */
412 if (usage & PIPE_TRANSFER_WRITE)
413 nouveau_transfer_staging(nv, tx, TRUE);
414 if (!buf->data)
415 nouveau_buffer_cache(nv, buf);
416 }
417 }
418 return buf->data ? (buf->data + box->x) : tx->map;
419 } else
420 if (unlikely(buf->domain == 0)) {
421 return buf->data + box->x;
422 }
423
424 /* At this point, buf->domain == GART */
425
426 if (nouveau_buffer_should_discard(buf, usage)) {
427 int ref = buf->base.reference.count - 1;
428 nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
429 if (ref > 0) /* any references inside context possible ? */
430 nv->invalidate_resource_storage(nv, &buf->base, ref);
431 }
432
433 /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
434 * relevant flags. If buf->mm is set, that means this resource is part of a
435 * larger slab bo that holds multiple resources. So in that case, don't
436 * wait on the whole slab and instead use the logic below to return a
437 * reasonable buffer for that case.
438 */
439 ret = nouveau_bo_map(buf->bo,
440 buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
441 nv->client);
442 if (ret) {
443 FREE(tx);
444 return NULL;
445 }
446 map = (uint8_t *)buf->bo->map + buf->offset + box->x;
447
448 /* using kernel fences only if !buf->mm */
449 if ((usage & PIPE_TRANSFER_UNSYNCHRONIZED) || !buf->mm)
450 return map;
451
452 /* If the GPU is currently reading/writing this buffer, we shouldn't
453 * interfere with its progress. So instead we either wait for the GPU to
454 * complete its operation, or set up a staging area to perform our work in.
455 */
456 if (nouveau_buffer_busy(buf, usage & PIPE_TRANSFER_READ_WRITE)) {
457 if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
458 /* Discarding was not possible, must sync because
459 * subsequent transfers might use UNSYNCHRONIZED. */
460 nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
461 } else
462 if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
463 /* The whole range is being discarded, so it doesn't matter what was
464 * there before. No need to copy anything over. */
465 nouveau_transfer_staging(nv, tx, TRUE);
466 map = tx->map;
467 } else
468 if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
469 if (usage & PIPE_TRANSFER_DONTBLOCK)
470 map = NULL;
471 else
472 nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
473 } else {
474 /* It is expected that the returned buffer be a representation of the
475 * data in question, so we must copy it over from the buffer. */
476 nouveau_transfer_staging(nv, tx, TRUE);
477 if (tx->map)
478 memcpy(tx->map, map, box->width);
479 map = tx->map;
480 }
481 }
482 if (!map)
483 FREE(tx);
484 return map;
485 }
486
487
488
489 static void
490 nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
491 struct pipe_transfer *transfer,
492 const struct pipe_box *box)
493 {
494 struct nouveau_transfer *tx = nouveau_transfer(transfer);
495 if (tx->map)
496 nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
497 }
498
499 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that
500 * was returned was not the real resource's data, this needs to transfer the
501 * data back to the resource.
502 *
503 * Also marks vbo/cb dirty if the buffer's binding
504 */
505 static void
506 nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
507 struct pipe_transfer *transfer)
508 {
509 struct nouveau_context *nv = nouveau_context(pipe);
510 struct nouveau_transfer *tx = nouveau_transfer(transfer);
511 struct nv04_resource *buf = nv04_resource(transfer->resource);
512
513 if (tx->base.usage & PIPE_TRANSFER_WRITE) {
514 if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
515 nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
516
517 if (likely(buf->domain)) {
518 const uint8_t bind = buf->base.bind;
519 /* make sure we invalidate dedicated caches */
520 if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
521 nv->vbo_dirty = TRUE;
522 if (bind & (PIPE_BIND_CONSTANT_BUFFER))
523 nv->cb_dirty = TRUE;
524 }
525 }
526
527 if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
528 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width);
529
530 nouveau_buffer_transfer_del(nv, tx);
531 FREE(tx);
532 }
533
534
535 void
536 nouveau_copy_buffer(struct nouveau_context *nv,
537 struct nv04_resource *dst, unsigned dstx,
538 struct nv04_resource *src, unsigned srcx, unsigned size)
539 {
540 assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER);
541
542 if (likely(dst->domain) && likely(src->domain)) {
543 nv->copy_data(nv,
544 dst->bo, dst->offset + dstx, dst->domain,
545 src->bo, src->offset + srcx, src->domain, size);
546
547 dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
548 nouveau_fence_ref(nv->screen->fence.current, &dst->fence);
549 nouveau_fence_ref(nv->screen->fence.current, &dst->fence_wr);
550
551 src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
552 nouveau_fence_ref(nv->screen->fence.current, &src->fence);
553 } else {
554 struct pipe_box src_box;
555 src_box.x = srcx;
556 src_box.y = 0;
557 src_box.z = 0;
558 src_box.width = size;
559 src_box.height = 1;
560 src_box.depth = 1;
561 util_resource_copy_region(&nv->pipe,
562 &dst->base, 0, dstx, 0, 0,
563 &src->base, 0, &src_box);
564 }
565 }
566
567
568 void *
569 nouveau_resource_map_offset(struct nouveau_context *nv,
570 struct nv04_resource *res, uint32_t offset,
571 uint32_t flags)
572 {
573 if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
574 return res->data + offset;
575
576 if (res->domain == NOUVEAU_BO_VRAM) {
577 if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
578 nouveau_buffer_cache(nv, res);
579 }
580 if (res->domain != NOUVEAU_BO_GART)
581 return res->data + offset;
582
583 if (res->mm) {
584 unsigned rw;
585 rw = (flags & NOUVEAU_BO_WR) ? PIPE_TRANSFER_WRITE : PIPE_TRANSFER_READ;
586 nouveau_buffer_sync(res, rw);
587 if (nouveau_bo_map(res->bo, 0, NULL))
588 return NULL;
589 } else {
590 if (nouveau_bo_map(res->bo, flags, nv->client))
591 return NULL;
592 }
593 return (uint8_t *)res->bo->map + res->offset + offset;
594 }
595
596
597 const struct u_resource_vtbl nouveau_buffer_vtbl =
598 {
599 u_default_resource_get_handle, /* get_handle */
600 nouveau_buffer_destroy, /* resource_destroy */
601 nouveau_buffer_transfer_map, /* transfer_map */
602 nouveau_buffer_transfer_flush_region, /* transfer_flush_region */
603 nouveau_buffer_transfer_unmap, /* transfer_unmap */
604 u_default_transfer_inline_write /* transfer_inline_write */
605 };
606
607 struct pipe_resource *
608 nouveau_buffer_create(struct pipe_screen *pscreen,
609 const struct pipe_resource *templ)
610 {
611 struct nouveau_screen *screen = nouveau_screen(pscreen);
612 struct nv04_resource *buffer;
613 boolean ret;
614
615 buffer = CALLOC_STRUCT(nv04_resource);
616 if (!buffer)
617 return NULL;
618
619 buffer->base = *templ;
620 buffer->vtbl = &nouveau_buffer_vtbl;
621 pipe_reference_init(&buffer->base.reference, 1);
622 buffer->base.screen = pscreen;
623
624 if (buffer->base.bind &
625 (screen->vidmem_bindings & screen->sysmem_bindings)) {
626 switch (buffer->base.usage) {
627 case PIPE_USAGE_DEFAULT:
628 case PIPE_USAGE_IMMUTABLE:
629 case PIPE_USAGE_STATIC:
630 buffer->domain = NOUVEAU_BO_VRAM;
631 break;
632 case PIPE_USAGE_DYNAMIC:
633 /* For most apps, we'd have to do staging transfers to avoid sync
634 * with this usage, and GART -> GART copies would be suboptimal.
635 */
636 buffer->domain = NOUVEAU_BO_VRAM;
637 break;
638 case PIPE_USAGE_STAGING:
639 case PIPE_USAGE_STREAM:
640 buffer->domain = NOUVEAU_BO_GART;
641 break;
642 default:
643 assert(0);
644 break;
645 }
646 } else {
647 if (buffer->base.bind & screen->vidmem_bindings)
648 buffer->domain = NOUVEAU_BO_VRAM;
649 else
650 if (buffer->base.bind & screen->sysmem_bindings)
651 buffer->domain = NOUVEAU_BO_GART;
652 }
653 ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
654
655 if (ret == FALSE)
656 goto fail;
657
658 if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
659 nouveau_buffer_cache(NULL, buffer);
660
661 NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1);
662
663 return &buffer->base;
664
665 fail:
666 FREE(buffer);
667 return NULL;
668 }
669
670
671 struct pipe_resource *
672 nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
673 unsigned bytes, unsigned bind)
674 {
675 struct nv04_resource *buffer;
676
677 buffer = CALLOC_STRUCT(nv04_resource);
678 if (!buffer)
679 return NULL;
680
681 pipe_reference_init(&buffer->base.reference, 1);
682 buffer->vtbl = &nouveau_buffer_vtbl;
683 buffer->base.screen = pscreen;
684 buffer->base.format = PIPE_FORMAT_R8_UNORM;
685 buffer->base.usage = PIPE_USAGE_IMMUTABLE;
686 buffer->base.bind = bind;
687 buffer->base.width0 = bytes;
688 buffer->base.height0 = 1;
689 buffer->base.depth0 = 1;
690
691 buffer->data = ptr;
692 buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
693
694 return &buffer->base;
695 }
696
697 static INLINE boolean
698 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
699 struct nouveau_bo *bo, unsigned offset, unsigned size)
700 {
701 if (!nouveau_buffer_malloc(buf))
702 return FALSE;
703 if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
704 return FALSE;
705 memcpy(buf->data, (uint8_t *)bo->map + offset, size);
706 return TRUE;
707 }
708
709 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
710 boolean
711 nouveau_buffer_migrate(struct nouveau_context *nv,
712 struct nv04_resource *buf, const unsigned new_domain)
713 {
714 struct nouveau_screen *screen = nv->screen;
715 struct nouveau_bo *bo;
716 const unsigned old_domain = buf->domain;
717 unsigned size = buf->base.width0;
718 unsigned offset;
719 int ret;
720
721 assert(new_domain != old_domain);
722
723 if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
724 if (!nouveau_buffer_allocate(screen, buf, new_domain))
725 return FALSE;
726 ret = nouveau_bo_map(buf->bo, 0, nv->client);
727 if (ret)
728 return ret;
729 memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
730 align_free(buf->data);
731 } else
732 if (old_domain != 0 && new_domain != 0) {
733 struct nouveau_mm_allocation *mm = buf->mm;
734
735 if (new_domain == NOUVEAU_BO_VRAM) {
736 /* keep a system memory copy of our data in case we hit a fallback */
737 if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
738 return FALSE;
739 if (nouveau_mesa_debug)
740 debug_printf("migrating %u KiB to VRAM\n", size / 1024);
741 }
742
743 offset = buf->offset;
744 bo = buf->bo;
745 buf->bo = NULL;
746 buf->mm = NULL;
747 nouveau_buffer_allocate(screen, buf, new_domain);
748
749 nv->copy_data(nv, buf->bo, buf->offset, new_domain,
750 bo, offset, old_domain, buf->base.width0);
751
752 nouveau_bo_ref(NULL, &bo);
753 if (mm)
754 release_allocation(&mm, screen->fence.current);
755 } else
756 if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
757 struct nouveau_transfer tx;
758 if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
759 return FALSE;
760 tx.base.resource = &buf->base;
761 tx.base.box.x = 0;
762 tx.base.box.width = buf->base.width0;
763 tx.bo = NULL;
764 tx.map = NULL;
765 if (!nouveau_transfer_staging(nv, &tx, FALSE))
766 return FALSE;
767 nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
768 nouveau_buffer_transfer_del(nv, &tx);
769 } else
770 return FALSE;
771
772 assert(buf->domain == new_domain);
773 return TRUE;
774 }
775
776 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
777 * We'd like to only allocate @size bytes here, but then we'd have to rebase
778 * the vertex indices ...
779 */
780 boolean
781 nouveau_user_buffer_upload(struct nouveau_context *nv,
782 struct nv04_resource *buf,
783 unsigned base, unsigned size)
784 {
785 struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
786 int ret;
787
788 assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
789
790 buf->base.width0 = base + size;
791 if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
792 return FALSE;
793
794 ret = nouveau_bo_map(buf->bo, 0, nv->client);
795 if (ret)
796 return FALSE;
797 memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
798
799 return TRUE;
800 }
801
802
803 /* Scratch data allocation. */
804
805 static INLINE int
806 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
807 unsigned size)
808 {
809 return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
810 4096, size, NULL, pbo);
811 }
812
813 void
814 nouveau_scratch_runout_release(struct nouveau_context *nv)
815 {
816 if (!nv->scratch.nr_runout)
817 return;
818 do {
819 --nv->scratch.nr_runout;
820 nouveau_bo_ref(NULL, &nv->scratch.runout[nv->scratch.nr_runout]);
821 } while (nv->scratch.nr_runout);
822
823 FREE(nv->scratch.runout);
824 nv->scratch.end = 0;
825 nv->scratch.runout = NULL;
826 }
827
828 /* Allocate an extra bo if we can't fit everything we need simultaneously.
829 * (Could happen for very large user arrays.)
830 */
831 static INLINE boolean
832 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
833 {
834 int ret;
835 const unsigned n = nv->scratch.nr_runout++;
836
837 nv->scratch.runout = REALLOC(nv->scratch.runout,
838 (n + 0) * sizeof(*nv->scratch.runout),
839 (n + 1) * sizeof(*nv->scratch.runout));
840 nv->scratch.runout[n] = NULL;
841
842 ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout[n], size);
843 if (!ret) {
844 ret = nouveau_bo_map(nv->scratch.runout[n], 0, NULL);
845 if (ret)
846 nouveau_bo_ref(NULL, &nv->scratch.runout[--nv->scratch.nr_runout]);
847 }
848 if (!ret) {
849 nv->scratch.current = nv->scratch.runout[n];
850 nv->scratch.offset = 0;
851 nv->scratch.end = size;
852 nv->scratch.map = nv->scratch.current->map;
853 }
854 return !ret;
855 }
856
857 /* Continue to next scratch buffer, if available (no wrapping, large enough).
858 * Allocate it if it has not yet been created.
859 */
860 static INLINE boolean
861 nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
862 {
863 struct nouveau_bo *bo;
864 int ret;
865 const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
866
867 if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
868 return FALSE;
869 nv->scratch.id = i;
870
871 bo = nv->scratch.bo[i];
872 if (!bo) {
873 ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
874 if (ret)
875 return FALSE;
876 nv->scratch.bo[i] = bo;
877 }
878 nv->scratch.current = bo;
879 nv->scratch.offset = 0;
880 nv->scratch.end = nv->scratch.bo_size;
881
882 ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->client);
883 if (!ret)
884 nv->scratch.map = bo->map;
885 return !ret;
886 }
887
888 static boolean
889 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
890 {
891 boolean ret;
892
893 ret = nouveau_scratch_next(nv, min_size);
894 if (!ret)
895 ret = nouveau_scratch_runout(nv, min_size);
896 return ret;
897 }
898
899
900 /* Copy data to a scratch buffer and return address & bo the data resides in. */
901 uint64_t
902 nouveau_scratch_data(struct nouveau_context *nv,
903 const void *data, unsigned base, unsigned size,
904 struct nouveau_bo **bo)
905 {
906 unsigned bgn = MAX2(base, nv->scratch.offset);
907 unsigned end = bgn + size;
908
909 if (end >= nv->scratch.end) {
910 end = base + size;
911 if (!nouveau_scratch_more(nv, end))
912 return 0;
913 bgn = base;
914 }
915 nv->scratch.offset = align(end, 4);
916
917 memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size);
918
919 *bo = nv->scratch.current;
920 return (*bo)->offset + (bgn - base);
921 }
922
923 void *
924 nouveau_scratch_get(struct nouveau_context *nv,
925 unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo)
926 {
927 unsigned bgn = nv->scratch.offset;
928 unsigned end = nv->scratch.offset + size;
929
930 if (end >= nv->scratch.end) {
931 end = size;
932 if (!nouveau_scratch_more(nv, end))
933 return NULL;
934 bgn = 0;
935 }
936 nv->scratch.offset = align(end, 4);
937
938 *pbo = nv->scratch.current;
939 *gpu_addr = nv->scratch.current->offset + bgn;
940 return nv->scratch.map + bgn;
941 }