-#if defined(USE_X86_ASM)
-#define COPY_DWORDS( dst, src, nr ) \
-do { \
- int __tmp; \
- __asm__ __volatile__( "rep ; movsl" \
- : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
- : "0" (nr), \
- "D" ((long)dst), \
- "S" ((long)src) ); \
-} while (0)
-#else
-#define COPY_DWORDS( dst, src, nr ) \
-do { \
- int j; \
- for ( j = 0 ; j < nr ; j++ ) \
- dst[j] = ((int *)src)[j]; \
- dst += nr; \
-} while (0)
-#endif
-
-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 4)
- COPY_DWORDS(out, data, count);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out++;
- data += stride;
- }
-}
-
-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 8)
- COPY_DWORDS(out, data, count * 2);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data + 4);
- out += 2;
- data += stride;
- }
-}
-
-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 12)
- COPY_DWORDS(out, data, count * 3);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data + 4);
- out[2] = *(int *)(data + 8);
- out += 3;
- data += stride;
- }
-}
-
-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int stride, int count)
-{
- int i;
- int *out = (int *)(rvb->address + rvb->start);
-
- if (RADEON_DEBUG & DEBUG_VERTS)
- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- __FUNCTION__, count, stride, (void *)out, (void *)data);
-
- if (stride == 16)
- COPY_DWORDS(out, data, count * 4);
- else
- for (i = 0; i < count; i++) {
- out[0] = *(int *)data;
- out[1] = *(int *)(data + 4);
- out[2] = *(int *)(data + 8);
- out[3] = *(int *)(data + 12);
- out += 4;
- data += stride;
- }
-}
-
-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
- GLvoid * data, int size, int stride, int count)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
- if (stride == 0) {
- r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
- count = 1;
- rvb->aos_offset = GET_START(rvb);
- rvb->aos_stride = 0;
- } else {
- r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
- rvb->aos_offset = GET_START(rvb);
- rvb->aos_stride = size;
- }
-
- switch (size) {
- case 1:
- r300EmitVec4(ctx, rvb, data, stride, count);
- break;
- case 2:
- r300EmitVec8(ctx, rvb, data, stride, count);
- break;
- case 3:
- r300EmitVec12(ctx, rvb, data, stride, count);
- break;
- case 4:
- r300EmitVec16(ctx, rvb, data, stride, count);
- break;
- default:
- assert(0);
- break;
- }
-}
-