src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include "radeon_common.h"
  34
  35 #if defined(USE_X86_ASM)
  36 #define COPY_DWORDS( dst, src, nr )                                     \
  37 do {                                                                    \
  38         int __tmp;                                                      \
  39         __asm__ __volatile__( "rep ; movsl"                             \
  40                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  41                               : "0" (nr),                               \
  42                                 "D" ((long)dst),                        \
  43                                 "S" ((long)src) );                      \
  44 } while (0)
  45 #else
  46 #define COPY_DWORDS( dst, src, nr )             \
  47 do {                                            \
  48    int j;                                       \
  49    for ( j = 0 ; j < nr ; j++ )                 \
  50       dst[j] = ((int *)src)[j];                 \
  51    dst += nr;                                   \
  52 } while (0)
  53 #endif
  54
  55 static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
  56 {
  57         int i;
  58
  59         if (RADEON_DEBUG & DEBUG_VERTS)
  60                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  61                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  62
  63         if (stride == 4)
  64                 COPY_DWORDS(out, data, count);
  65         else
  66                 for (i = 0; i < count; i++) {
  67                         out[0] = *(int *)data;
  68                         out++;
  69                         data += stride;
  70                 }
  71 }
  72
  73 void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
  74 {
  75         int i;
  76
  77         if (RADEON_DEBUG & DEBUG_VERTS)
  78                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  79                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  80
  81         if (stride == 8)
  82                 COPY_DWORDS(out, data, count * 2);
  83         else
  84                 for (i = 0; i < count; i++) {
  85                         out[0] = *(int *)data;
  86                         out[1] = *(int *)(data + 4);
  87                         out += 2;
  88                         data += stride;
  89                 }
  90 }
  91
  92 void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
  93 {
  94         int i;
  95
  96         if (RADEON_DEBUG & DEBUG_VERTS)
  97                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  98                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  99
 100         if (stride == 12) {
 101                 COPY_DWORDS(out, data, count * 3);
 102     }
 103         else
 104                 for (i = 0; i < count; i++) {
 105                         out[0] = *(int *)data;
 106                         out[1] = *(int *)(data + 4);
 107                         out[2] = *(int *)(data + 8);
 108                         out += 3;
 109                         data += stride;
 110                 }
 111 }
 112
 113 static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
 114 {
 115         int i;
 116
 117         if (RADEON_DEBUG & DEBUG_VERTS)
 118                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 119                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 120
 121         if (stride == 16)
 122                 COPY_DWORDS(out, data, count * 4);
 123         else
 124                 for (i = 0; i < count; i++) {
 125                         out[0] = *(int *)data;
 126                         out[1] = *(int *)(data + 4);
 127                         out[2] = *(int *)(data + 8);
 128                         out[3] = *(int *)(data + 12);
 129                         out += 4;
 130                         data += stride;
 131                 }
 132 }
 133
 134 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
 135                          GLvoid * data, int size, int stride, int count)
 136 {
 137         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 138         uint32_t *out;
 139
 140         if (stride == 0) {
 141                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 142                 count = 1;
 143                 aos->stride = 0;
 144         } else {
 145                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 146                 aos->stride = size;
 147         }
 148
 149         aos->components = size;
 150         aos->count = count;
 151
 152         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 153         switch (size) {
 154         case 1: radeonEmitVec4(out, data, stride, count); break;
 155         case 2: radeonEmitVec8(out, data, stride, count); break;
 156         case 3: radeonEmitVec12(out, data, stride, count); break;
 157         case 4: radeonEmitVec16(out, data, stride, count); break;
 158         default:
 159                 assert(0);
 160                 break;
 161         }
 162 }
 163
 164 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 165 {
 166         struct radeon_cs_space_check bos[1];
 167         int flushed = 0, ret;
 168
 169         size = MAX2(size, MAX_DMA_BUF_SZ * 16);
 170
 171         if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
 172                 fprintf(stderr, "%s\n", __FUNCTION__);
 173
 174         if (rmesa->dma.flush) {
 175                 rmesa->dma.flush(rmesa->glCtx);
 176         }
 177
 178         if (rmesa->dma.nr_released_bufs > 4) {
 179                 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 180                 rmesa->dma.nr_released_bufs = 0;
 181         }
 182
 183         if (rmesa->dma.current) {
 184                 radeon_bo_unmap(rmesa->dma.current);
 185                 radeon_bo_unref(rmesa->dma.current);
 186                 rmesa->dma.current = 0;
 187         }
 188
 189 again_alloc:
 190         rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
 191                                             0, size, 4, RADEON_GEM_DOMAIN_GTT,
 192                                             0);
 193
 194         if (!rmesa->dma.current) {
 195                 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 196                 rmesa->dma.nr_released_bufs = 0;
 197                 goto again_alloc;
 198         }
 199
 200         rmesa->dma.current_used = 0;
 201         rmesa->dma.current_vertexptr = 0;
 202
 203         bos[0].bo = rmesa->dma.current;
 204         bos[0].read_domains = RADEON_GEM_DOMAIN_GTT;
 205         bos[0].write_domain =0 ;
 206         bos[0].new_accounted = 0;
 207
 208         ret = radeon_cs_space_check(rmesa->cmdbuf.cs, bos, 1);
 209         if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
 210                 fprintf(stderr,"Got OPEARTION TO BIG ILLEGAL - this cannot happen");
 211                 assert(0);
 212         } else if (ret == RADEON_CS_SPACE_FLUSH) {
 213                 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 214                 if (flushed) {
 215                         fprintf(stderr,"flushed but still no space\n");
 216                         assert(0);
 217                 }
 218                 flushed = 1;
 219                 goto again_alloc;
 220         }
 221         radeon_bo_map(rmesa->dma.current, 1);
 222 }
 223
 224 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 225  * space in current, grab a new buffer (and discard what was left of current)
 226  */
 227 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 228                           struct radeon_bo **pbo, int *poffset,
 229                           int bytes, int alignment)
 230 {
 231         if (RADEON_DEBUG & DEBUG_IOCTL)
 232                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 233
 234         if (rmesa->dma.flush)
 235                 rmesa->dma.flush(rmesa->glCtx);
 236
 237         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 238
 239         alignment--;
 240         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 241
 242         if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
 243                 radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
 244
 245         *poffset = rmesa->dma.current_used;
 246         *pbo = rmesa->dma.current;
 247         radeon_bo_ref(*pbo);
 248
 249         /* Always align to at least 16 bytes */
 250         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 251         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 252
 253         assert(rmesa->dma.current_used <= rmesa->dma.current->size);
 254 }
 255
 256 void radeonReleaseDmaRegion(radeonContextPtr rmesa)
 257 {
 258         if (RADEON_DEBUG & DEBUG_IOCTL)
 259                 fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
 260         if (rmesa->dma.current) {
 261                 rmesa->dma.nr_released_bufs++;
 262                 radeon_bo_unmap(rmesa->dma.current);
 263                 radeon_bo_unref(rmesa->dma.current);
 264         }
 265         rmesa->dma.current = NULL;
 266 }
 267
 268
 269 /* Flush vertices in the current dma region.
 270  */
 271 void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
 272 {
 273         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 274         struct radeon_dma *dma = &rmesa->dma;
 275
 276
 277         if (RADEON_DEBUG & DEBUG_IOCTL)
 278                 fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current);
 279         dma->flush = NULL;
 280
 281         if (dma->current) {
 282             GLuint current_offset = dma->current_used;
 283
 284             assert (dma->current_used +
 285                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 286                     dma->current_vertexptr);
 287
 288             if (dma->current_used != dma->current_vertexptr) {
 289                     dma->current_used = dma->current_vertexptr;
 290
 291                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 292             }
 293             rmesa->swtcl.numverts = 0;
 294         }
 295 }
 296 /* Alloc space in the current dma region.
 297  */
 298 void *
 299 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 300 {
 301         GLuint bytes = vsize * nverts;
 302         void *head;
 303 restart:
 304         if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
 305                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 306         }
 307
 308         if (!rmesa->dma.flush) {
 309                 /* make sure we have enough space to use this in cmdbuf */
 310                 rcommonEnsureCmdBufSpace(rmesa,
 311                               rmesa->hw.max_state_size + (12*sizeof(int)),
 312                               __FUNCTION__);
 313                 /* if cmdbuf flushed DMA restart */
 314                 if (!rmesa->dma.current)
 315                         goto restart;
 316                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 317                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 318         }
 319
 320         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 321         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 322         ASSERT( rmesa->dma.current_used +
 323                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 324                 rmesa->dma.current_vertexptr );
 325
 326         head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
 327         rmesa->dma.current_vertexptr += bytes;
 328         rmesa->swtcl.numverts += nverts;
 329         return head;
 330 }