src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include <errno.h>
  34 #include "radeon_common.h"
  35 #include "main/simple_list.h"
  36
  37 #if defined(USE_X86_ASM)
  38 #define COPY_DWORDS( dst, src, nr )                                     \
  39 do {                                                                    \
  40         int __tmp;                                                      \
  41         __asm__ __volatile__( "rep ; movsl"                             \
  42                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  43                               : "0" (nr),                               \
  44                                 "D" ((long)dst),                        \
  45                                 "S" ((long)src) );                      \
  46 } while (0)
  47 #else
  48 #define COPY_DWORDS( dst, src, nr )             \
  49 do {                                            \
  50    int j;                                       \
  51    for ( j = 0 ; j < nr ; j++ )                 \
  52       dst[j] = ((int *)src)[j];                 \
  53    dst += nr;                                   \
  54 } while (0)
  55 #endif
  56
  57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
  58 {
  59         int i;
  60
  61         if (RADEON_DEBUG & RADEON_VERTS)
  62                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  63                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  64
  65         if (stride == 4)
  66                 COPY_DWORDS(out, data, count);
  67         else
  68                 for (i = 0; i < count; i++) {
  69                         out[0] = *(int *)data;
  70                         out++;
  71                         data += stride;
  72                 }
  73 }
  74
  75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
  76 {
  77         int i;
  78
  79         if (RADEON_DEBUG & RADEON_VERTS)
  80                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  81                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  82
  83         if (stride == 8)
  84                 COPY_DWORDS(out, data, count * 2);
  85         else
  86                 for (i = 0; i < count; i++) {
  87                         out[0] = *(int *)data;
  88                         out[1] = *(int *)(data + 4);
  89                         out += 2;
  90                         data += stride;
  91                 }
  92 }
  93
  94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
  95 {
  96         int i;
  97
  98         if (RADEON_DEBUG & RADEON_VERTS)
  99                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 100                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 101
 102         if (stride == 12) {
 103                 COPY_DWORDS(out, data, count * 3);
 104     }
 105         else
 106                 for (i = 0; i < count; i++) {
 107                         out[0] = *(int *)data;
 108                         out[1] = *(int *)(data + 4);
 109                         out[2] = *(int *)(data + 8);
 110                         out += 3;
 111                         data += stride;
 112                 }
 113 }
 114
 115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
 116 {
 117         int i;
 118
 119         if (RADEON_DEBUG & RADEON_VERTS)
 120                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 121                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 122
 123         if (stride == 16)
 124                 COPY_DWORDS(out, data, count * 4);
 125         else
 126                 for (i = 0; i < count; i++) {
 127                         out[0] = *(int *)data;
 128                         out[1] = *(int *)(data + 4);
 129                         out[2] = *(int *)(data + 8);
 130                         out[3] = *(int *)(data + 12);
 131                         out += 4;
 132                         data += stride;
 133                 }
 134 }
 135
 136 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
 137                          const GLvoid * data, int size, int stride, int count)
 138 {
 139         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 140         uint32_t *out;
 141
 142         if (stride == 0) {
 143                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 144                 count = 1;
 145                 aos->stride = 0;
 146         } else {
 147                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 148                 aos->stride = size;
 149         }
 150
 151         aos->components = size;
 152         aos->count = count;
 153
 154         radeon_bo_map(aos->bo, 1);
 155         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 156         switch (size) {
 157         case 1: radeonEmitVec4(out, data, stride, count); break;
 158         case 2: radeonEmitVec8(out, data, stride, count); break;
 159         case 3: radeonEmitVec12(out, data, stride, count); break;
 160         case 4: radeonEmitVec16(out, data, stride, count); break;
 161         default:
 162                 assert(0);
 163                 break;
 164         }
 165         radeon_bo_unmap(aos->bo);
 166 }
 167
 168 void radeon_init_dma(radeonContextPtr rmesa)
 169 {
 170         make_empty_list(&rmesa->dma.free);
 171         make_empty_list(&rmesa->dma.wait);
 172         make_empty_list(&rmesa->dma.reserved);
 173         rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
 174 }
 175
 176 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 177 {
 178         struct radeon_dma_bo *dma_bo = NULL;
 179         /* we set minimum sizes to at least requested size
 180            aligned to next 16 bytes. */
 181         if (size > rmesa->dma.minimum_size)
 182                 rmesa->dma.minimum_size = (size + 15) & (~15);
 183
 184         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
 185                         __FUNCTION__, size, rmesa->dma.minimum_size);
 186
 187         if (is_empty_list(&rmesa->dma.free)
 188               || last_elem(&rmesa->dma.free)->bo->size < size) {
 189                 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
 190                 assert(dma_bo);
 191
 192 again_alloc:
 193                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 194                                             0, rmesa->dma.minimum_size, 4,
 195                                             RADEON_GEM_DOMAIN_GTT, 0);
 196
 197                 if (!dma_bo->bo) {
 198                         rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 199                         goto again_alloc;
 200                 }
 201                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 202         } else {
 203                 /* We push and pop buffers from end of list so we can keep
 204                    counter on unused buffers for later freeing them from
 205                    begin of list */
 206                 dma_bo = last_elem(&rmesa->dma.free);
 207                 remove_from_list(dma_bo);
 208                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 209         }
 210
 211         rmesa->dma.current_used = 0;
 212         rmesa->dma.current_vertexptr = 0;
 213
 214         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
 215                                           first_elem(&rmesa->dma.reserved)->bo,
 216                                           RADEON_GEM_DOMAIN_GTT, 0))
 217                 fprintf(stderr,"failure to revalidate BOs - badness\n");
 218
 219         if (is_empty_list(&rmesa->dma.reserved)) {
 220         /* Cmd buff have been flushed in radeon_revalidate_bos */
 221                 goto again_alloc;
 222         }
 223         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 224 }
 225
 226 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 227  * space in current, grab a new buffer (and discard what was left of current)
 228  */
 229 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 230                           struct radeon_bo **pbo, int *poffset,
 231                           int bytes, int alignment)
 232 {
 233         if (RADEON_DEBUG & RADEON_IOCTL)
 234                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 235
 236         if (rmesa->dma.flush)
 237                 rmesa->dma.flush(rmesa->glCtx);
 238
 239         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 240
 241         alignment--;
 242         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 243
 244         if (is_empty_list(&rmesa->dma.reserved)
 245                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
 246                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 247
 248         *poffset = rmesa->dma.current_used;
 249         *pbo = first_elem(&rmesa->dma.reserved)->bo;
 250         radeon_bo_ref(*pbo);
 251
 252         /* Always align to at least 16 bytes */
 253         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 254         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 255
 256         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
 257 }
 258
 259 void radeonFreeDmaRegions(radeonContextPtr rmesa)
 260 {
 261         struct radeon_dma_bo *dma_bo;
 262         struct radeon_dma_bo *temp;
 263         if (RADEON_DEBUG & RADEON_DMA)
 264                 fprintf(stderr, "%s\n", __FUNCTION__);
 265
 266         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 267                 remove_from_list(dma_bo);
 268                 radeon_bo_unref(dma_bo->bo);
 269                 FREE(dma_bo);
 270         }
 271
 272         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 273                 remove_from_list(dma_bo);
 274                 radeon_bo_unref(dma_bo->bo);
 275                 FREE(dma_bo);
 276         }
 277
 278         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 279                 remove_from_list(dma_bo);
 280                 radeon_bo_unref(dma_bo->bo);
 281                 FREE(dma_bo);
 282         }
 283 }
 284
 285 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
 286 {
 287         if (is_empty_list(&rmesa->dma.reserved))
 288                 return;
 289
 290         if (RADEON_DEBUG & RADEON_IOCTL)
 291                 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
 292         rmesa->dma.current_used -= return_bytes;
 293         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 294 }
 295
 296 static int radeon_bo_is_idle(struct radeon_bo* bo)
 297 {
 298         uint32_t domain;
 299         int ret = radeon_bo_is_busy(bo, &domain);
 300         if (ret == -EINVAL) {
 301                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 302                         "This may cause small performance drop for you.\n");
 303         }
 304         return ret != -EBUSY;
 305 }
 306
 307 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 308 {
 309         struct radeon_dma_bo *dma_bo;
 310         struct radeon_dma_bo *temp;
 311         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
 312         const int time = rmesa->dma.free.expire_counter;
 313
 314         if (RADEON_DEBUG & RADEON_DMA) {
 315                 size_t free = 0,
 316                        wait = 0,
 317                        reserved = 0;
 318                 foreach(dma_bo, &rmesa->dma.free)
 319                         ++free;
 320
 321                 foreach(dma_bo, &rmesa->dma.wait)
 322                         ++wait;
 323
 324                 foreach(dma_bo, &rmesa->dma.reserved)
 325                         ++reserved;
 326
 327                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
 328                       __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
 329         }
 330
 331         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 332                 /* request updated cs processing information from kernel */
 333                 legacy_track_pending(rmesa->radeonScreen->bom, 0);
 334         }
 335
 336         /* move waiting bos to free list.
 337            wait list provides gpu time to handle data before reuse */
 338         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 339                 if (dma_bo->expire_counter == time) {
 340                         WARN_ONCE("Leaking dma buffer object!\n");
 341                         radeon_bo_unref(dma_bo->bo);
 342                         remove_from_list(dma_bo);
 343                         FREE(dma_bo);
 344                         continue;
 345                 }
 346                 /* free objects that are too small to be used because of large request */
 347                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 348                    radeon_bo_unref(dma_bo->bo);
 349                    remove_from_list(dma_bo);
 350                    FREE(dma_bo);
 351                    continue;
 352                 }
 353                 if (!radeon_bo_is_idle(dma_bo->bo)) {
 354                         if (rmesa->radeonScreen->driScreen->dri2.enabled)
 355                                 break;
 356                         continue;
 357                 }
 358                 remove_from_list(dma_bo);
 359                 dma_bo->expire_counter = expire_at;
 360                 insert_at_tail(&rmesa->dma.free, dma_bo);
 361         }
 362
 363         /* move reserved to wait list */
 364         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 365                 radeon_bo_unmap(dma_bo->bo);
 366                 /* free objects that are too small to be used because of large request */
 367                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 368                    radeon_bo_unref(dma_bo->bo);
 369                    remove_from_list(dma_bo);
 370                    FREE(dma_bo);
 371                    continue;
 372                 }
 373                 remove_from_list(dma_bo);
 374                 dma_bo->expire_counter = expire_at;
 375                 insert_at_tail(&rmesa->dma.wait, dma_bo);
 376         }
 377
 378         /* free bos that have been unused for some time */
 379         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 380                 if (dma_bo->expire_counter != time)
 381                         break;
 382                 remove_from_list(dma_bo);
 383                 radeon_bo_unref(dma_bo->bo);
 384                 FREE(dma_bo);
 385         }
 386
 387 }
 388
 389
 390 /* Flush vertices in the current dma region.
 391  */
 392 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
 393 {
 394         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 395         struct radeon_dma *dma = &rmesa->dma;
 396
 397         if (RADEON_DEBUG & RADEON_IOCTL)
 398                 fprintf(stderr, "%s\n", __FUNCTION__);
 399         dma->flush = NULL;
 400
 401         radeon_bo_unmap(rmesa->swtcl.bo);
 402
 403         if (!is_empty_list(&dma->reserved)) {
 404             GLuint current_offset = dma->current_used;
 405
 406             assert (dma->current_used +
 407                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 408                     dma->current_vertexptr);
 409
 410             if (dma->current_used != dma->current_vertexptr) {
 411                     dma->current_used = dma->current_vertexptr;
 412
 413                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 414             }
 415             rmesa->swtcl.numverts = 0;
 416         }
 417         radeon_bo_unref(rmesa->swtcl.bo);
 418         rmesa->swtcl.bo = NULL;
 419 }
 420 /* Alloc space in the current dma region.
 421  */
 422 void *
 423 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 424 {
 425         GLuint bytes = vsize * nverts;
 426         void *head;
 427         if (RADEON_DEBUG & RADEON_IOCTL)
 428                 fprintf(stderr, "%s\n", __FUNCTION__);
 429
 430         if(is_empty_list(&rmesa->dma.reserved)
 431               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
 432                 if (rmesa->dma.flush) {
 433                         rmesa->dma.flush(rmesa->glCtx);
 434                 }
 435
 436                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 437
 438                 return NULL;
 439         }
 440
 441         if (!rmesa->dma.flush) {
 442                 /* if cmdbuf flushed DMA restart */
 443                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 444                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 445         }
 446
 447         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 448         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 449         ASSERT( rmesa->dma.current_used +
 450                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 451                 rmesa->dma.current_vertexptr );
 452
 453         if (!rmesa->swtcl.bo) {
 454                 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
 455                 radeon_bo_ref(rmesa->swtcl.bo);
 456                 radeon_bo_map(rmesa->swtcl.bo, 1);
 457         }
 458
 459         head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
 460         rmesa->dma.current_vertexptr += bytes;
 461         rmesa->swtcl.numverts += nverts;
 462         return head;
 463 }
 464
 465 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
 466 {
 467    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
 468    int i;
 469         if (RADEON_DEBUG & RADEON_IOCTL)
 470                 fprintf(stderr, "%s\n", __FUNCTION__);
 471
 472    if (radeon->dma.flush) {
 473        radeon->dma.flush(radeon->glCtx);
 474    }
 475    for (i = 0; i < radeon->tcl.aos_count; i++) {
 476       if (radeon->tcl.aos[i].bo) {
 477          radeon_bo_unref(radeon->tcl.aos[i].bo);
 478          radeon->tcl.aos[i].bo = NULL;
 479
 480       }
 481    }
 482 }