src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include <errno.h>
  34 #include "radeon_common.h"
  35 #include "main/simple_list.h"
  36
  37 #if defined(USE_X86_ASM)
  38 #define COPY_DWORDS( dst, src, nr )                                     \
  39 do {                                                                    \
  40         int __tmp;                                                      \
  41         __asm__ __volatile__( "rep ; movsl"                             \
  42                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  43                               : "0" (nr),                               \
  44                                 "D" ((long)dst),                        \
  45                                 "S" ((long)src) );                      \
  46 } while (0)
  47 #else
  48 #define COPY_DWORDS( dst, src, nr )             \
  49 do {                                            \
  50    int j;                                       \
  51    for ( j = 0 ; j < nr ; j++ )                 \
  52       dst[j] = ((int *)src)[j];                 \
  53    dst += nr;                                   \
  54 } while (0)
  55 #endif
  56
  57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
  58 {
  59         int i;
  60
  61         if (RADEON_DEBUG & RADEON_VERTS)
  62                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  63                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  64
  65         if (stride == 4)
  66                 COPY_DWORDS(out, data, count);
  67         else
  68                 for (i = 0; i < count; i++) {
  69                         out[0] = *(int *)data;
  70                         out++;
  71                         data += stride;
  72                 }
  73 }
  74
  75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
  76 {
  77         int i;
  78
  79         if (RADEON_DEBUG & RADEON_VERTS)
  80                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  81                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  82
  83         if (stride == 8)
  84                 COPY_DWORDS(out, data, count * 2);
  85         else
  86                 for (i = 0; i < count; i++) {
  87                         out[0] = *(int *)data;
  88                         out[1] = *(int *)(data + 4);
  89                         out += 2;
  90                         data += stride;
  91                 }
  92 }
  93
  94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
  95 {
  96         int i;
  97
  98         if (RADEON_DEBUG & RADEON_VERTS)
  99                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 100                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 101
 102         if (stride == 12) {
 103                 COPY_DWORDS(out, data, count * 3);
 104     }
 105         else
 106                 for (i = 0; i < count; i++) {
 107                         out[0] = *(int *)data;
 108                         out[1] = *(int *)(data + 4);
 109                         out[2] = *(int *)(data + 8);
 110                         out += 3;
 111                         data += stride;
 112                 }
 113 }
 114
 115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
 116 {
 117         int i;
 118
 119         if (RADEON_DEBUG & RADEON_VERTS)
 120                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 121                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 122
 123         if (stride == 16)
 124                 COPY_DWORDS(out, data, count * 4);
 125         else
 126                 for (i = 0; i < count; i++) {
 127                         out[0] = *(int *)data;
 128                         out[1] = *(int *)(data + 4);
 129                         out[2] = *(int *)(data + 8);
 130                         out[3] = *(int *)(data + 12);
 131                         out += 4;
 132                         data += stride;
 133                 }
 134 }
 135
 136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
 137                          const GLvoid * data, int size, int stride, int count)
 138 {
 139         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 140         uint32_t *out;
 141
 142         if (stride == 0) {
 143                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 144                 count = 1;
 145                 aos->stride = 0;
 146         } else {
 147                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 148                 aos->stride = size;
 149         }
 150
 151         aos->components = size;
 152         aos->count = count;
 153
 154         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 155         switch (size) {
 156         case 1: radeonEmitVec4(out, data, stride, count); break;
 157         case 2: radeonEmitVec8(out, data, stride, count); break;
 158         case 3: radeonEmitVec12(out, data, stride, count); break;
 159         case 4: radeonEmitVec16(out, data, stride, count); break;
 160         default:
 161                 assert(0);
 162                 break;
 163         }
 164 }
 165
 166 void radeon_init_dma(radeonContextPtr rmesa)
 167 {
 168         make_empty_list(&rmesa->dma.free);
 169         make_empty_list(&rmesa->dma.wait);
 170         make_empty_list(&rmesa->dma.reserved);
 171         rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
 172 }
 173
 174 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 175 {
 176         struct radeon_dma_bo *dma_bo = NULL;
 177         /* we set minimum sizes to at least requested size
 178            aligned to next 16 bytes. */
 179         if (size > rmesa->dma.minimum_size)
 180                 rmesa->dma.minimum_size = (size + 15) & (~15);
 181
 182         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
 183                         __FUNCTION__, size, rmesa->dma.minimum_size);
 184
 185
 186         /* unmap old reserved bo */
 187         if (!is_empty_list(&rmesa->dma.reserved))
 188                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 189
 190         if (is_empty_list(&rmesa->dma.free)
 191               || last_elem(&rmesa->dma.free)->bo->size < size) {
 192                 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
 193                 assert(dma_bo);
 194
 195 again_alloc:
 196                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 197                                             0, rmesa->dma.minimum_size, 4,
 198                                             RADEON_GEM_DOMAIN_GTT, 0);
 199
 200                 if (!dma_bo->bo) {
 201                         rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 202                         goto again_alloc;
 203                 }
 204                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 205         } else {
 206                 /* We push and pop buffers from end of list so we can keep
 207                    counter on unused buffers for later freeing them from
 208                    begin of list */
 209                 dma_bo = last_elem(&rmesa->dma.free);
 210                 assert(dma_bo->bo->cref == 1);
 211                 remove_from_list(dma_bo);
 212                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 213         }
 214
 215         rmesa->dma.current_used = 0;
 216         rmesa->dma.current_vertexptr = 0;
 217
 218         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
 219                                           first_elem(&rmesa->dma.reserved)->bo,
 220                                           RADEON_GEM_DOMAIN_GTT, 0))
 221                 fprintf(stderr,"failure to revalidate BOs - badness\n");
 222
 223         if (is_empty_list(&rmesa->dma.reserved)) {
 224         /* Cmd buff have been flushed in radeon_revalidate_bos */
 225                 goto again_alloc;
 226         }
 227
 228         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 229 }
 230
 231 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 232  * space in current, grab a new buffer (and discard what was left of current)
 233  */
 234 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 235                           struct radeon_bo **pbo, int *poffset,
 236                           int bytes, int alignment)
 237 {
 238         if (RADEON_DEBUG & RADEON_IOCTL)
 239                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 240
 241         if (rmesa->dma.flush)
 242                 rmesa->dma.flush(rmesa->glCtx);
 243
 244         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 245
 246         alignment--;
 247         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 248
 249         if (is_empty_list(&rmesa->dma.reserved)
 250                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
 251                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 252
 253         *poffset = rmesa->dma.current_used;
 254         *pbo = first_elem(&rmesa->dma.reserved)->bo;
 255         radeon_bo_ref(*pbo);
 256
 257         /* Always align to at least 16 bytes */
 258         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 259         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 260
 261         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
 262 }
 263
 264 void radeonFreeDmaRegions(radeonContextPtr rmesa)
 265 {
 266         struct radeon_dma_bo *dma_bo;
 267         struct radeon_dma_bo *temp;
 268         if (RADEON_DEBUG & RADEON_DMA)
 269                 fprintf(stderr, "%s\n", __FUNCTION__);
 270
 271         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 272                 remove_from_list(dma_bo);
 273                 radeon_bo_unref(dma_bo->bo);
 274                 FREE(dma_bo);
 275         }
 276
 277         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 278                 remove_from_list(dma_bo);
 279                 radeon_bo_unref(dma_bo->bo);
 280                 FREE(dma_bo);
 281         }
 282
 283         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 284                 remove_from_list(dma_bo);
 285                 radeon_bo_unmap(dma_bo->bo);
 286                 radeon_bo_unref(dma_bo->bo);
 287                 FREE(dma_bo);
 288         }
 289 }
 290
 291 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
 292 {
 293         if (is_empty_list(&rmesa->dma.reserved))
 294                 return;
 295
 296         if (RADEON_DEBUG & RADEON_IOCTL)
 297                 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
 298         rmesa->dma.current_used -= return_bytes;
 299         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 300 }
 301
 302 static int radeon_bo_is_idle(struct radeon_bo* bo)
 303 {
 304         uint32_t domain;
 305         int ret = radeon_bo_is_busy(bo, &domain);
 306         if (ret == -EINVAL) {
 307                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 308                         "This may cause small performance drop for you.\n");
 309         }
 310         return ret != -EBUSY;
 311 }
 312
 313 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 314 {
 315         struct radeon_dma_bo *dma_bo;
 316         struct radeon_dma_bo *temp;
 317         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
 318         const int time = rmesa->dma.free.expire_counter;
 319
 320         if (RADEON_DEBUG & RADEON_DMA) {
 321                 size_t free = 0,
 322                        wait = 0,
 323                        reserved = 0;
 324                 foreach(dma_bo, &rmesa->dma.free)
 325                         ++free;
 326
 327                 foreach(dma_bo, &rmesa->dma.wait)
 328                         ++wait;
 329
 330                 foreach(dma_bo, &rmesa->dma.reserved)
 331                         ++reserved;
 332
 333                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
 334                       __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
 335         }
 336
 337         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 338                 /* request updated cs processing information from kernel */
 339                 legacy_track_pending(rmesa->radeonScreen->bom, 0);
 340         }
 341         /* move waiting bos to free list.
 342            wait list provides gpu time to handle data before reuse */
 343         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 344                 if (dma_bo->expire_counter == time) {
 345                         WARN_ONCE("Leaking dma buffer object!\n");
 346                         radeon_bo_unref(dma_bo->bo);
 347                         remove_from_list(dma_bo);
 348                         FREE(dma_bo);
 349                         continue;
 350                 }
 351                 /* free objects that are too small to be used because of large request */
 352                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 353                    radeon_bo_unref(dma_bo->bo);
 354                    remove_from_list(dma_bo);
 355                    FREE(dma_bo);
 356                    continue;
 357                 }
 358                 if (!radeon_bo_is_idle(dma_bo->bo))
 359                         continue;
 360                 remove_from_list(dma_bo);
 361                 dma_bo->expire_counter = expire_at;
 362                 insert_at_tail(&rmesa->dma.free, dma_bo);
 363         }
 364
 365         /* unmap the last dma region */
 366         if (!is_empty_list(&rmesa->dma.reserved))
 367                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 368         /* move reserved to wait list */
 369         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 370                 /* free objects that are too small to be used because of large request */
 371                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 372                    radeon_bo_unref(dma_bo->bo);
 373                    remove_from_list(dma_bo);
 374                    FREE(dma_bo);
 375                    continue;
 376                 }
 377                 remove_from_list(dma_bo);
 378                 dma_bo->expire_counter = expire_at;
 379                 insert_at_tail(&rmesa->dma.wait, dma_bo);
 380         }
 381
 382         /* free bos that have been unused for some time */
 383         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 384                 if (dma_bo->expire_counter != time)
 385                         break;
 386                 remove_from_list(dma_bo);
 387                 radeon_bo_unref(dma_bo->bo);
 388                 FREE(dma_bo);
 389         }
 390
 391 }
 392
 393
 394 /* Flush vertices in the current dma region.
 395  */
 396 void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
 397 {
 398         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 399         struct radeon_dma *dma = &rmesa->dma;
 400
 401
 402         if (RADEON_DEBUG & RADEON_IOCTL)
 403                 fprintf(stderr, "%s\n", __FUNCTION__);
 404         dma->flush = NULL;
 405
 406         if (!is_empty_list(&dma->reserved)) {
 407             GLuint current_offset = dma->current_used;
 408
 409             assert (dma->current_used +
 410                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 411                     dma->current_vertexptr);
 412
 413             if (dma->current_used != dma->current_vertexptr) {
 414                     dma->current_used = dma->current_vertexptr;
 415
 416                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 417             }
 418             rmesa->swtcl.numverts = 0;
 419         }
 420 }
 421 /* Alloc space in the current dma region.
 422  */
 423 void *
 424 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 425 {
 426         GLuint bytes = vsize * nverts;
 427         void *head;
 428         if (RADEON_DEBUG & RADEON_IOCTL)
 429                 fprintf(stderr, "%s\n", __FUNCTION__);
 430         if(is_empty_list(&rmesa->dma.reserved)
 431               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
 432                 if (rmesa->dma.flush) {
 433                         rmesa->dma.flush(rmesa->glCtx);
 434                 }
 435
 436                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 437
 438                 return NULL;
 439         }
 440
 441         if (!rmesa->dma.flush) {
 442                 /* if cmdbuf flushed DMA restart */
 443                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 444                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 445         }
 446
 447         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 448         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 449         ASSERT( rmesa->dma.current_used +
 450                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 451                 rmesa->dma.current_vertexptr );
 452
 453         head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
 454         rmesa->dma.current_vertexptr += bytes;
 455         rmesa->swtcl.numverts += nverts;
 456         return head;
 457 }
 458
 459 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
 460 {
 461    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
 462    int i;
 463         if (RADEON_DEBUG & RADEON_IOCTL)
 464                 fprintf(stderr, "%s\n", __FUNCTION__);
 465
 466    if (radeon->dma.flush) {
 467        radeon->dma.flush(radeon->glCtx);
 468    }
 469    for (i = 0; i < radeon->tcl.aos_count; i++) {
 470       if (radeon->tcl.aos[i].bo) {
 471          radeon_bo_unref(radeon->tcl.aos[i].bo);
 472          radeon->tcl.aos[i].bo = NULL;
 473
 474       }
 475    }
 476 }