src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include <errno.h>
  34 #include "radeon_common.h"
  35 #include "main/simple_list.h"
  36
  37 #if defined(USE_X86_ASM)
  38 #define COPY_DWORDS( dst, src, nr )                                     \
  39 do {                                                                    \
  40         int __tmp;                                                      \
  41         __asm__ __volatile__( "rep ; movsl"                             \
  42                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  43                               : "0" (nr),                               \
  44                                 "D" ((long)dst),                        \
  45                                 "S" ((long)src) );                      \
  46 } while (0)
  47 #else
  48 #define COPY_DWORDS( dst, src, nr )             \
  49 do {                                            \
  50    int j;                                       \
  51    for ( j = 0 ; j < nr ; j++ )                 \
  52       dst[j] = ((int *)src)[j];                 \
  53    dst += nr;                                   \
  54 } while (0)
  55 #endif
  56
  57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
  58 {
  59         int i;
  60
  61         if (RADEON_DEBUG & RADEON_VERTS)
  62                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  63                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  64
  65         if (stride == 4)
  66                 COPY_DWORDS(out, data, count);
  67         else
  68                 for (i = 0; i < count; i++) {
  69                         out[0] = *(int *)data;
  70                         out++;
  71                         data += stride;
  72                 }
  73 }
  74
  75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
  76 {
  77         int i;
  78
  79         if (RADEON_DEBUG & RADEON_VERTS)
  80                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  81                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  82
  83         if (stride == 8)
  84                 COPY_DWORDS(out, data, count * 2);
  85         else
  86                 for (i = 0; i < count; i++) {
  87                         out[0] = *(int *)data;
  88                         out[1] = *(int *)(data + 4);
  89                         out += 2;
  90                         data += stride;
  91                 }
  92 }
  93
  94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
  95 {
  96         int i;
  97
  98         if (RADEON_DEBUG & RADEON_VERTS)
  99                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 100                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 101
 102         if (stride == 12) {
 103                 COPY_DWORDS(out, data, count * 3);
 104     }
 105         else
 106                 for (i = 0; i < count; i++) {
 107                         out[0] = *(int *)data;
 108                         out[1] = *(int *)(data + 4);
 109                         out[2] = *(int *)(data + 8);
 110                         out += 3;
 111                         data += stride;
 112                 }
 113 }
 114
 115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
 116 {
 117         int i;
 118
 119         if (RADEON_DEBUG & RADEON_VERTS)
 120                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 121                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 122
 123         if (stride == 16)
 124                 COPY_DWORDS(out, data, count * 4);
 125         else
 126                 for (i = 0; i < count; i++) {
 127                         out[0] = *(int *)data;
 128                         out[1] = *(int *)(data + 4);
 129                         out[2] = *(int *)(data + 8);
 130                         out[3] = *(int *)(data + 12);
 131                         out += 4;
 132                         data += stride;
 133                 }
 134 }
 135
 136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
 137                          const GLvoid * data, int size, int stride, int count)
 138 {
 139         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 140         uint32_t *out;
 141
 142         if (stride == 0) {
 143                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 144                 count = 1;
 145                 aos->stride = 0;
 146         } else {
 147                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 148                 aos->stride = size;
 149         }
 150
 151         aos->components = size;
 152         aos->count = count;
 153
 154         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 155         switch (size) {
 156         case 1: radeonEmitVec4(out, data, stride, count); break;
 157         case 2: radeonEmitVec8(out, data, stride, count); break;
 158         case 3: radeonEmitVec12(out, data, stride, count); break;
 159         case 4: radeonEmitVec16(out, data, stride, count); break;
 160         default:
 161                 assert(0);
 162                 break;
 163         }
 164 }
 165
 166 void radeon_init_dma(radeonContextPtr rmesa)
 167 {
 168         make_empty_list(&rmesa->dma.free);
 169         make_empty_list(&rmesa->dma.wait);
 170         make_empty_list(&rmesa->dma.reserved);
 171         rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
 172 }
 173
 174 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 175 {
 176         struct radeon_dma_bo *dma_bo = NULL;
 177         /* we set minimum sizes to at least requested size
 178            aligned to next 16 bytes. */
 179         if (size > rmesa->dma.minimum_size)
 180                 rmesa->dma.minimum_size = (size + 15) & (~15);
 181
 182         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
 183                         __FUNCTION__, size, rmesa->dma.minimum_size);
 184
 185
 186         /* unmap old reserved bo */
 187         if (!is_empty_list(&rmesa->dma.reserved))
 188                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 189
 190         if (is_empty_list(&rmesa->dma.free)
 191               || last_elem(&rmesa->dma.free)->bo->size < size) {
 192                 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
 193                 assert(dma_bo);
 194
 195 again_alloc:
 196                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 197                                             0, rmesa->dma.minimum_size, 4,
 198                                             RADEON_GEM_DOMAIN_GTT, 0);
 199
 200                 if (!dma_bo->bo) {
 201                         rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 202                         goto again_alloc;
 203                 }
 204                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 205         } else {
 206                 /* We push and pop buffers from end of list so we can keep
 207                    counter on unused buffers for later freeing them from
 208                    begin of list */
 209                 dma_bo = last_elem(&rmesa->dma.free);
 210                 remove_from_list(dma_bo);
 211                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 212         }
 213
 214         rmesa->dma.current_used = 0;
 215         rmesa->dma.current_vertexptr = 0;
 216
 217         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
 218                                           first_elem(&rmesa->dma.reserved)->bo,
 219                                           RADEON_GEM_DOMAIN_GTT, 0))
 220                 fprintf(stderr,"failure to revalidate BOs - badness\n");
 221
 222         if (is_empty_list(&rmesa->dma.reserved)) {
 223         /* Cmd buff have been flushed in radeon_revalidate_bos */
 224                 goto again_alloc;
 225         }
 226
 227         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 228 }
 229
 230 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 231  * space in current, grab a new buffer (and discard what was left of current)
 232  */
 233 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 234                           struct radeon_bo **pbo, int *poffset,
 235                           int bytes, int alignment)
 236 {
 237         if (RADEON_DEBUG & RADEON_IOCTL)
 238                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 239
 240         if (rmesa->dma.flush)
 241                 rmesa->dma.flush(rmesa->glCtx);
 242
 243         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 244
 245         alignment--;
 246         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 247
 248         if (is_empty_list(&rmesa->dma.reserved)
 249                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
 250                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 251
 252         *poffset = rmesa->dma.current_used;
 253         *pbo = first_elem(&rmesa->dma.reserved)->bo;
 254         radeon_bo_ref(*pbo);
 255
 256         /* Always align to at least 16 bytes */
 257         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 258         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 259
 260         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
 261 }
 262
 263 void radeonFreeDmaRegions(radeonContextPtr rmesa)
 264 {
 265         struct radeon_dma_bo *dma_bo;
 266         struct radeon_dma_bo *temp;
 267         if (RADEON_DEBUG & RADEON_DMA)
 268                 fprintf(stderr, "%s\n", __FUNCTION__);
 269
 270         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 271                 remove_from_list(dma_bo);
 272                 radeon_bo_unref(dma_bo->bo);
 273                 FREE(dma_bo);
 274         }
 275
 276         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 277                 remove_from_list(dma_bo);
 278                 radeon_bo_unref(dma_bo->bo);
 279                 FREE(dma_bo);
 280         }
 281
 282         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 283                 remove_from_list(dma_bo);
 284                 radeon_bo_unmap(dma_bo->bo);
 285                 radeon_bo_unref(dma_bo->bo);
 286                 FREE(dma_bo);
 287         }
 288 }
 289
 290 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
 291 {
 292         if (is_empty_list(&rmesa->dma.reserved))
 293                 return;
 294
 295         if (RADEON_DEBUG & RADEON_IOCTL)
 296                 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
 297         rmesa->dma.current_used -= return_bytes;
 298         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 299 }
 300
 301 static int radeon_bo_is_idle(struct radeon_bo* bo)
 302 {
 303         uint32_t domain;
 304         int ret = radeon_bo_is_busy(bo, &domain);
 305         if (ret == -EINVAL) {
 306                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 307                         "This may cause small performance drop for you.\n");
 308         }
 309         /* Protect against bug in legacy bo handling that causes bos stay
 310          * referenced even after they should be freed */
 311         if (bo->cref != 1)
 312                 return 0;
 313         return ret != -EBUSY;
 314 }
 315
 316 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 317 {
 318         struct radeon_dma_bo *dma_bo;
 319         struct radeon_dma_bo *temp;
 320         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
 321         const int time = rmesa->dma.free.expire_counter;
 322
 323         if (RADEON_DEBUG & RADEON_DMA) {
 324                 size_t free = 0,
 325                        wait = 0,
 326                        reserved = 0;
 327                 foreach(dma_bo, &rmesa->dma.free)
 328                         ++free;
 329
 330                 foreach(dma_bo, &rmesa->dma.wait)
 331                         ++wait;
 332
 333                 foreach(dma_bo, &rmesa->dma.reserved)
 334                         ++reserved;
 335
 336                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
 337                       __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
 338         }
 339
 340         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 341                 /* request updated cs processing information from kernel */
 342                 legacy_track_pending(rmesa->radeonScreen->bom, 0);
 343         }
 344         /* move waiting bos to free list.
 345            wait list provides gpu time to handle data before reuse */
 346         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 347                 if (dma_bo->expire_counter == time) {
 348                         WARN_ONCE("Leaking dma buffer object!\n");
 349                         /* force free of buffer so we don't realy start
 350                          * leaking stuff now*/
 351                         while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {}
 352                         remove_from_list(dma_bo);
 353                         FREE(dma_bo);
 354                         continue;
 355                 }
 356                 /* free objects that are too small to be used because of large request */
 357                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 358                    radeon_bo_unref(dma_bo->bo);
 359                    remove_from_list(dma_bo);
 360                    FREE(dma_bo);
 361                    continue;
 362                 }
 363                 if (!radeon_bo_is_idle(dma_bo->bo))
 364                         continue;
 365                 remove_from_list(dma_bo);
 366                 dma_bo->expire_counter = expire_at;
 367                 insert_at_tail(&rmesa->dma.free, dma_bo);
 368         }
 369
 370         /* unmap the last dma region */
 371         if (!is_empty_list(&rmesa->dma.reserved))
 372                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 373         /* move reserved to wait list */
 374         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 375                 /* free objects that are too small to be used because of large request */
 376                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 377                    radeon_bo_unref(dma_bo->bo);
 378                    remove_from_list(dma_bo);
 379                    FREE(dma_bo);
 380                    continue;
 381                 }
 382                 remove_from_list(dma_bo);
 383                 dma_bo->expire_counter = expire_at;
 384                 insert_at_tail(&rmesa->dma.wait, dma_bo);
 385         }
 386
 387         /* free bos that have been unused for some time */
 388         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 389                 if (dma_bo->expire_counter != time)
 390                         break;
 391                 remove_from_list(dma_bo);
 392                 radeon_bo_unref(dma_bo->bo);
 393                 FREE(dma_bo);
 394         }
 395
 396 }
 397
 398
 399 /* Flush vertices in the current dma region.
 400  */
 401 void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
 402 {
 403         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 404         struct radeon_dma *dma = &rmesa->dma;
 405
 406
 407         if (RADEON_DEBUG & RADEON_IOCTL)
 408                 fprintf(stderr, "%s\n", __FUNCTION__);
 409         dma->flush = NULL;
 410
 411         if (!is_empty_list(&dma->reserved)) {
 412             GLuint current_offset = dma->current_used;
 413
 414             assert (dma->current_used +
 415                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 416                     dma->current_vertexptr);
 417
 418             if (dma->current_used != dma->current_vertexptr) {
 419                     dma->current_used = dma->current_vertexptr;
 420
 421                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 422             }
 423             rmesa->swtcl.numverts = 0;
 424         }
 425 }
 426 /* Alloc space in the current dma region.
 427  */
 428 void *
 429 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 430 {
 431         GLuint bytes = vsize * nverts;
 432         void *head;
 433         if (RADEON_DEBUG & RADEON_IOCTL)
 434                 fprintf(stderr, "%s\n", __FUNCTION__);
 435         if(is_empty_list(&rmesa->dma.reserved)
 436               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
 437                 if (rmesa->dma.flush) {
 438                         rmesa->dma.flush(rmesa->glCtx);
 439                 }
 440
 441                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 442
 443                 return NULL;
 444         }
 445
 446         if (!rmesa->dma.flush) {
 447                 /* if cmdbuf flushed DMA restart */
 448                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 449                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 450         }
 451
 452         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 453         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 454         ASSERT( rmesa->dma.current_used +
 455                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 456                 rmesa->dma.current_vertexptr );
 457
 458         head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
 459         rmesa->dma.current_vertexptr += bytes;
 460         rmesa->swtcl.numverts += nverts;
 461         return head;
 462 }
 463
 464 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
 465 {
 466    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
 467    int i;
 468         if (RADEON_DEBUG & RADEON_IOCTL)
 469                 fprintf(stderr, "%s\n", __FUNCTION__);
 470
 471    if (radeon->dma.flush) {
 472        radeon->dma.flush(radeon->glCtx);
 473    }
 474    for (i = 0; i < radeon->tcl.aos_count; i++) {
 475       if (radeon->tcl.aos[i].bo) {
 476          radeon_bo_unref(radeon->tcl.aos[i].bo);
 477          radeon->tcl.aos[i].bo = NULL;
 478
 479       }
 480    }
 481 }