src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include <errno.h>
  34 #include "radeon_common.h"
  35 #include "main/simple_list.h"
  36
  37 #if defined(USE_X86_ASM)
  38 #define COPY_DWORDS( dst, src, nr )                                     \
  39 do {                                                                    \
  40         int __tmp;                                                      \
  41         __asm__ __volatile__( "rep ; movsl"                             \
  42                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  43                               : "0" (nr),                               \
  44                                 "D" ((long)dst),                        \
  45                                 "S" ((long)src) );                      \
  46 } while (0)
  47 #else
  48 #define COPY_DWORDS( dst, src, nr )             \
  49 do {                                            \
  50    int j;                                       \
  51    for ( j = 0 ; j < nr ; j++ )                 \
  52       dst[j] = ((int *)src)[j];                 \
  53    dst += nr;                                   \
  54 } while (0)
  55 #endif
  56
  57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
  58 {
  59         int i;
  60
  61         if (RADEON_DEBUG & RADEON_VERTS)
  62                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  63                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  64
  65         if (stride == 4)
  66                 COPY_DWORDS(out, data, count);
  67         else
  68                 for (i = 0; i < count; i++) {
  69                         out[0] = *(int *)data;
  70                         out++;
  71                         data += stride;
  72                 }
  73 }
  74
  75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
  76 {
  77         int i;
  78
  79         if (RADEON_DEBUG & RADEON_VERTS)
  80                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  81                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  82
  83         if (stride == 8)
  84                 COPY_DWORDS(out, data, count * 2);
  85         else
  86                 for (i = 0; i < count; i++) {
  87                         out[0] = *(int *)data;
  88                         out[1] = *(int *)(data + 4);
  89                         out += 2;
  90                         data += stride;
  91                 }
  92 }
  93
  94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
  95 {
  96         int i;
  97
  98         if (RADEON_DEBUG & RADEON_VERTS)
  99                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 100                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 101
 102         if (stride == 12) {
 103                 COPY_DWORDS(out, data, count * 3);
 104     }
 105         else
 106                 for (i = 0; i < count; i++) {
 107                         out[0] = *(int *)data;
 108                         out[1] = *(int *)(data + 4);
 109                         out[2] = *(int *)(data + 8);
 110                         out += 3;
 111                         data += stride;
 112                 }
 113 }
 114
 115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
 116 {
 117         int i;
 118
 119         if (RADEON_DEBUG & RADEON_VERTS)
 120                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 121                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 122
 123         if (stride == 16)
 124                 COPY_DWORDS(out, data, count * 4);
 125         else
 126                 for (i = 0; i < count; i++) {
 127                         out[0] = *(int *)data;
 128                         out[1] = *(int *)(data + 4);
 129                         out[2] = *(int *)(data + 8);
 130                         out[3] = *(int *)(data + 12);
 131                         out += 4;
 132                         data += stride;
 133                 }
 134 }
 135
 136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
 137                          const GLvoid * data, int size, int stride, int count)
 138 {
 139         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 140         uint32_t *out;
 141
 142         if (stride == 0) {
 143                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 144                 count = 1;
 145                 aos->stride = 0;
 146         } else {
 147                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 148                 aos->stride = size;
 149         }
 150
 151         aos->components = size;
 152         aos->count = count;
 153
 154         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 155         switch (size) {
 156         case 1: radeonEmitVec4(out, data, stride, count); break;
 157         case 2: radeonEmitVec8(out, data, stride, count); break;
 158         case 3: radeonEmitVec12(out, data, stride, count); break;
 159         case 4: radeonEmitVec16(out, data, stride, count); break;
 160         default:
 161                 assert(0);
 162                 break;
 163         }
 164 }
 165
 166 void radeon_init_dma(radeonContextPtr rmesa)
 167 {
 168         make_empty_list(&rmesa->dma.free);
 169         make_empty_list(&rmesa->dma.wait);
 170         make_empty_list(&rmesa->dma.reserved);
 171         rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
 172 }
 173
 174 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 175 {
 176         struct radeon_dma_bo *dma_bo = NULL;
 177         /* we set minimum sizes to at least requested size
 178            aligned to next 16 bytes. */
 179         if (size > rmesa->dma.minimum_size)
 180                 rmesa->dma.minimum_size = (size + 15) & (~15);
 181
 182         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
 183                         __FUNCTION__, size, rmesa->dma.minimum_size);
 184
 185
 186         /* unmap old reserved bo */
 187         if (!is_empty_list(&rmesa->dma.reserved))
 188                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 189
 190         if (is_empty_list(&rmesa->dma.free)
 191               || last_elem(&rmesa->dma.free)->bo->size < size) {
 192                 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
 193                 assert(dma_bo);
 194
 195 again_alloc:
 196                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 197                                             0, rmesa->dma.minimum_size, 4,
 198                                             RADEON_GEM_DOMAIN_GTT, 0);
 199
 200                 if (!dma_bo->bo) {
 201                         rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 202                         goto again_alloc;
 203                 }
 204                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 205         } else {
 206                 /* We push and pop buffers from end of list so we can keep
 207                    counter on unused buffers for later freeing them from
 208                    begin of list */
 209                 dma_bo = last_elem(&rmesa->dma.free);
 210                 remove_from_list(dma_bo);
 211                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 212         }
 213
 214         rmesa->dma.current_used = 0;
 215         rmesa->dma.current_vertexptr = 0;
 216
 217         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
 218                                           first_elem(&rmesa->dma.reserved)->bo,
 219                                           RADEON_GEM_DOMAIN_GTT, 0))
 220                 fprintf(stderr,"failure to revalidate BOs - badness\n");
 221
 222         if (is_empty_list(&rmesa->dma.reserved)) {
 223         /* Cmd buff have been flushed in radeon_revalidate_bos */
 224                 goto again_alloc;
 225         }
 226
 227         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 228 }
 229
 230 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 231  * space in current, grab a new buffer (and discard what was left of current)
 232  */
 233 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 234                           struct radeon_bo **pbo, int *poffset,
 235                           int bytes, int alignment)
 236 {
 237         if (RADEON_DEBUG & RADEON_IOCTL)
 238                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 239
 240         if (rmesa->dma.flush)
 241                 rmesa->dma.flush(rmesa->glCtx);
 242
 243         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 244
 245         alignment--;
 246         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 247
 248         if (is_empty_list(&rmesa->dma.reserved)
 249                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
 250                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 251
 252         *poffset = rmesa->dma.current_used;
 253         *pbo = first_elem(&rmesa->dma.reserved)->bo;
 254         radeon_bo_ref(*pbo);
 255
 256         /* Always align to at least 16 bytes */
 257         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 258         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 259
 260         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
 261 }
 262
 263 void radeonFreeDmaRegions(radeonContextPtr rmesa)
 264 {
 265         struct radeon_dma_bo *dma_bo;
 266         struct radeon_dma_bo *temp;
 267         if (RADEON_DEBUG & RADEON_DMA)
 268                 fprintf(stderr, "%s\n", __FUNCTION__);
 269
 270         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 271                 remove_from_list(dma_bo);
 272                 radeon_bo_unref(dma_bo->bo);
 273                 FREE(dma_bo);
 274         }
 275
 276         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 277                 remove_from_list(dma_bo);
 278                 radeon_bo_unref(dma_bo->bo);
 279                 FREE(dma_bo);
 280         }
 281
 282         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 283                 remove_from_list(dma_bo);
 284                 radeon_bo_unmap(dma_bo->bo);
 285                 radeon_bo_unref(dma_bo->bo);
 286                 FREE(dma_bo);
 287         }
 288 }
 289
 290 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
 291 {
 292         if (is_empty_list(&rmesa->dma.reserved))
 293                 return;
 294
 295         if (RADEON_DEBUG & RADEON_IOCTL)
 296                 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
 297         rmesa->dma.current_used -= return_bytes;
 298         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 299 }
 300
 301 static int radeon_bo_is_idle(struct radeon_bo* bo)
 302 {
 303         uint32_t domain;
 304         int ret = radeon_bo_is_busy(bo, &domain);
 305         if (ret == -EINVAL) {
 306                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 307                         "This may cause small performance drop for you.\n");
 308         }
 309         return ret != -EBUSY;
 310 }
 311
 312 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 313 {
 314         struct radeon_dma_bo *dma_bo;
 315         struct radeon_dma_bo *temp;
 316         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
 317         const int time = rmesa->dma.free.expire_counter;
 318
 319         if (RADEON_DEBUG & RADEON_DMA) {
 320                 size_t free = 0,
 321                        wait = 0,
 322                        reserved = 0;
 323                 foreach(dma_bo, &rmesa->dma.free)
 324                         ++free;
 325
 326                 foreach(dma_bo, &rmesa->dma.wait)
 327                         ++wait;
 328
 329                 foreach(dma_bo, &rmesa->dma.reserved)
 330                         ++reserved;
 331
 332                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
 333                       __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
 334         }
 335
 336         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 337                 /* request updated cs processing information from kernel */
 338                 legacy_track_pending(rmesa->radeonScreen->bom, 0);
 339         }
 340         /* move waiting bos to free list.
 341            wait list provides gpu time to handle data before reuse */
 342         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 343                 if (dma_bo->expire_counter == time) {
 344                         WARN_ONCE("Leaking dma buffer object!\n");
 345                         radeon_bo_unref(dma_bo->bo);
 346                         remove_from_list(dma_bo);
 347                         FREE(dma_bo);
 348                         continue;
 349                 }
 350                 /* free objects that are too small to be used because of large request */
 351                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 352                    radeon_bo_unref(dma_bo->bo);
 353                    remove_from_list(dma_bo);
 354                    FREE(dma_bo);
 355                    continue;
 356                 }
 357                 if (!radeon_bo_is_idle(dma_bo->bo))
 358                         continue;
 359                 remove_from_list(dma_bo);
 360                 dma_bo->expire_counter = expire_at;
 361                 insert_at_tail(&rmesa->dma.free, dma_bo);
 362         }
 363
 364         /* unmap the last dma region */
 365         if (!is_empty_list(&rmesa->dma.reserved))
 366                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 367         /* move reserved to wait list */
 368         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 369                 /* free objects that are too small to be used because of large request */
 370                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 371                    radeon_bo_unref(dma_bo->bo);
 372                    remove_from_list(dma_bo);
 373                    FREE(dma_bo);
 374                    continue;
 375                 }
 376                 remove_from_list(dma_bo);
 377                 dma_bo->expire_counter = expire_at;
 378                 insert_at_tail(&rmesa->dma.wait, dma_bo);
 379         }
 380
 381         /* free bos that have been unused for some time */
 382         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 383                 if (dma_bo->expire_counter != time)
 384                         break;
 385                 remove_from_list(dma_bo);
 386                 radeon_bo_unref(dma_bo->bo);
 387                 FREE(dma_bo);
 388         }
 389
 390 }
 391
 392
 393 /* Flush vertices in the current dma region.
 394  */
 395 void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
 396 {
 397         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 398         struct radeon_dma *dma = &rmesa->dma;
 399
 400
 401         if (RADEON_DEBUG & RADEON_IOCTL)
 402                 fprintf(stderr, "%s\n", __FUNCTION__);
 403         dma->flush = NULL;
 404
 405         if (!is_empty_list(&dma->reserved)) {
 406             GLuint current_offset = dma->current_used;
 407
 408             assert (dma->current_used +
 409                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 410                     dma->current_vertexptr);
 411
 412             if (dma->current_used != dma->current_vertexptr) {
 413                     dma->current_used = dma->current_vertexptr;
 414
 415                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 416             }
 417             rmesa->swtcl.numverts = 0;
 418         }
 419 }
 420 /* Alloc space in the current dma region.
 421  */
 422 void *
 423 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 424 {
 425         GLuint bytes = vsize * nverts;
 426         void *head;
 427         if (RADEON_DEBUG & RADEON_IOCTL)
 428                 fprintf(stderr, "%s\n", __FUNCTION__);
 429         if(is_empty_list(&rmesa->dma.reserved)
 430               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
 431                 if (rmesa->dma.flush) {
 432                         rmesa->dma.flush(rmesa->glCtx);
 433                 }
 434
 435                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 436
 437                 return NULL;
 438         }
 439
 440         if (!rmesa->dma.flush) {
 441                 /* if cmdbuf flushed DMA restart */
 442                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 443                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 444         }
 445
 446         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 447         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 448         ASSERT( rmesa->dma.current_used +
 449                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 450                 rmesa->dma.current_vertexptr );
 451
 452         head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
 453         rmesa->dma.current_vertexptr += bytes;
 454         rmesa->swtcl.numverts += nverts;
 455         return head;
 456 }
 457
 458 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
 459 {
 460    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
 461    int i;
 462         if (RADEON_DEBUG & RADEON_IOCTL)
 463                 fprintf(stderr, "%s\n", __FUNCTION__);
 464
 465    if (radeon->dma.flush) {
 466        radeon->dma.flush(radeon->glCtx);
 467    }
 468    for (i = 0; i < radeon->tcl.aos_count; i++) {
 469       if (radeon->tcl.aos[i].bo) {
 470          radeon_bo_unref(radeon->tcl.aos[i].bo);
 471          radeon->tcl.aos[i].bo = NULL;
 472
 473       }
 474    }
 475 }