src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include <errno.h>
  34 #include "radeon_common.h"
  35 #include "main/simple_list.h"
  36
  37 #if defined(USE_X86_ASM)
  38 #define COPY_DWORDS( dst, src, nr )                                     \
  39 do {                                                                    \
  40         int __tmp;                                                      \
  41         __asm__ __volatile__( "rep ; movsl"                             \
  42                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  43                               : "0" (nr),                               \
  44                                 "D" ((long)dst),                        \
  45                                 "S" ((long)src) );                      \
  46 } while (0)
  47 #else
  48 #define COPY_DWORDS( dst, src, nr )             \
  49 do {                                            \
  50    int j;                                       \
  51    for ( j = 0 ; j < nr ; j++ )                 \
  52       dst[j] = ((int *)src)[j];                 \
  53    dst += nr;                                   \
  54 } while (0)
  55 #endif
  56
  57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
  58 {
  59         int i;
  60
  61         if (RADEON_DEBUG & RADEON_VERTS)
  62                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  63                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  64
  65         if (stride == 4)
  66                 COPY_DWORDS(out, data, count);
  67         else
  68                 for (i = 0; i < count; i++) {
  69                         out[0] = *(int *)data;
  70                         out++;
  71                         data += stride;
  72                 }
  73 }
  74
  75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
  76 {
  77         int i;
  78
  79         if (RADEON_DEBUG & RADEON_VERTS)
  80                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  81                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  82
  83         if (stride == 8)
  84                 COPY_DWORDS(out, data, count * 2);
  85         else
  86                 for (i = 0; i < count; i++) {
  87                         out[0] = *(int *)data;
  88                         out[1] = *(int *)(data + 4);
  89                         out += 2;
  90                         data += stride;
  91                 }
  92 }
  93
  94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
  95 {
  96         int i;
  97
  98         if (RADEON_DEBUG & RADEON_VERTS)
  99                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 100                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 101
 102         if (stride == 12) {
 103                 COPY_DWORDS(out, data, count * 3);
 104     }
 105         else
 106                 for (i = 0; i < count; i++) {
 107                         out[0] = *(int *)data;
 108                         out[1] = *(int *)(data + 4);
 109                         out[2] = *(int *)(data + 8);
 110                         out += 3;
 111                         data += stride;
 112                 }
 113 }
 114
 115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
 116 {
 117         int i;
 118
 119         if (RADEON_DEBUG & RADEON_VERTS)
 120                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 121                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 122
 123         if (stride == 16)
 124                 COPY_DWORDS(out, data, count * 4);
 125         else
 126                 for (i = 0; i < count; i++) {
 127                         out[0] = *(int *)data;
 128                         out[1] = *(int *)(data + 4);
 129                         out[2] = *(int *)(data + 8);
 130                         out[3] = *(int *)(data + 12);
 131                         out += 4;
 132                         data += stride;
 133                 }
 134 }
 135
 136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
 137                          const GLvoid * data, int size, int stride, int count)
 138 {
 139         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 140         uint32_t *out;
 141
 142         if (stride == 0) {
 143                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 144                 count = 1;
 145                 aos->stride = 0;
 146         } else {
 147                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 148                 aos->stride = size;
 149         }
 150
 151         aos->components = size;
 152         aos->count = count;
 153
 154         radeon_bo_map(aos->bo, 1);
 155         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 156         switch (size) {
 157         case 1: radeonEmitVec4(out, data, stride, count); break;
 158         case 2: radeonEmitVec8(out, data, stride, count); break;
 159         case 3: radeonEmitVec12(out, data, stride, count); break;
 160         case 4: radeonEmitVec16(out, data, stride, count); break;
 161         default:
 162                 assert(0);
 163                 break;
 164         }
 165         radeon_bo_unmap(aos->bo);
 166 }
 167
 168 void radeon_init_dma(radeonContextPtr rmesa)
 169 {
 170         make_empty_list(&rmesa->dma.free);
 171         make_empty_list(&rmesa->dma.wait);
 172         make_empty_list(&rmesa->dma.reserved);
 173         rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
 174 }
 175
 176 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 177 {
 178         struct radeon_dma_bo *dma_bo = NULL;
 179         /* we set minimum sizes to at least requested size
 180            aligned to next 16 bytes. */
 181         if (size > rmesa->dma.minimum_size)
 182                 rmesa->dma.minimum_size = (size + 15) & (~15);
 183
 184         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
 185                         __FUNCTION__, size, rmesa->dma.minimum_size);
 186
 187         if (!is_empty_list(&rmesa->dma.reserved))
 188                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 189
 190         if (is_empty_list(&rmesa->dma.free)
 191               || last_elem(&rmesa->dma.free)->bo->size < size) {
 192                 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
 193                 assert(dma_bo);
 194
 195 again_alloc:
 196                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 197                                             0, rmesa->dma.minimum_size, 4,
 198                                             RADEON_GEM_DOMAIN_GTT, 0);
 199
 200                 if (!dma_bo->bo) {
 201                         rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 202                         goto again_alloc;
 203                 }
 204                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 205         } else {
 206                 /* We push and pop buffers from end of list so we can keep
 207                    counter on unused buffers for later freeing them from
 208                    begin of list */
 209                 dma_bo = last_elem(&rmesa->dma.free);
 210                 remove_from_list(dma_bo);
 211                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 212         }
 213
 214         rmesa->dma.current_used = 0;
 215         rmesa->dma.current_vertexptr = 0;
 216
 217         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
 218                                           first_elem(&rmesa->dma.reserved)->bo,
 219                                           RADEON_GEM_DOMAIN_GTT, 0))
 220                 fprintf(stderr,"failure to revalidate BOs - badness\n");
 221
 222         if (is_empty_list(&rmesa->dma.reserved)) {
 223         /* Cmd buff have been flushed in radeon_revalidate_bos */
 224                 goto again_alloc;
 225         }
 226         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 227 }
 228
 229 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 230  * space in current, grab a new buffer (and discard what was left of current)
 231  */
 232 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 233                           struct radeon_bo **pbo, int *poffset,
 234                           int bytes, int alignment)
 235 {
 236         if (RADEON_DEBUG & RADEON_IOCTL)
 237                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 238
 239         if (rmesa->dma.flush)
 240                 rmesa->dma.flush(rmesa->glCtx);
 241
 242         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 243
 244         alignment--;
 245         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 246
 247         if (is_empty_list(&rmesa->dma.reserved)
 248                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
 249                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 250
 251         *poffset = rmesa->dma.current_used;
 252         *pbo = first_elem(&rmesa->dma.reserved)->bo;
 253         radeon_bo_ref(*pbo);
 254
 255         /* Always align to at least 16 bytes */
 256         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 257         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 258
 259         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
 260 }
 261
 262 void radeonFreeDmaRegions(radeonContextPtr rmesa)
 263 {
 264         struct radeon_dma_bo *dma_bo;
 265         struct radeon_dma_bo *temp;
 266         if (RADEON_DEBUG & RADEON_DMA)
 267                 fprintf(stderr, "%s\n", __FUNCTION__);
 268
 269         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 270                 remove_from_list(dma_bo);
 271                 radeon_bo_unref(dma_bo->bo);
 272                 FREE(dma_bo);
 273         }
 274
 275         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 276                 remove_from_list(dma_bo);
 277                 radeon_bo_unref(dma_bo->bo);
 278                 FREE(dma_bo);
 279         }
 280
 281         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 282                 remove_from_list(dma_bo);
 283                 radeon_bo_unref(dma_bo->bo);
 284                 FREE(dma_bo);
 285         }
 286 }
 287
 288 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
 289 {
 290         if (is_empty_list(&rmesa->dma.reserved))
 291                 return;
 292
 293         if (RADEON_DEBUG & RADEON_IOCTL)
 294                 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
 295         rmesa->dma.current_used -= return_bytes;
 296         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 297 }
 298
 299 static int radeon_bo_is_idle(struct radeon_bo* bo)
 300 {
 301         uint32_t domain;
 302         int ret = radeon_bo_is_busy(bo, &domain);
 303         if (ret == -EINVAL) {
 304                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 305                         "This may cause small performance drop for you.\n");
 306         }
 307         return ret != -EBUSY;
 308 }
 309
 310 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 311 {
 312         struct radeon_dma_bo *dma_bo;
 313         struct radeon_dma_bo *temp;
 314         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
 315         const int time = rmesa->dma.free.expire_counter;
 316
 317         if (RADEON_DEBUG & RADEON_DMA) {
 318                 size_t free = 0,
 319                        wait = 0,
 320                        reserved = 0;
 321                 foreach(dma_bo, &rmesa->dma.free)
 322                         ++free;
 323
 324                 foreach(dma_bo, &rmesa->dma.wait)
 325                         ++wait;
 326
 327                 foreach(dma_bo, &rmesa->dma.reserved)
 328                         ++reserved;
 329
 330                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
 331                       __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
 332         }
 333
 334         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
 335                 /* request updated cs processing information from kernel */
 336                 legacy_track_pending(rmesa->radeonScreen->bom, 0);
 337         }
 338
 339         if (!is_empty_list(&rmesa->dma.reserved))
 340                 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 341
 342         /* move waiting bos to free list.
 343            wait list provides gpu time to handle data before reuse */
 344         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 345                 if (dma_bo->expire_counter == time) {
 346                         WARN_ONCE("Leaking dma buffer object!\n");
 347                         radeon_bo_unref(dma_bo->bo);
 348                         remove_from_list(dma_bo);
 349                         FREE(dma_bo);
 350                         continue;
 351                 }
 352                 /* free objects that are too small to be used because of large request */
 353                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 354                    radeon_bo_unref(dma_bo->bo);
 355                    remove_from_list(dma_bo);
 356                    FREE(dma_bo);
 357                    continue;
 358                 }
 359                 if (!radeon_bo_is_idle(dma_bo->bo)) {
 360                         if (rmesa->radeonScreen->driScreen->dri2.enabled)
 361                                 break;
 362                         continue;
 363                 }
 364                 remove_from_list(dma_bo);
 365                 dma_bo->expire_counter = expire_at;
 366                 insert_at_tail(&rmesa->dma.free, dma_bo);
 367         }
 368
 369         /* move reserved to wait list */
 370         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 371                 /* free objects that are too small to be used because of large request */
 372                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 373                    radeon_bo_unref(dma_bo->bo);
 374                    remove_from_list(dma_bo);
 375                    FREE(dma_bo);
 376                    continue;
 377                 }
 378                 remove_from_list(dma_bo);
 379                 dma_bo->expire_counter = expire_at;
 380                 insert_at_tail(&rmesa->dma.wait, dma_bo);
 381         }
 382
 383         /* free bos that have been unused for some time */
 384         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 385                 if (dma_bo->expire_counter != time)
 386                         break;
 387                 remove_from_list(dma_bo);
 388                 radeon_bo_unref(dma_bo->bo);
 389                 FREE(dma_bo);
 390         }
 391
 392 }
 393
 394
 395 /* Flush vertices in the current dma region.
 396  */
 397 void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
 398 {
 399         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 400         struct radeon_dma *dma = &rmesa->dma;
 401
 402         if (RADEON_DEBUG & RADEON_IOCTL)
 403                 fprintf(stderr, "%s\n", __FUNCTION__);
 404         dma->flush = NULL;
 405
 406         radeon_bo_unmap(rmesa->swtcl.bo);
 407
 408         if (!is_empty_list(&dma->reserved)) {
 409             GLuint current_offset = dma->current_used;
 410
 411             assert (dma->current_used +
 412                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 413                     dma->current_vertexptr);
 414
 415             if (dma->current_used != dma->current_vertexptr) {
 416                     dma->current_used = dma->current_vertexptr;
 417
 418                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 419             }
 420             rmesa->swtcl.numverts = 0;
 421         }
 422         radeon_bo_unref(rmesa->swtcl.bo);
 423         rmesa->swtcl.bo = NULL;
 424 }
 425 /* Alloc space in the current dma region.
 426  */
 427 void *
 428 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 429 {
 430         GLuint bytes = vsize * nverts;
 431         void *head;
 432         if (RADEON_DEBUG & RADEON_IOCTL)
 433                 fprintf(stderr, "%s\n", __FUNCTION__);
 434
 435         if(is_empty_list(&rmesa->dma.reserved)
 436               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
 437                 if (rmesa->dma.flush) {
 438                         rmesa->dma.flush(rmesa->glCtx);
 439                 }
 440
 441                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 442
 443                 return NULL;
 444         }
 445
 446         if (!rmesa->dma.flush) {
 447                 /* if cmdbuf flushed DMA restart */
 448                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 449                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 450         }
 451
 452         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 453         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 454         ASSERT( rmesa->dma.current_used +
 455                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 456                 rmesa->dma.current_vertexptr );
 457
 458         if (!rmesa->swtcl.bo) {
 459                 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
 460                 radeon_bo_ref(rmesa->swtcl.bo);
 461                 radeon_bo_map(rmesa->swtcl.bo, 1);
 462         }
 463
 464         head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
 465         rmesa->dma.current_vertexptr += bytes;
 466         rmesa->swtcl.numverts += nverts;
 467         return head;
 468 }
 469
 470 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
 471 {
 472    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
 473    int i;
 474         if (RADEON_DEBUG & RADEON_IOCTL)
 475                 fprintf(stderr, "%s\n", __FUNCTION__);
 476
 477    if (radeon->dma.flush) {
 478        radeon->dma.flush(radeon->glCtx);
 479    }
 480    for (i = 0; i < radeon->tcl.aos_count; i++) {
 481       if (radeon->tcl.aos[i].bo) {
 482          radeon_bo_unref(radeon->tcl.aos[i].bo);
 483          radeon->tcl.aos[i].bo = NULL;
 484
 485       }
 486    }
 487 }