src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include <errno.h>
  34 #include "radeon_common.h"
  35 #include "radeon_fog.h"
  36 #include "util/simple_list.h"
  37 #include "util/u_memory.h"
  38
  39 #if defined(USE_X86_ASM)
  40 #define COPY_DWORDS( dst, src, nr )                                     \
  41 do {                                                                    \
  42         int __tmp;                                                      \
  43         __asm__ __volatile__( "rep ; movsl"                             \
  44                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  45                               : "0" (nr),                               \
  46                                 "D" ((long)dst),                        \
  47                                 "S" ((long)src) );                      \
  48 } while (0)
  49 #else
  50 #define COPY_DWORDS( dst, src, nr )             \
  51 do {                                            \
  52    int j;                                       \
  53    for ( j = 0 ; j < nr ; j++ )                 \
  54       dst[j] = ((int *)src)[j];                 \
  55    dst += nr;                                   \
  56 } while (0)
  57 #endif
  58
  59 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
  60 {
  61         int i;
  62
  63         if (RADEON_DEBUG & RADEON_VERTS)
  64                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  65                         __func__, count, stride, (void *)out, (void *)data);
  66
  67         if (stride == 4)
  68                 COPY_DWORDS(out, data, count);
  69         else
  70                 for (i = 0; i < count; i++) {
  71                         out[0] = *(int *)data;
  72                         out++;
  73                         data += stride;
  74                 }
  75 }
  76
  77 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
  78 {
  79         int i;
  80
  81         if (RADEON_DEBUG & RADEON_VERTS)
  82                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  83                         __func__, count, stride, (void *)out, (void *)data);
  84
  85         if (stride == 8)
  86                 COPY_DWORDS(out, data, count * 2);
  87         else
  88                 for (i = 0; i < count; i++) {
  89                         out[0] = *(int *)data;
  90                         out[1] = *(int *)(data + 4);
  91                         out += 2;
  92                         data += stride;
  93                 }
  94 }
  95
  96 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
  97 {
  98         int i;
  99
 100         if (RADEON_DEBUG & RADEON_VERTS)
 101                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 102                         __func__, count, stride, (void *)out, (void *)data);
 103
 104         if (stride == 12) {
 105                 COPY_DWORDS(out, data, count * 3);
 106     }
 107         else
 108                 for (i = 0; i < count; i++) {
 109                         out[0] = *(int *)data;
 110                         out[1] = *(int *)(data + 4);
 111                         out[2] = *(int *)(data + 8);
 112                         out += 3;
 113                         data += stride;
 114                 }
 115 }
 116
 117 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
 118 {
 119         int i;
 120
 121         if (RADEON_DEBUG & RADEON_VERTS)
 122                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 123                         __func__, count, stride, (void *)out, (void *)data);
 124
 125         if (stride == 16)
 126                 COPY_DWORDS(out, data, count * 4);
 127         else
 128                 for (i = 0; i < count; i++) {
 129                         out[0] = *(int *)data;
 130                         out[1] = *(int *)(data + 4);
 131                         out[2] = *(int *)(data + 8);
 132                         out[3] = *(int *)(data + 12);
 133                         out += 4;
 134                         data += stride;
 135                 }
 136 }
 137
 138 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
 139                          const GLvoid * data, int size, int stride, int count)
 140 {
 141         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 142         uint32_t *out;
 143
 144         if (stride == 0) {
 145                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 146                 count = 1;
 147                 aos->stride = 0;
 148         } else {
 149                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 150                 aos->stride = size;
 151         }
 152
 153         aos->components = size;
 154         aos->count = count;
 155
 156         radeon_bo_map(aos->bo, 1);
 157         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 158         switch (size) {
 159         case 1: radeonEmitVec4(out, data, stride, count); break;
 160         case 2: radeonEmitVec8(out, data, stride, count); break;
 161         case 3: radeonEmitVec12(out, data, stride, count); break;
 162         case 4: radeonEmitVec16(out, data, stride, count); break;
 163         default:
 164                 assert(0);
 165                 break;
 166         }
 167         radeon_bo_unmap(aos->bo);
 168 }
 169
 170 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
 171                          GLvoid *data, int stride, int count)
 172 {
 173         int i;
 174         float *out;
 175         int size = 1;
 176         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 177
 178         if (RADEON_DEBUG & RADEON_VERTS)
 179                 fprintf(stderr, "%s count %d stride %d\n",
 180                         __func__, count, stride);
 181
 182         if (stride == 0) {
 183                 radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
 184                 count = 1;
 185                 aos->stride = 0;
 186         } else {
 187                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 188                 aos->stride = size;
 189         }
 190
 191         aos->components = size;
 192         aos->count = count;
 193
 194         /* Emit the data */
 195         radeon_bo_map(aos->bo, 1);
 196         out = (float*)((char*)aos->bo->ptr + aos->offset);
 197         for (i = 0; i < count; i++) {
 198                 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
 199                 out++;
 200                 data += stride;
 201         }
 202         radeon_bo_unmap(aos->bo);
 203 }
 204
 205 void radeon_init_dma(radeonContextPtr rmesa)
 206 {
 207         make_empty_list(&rmesa->dma.free);
 208         make_empty_list(&rmesa->dma.wait);
 209         make_empty_list(&rmesa->dma.reserved);
 210         rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
 211 }
 212
 213 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 214 {
 215         struct radeon_dma_bo *dma_bo = NULL;
 216         /* we set minimum sizes to at least requested size
 217            aligned to next 16 bytes. */
 218         if (size > rmesa->dma.minimum_size)
 219                 rmesa->dma.minimum_size = (size + 15) & (~15);
 220
 221         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %zi\n",
 222                         __func__, size, rmesa->dma.minimum_size);
 223
 224         if (is_empty_list(&rmesa->dma.free)
 225               || last_elem(&rmesa->dma.free)->bo->size < size) {
 226                 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
 227                 assert(dma_bo);
 228
 229 again_alloc:
 230                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 231                                             0, rmesa->dma.minimum_size, 4,
 232                                             RADEON_GEM_DOMAIN_GTT, 0);
 233
 234                 if (!dma_bo->bo) {
 235                         rcommonFlushCmdBuf(rmesa, __func__);
 236                         goto again_alloc;
 237                 }
 238                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 239         } else {
 240                 /* We push and pop buffers from end of list so we can keep
 241                    counter on unused buffers for later freeing them from
 242                    begin of list */
 243                 dma_bo = last_elem(&rmesa->dma.free);
 244                 remove_from_list(dma_bo);
 245                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 246         }
 247
 248         rmesa->dma.current_used = 0;
 249         rmesa->dma.current_vertexptr = 0;
 250
 251         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
 252                                           first_elem(&rmesa->dma.reserved)->bo,
 253                                           RADEON_GEM_DOMAIN_GTT, 0))
 254                 fprintf(stderr,"failure to revalidate BOs - badness\n");
 255
 256         if (is_empty_list(&rmesa->dma.reserved)) {
 257         /* Cmd buff have been flushed in radeon_revalidate_bos */
 258                 goto again_alloc;
 259         }
 260         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 261 }
 262
 263 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 264  * space in current, grab a new buffer (and discard what was left of current)
 265  */
 266 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 267                           struct radeon_bo **pbo, int *poffset,
 268                           int bytes, int alignment)
 269 {
 270         if (RADEON_DEBUG & RADEON_IOCTL)
 271                 fprintf(stderr, "%s %d\n", __func__, bytes);
 272
 273         if (rmesa->dma.flush)
 274                 rmesa->dma.flush(&rmesa->glCtx);
 275
 276         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 277
 278         alignment--;
 279         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 280
 281         if (is_empty_list(&rmesa->dma.reserved)
 282                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
 283                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 284
 285         *poffset = rmesa->dma.current_used;
 286         *pbo = first_elem(&rmesa->dma.reserved)->bo;
 287         radeon_bo_ref(*pbo);
 288
 289         /* Always align to at least 16 bytes */
 290         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 291         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 292
 293         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
 294 }
 295
 296 void radeonFreeDmaRegions(radeonContextPtr rmesa)
 297 {
 298         struct radeon_dma_bo *dma_bo;
 299         struct radeon_dma_bo *temp;
 300         if (RADEON_DEBUG & RADEON_DMA)
 301                 fprintf(stderr, "%s\n", __func__);
 302
 303         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 304                 remove_from_list(dma_bo);
 305                 radeon_bo_unref(dma_bo->bo);
 306                 free(dma_bo);
 307         }
 308
 309         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 310                 remove_from_list(dma_bo);
 311                 radeon_bo_unref(dma_bo->bo);
 312                 free(dma_bo);
 313         }
 314
 315         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 316                 remove_from_list(dma_bo);
 317                 radeon_bo_unref(dma_bo->bo);
 318                 free(dma_bo);
 319         }
 320 }
 321
 322 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
 323 {
 324         if (is_empty_list(&rmesa->dma.reserved))
 325                 return;
 326
 327         if (RADEON_DEBUG & RADEON_IOCTL)
 328                 fprintf(stderr, "%s %d\n", __func__, return_bytes);
 329         rmesa->dma.current_used -= return_bytes;
 330         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 331 }
 332
 333 static int radeon_bo_is_idle(struct radeon_bo* bo)
 334 {
 335         uint32_t domain;
 336         int ret = radeon_bo_is_busy(bo, &domain);
 337         if (ret == -EINVAL) {
 338                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 339                         "This may cause small performance drop for you.\n");
 340         }
 341         return ret != -EBUSY;
 342 }
 343
 344 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 345 {
 346         struct radeon_dma_bo *dma_bo;
 347         struct radeon_dma_bo *temp;
 348         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
 349         const int time = rmesa->dma.free.expire_counter;
 350
 351         if (RADEON_DEBUG & RADEON_DMA) {
 352                 size_t free = 0,
 353                        wait = 0,
 354                        reserved = 0;
 355                 foreach(dma_bo, &rmesa->dma.free)
 356                         ++free;
 357
 358                 foreach(dma_bo, &rmesa->dma.wait)
 359                         ++wait;
 360
 361                 foreach(dma_bo, &rmesa->dma.reserved)
 362                         ++reserved;
 363
 364                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
 365                       __func__, free, wait, reserved, rmesa->dma.minimum_size);
 366         }
 367
 368         /* move waiting bos to free list.
 369            wait list provides gpu time to handle data before reuse */
 370         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 371                 if (dma_bo->expire_counter == time) {
 372                         WARN_ONCE("Leaking dma buffer object!\n");
 373                         radeon_bo_unref(dma_bo->bo);
 374                         remove_from_list(dma_bo);
 375                         free(dma_bo);
 376                         continue;
 377                 }
 378                 /* free objects that are too small to be used because of large request */
 379                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 380                    radeon_bo_unref(dma_bo->bo);
 381                    remove_from_list(dma_bo);
 382                    free(dma_bo);
 383                    continue;
 384                 }
 385                 if (!radeon_bo_is_idle(dma_bo->bo)) {
 386                         break;
 387                 }
 388                 remove_from_list(dma_bo);
 389                 dma_bo->expire_counter = expire_at;
 390                 insert_at_tail(&rmesa->dma.free, dma_bo);
 391         }
 392
 393         /* move reserved to wait list */
 394         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 395                 radeon_bo_unmap(dma_bo->bo);
 396                 /* free objects that are too small to be used because of large request */
 397                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 398                    radeon_bo_unref(dma_bo->bo);
 399                    remove_from_list(dma_bo);
 400                    free(dma_bo);
 401                    continue;
 402                 }
 403                 remove_from_list(dma_bo);
 404                 dma_bo->expire_counter = expire_at;
 405                 insert_at_tail(&rmesa->dma.wait, dma_bo);
 406         }
 407
 408         /* free bos that have been unused for some time */
 409         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 410                 if (dma_bo->expire_counter != time)
 411                         break;
 412                 remove_from_list(dma_bo);
 413                 radeon_bo_unref(dma_bo->bo);
 414                 free(dma_bo);
 415         }
 416
 417 }
 418
 419
 420 /* Flush vertices in the current dma region.
 421  */
 422 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
 423 {
 424         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 425         struct radeon_dma *dma = &rmesa->dma;
 426
 427         if (RADEON_DEBUG & RADEON_IOCTL)
 428                 fprintf(stderr, "%s\n", __func__);
 429         dma->flush = NULL;
 430
 431         radeon_bo_unmap(rmesa->swtcl.bo);
 432
 433         if (!is_empty_list(&dma->reserved)) {
 434             GLuint current_offset = dma->current_used;
 435
 436             assert (dma->current_used +
 437                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 438                     dma->current_vertexptr);
 439
 440             if (dma->current_used != dma->current_vertexptr) {
 441                     dma->current_used = dma->current_vertexptr;
 442
 443                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 444             }
 445             rmesa->swtcl.numverts = 0;
 446         }
 447         radeon_bo_unref(rmesa->swtcl.bo);
 448         rmesa->swtcl.bo = NULL;
 449 }
 450 /* Alloc space in the current dma region.
 451  */
 452 void *
 453 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 454 {
 455         GLuint bytes = vsize * nverts;
 456         void *head;
 457         if (RADEON_DEBUG & RADEON_IOCTL)
 458                 fprintf(stderr, "%s\n", __func__);
 459
 460         if(is_empty_list(&rmesa->dma.reserved)
 461               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
 462                 if (rmesa->dma.flush) {
 463                         rmesa->dma.flush(&rmesa->glCtx);
 464                 }
 465
 466                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 467
 468                 return NULL;
 469         }
 470
 471         if (!rmesa->dma.flush) {
 472                 /* if cmdbuf flushed DMA restart */
 473                 rmesa->glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 474                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 475         }
 476
 477         assert( vsize == rmesa->swtcl.vertex_size * 4 );
 478         assert( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 479         assert( rmesa->dma.current_used +
 480                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 481                 rmesa->dma.current_vertexptr );
 482
 483         if (!rmesa->swtcl.bo) {
 484                 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
 485                 radeon_bo_ref(rmesa->swtcl.bo);
 486                 radeon_bo_map(rmesa->swtcl.bo, 1);
 487         }
 488
 489         head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
 490         rmesa->dma.current_vertexptr += bytes;
 491         rmesa->swtcl.numverts += nverts;
 492         return head;
 493 }
 494
 495 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
 496 {
 497    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
 498    int i;
 499         if (RADEON_DEBUG & RADEON_IOCTL)
 500                 fprintf(stderr, "%s\n", __func__);
 501
 502    if (radeon->dma.flush) {
 503        radeon->dma.flush(&radeon->glCtx);
 504    }
 505    for (i = 0; i < radeon->tcl.aos_count; i++) {
 506       if (radeon->tcl.aos[i].bo) {
 507          radeon_bo_unref(radeon->tcl.aos[i].bo);
 508          radeon->tcl.aos[i].bo = NULL;
 509
 510       }
 511    }
 512 }