src/mesa/drivers/dri/radeon/radeon_maos_arrays.c

   1 /**************************************************************************
   2
   3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   4                      Tungsten Graphics Inc., Cedar Park, Texas.
   5
   6 All Rights Reserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining
   9 a copy of this software and associated documentation files (the
  10 "Software"), to deal in the Software without restriction, including
  11 without limitation the rights to use, copy, modify, merge, publish,
  12 distribute, sublicense, and/or sell copies of the Software, and to
  13 permit persons to whom the Software is furnished to do so, subject to
  14 the following conditions:
  15
  16 The above copyright notice and this permission notice (including the
  17 next paragraph) shall be included in all copies or substantial
  18 portions of the Software.
  19
  20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27
  28 **************************************************************************/
  29
  30 /*
  31  * Authors:
  32  *   Keith Whitwell <keith@tungstengraphics.com>
  33  */
  34
  35 #include "main/glheader.h"
  36 #include "main/imports.h"
  37 #include "main/mtypes.h"
  38 #include "main/macros.h"
  39
  40 #include "swrast_setup/swrast_setup.h"
  41 #include "math/m_translate.h"
  42 #include "tnl/tnl.h"
  43 #include "tnl/tcontext.h"
  44
  45 #include "radeon_context.h"
  46 #include "radeon_ioctl.h"
  47 #include "radeon_state.h"
  48 #include "radeon_swtcl.h"
  49 #include "radeon_maos.h"
  50 #include "radeon_tcl.h"
  51
  52 #if 0
  53 /* Usage:
  54  *   - from radeon_tcl_render
  55  *   - call radeonEmitArrays to ensure uptodate arrays in dma
  56  *   - emit primitives (new type?) which reference the data
  57  *       -- need to use elts for lineloop, quads, quadstrip/flat
  58  *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
  59  *
  60  */
  61 static void emit_ubyte_rgba3( GLcontext *ctx,
  62                        struct radeon_dma_region *rvb,
  63                        char *data,
  64                        int stride,
  65                        int count )
  66 {
  67    int i;
  68    radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
  69
  70    if (RADEON_DEBUG & DEBUG_VERTS)
  71       fprintf(stderr, "%s count %d stride %d out %p\n",
  72               __FUNCTION__, count, stride, (void *)out);
  73
  74    for (i = 0; i < count; i++) {
  75       out->red   = *data;
  76       out->green = *(data+1);
  77       out->blue  = *(data+2);
  78       out->alpha = 0xFF;
  79       out++;
  80       data += stride;
  81    }
  82 }
  83
  84 static void emit_ubyte_rgba4( GLcontext *ctx,
  85                               struct radeon_dma_region *rvb,
  86                               char *data,
  87                               int stride,
  88                               int count )
  89 {
  90    int i;
  91    int *out = (int *)(rvb->address + rvb->start);
  92
  93    if (RADEON_DEBUG & DEBUG_VERTS)
  94       fprintf(stderr, "%s count %d stride %d\n",
  95               __FUNCTION__, count, stride);
  96
  97    if (stride == 4)
  98        COPY_DWORDS( out, data, count );
  99    else
 100       for (i = 0; i < count; i++) {
 101          *out++ = LE32_TO_CPU(*(int *)data);
 102          data += stride;
 103       }
 104 }
 105
 106
 107 static void emit_ubyte_rgba( GLcontext *ctx,
 108                              struct radeon_dma_region *rvb,
 109                              char *data,
 110                              int size,
 111                              int stride,
 112                              int count )
 113 {
 114    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 115
 116    if (RADEON_DEBUG & DEBUG_VERTS)
 117       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
 118
 119    assert (!rvb->buf);
 120
 121    if (stride == 0) {
 122       radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
 123       count = 1;
 124       rvb->aos_start = GET_START(rvb);
 125       rvb->aos_stride = 0;
 126       rvb->aos_size = 1;
 127    }
 128    else {
 129       radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
 130       rvb->aos_start = GET_START(rvb);
 131       rvb->aos_stride = 1;
 132       rvb->aos_size = 1;
 133    }
 134
 135    /* Emit the data
 136     */
 137    switch (size) {
 138    case 3:
 139       emit_ubyte_rgba3( ctx, rvb, data, stride, count );
 140       break;
 141    case 4:
 142       emit_ubyte_rgba4( ctx, rvb, data, stride, count );
 143       break;
 144    default:
 145       assert(0);
 146       exit(1);
 147       break;
 148    }
 149 }
 150 #endif
 151
 152 #if defined(USE_X86_ASM)
 153 #define COPY_DWORDS( dst, src, nr )                                     \
 154 do {                                                                    \
 155         int __tmp;                                                      \
 156         __asm__ __volatile__( "rep ; movsl"                             \
 157                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
 158                               : "0" (nr),                               \
 159                                 "D" ((long)dst),                        \
 160                                 "S" ((long)src) );                      \
 161 } while (0)
 162 #else
 163 #define COPY_DWORDS( dst, src, nr )             \
 164 do {                                            \
 165    int j;                                       \
 166    for ( j = 0 ; j < nr ; j++ )                 \
 167       dst[j] = ((int *)src)[j];                 \
 168    dst += nr;                                   \
 169 } while (0)
 170 #endif
 171
 172 static void emit_vecfog( GLcontext *ctx,
 173                          struct radeon_dma_region *rvb,
 174                          char *data,
 175                          int stride,
 176                          int count )
 177 {
 178    int i;
 179    GLfloat *out;
 180
 181    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 182
 183    if (RADEON_DEBUG & DEBUG_VERTS)
 184       fprintf(stderr, "%s count %d stride %d\n",
 185               __FUNCTION__, count, stride);
 186
 187    assert (!rvb->buf);
 188
 189    if (stride == 0) {
 190       radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
 191       count = 1;
 192       rvb->aos_start = GET_START(rvb);
 193       rvb->aos_stride = 0;
 194       rvb->aos_size = 1;
 195    }
 196    else {
 197       radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
 198       rvb->aos_start = GET_START(rvb);
 199       rvb->aos_stride = 1;
 200       rvb->aos_size = 1;
 201    }
 202
 203    /* Emit the data
 204     */
 205    out = (GLfloat *)(rvb->address + rvb->start);
 206    for (i = 0; i < count; i++) {
 207       out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
 208       out++;
 209       data += stride;
 210    }
 211 }
 212
 213 static void emit_vec4( GLcontext *ctx,
 214                        struct radeon_dma_region *rvb,
 215                        char *data,
 216                        int stride,
 217                        int count )
 218 {
 219    int i;
 220    int *out = (int *)(rvb->address + rvb->start);
 221
 222    if (RADEON_DEBUG & DEBUG_VERTS)
 223       fprintf(stderr, "%s count %d stride %d\n",
 224               __FUNCTION__, count, stride);
 225
 226    if (stride == 4)
 227       COPY_DWORDS( out, data, count );
 228    else
 229       for (i = 0; i < count; i++) {
 230          out[0] = *(int *)data;
 231          out++;
 232          data += stride;
 233       }
 234 }
 235
 236
 237 static void emit_vec8( GLcontext *ctx,
 238                        struct radeon_dma_region *rvb,
 239                        char *data,
 240                        int stride,
 241                        int count )
 242 {
 243    int i;
 244    int *out = (int *)(rvb->address + rvb->start);
 245
 246    if (RADEON_DEBUG & DEBUG_VERTS)
 247       fprintf(stderr, "%s count %d stride %d\n",
 248               __FUNCTION__, count, stride);
 249
 250    if (stride == 8)
 251       COPY_DWORDS( out, data, count*2 );
 252    else
 253       for (i = 0; i < count; i++) {
 254          out[0] = *(int *)data;
 255          out[1] = *(int *)(data+4);
 256          out += 2;
 257          data += stride;
 258       }
 259 }
 260
 261 static void emit_vec12( GLcontext *ctx,
 262                        struct radeon_dma_region *rvb,
 263                        char *data,
 264                        int stride,
 265                        int count )
 266 {
 267    int i;
 268    int *out = (int *)(rvb->address + rvb->start);
 269
 270    if (RADEON_DEBUG & DEBUG_VERTS)
 271       fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 272               __FUNCTION__, count, stride, (void *)out, (void *)data);
 273
 274    if (stride == 12)
 275       COPY_DWORDS( out, data, count*3 );
 276    else
 277       for (i = 0; i < count; i++) {
 278          out[0] = *(int *)data;
 279          out[1] = *(int *)(data+4);
 280          out[2] = *(int *)(data+8);
 281          out += 3;
 282          data += stride;
 283       }
 284 }
 285
 286 static void emit_vec16( GLcontext *ctx,
 287                         struct radeon_dma_region *rvb,
 288                         char *data,
 289                         int stride,
 290                         int count )
 291 {
 292    int i;
 293    int *out = (int *)(rvb->address + rvb->start);
 294
 295    if (RADEON_DEBUG & DEBUG_VERTS)
 296       fprintf(stderr, "%s count %d stride %d\n",
 297               __FUNCTION__, count, stride);
 298
 299    if (stride == 16)
 300       COPY_DWORDS( out, data, count*4 );
 301    else
 302       for (i = 0; i < count; i++) {
 303          out[0] = *(int *)data;
 304          out[1] = *(int *)(data+4);
 305          out[2] = *(int *)(data+8);
 306          out[3] = *(int *)(data+12);
 307          out += 4;
 308          data += stride;
 309       }
 310 }
 311
 312
 313 static void emit_vector( GLcontext *ctx,
 314                          struct radeon_dma_region *rvb,
 315                          char *data,
 316                          int size,
 317                          int stride,
 318                          int count )
 319 {
 320    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 321
 322    if (RADEON_DEBUG & DEBUG_VERTS)
 323       fprintf(stderr, "%s count %d size %d stride %d\n",
 324               __FUNCTION__, count, size, stride);
 325
 326    assert (!rvb->buf);
 327
 328    if (stride == 0) {
 329       radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
 330       count = 1;
 331       rvb->aos_start = GET_START(rvb);
 332       rvb->aos_stride = 0;
 333       rvb->aos_size = size;
 334    }
 335    else {
 336       radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );  /* alignment? */
 337       rvb->aos_start = GET_START(rvb);
 338       rvb->aos_stride = size;
 339       rvb->aos_size = size;
 340    }
 341
 342    /* Emit the data
 343     */
 344    switch (size) {
 345    case 1:
 346       emit_vec4( ctx, rvb, data, stride, count );
 347       break;
 348    case 2:
 349       emit_vec8( ctx, rvb, data, stride, count );
 350       break;
 351    case 3:
 352       emit_vec12( ctx, rvb, data, stride, count );
 353       break;
 354    case 4:
 355       emit_vec16( ctx, rvb, data, stride, count );
 356       break;
 357    default:
 358       assert(0);
 359       exit(1);
 360       break;
 361    }
 362
 363 }
 364
 365
 366
 367 static void emit_s0_vec( GLcontext *ctx,
 368                          struct radeon_dma_region *rvb,
 369                          char *data,
 370                          int stride,
 371                          int count )
 372 {
 373    int i;
 374    int *out = (int *)(rvb->address + rvb->start);
 375
 376    if (RADEON_DEBUG & DEBUG_VERTS)
 377       fprintf(stderr, "%s count %d stride %d\n",
 378               __FUNCTION__, count, stride);
 379
 380    for (i = 0; i < count; i++) {
 381       out[0] = *(int *)data;
 382       out[1] = 0;
 383       out += 2;
 384       data += stride;
 385    }
 386 }
 387
 388 static void emit_stq_vec( GLcontext *ctx,
 389                          struct radeon_dma_region *rvb,
 390                          char *data,
 391                          int stride,
 392                          int count )
 393 {
 394    int i;
 395    int *out = (int *)(rvb->address + rvb->start);
 396
 397    if (RADEON_DEBUG & DEBUG_VERTS)
 398       fprintf(stderr, "%s count %d stride %d\n",
 399               __FUNCTION__, count, stride);
 400
 401    for (i = 0; i < count; i++) {
 402       out[0] = *(int *)data;
 403       out[1] = *(int *)(data+4);
 404       out[2] = *(int *)(data+12);
 405       out += 3;
 406       data += stride;
 407    }
 408 }
 409
 410
 411
 412
 413 static void emit_tex_vector( GLcontext *ctx,
 414                              struct radeon_dma_region *rvb,
 415                              char *data,
 416                              int size,
 417                              int stride,
 418                              int count )
 419 {
 420    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 421    int emitsize;
 422
 423    if (RADEON_DEBUG & DEBUG_VERTS)
 424       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
 425
 426    assert (!rvb->buf);
 427
 428    switch (size) {
 429    case 4: emitsize = 3; break;
 430    case 3: emitsize = 3; break;
 431    default: emitsize = 2; break;
 432    }
 433
 434
 435    if (stride == 0) {
 436       radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
 437       count = 1;
 438       rvb->aos_start = GET_START(rvb);
 439       rvb->aos_stride = 0;
 440       rvb->aos_size = emitsize;
 441    }
 442    else {
 443       radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
 444       rvb->aos_start = GET_START(rvb);
 445       rvb->aos_stride = emitsize;
 446       rvb->aos_size = emitsize;
 447    }
 448
 449
 450    /* Emit the data
 451     */
 452    switch (size) {
 453    case 1:
 454       emit_s0_vec( ctx, rvb, data, stride, count );
 455       break;
 456    case 2:
 457       emit_vec8( ctx, rvb, data, stride, count );
 458       break;
 459    case 3:
 460       emit_vec12( ctx, rvb, data, stride, count );
 461       break;
 462    case 4:
 463       emit_stq_vec( ctx, rvb, data, stride, count );
 464       break;
 465    default:
 466       assert(0);
 467       exit(1);
 468       break;
 469    }
 470 }
 471
 472
 473
 474
 475 /* Emit any changed arrays to new GART memory, re-emit a packet to
 476  * update the arrays.
 477  */
 478 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 479 {
 480    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 481    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
 482    struct radeon_dma_region **component = rmesa->tcl.aos_components;
 483    GLuint nr = 0;
 484    GLuint vfmt = 0;
 485    GLuint count = VB->Count;
 486    GLuint vtx, unit;
 487
 488 #if 0
 489    if (RADEON_DEBUG & DEBUG_VERTS)
 490       _tnl_print_vert_flags( __FUNCTION__, inputs );
 491 #endif
 492
 493    if (1) {
 494       if (!rmesa->tcl.obj.buf)
 495          emit_vector( ctx,
 496                       &rmesa->tcl.obj,
 497                       (char *)VB->ObjPtr->data,
 498                       VB->ObjPtr->size,
 499                       VB->ObjPtr->stride,
 500                       count);
 501
 502       switch( VB->ObjPtr->size ) {
 503       case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
 504       case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
 505       case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
 506       default:
 507          break;
 508       }
 509       component[nr++] = &rmesa->tcl.obj;
 510    }
 511
 512
 513    if (inputs & VERT_BIT_NORMAL) {
 514       if (!rmesa->tcl.norm.buf)
 515          emit_vector( ctx,
 516                       &(rmesa->tcl.norm),
 517                       (char *)VB->NormalPtr->data,
 518                       3,
 519                       VB->NormalPtr->stride,
 520                       count);
 521
 522       vfmt |= RADEON_CP_VC_FRMT_N0;
 523       component[nr++] = &rmesa->tcl.norm;
 524    }
 525
 526    if (inputs & VERT_BIT_COLOR0) {
 527       int emitsize;
 528       if (VB->ColorPtr[0]->size == 4 &&
 529           (VB->ColorPtr[0]->stride != 0 ||
 530            VB->ColorPtr[0]->data[0][3] != 1.0)) {
 531          vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
 532          emitsize = 4;
 533       }
 534
 535       else {
 536          vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
 537          emitsize = 3;
 538       }
 539
 540       if (!rmesa->tcl.rgba.buf)
 541          emit_vector( ctx,
 542                       &(rmesa->tcl.rgba),
 543                       (char *)VB->ColorPtr[0]->data,
 544                       emitsize,
 545                       VB->ColorPtr[0]->stride,
 546                       count);
 547
 548
 549       component[nr++] = &rmesa->tcl.rgba;
 550    }
 551
 552
 553    if (inputs & VERT_BIT_COLOR1) {
 554       if (!rmesa->tcl.spec.buf) {
 555
 556          emit_vector( ctx,
 557                       &rmesa->tcl.spec,
 558                       (char *)VB->SecondaryColorPtr[0]->data,
 559                       3,
 560                       VB->SecondaryColorPtr[0]->stride,
 561                       count);
 562       }
 563
 564       vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
 565       component[nr++] = &rmesa->tcl.spec;
 566    }
 567
 568 /* FIXME: not sure if this is correct. May need to stitch this together with
 569    secondary color. It seems odd that for primary color color and alpha values
 570    are emitted together but for secondary color not. */
 571    if (inputs & VERT_BIT_FOG) {
 572       if (!rmesa->tcl.fog.buf)
 573          emit_vecfog( ctx,
 574                       &(rmesa->tcl.fog),
 575                       (char *)VB->FogCoordPtr->data,
 576                       VB->FogCoordPtr->stride,
 577                       count);
 578
 579       vfmt |= RADEON_CP_VC_FRMT_FPFOG;
 580       component[nr++] = &rmesa->tcl.fog;
 581    }
 582
 583
 584    vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
 585           ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
 586
 587    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 588       if (inputs & VERT_BIT_TEX(unit)) {
 589          if (!rmesa->tcl.tex[unit].buf)
 590             emit_tex_vector( ctx,
 591                              &(rmesa->tcl.tex[unit]),
 592                              (char *)VB->TexCoordPtr[unit]->data,
 593                              VB->TexCoordPtr[unit]->size,
 594                              VB->TexCoordPtr[unit]->stride,
 595                              count );
 596
 597          vfmt |= RADEON_ST_BIT(unit);
 598          /* assume we need the 3rd coord if texgen is active for r/q OR at least
 599             3 coords are submitted. This may not be 100% correct */
 600          if (VB->TexCoordPtr[unit]->size >= 3) {
 601             vtx |= RADEON_Q_BIT(unit);
 602             vfmt |= RADEON_Q_BIT(unit);
 603          }
 604          if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
 605             vtx |= RADEON_Q_BIT(unit);
 606          else if ((VB->TexCoordPtr[unit]->size >= 3) &&
 607                   ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
 608             GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3);
 609             if (((rmesa->NeedTexMatrix >> unit) & 1) &&
 610                  (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
 611                radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
 612          }
 613          component[nr++] = &rmesa->tcl.tex[unit];
 614       }
 615    }
 616
 617    if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
 618       RADEON_STATECHANGE( rmesa, tcl );
 619       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
 620    }
 621
 622    rmesa->tcl.nr_aos_components = nr;
 623    rmesa->tcl.vertex_format = vfmt;
 624 }
 625
 626
 627 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
 628 {
 629    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 630    GLuint unit;
 631
 632 #if 0
 633    if (RADEON_DEBUG & DEBUG_VERTS)
 634       _tnl_print_vert_flags( __FUNCTION__, newinputs );
 635 #endif
 636
 637    if (newinputs & VERT_BIT_POS)
 638      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
 639
 640    if (newinputs & VERT_BIT_NORMAL)
 641       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
 642
 643    if (newinputs & VERT_BIT_COLOR0)
 644       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
 645
 646    if (newinputs & VERT_BIT_COLOR1)
 647       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
 648
 649    if (newinputs & VERT_BIT_FOG)
 650       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
 651
 652    for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
 653       if (newinputs & VERT_BIT_TEX(unit))
 654          radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
 655    }
 656 }