src/mesa/drivers/dri/radeon/radeon_maos_arrays.c

   1 /**************************************************************************
   2
   3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   4                      Tungsten Graphics Inc., Cedar Park, Texas.
   5
   6 All Rights Reserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining
   9 a copy of this software and associated documentation files (the
  10 "Software"), to deal in the Software without restriction, including
  11 without limitation the rights to use, copy, modify, merge, publish,
  12 distribute, sublicense, and/or sell copies of the Software, and to
  13 permit persons to whom the Software is furnished to do so, subject to
  14 the following conditions:
  15
  16 The above copyright notice and this permission notice (including the
  17 next paragraph) shall be included in all copies or substantial
  18 portions of the Software.
  19
  20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27
  28 **************************************************************************/
  29
  30 /*
  31  * Authors:
  32  *   Keith Whitwell <keith@tungstengraphics.com>
  33  */
  34
  35 #include "main/glheader.h"
  36 #include "main/imports.h"
  37 #include "main/mtypes.h"
  38 #include "main/macros.h"
  39
  40 #include "swrast_setup/swrast_setup.h"
  41 #include "math/m_translate.h"
  42 #include "tnl/tnl.h"
  43
  44 #include "radeon_context.h"
  45 #include "radeon_ioctl.h"
  46 #include "radeon_state.h"
  47 #include "radeon_swtcl.h"
  48 #include "radeon_maos.h"
  49 #include "radeon_tcl.h"
  50
  51 #if 0
  52 /* Usage:
  53  *   - from radeon_tcl_render
  54  *   - call radeonEmitArrays to ensure uptodate arrays in dma
  55  *   - emit primitives (new type?) which reference the data
  56  *       -- need to use elts for lineloop, quads, quadstrip/flat
  57  *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
  58  *
  59  */
  60 static void emit_ubyte_rgba3( GLcontext *ctx,
  61                        struct radeon_dma_region *rvb,
  62                        char *data,
  63                        int stride,
  64                        int count )
  65 {
  66    int i;
  67    radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
  68
  69    if (RADEON_DEBUG & DEBUG_VERTS)
  70       fprintf(stderr, "%s count %d stride %d out %p\n",
  71               __FUNCTION__, count, stride, (void *)out);
  72
  73    for (i = 0; i < count; i++) {
  74       out->red   = *data;
  75       out->green = *(data+1);
  76       out->blue  = *(data+2);
  77       out->alpha = 0xFF;
  78       out++;
  79       data += stride;
  80    }
  81 }
  82
  83 static void emit_ubyte_rgba4( GLcontext *ctx,
  84                               struct radeon_dma_region *rvb,
  85                               char *data,
  86                               int stride,
  87                               int count )
  88 {
  89    int i;
  90    int *out = (int *)(rvb->address + rvb->start);
  91
  92    if (RADEON_DEBUG & DEBUG_VERTS)
  93       fprintf(stderr, "%s count %d stride %d\n",
  94               __FUNCTION__, count, stride);
  95
  96    if (stride == 4)
  97        COPY_DWORDS( out, data, count );
  98    else
  99       for (i = 0; i < count; i++) {
 100          *out++ = LE32_TO_CPU(*(int *)data);
 101          data += stride;
 102       }
 103 }
 104
 105
 106 static void emit_ubyte_rgba( GLcontext *ctx,
 107                              struct radeon_dma_region *rvb,
 108                              char *data,
 109                              int size,
 110                              int stride,
 111                              int count )
 112 {
 113    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 114
 115    if (RADEON_DEBUG & DEBUG_VERTS)
 116       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
 117
 118    assert (!rvb->buf);
 119
 120    if (stride == 0) {
 121       radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
 122       count = 1;
 123       rvb->aos_start = GET_START(rvb);
 124       rvb->aos_stride = 0;
 125       rvb->aos_size = 1;
 126    }
 127    else {
 128       radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
 129       rvb->aos_start = GET_START(rvb);
 130       rvb->aos_stride = 1;
 131       rvb->aos_size = 1;
 132    }
 133
 134    /* Emit the data
 135     */
 136    switch (size) {
 137    case 3:
 138       emit_ubyte_rgba3( ctx, rvb, data, stride, count );
 139       break;
 140    case 4:
 141       emit_ubyte_rgba4( ctx, rvb, data, stride, count );
 142       break;
 143    default:
 144       assert(0);
 145       exit(1);
 146       break;
 147    }
 148 }
 149 #endif
 150
 151 #if defined(USE_X86_ASM)
 152 #define COPY_DWORDS( dst, src, nr )                                     \
 153 do {                                                                    \
 154         int __tmp;                                                      \
 155         __asm__ __volatile__( "rep ; movsl"                             \
 156                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
 157                               : "0" (nr),                               \
 158                                 "D" ((long)dst),                        \
 159                                 "S" ((long)src) );                      \
 160 } while (0)
 161 #else
 162 #define COPY_DWORDS( dst, src, nr )             \
 163 do {                                            \
 164    int j;                                       \
 165    for ( j = 0 ; j < nr ; j++ )                 \
 166       dst[j] = ((int *)src)[j];                 \
 167    dst += nr;                                   \
 168 } while (0)
 169 #endif
 170
 171 static void emit_vecfog( GLcontext *ctx,
 172                          struct radeon_dma_region *rvb,
 173                          char *data,
 174                          int stride,
 175                          int count )
 176 {
 177    int i;
 178    GLfloat *out;
 179
 180    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 181
 182    if (RADEON_DEBUG & DEBUG_VERTS)
 183       fprintf(stderr, "%s count %d stride %d\n",
 184               __FUNCTION__, count, stride);
 185
 186    assert (!rvb->buf);
 187
 188    if (stride == 0) {
 189       radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
 190       count = 1;
 191       rvb->aos_start = GET_START(rvb);
 192       rvb->aos_stride = 0;
 193       rvb->aos_size = 1;
 194    }
 195    else {
 196       radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
 197       rvb->aos_start = GET_START(rvb);
 198       rvb->aos_stride = 1;
 199       rvb->aos_size = 1;
 200    }
 201
 202    /* Emit the data
 203     */
 204    out = (GLfloat *)(rvb->address + rvb->start);
 205    for (i = 0; i < count; i++) {
 206       out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
 207       out++;
 208       data += stride;
 209    }
 210 }
 211
 212 static void emit_vec4( GLcontext *ctx,
 213                        struct radeon_dma_region *rvb,
 214                        char *data,
 215                        int stride,
 216                        int count )
 217 {
 218    int i;
 219    int *out = (int *)(rvb->address + rvb->start);
 220
 221    if (RADEON_DEBUG & DEBUG_VERTS)
 222       fprintf(stderr, "%s count %d stride %d\n",
 223               __FUNCTION__, count, stride);
 224
 225    if (stride == 4)
 226       COPY_DWORDS( out, data, count );
 227    else
 228       for (i = 0; i < count; i++) {
 229          out[0] = *(int *)data;
 230          out++;
 231          data += stride;
 232       }
 233 }
 234
 235
 236 static void emit_vec8( GLcontext *ctx,
 237                        struct radeon_dma_region *rvb,
 238                        char *data,
 239                        int stride,
 240                        int count )
 241 {
 242    int i;
 243    int *out = (int *)(rvb->address + rvb->start);
 244
 245    if (RADEON_DEBUG & DEBUG_VERTS)
 246       fprintf(stderr, "%s count %d stride %d\n",
 247               __FUNCTION__, count, stride);
 248
 249    if (stride == 8)
 250       COPY_DWORDS( out, data, count*2 );
 251    else
 252       for (i = 0; i < count; i++) {
 253          out[0] = *(int *)data;
 254          out[1] = *(int *)(data+4);
 255          out += 2;
 256          data += stride;
 257       }
 258 }
 259
 260 static void emit_vec12( GLcontext *ctx,
 261                        struct radeon_dma_region *rvb,
 262                        char *data,
 263                        int stride,
 264                        int count )
 265 {
 266    int i;
 267    int *out = (int *)(rvb->address + rvb->start);
 268
 269    if (RADEON_DEBUG & DEBUG_VERTS)
 270       fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 271               __FUNCTION__, count, stride, (void *)out, (void *)data);
 272
 273    if (stride == 12)
 274       COPY_DWORDS( out, data, count*3 );
 275    else
 276       for (i = 0; i < count; i++) {
 277          out[0] = *(int *)data;
 278          out[1] = *(int *)(data+4);
 279          out[2] = *(int *)(data+8);
 280          out += 3;
 281          data += stride;
 282       }
 283 }
 284
 285 static void emit_vec16( GLcontext *ctx,
 286                         struct radeon_dma_region *rvb,
 287                         char *data,
 288                         int stride,
 289                         int count )
 290 {
 291    int i;
 292    int *out = (int *)(rvb->address + rvb->start);
 293
 294    if (RADEON_DEBUG & DEBUG_VERTS)
 295       fprintf(stderr, "%s count %d stride %d\n",
 296               __FUNCTION__, count, stride);
 297
 298    if (stride == 16)
 299       COPY_DWORDS( out, data, count*4 );
 300    else
 301       for (i = 0; i < count; i++) {
 302          out[0] = *(int *)data;
 303          out[1] = *(int *)(data+4);
 304          out[2] = *(int *)(data+8);
 305          out[3] = *(int *)(data+12);
 306          out += 4;
 307          data += stride;
 308       }
 309 }
 310
 311
 312 static void emit_vector( GLcontext *ctx,
 313                          struct radeon_dma_region *rvb,
 314                          char *data,
 315                          int size,
 316                          int stride,
 317                          int count )
 318 {
 319    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 320
 321    if (RADEON_DEBUG & DEBUG_VERTS)
 322       fprintf(stderr, "%s count %d size %d stride %d\n",
 323               __FUNCTION__, count, size, stride);
 324
 325    assert (!rvb->buf);
 326
 327    if (stride == 0) {
 328       radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
 329       count = 1;
 330       rvb->aos_start = GET_START(rvb);
 331       rvb->aos_stride = 0;
 332       rvb->aos_size = size;
 333    }
 334    else {
 335       radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );  /* alignment? */
 336       rvb->aos_start = GET_START(rvb);
 337       rvb->aos_stride = size;
 338       rvb->aos_size = size;
 339    }
 340
 341    /* Emit the data
 342     */
 343    switch (size) {
 344    case 1:
 345       emit_vec4( ctx, rvb, data, stride, count );
 346       break;
 347    case 2:
 348       emit_vec8( ctx, rvb, data, stride, count );
 349       break;
 350    case 3:
 351       emit_vec12( ctx, rvb, data, stride, count );
 352       break;
 353    case 4:
 354       emit_vec16( ctx, rvb, data, stride, count );
 355       break;
 356    default:
 357       assert(0);
 358       exit(1);
 359       break;
 360    }
 361
 362 }
 363
 364
 365
 366 static void emit_s0_vec( GLcontext *ctx,
 367                          struct radeon_dma_region *rvb,
 368                          char *data,
 369                          int stride,
 370                          int count )
 371 {
 372    int i;
 373    int *out = (int *)(rvb->address + rvb->start);
 374
 375    if (RADEON_DEBUG & DEBUG_VERTS)
 376       fprintf(stderr, "%s count %d stride %d\n",
 377               __FUNCTION__, count, stride);
 378
 379    for (i = 0; i < count; i++) {
 380       out[0] = *(int *)data;
 381       out[1] = 0;
 382       out += 2;
 383       data += stride;
 384    }
 385 }
 386
 387 static void emit_stq_vec( GLcontext *ctx,
 388                          struct radeon_dma_region *rvb,
 389                          char *data,
 390                          int stride,
 391                          int count )
 392 {
 393    int i;
 394    int *out = (int *)(rvb->address + rvb->start);
 395
 396    if (RADEON_DEBUG & DEBUG_VERTS)
 397       fprintf(stderr, "%s count %d stride %d\n",
 398               __FUNCTION__, count, stride);
 399
 400    for (i = 0; i < count; i++) {
 401       out[0] = *(int *)data;
 402       out[1] = *(int *)(data+4);
 403       out[2] = *(int *)(data+12);
 404       out += 3;
 405       data += stride;
 406    }
 407 }
 408
 409
 410
 411
 412 static void emit_tex_vector( GLcontext *ctx,
 413                              struct radeon_dma_region *rvb,
 414                              char *data,
 415                              int size,
 416                              int stride,
 417                              int count )
 418 {
 419    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 420    int emitsize;
 421
 422    if (RADEON_DEBUG & DEBUG_VERTS)
 423       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
 424
 425    assert (!rvb->buf);
 426
 427    switch (size) {
 428    case 4: emitsize = 3; break;
 429    case 3: emitsize = 3; break;
 430    default: emitsize = 2; break;
 431    }
 432
 433
 434    if (stride == 0) {
 435       radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
 436       count = 1;
 437       rvb->aos_start = GET_START(rvb);
 438       rvb->aos_stride = 0;
 439       rvb->aos_size = emitsize;
 440    }
 441    else {
 442       radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
 443       rvb->aos_start = GET_START(rvb);
 444       rvb->aos_stride = emitsize;
 445       rvb->aos_size = emitsize;
 446    }
 447
 448
 449    /* Emit the data
 450     */
 451    switch (size) {
 452    case 1:
 453       emit_s0_vec( ctx, rvb, data, stride, count );
 454       break;
 455    case 2:
 456       emit_vec8( ctx, rvb, data, stride, count );
 457       break;
 458    case 3:
 459       emit_vec12( ctx, rvb, data, stride, count );
 460       break;
 461    case 4:
 462       emit_stq_vec( ctx, rvb, data, stride, count );
 463       break;
 464    default:
 465       assert(0);
 466       exit(1);
 467       break;
 468    }
 469 }
 470
 471
 472
 473
 474 /* Emit any changed arrays to new GART memory, re-emit a packet to
 475  * update the arrays.
 476  */
 477 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 478 {
 479    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 480    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
 481    struct radeon_dma_region **component = rmesa->tcl.aos_components;
 482    GLuint nr = 0;
 483    GLuint vfmt = 0;
 484    GLuint count = VB->Count;
 485    GLuint vtx, unit;
 486
 487 #if 0
 488    if (RADEON_DEBUG & DEBUG_VERTS)
 489       _tnl_print_vert_flags( __FUNCTION__, inputs );
 490 #endif
 491
 492    if (1) {
 493       if (!rmesa->tcl.obj.buf)
 494          emit_vector( ctx,
 495                       &rmesa->tcl.obj,
 496                       (char *)VB->ObjPtr->data,
 497                       VB->ObjPtr->size,
 498                       VB->ObjPtr->stride,
 499                       count);
 500
 501       switch( VB->ObjPtr->size ) {
 502       case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
 503       case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
 504       case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
 505       default:
 506          break;
 507       }
 508       component[nr++] = &rmesa->tcl.obj;
 509    }
 510
 511
 512    if (inputs & VERT_BIT_NORMAL) {
 513       if (!rmesa->tcl.norm.buf)
 514          emit_vector( ctx,
 515                       &(rmesa->tcl.norm),
 516                       (char *)VB->NormalPtr->data,
 517                       3,
 518                       VB->NormalPtr->stride,
 519                       count);
 520
 521       vfmt |= RADEON_CP_VC_FRMT_N0;
 522       component[nr++] = &rmesa->tcl.norm;
 523    }
 524
 525    if (inputs & VERT_BIT_COLOR0) {
 526       int emitsize;
 527       if (VB->ColorPtr[0]->size == 4 &&
 528           (VB->ColorPtr[0]->stride != 0 ||
 529            VB->ColorPtr[0]->data[0][3] != 1.0)) {
 530          vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
 531          emitsize = 4;
 532       }
 533
 534       else {
 535          vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
 536          emitsize = 3;
 537       }
 538
 539       if (!rmesa->tcl.rgba.buf)
 540          emit_vector( ctx,
 541                       &(rmesa->tcl.rgba),
 542                       (char *)VB->ColorPtr[0]->data,
 543                       emitsize,
 544                       VB->ColorPtr[0]->stride,
 545                       count);
 546
 547
 548       component[nr++] = &rmesa->tcl.rgba;
 549    }
 550
 551
 552    if (inputs & VERT_BIT_COLOR1) {
 553       if (!rmesa->tcl.spec.buf) {
 554
 555          emit_vector( ctx,
 556                       &rmesa->tcl.spec,
 557                       (char *)VB->SecondaryColorPtr[0]->data,
 558                       3,
 559                       VB->SecondaryColorPtr[0]->stride,
 560                       count);
 561       }
 562
 563       vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
 564       component[nr++] = &rmesa->tcl.spec;
 565    }
 566
 567 /* FIXME: not sure if this is correct. May need to stitch this together with
 568    secondary color. It seems odd that for primary color color and alpha values
 569    are emitted together but for secondary color not. */
 570    if (inputs & VERT_BIT_FOG) {
 571       if (!rmesa->tcl.fog.buf)
 572          emit_vecfog( ctx,
 573                       &(rmesa->tcl.fog),
 574                       (char *)VB->FogCoordPtr->data,
 575                       VB->FogCoordPtr->stride,
 576                       count);
 577
 578       vfmt |= RADEON_CP_VC_FRMT_FPFOG;
 579       component[nr++] = &rmesa->tcl.fog;
 580    }
 581
 582
 583    vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
 584           ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
 585
 586    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 587       if (inputs & VERT_BIT_TEX(unit)) {
 588          if (!rmesa->tcl.tex[unit].buf)
 589             emit_tex_vector( ctx,
 590                              &(rmesa->tcl.tex[unit]),
 591                              (char *)VB->TexCoordPtr[unit]->data,
 592                              VB->TexCoordPtr[unit]->size,
 593                              VB->TexCoordPtr[unit]->stride,
 594                              count );
 595
 596          vfmt |= RADEON_ST_BIT(unit);
 597          /* assume we need the 3rd coord if texgen is active for r/q OR at least
 598             3 coords are submitted. This may not be 100% correct */
 599          if (VB->TexCoordPtr[unit]->size >= 3) {
 600             vtx |= RADEON_Q_BIT(unit);
 601             vfmt |= RADEON_Q_BIT(unit);
 602          }
 603          if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
 604             vtx |= RADEON_Q_BIT(unit);
 605          else if ((VB->TexCoordPtr[unit]->size >= 3) &&
 606                   ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
 607             GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3);
 608             if (((rmesa->NeedTexMatrix >> unit) & 1) &&
 609                  (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
 610                radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
 611          }
 612          component[nr++] = &rmesa->tcl.tex[unit];
 613       }
 614    }
 615
 616    if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
 617       RADEON_STATECHANGE( rmesa, tcl );
 618       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
 619    }
 620
 621    rmesa->tcl.nr_aos_components = nr;
 622    rmesa->tcl.vertex_format = vfmt;
 623 }
 624
 625
 626 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
 627 {
 628    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 629    GLuint unit;
 630
 631 #if 0
 632    if (RADEON_DEBUG & DEBUG_VERTS)
 633       _tnl_print_vert_flags( __FUNCTION__, newinputs );
 634 #endif
 635
 636    if (newinputs & VERT_BIT_POS)
 637      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
 638
 639    if (newinputs & VERT_BIT_NORMAL)
 640       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
 641
 642    if (newinputs & VERT_BIT_COLOR0)
 643       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
 644
 645    if (newinputs & VERT_BIT_COLOR1)
 646       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
 647
 648    if (newinputs & VERT_BIT_FOG)
 649       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
 650
 651    for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
 652       if (newinputs & VERT_BIT_TEX(unit))
 653          radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
 654    }
 655 }