src/mesa/drivers/dri/radeon/radeon_maos_arrays.c

   1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
   2 /**************************************************************************
   3
   4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
   5                      Tungsten Graphics Inc., Cedar Park, Texas.
   6
   7 All Rights Reserved.
   8
   9 Permission is hereby granted, free of charge, to any person obtaining
  10 a copy of this software and associated documentation files (the
  11 "Software"), to deal in the Software without restriction, including
  12 without limitation the rights to use, copy, modify, merge, publish,
  13 distribute, sublicense, and/or sell copies of the Software, and to
  14 permit persons to whom the Software is furnished to do so, subject to
  15 the following conditions:
  16
  17 The above copyright notice and this permission notice (including the
  18 next paragraph) shall be included in all copies or substantial
  19 portions of the Software.
  20
  21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28
  29 **************************************************************************/
  30
  31 /*
  32  * Authors:
  33  *   Keith Whitwell <keith@tungstengraphics.com>
  34  */
  35
  36 #include "glheader.h"
  37 #include "imports.h"
  38 #include "mtypes.h"
  39 #include "macros.h"
  40
  41 #include "swrast_setup/swrast_setup.h"
  42 #include "math/m_translate.h"
  43 #include "tnl/tnl.h"
  44 #include "tnl/t_context.h"
  45
  46 #include "radeon_context.h"
  47 #include "radeon_ioctl.h"
  48 #include "radeon_state.h"
  49 #include "radeon_swtcl.h"
  50 #include "radeon_maos.h"
  51
  52 #if 0
  53 /* Usage:
  54  *   - from radeon_tcl_render
  55  *   - call radeonEmitArrays to ensure uptodate arrays in dma
  56  *   - emit primitives (new type?) which reference the data
  57  *       -- need to use elts for lineloop, quads, quadstrip/flat
  58  *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
  59  *
  60  */
  61 static void emit_ubyte_rgba3( GLcontext *ctx,
  62                        struct radeon_dma_region *rvb,
  63                        char *data,
  64                        int stride,
  65                        int count )
  66 {
  67    int i;
  68    radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
  69
  70    if (RADEON_DEBUG & DEBUG_VERTS)
  71       fprintf(stderr, "%s count %d stride %d out %p\n",
  72               __FUNCTION__, count, stride, (void *)out);
  73
  74    for (i = 0; i < count; i++) {
  75       out->red   = *data;
  76       out->green = *(data+1);
  77       out->blue  = *(data+2);
  78       out->alpha = 0xFF;
  79       out++;
  80       data += stride;
  81    }
  82 }
  83
  84 static void emit_ubyte_rgba4( GLcontext *ctx,
  85                               struct radeon_dma_region *rvb,
  86                               char *data,
  87                               int stride,
  88                               int count )
  89 {
  90    int i;
  91    int *out = (int *)(rvb->address + rvb->start);
  92
  93    if (RADEON_DEBUG & DEBUG_VERTS)
  94       fprintf(stderr, "%s count %d stride %d\n",
  95               __FUNCTION__, count, stride);
  96
  97    if (stride == 4)
  98        COPY_DWORDS( out, data, count );
  99    else
 100       for (i = 0; i < count; i++) {
 101          *out++ = LE32_TO_CPU(*(int *)data);
 102          data += stride;
 103       }
 104 }
 105
 106
 107 static void emit_ubyte_rgba( GLcontext *ctx,
 108                              struct radeon_dma_region *rvb,
 109                              char *data,
 110                              int size,
 111                              int stride,
 112                              int count )
 113 {
 114    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 115
 116    if (RADEON_DEBUG & DEBUG_VERTS)
 117       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
 118
 119    assert (!rvb->buf);
 120
 121    if (stride == 0) {
 122       radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
 123       count = 1;
 124       rvb->aos_start = GET_START(rvb);
 125       rvb->aos_stride = 0;
 126       rvb->aos_size = 1;
 127    }
 128    else {
 129       radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
 130       rvb->aos_start = GET_START(rvb);
 131       rvb->aos_stride = 1;
 132       rvb->aos_size = 1;
 133    }
 134
 135    /* Emit the data
 136     */
 137    switch (size) {
 138    case 3:
 139       emit_ubyte_rgba3( ctx, rvb, data, stride, count );
 140       break;
 141    case 4:
 142       emit_ubyte_rgba4( ctx, rvb, data, stride, count );
 143       break;
 144    default:
 145       assert(0);
 146       exit(1);
 147       break;
 148    }
 149 }
 150 #endif
 151
 152 #if defined(USE_X86_ASM)
 153 #define COPY_DWORDS( dst, src, nr )                                     \
 154 do {                                                                    \
 155         int __tmp;                                                      \
 156         __asm__ __volatile__( "rep ; movsl"                             \
 157                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
 158                               : "0" (nr),                               \
 159                                 "D" ((long)dst),                        \
 160                                 "S" ((long)src) );                      \
 161 } while (0)
 162 #else
 163 #define COPY_DWORDS( dst, src, nr )             \
 164 do {                                            \
 165    int j;                                       \
 166    for ( j = 0 ; j < nr ; j++ )                 \
 167       dst[j] = ((int *)src)[j];                 \
 168    dst += nr;                                   \
 169 } while (0)
 170 #endif
 171
 172
 173 static void emit_vec4( GLcontext *ctx,
 174                        struct radeon_dma_region *rvb,
 175                        char *data,
 176                        int stride,
 177                        int count )
 178 {
 179    int i;
 180    int *out = (int *)(rvb->address + rvb->start);
 181
 182    if (RADEON_DEBUG & DEBUG_VERTS)
 183       fprintf(stderr, "%s count %d stride %d\n",
 184               __FUNCTION__, count, stride);
 185
 186    if (stride == 4)
 187       COPY_DWORDS( out, data, count );
 188    else
 189       for (i = 0; i < count; i++) {
 190          out[0] = *(int *)data;
 191          out++;
 192          data += stride;
 193       }
 194 }
 195
 196
 197 static void emit_vec8( GLcontext *ctx,
 198                        struct radeon_dma_region *rvb,
 199                        char *data,
 200                        int stride,
 201                        int count )
 202 {
 203    int i;
 204    int *out = (int *)(rvb->address + rvb->start);
 205
 206    if (RADEON_DEBUG & DEBUG_VERTS)
 207       fprintf(stderr, "%s count %d stride %d\n",
 208               __FUNCTION__, count, stride);
 209
 210    if (stride == 8)
 211       COPY_DWORDS( out, data, count*2 );
 212    else
 213       for (i = 0; i < count; i++) {
 214          out[0] = *(int *)data;
 215          out[1] = *(int *)(data+4);
 216          out += 2;
 217          data += stride;
 218       }
 219 }
 220
 221 static void emit_vec12( GLcontext *ctx,
 222                        struct radeon_dma_region *rvb,
 223                        char *data,
 224                        int stride,
 225                        int count )
 226 {
 227    int i;
 228    int *out = (int *)(rvb->address + rvb->start);
 229
 230    if (RADEON_DEBUG & DEBUG_VERTS)
 231       fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 232               __FUNCTION__, count, stride, (void *)out, (void *)data);
 233
 234    if (stride == 12)
 235       COPY_DWORDS( out, data, count*3 );
 236    else
 237       for (i = 0; i < count; i++) {
 238          out[0] = *(int *)data;
 239          out[1] = *(int *)(data+4);
 240          out[2] = *(int *)(data+8);
 241          out += 3;
 242          data += stride;
 243       }
 244 }
 245
 246 static void emit_vec16( GLcontext *ctx,
 247                         struct radeon_dma_region *rvb,
 248                         char *data,
 249                         int stride,
 250                         int count )
 251 {
 252    int i;
 253    int *out = (int *)(rvb->address + rvb->start);
 254
 255    if (RADEON_DEBUG & DEBUG_VERTS)
 256       fprintf(stderr, "%s count %d stride %d\n",
 257               __FUNCTION__, count, stride);
 258
 259    if (stride == 16)
 260       COPY_DWORDS( out, data, count*4 );
 261    else
 262       for (i = 0; i < count; i++) {
 263          out[0] = *(int *)data;
 264          out[1] = *(int *)(data+4);
 265          out[2] = *(int *)(data+8);
 266          out[3] = *(int *)(data+12);
 267          out += 4;
 268          data += stride;
 269       }
 270 }
 271
 272
 273 static void emit_vector( GLcontext *ctx,
 274                          struct radeon_dma_region *rvb,
 275                          char *data,
 276                          int size,
 277                          int stride,
 278                          int count )
 279 {
 280    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 281
 282    if (RADEON_DEBUG & DEBUG_VERTS)
 283       fprintf(stderr, "%s count %d size %d stride %d\n",
 284               __FUNCTION__, count, size, stride);
 285
 286    assert (!rvb->buf);
 287
 288    if (stride == 0) {
 289       radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
 290       count = 1;
 291       rvb->aos_start = GET_START(rvb);
 292       rvb->aos_stride = 0;
 293       rvb->aos_size = size;
 294    }
 295    else {
 296       radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );  /* alignment? */
 297       rvb->aos_start = GET_START(rvb);
 298       rvb->aos_stride = size;
 299       rvb->aos_size = size;
 300    }
 301
 302    /* Emit the data
 303     */
 304    switch (size) {
 305    case 1:
 306       emit_vec4( ctx, rvb, data, stride, count );
 307       break;
 308    case 2:
 309       emit_vec8( ctx, rvb, data, stride, count );
 310       break;
 311    case 3:
 312       emit_vec12( ctx, rvb, data, stride, count );
 313       break;
 314    case 4:
 315       emit_vec16( ctx, rvb, data, stride, count );
 316       break;
 317    default:
 318       assert(0);
 319       exit(1);
 320       break;
 321    }
 322
 323 }
 324
 325
 326
 327 static void emit_s0_vec( GLcontext *ctx,
 328                          struct radeon_dma_region *rvb,
 329                          char *data,
 330                          int stride,
 331                          int count )
 332 {
 333    int i;
 334    int *out = (int *)(rvb->address + rvb->start);
 335
 336    if (RADEON_DEBUG & DEBUG_VERTS)
 337       fprintf(stderr, "%s count %d stride %d\n",
 338               __FUNCTION__, count, stride);
 339
 340    for (i = 0; i < count; i++) {
 341       out[0] = *(int *)data;
 342       out[1] = 0;
 343       out += 2;
 344       data += stride;
 345    }
 346 }
 347
 348 static void emit_stq_vec( GLcontext *ctx,
 349                          struct radeon_dma_region *rvb,
 350                          char *data,
 351                          int stride,
 352                          int count )
 353 {
 354    int i;
 355    int *out = (int *)(rvb->address + rvb->start);
 356
 357    if (RADEON_DEBUG & DEBUG_VERTS)
 358       fprintf(stderr, "%s count %d stride %d\n",
 359               __FUNCTION__, count, stride);
 360
 361    for (i = 0; i < count; i++) {
 362       out[0] = *(int *)data;
 363       out[1] = *(int *)(data+4);
 364       out[2] = *(int *)(data+12);
 365       out += 3;
 366       data += stride;
 367    }
 368 }
 369
 370
 371
 372
 373 static void emit_tex_vector( GLcontext *ctx,
 374                              struct radeon_dma_region *rvb,
 375                              char *data,
 376                              int size,
 377                              int stride,
 378                              int count )
 379 {
 380    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 381    int emitsize;
 382
 383    if (RADEON_DEBUG & DEBUG_VERTS)
 384       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
 385
 386    assert (!rvb->buf);
 387
 388    switch (size) {
 389    case 4: emitsize = 3; break;
 390    case 3: emitsize = 3; break;
 391    default: emitsize = 2; break;
 392    }
 393
 394
 395    if (stride == 0) {
 396       radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
 397       count = 1;
 398       rvb->aos_start = GET_START(rvb);
 399       rvb->aos_stride = 0;
 400       rvb->aos_size = emitsize;
 401    }
 402    else {
 403       radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
 404       rvb->aos_start = GET_START(rvb);
 405       rvb->aos_stride = emitsize;
 406       rvb->aos_size = emitsize;
 407    }
 408
 409
 410    /* Emit the data
 411     */
 412    switch (size) {
 413    case 1:
 414       emit_s0_vec( ctx, rvb, data, stride, count );
 415       break;
 416    case 2:
 417       emit_vec8( ctx, rvb, data, stride, count );
 418       break;
 419    case 3:
 420       emit_vec12( ctx, rvb, data, stride, count );
 421       break;
 422    case 4:
 423       emit_stq_vec( ctx, rvb, data, stride, count );
 424       break;
 425    default:
 426       assert(0);
 427       exit(1);
 428       break;
 429    }
 430 }
 431
 432
 433
 434
 435 /* Emit any changed arrays to new GART memory, re-emit a packet to
 436  * update the arrays.
 437  */
 438 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 439 {
 440    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 441    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
 442    struct radeon_dma_region **component = rmesa->tcl.aos_components;
 443    GLuint nr = 0;
 444    GLuint vfmt = 0;
 445    GLuint count = VB->Count;
 446    GLuint vtx;
 447
 448 #if 0
 449    if (RADEON_DEBUG & DEBUG_VERTS)
 450       _tnl_print_vert_flags( __FUNCTION__, inputs );
 451 #endif
 452
 453    if (1) {
 454       if (!rmesa->tcl.obj.buf)
 455          emit_vector( ctx,
 456                       &rmesa->tcl.obj,
 457                       (char *)VB->ObjPtr->data,
 458                       VB->ObjPtr->size,
 459                       VB->ObjPtr->stride,
 460                       count);
 461
 462       switch( VB->ObjPtr->size ) {
 463       case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
 464       case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
 465       case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
 466       default:
 467          break;
 468       }
 469       component[nr++] = &rmesa->tcl.obj;
 470    }
 471
 472
 473    if (inputs & VERT_BIT_NORMAL) {
 474       if (!rmesa->tcl.norm.buf)
 475          emit_vector( ctx,
 476                       &(rmesa->tcl.norm),
 477                       (char *)VB->NormalPtr->data,
 478                       3,
 479                       VB->NormalPtr->stride,
 480                       count);
 481
 482       vfmt |= RADEON_CP_VC_FRMT_N0;
 483       component[nr++] = &rmesa->tcl.norm;
 484    }
 485
 486    if (inputs & VERT_BIT_COLOR0) {
 487       int emitsize;
 488       if (VB->ColorPtr[0]->size == 4 &&
 489           (VB->ColorPtr[0]->stride != 0 ||
 490            VB->ColorPtr[0]->data[0][3] != 1.0)) {
 491          vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
 492          emitsize = 4;
 493       }
 494
 495       else {
 496          vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
 497          emitsize = 3;
 498       }
 499
 500       if (!rmesa->tcl.rgba.buf)
 501          emit_vector( ctx,
 502                       &(rmesa->tcl.rgba),
 503                       (char *)VB->ColorPtr[0]->data,
 504                       emitsize,
 505                       VB->ColorPtr[0]->stride,
 506                       count);
 507
 508
 509       component[nr++] = &rmesa->tcl.rgba;
 510    }
 511
 512
 513    if (inputs & VERT_BIT_COLOR1) {
 514       if (!rmesa->tcl.spec.buf) {
 515
 516          emit_vector( ctx,
 517                       &rmesa->tcl.spec,
 518                       (char *)VB->SecondaryColorPtr[0]->data,
 519                       3,
 520                       VB->SecondaryColorPtr[0]->stride,
 521                       count);
 522       }
 523
 524       vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
 525       component[nr++] = &rmesa->tcl.spec;
 526    }
 527
 528    vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
 529           ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
 530
 531    if (inputs & VERT_BIT_TEX0) {
 532       if (!rmesa->tcl.tex[0].buf)
 533          emit_tex_vector( ctx,
 534                           &(rmesa->tcl.tex[0]),
 535                           (char *)VB->TexCoordPtr[0]->data,
 536                           VB->TexCoordPtr[0]->size,
 537                           VB->TexCoordPtr[0]->stride,
 538                           count );
 539
 540       vfmt |= RADEON_CP_VC_FRMT_ST0;
 541       /* assume we need the 3rd coord if texgen is active for r/q OR at least 3
 542          coords are submitted. This may not be 100% correct */
 543       if ( (VB->TexCoordPtr[0]->size >= 3) {
 544          vtx |= RADEON_TCL_VTX_Q0;
 545          vfmt |= RADEON_CP_VC_FRMT_Q0;
 546       }
 547       if ( (ctx->Texture.Unit[0].TexGenEnabled & (R_BIT | Q_BIT)) )
 548          vtx |= RADEON_TCL_VTX_Q0;
 549       else if (VB->TexCoordPtr[0]->size >= 3) {
 550          GLuint swaptexmatcol = (VB->TexCoordPtr[0]->size - 3);
 551          if ((rmesa->NeedTexMatrix & 1) &&
 552                 (swaptexmatcol != (rmesa->TexMatColSwap & 1)))
 553             radeonUploadTexMatrix( rmesa, rmesa->tmpmat[0].m, 0, swaptexmatcol ) ;
 554       }
 555       component[nr++] = &rmesa->tcl.tex[0];
 556    }
 557
 558    if (inputs & VERT_BIT_TEX1) {
 559       if (!rmesa->tcl.tex[1].buf)
 560          emit_tex_vector( ctx,
 561                           &(rmesa->tcl.tex[1]),
 562                           (char *)VB->TexCoordPtr[1]->data,
 563                           VB->TexCoordPtr[1]->size,
 564                           VB->TexCoordPtr[1]->stride,
 565                           count );
 566
 567       vfmt |= RADEON_CP_VC_FRMT_ST1;
 568       if ( (VB->TexCoordPtr[1]->size >= 3) {
 569          vtx |= RADEON_TCL_VTX_Q1;
 570          vfmt |= RADEON_CP_VC_FRMT_Q1;
 571       }
 572       if ( (ctx->Texture.Unit[1].TexGenEnabled & (R_BIT | Q_BIT)) )
 573          vtx |= RADEON_TCL_VTX_Q1;
 574       else if (VB->TexCoordPtr[1]->size >= 3) {
 575          GLuint swaptexmatcol = (VB->TexCoordPtr[1]->size - 3);
 576          if (((rmesa->NeedTexMatrix >> 1) & 1) &&
 577                 (swaptexmatcol != ((rmesa->TexMatColSwap >> 1) & 1)))
 578             radeonUploadTexMatrix( rmesa, rmesa->tmpmat[1].m, 1, swaptexmatcol ) ;
 579       }
 580       component[nr++] = &rmesa->tcl.tex[1];
 581    }
 582
 583    if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
 584       RADEON_STATECHANGE( rmesa, tcl );
 585       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
 586    }
 587
 588    rmesa->tcl.nr_aos_components = nr;
 589    rmesa->tcl.vertex_format = vfmt;
 590 }
 591
 592
 593 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
 594 {
 595    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 596
 597 #if 0
 598    if (RADEON_DEBUG & DEBUG_VERTS)
 599       _tnl_print_vert_flags( __FUNCTION__, newinputs );
 600 #endif
 601
 602    if (newinputs & VERT_BIT_POS)
 603      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
 604
 605    if (newinputs & VERT_BIT_NORMAL)
 606       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
 607
 608    if (newinputs & VERT_BIT_COLOR0)
 609       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
 610
 611    if (newinputs & VERT_BIT_COLOR1)
 612       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
 613
 614    if (newinputs & VERT_BIT_TEX0)
 615       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[0], __FUNCTION__ );
 616
 617    if (newinputs & VERT_BIT_TEX1)
 618       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
 619 }