src/mesa/drivers/dri/r200/r200_cmdbuf.c

   1 /*
   2 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   3
   4 The Weather Channel (TM) funded Tungsten Graphics to develop the
   5 initial release of the Radeon 8500 driver under the XFree86 license.
   6 This notice must be preserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining
   9 a copy of this software and associated documentation files (the
  10 "Software"), to deal in the Software without restriction, including
  11 without limitation the rights to use, copy, modify, merge, publish,
  12 distribute, sublicense, and/or sell copies of the Software, and to
  13 permit persons to whom the Software is furnished to do so, subject to
  14 the following conditions:
  15
  16 The above copyright notice and this permission notice (including the
  17 next paragraph) shall be included in all copies or substantial
  18 portions of the Software.
  19
  20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 */
  28
  29 /*
  30  * Authors:
  31  *   Keith Whitwell <keith@tungstengraphics.com>
  32  */
  33
  34 #include "main/glheader.h"
  35 #include "main/imports.h"
  36 #include "main/macros.h"
  37 #include "main/context.h"
  38 #include "swrast/swrast.h"
  39 #include "main/simple_list.h"
  40
  41 #include "radeon_cs.h"
  42 #include "r200_context.h"
  43 #include "common_cmdbuf.h"
  44 #include "r200_state.h"
  45 #include "r200_ioctl.h"
  46 #include "r200_tcl.h"
  47 #include "r200_sanity.h"
  48 #include "radeon_reg.h"
  49
  50 #define DEBUG_CMDBUF         0
  51
  52 /* The state atoms will be emitted in the order they appear in the atom list,
  53  * so this step is important.
  54  */
  55 void r200SetUpAtomList( r200ContextPtr rmesa )
  56 {
  57    int i, mtu;
  58
  59    mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
  60
  61    make_empty_list(&rmesa->hw.atomlist);
  62    rmesa->hw.atomlist.name = "atom-list";
  63
  64    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
  65    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
  66    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
  67    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
  68    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
  69    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
  70    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
  71    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
  72    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
  73    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
  74    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
  75    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
  76    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
  77    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
  78    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
  79    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
  80    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
  81    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
  82    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
  83    for (i = 0; i < mtu; ++i)
  84        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
  85    for (i = 0; i < mtu; ++i)
  86        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
  87    for (i = 0; i < 6; ++i)
  88        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
  89    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
  90    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
  91    for (i = 0; i < 8; ++i)
  92        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
  93    for (i = 0; i < 3 + mtu; ++i)
  94        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
  95    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
  96    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
  97    for (i = 0; i < 2; ++i)
  98       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
  99    for (i = 0; i < 6; ++i)
 100        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
 101    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr );
 102    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp );
 103    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf );
 104    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs );
 105    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] );
 106    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] );
 107    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] );
 108    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] );
 109 }
 110
 111 static void r200SaveHwState( r200ContextPtr rmesa )
 112 {
 113    struct radeon_state_atom *atom;
 114    char * dest = rmesa->backup_store.cmd_buf;
 115    uint32_t dwords;
 116    if (R200_DEBUG & DEBUG_STATE)
 117       fprintf(stderr, "%s\n", __FUNCTION__);
 118
 119    rmesa->backup_store.cmd_used = 0;
 120
 121    foreach( atom, &rmesa->hw.atomlist ) {
 122       dwords = atom->check( rmesa->radeon.glCtx, atom );
 123       if ( dwords ) {
 124          int size = atom->cmd_size * 4;
 125
 126          if (atom->emit) {
 127            (*atom->emit)(rmesa->radeon.glCtx, atom);
 128          } else {
 129            memcpy( dest, atom->cmd, size);
 130            dest += size;
 131            rmesa->backup_store.cmd_used += size;
 132          }
 133          if (R200_DEBUG & DEBUG_STATE)
 134             radeon_print_state_atom( atom );
 135       }
 136    }
 137
 138    assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ );
 139    if (R200_DEBUG & DEBUG_STATE)
 140       fprintf(stderr, "Returning to r200EmitState\n");
 141 }
 142
 143 static INLINE void r200EmitAtoms(r200ContextPtr r200, GLboolean dirty)
 144 {
 145    BATCH_LOCALS(&r200->radeon);
 146    struct radeon_state_atom *atom;
 147    int dwords;
 148
 149    /* Emit actual atoms */
 150    foreach(atom, &r200->hw.atomlist) {
 151      if ((atom->dirty || r200->hw.all_dirty) == dirty) {
 152        dwords = (*atom->check) (r200->radeon.glCtx, atom);
 153        if (dwords) {
 154           if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
 155              radeon_print_state_atom(atom);
 156           }
 157          if (atom->emit) {
 158            (*atom->emit)(r200->radeon.glCtx, atom);
 159          } else {
 160            BEGIN_BATCH_NO_AUTOSTATE(dwords);
 161            OUT_BATCH_TABLE(atom->cmd, dwords);
 162            END_BATCH();
 163          }
 164          atom->dirty = GL_FALSE;
 165        } else {
 166           if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
 167              fprintf(stderr, "  skip state %s\n",
 168                      atom->name);
 169           }
 170        }
 171      }
 172    }
 173
 174    COMMIT_BATCH();
 175 }
 176
 177 void r200EmitState( r200ContextPtr rmesa )
 178 {
 179    char *dest;
 180    int mtu;
 181    struct radeon_state_atom *atom;
 182    uint32_t dwords;
 183
 184    if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
 185       fprintf(stderr, "%s\n", __FUNCTION__);
 186
 187    if (rmesa->save_on_next_emit) {
 188       r200SaveHwState(rmesa);
 189       rmesa->save_on_next_emit = GL_FALSE;
 190    }
 191
 192    if (rmesa->radeon.cmdbuf.cs->cdw && !rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
 193        return;
 194
 195    mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
 196
 197    /* To avoid going across the entire set of states multiple times, just check
 198     * for enough space for the case of emitting all state, and inline the
 199     * r200AllocCmdBuf code here without all the checks.
 200     */
 201    rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->hw.max_state_size, __FUNCTION__);
 202
 203    if (!rmesa->radeon.cmdbuf.cs->cdw) {
 204      if (RADEON_DEBUG & DEBUG_STATE)
 205        fprintf(stderr, "Begin reemit state\n");
 206
 207      r200EmitAtoms(rmesa, GL_FALSE);
 208    }
 209
 210    if (RADEON_DEBUG & DEBUG_STATE)
 211      fprintf(stderr, "Begin dirty state\n");
 212
 213    r200EmitAtoms(rmesa, GL_TRUE);
 214    rmesa->hw.is_dirty = GL_FALSE;
 215    rmesa->hw.all_dirty = GL_FALSE;
 216 }
 217
 218 /* Fire a section of the retained (indexed_verts) buffer as a regular
 219  * primtive.
 220  */
 221 void r200EmitVbufPrim( r200ContextPtr rmesa,
 222                        GLuint primitive,
 223                        GLuint vertex_nr )
 224 {
 225    drm_radeon_cmd_header_t *cmd;
 226    BATCH_LOCALS(&rmesa->radeon);
 227
 228    assert(!(primitive & R200_VF_PRIM_WALK_IND));
 229
 230    r200EmitState( rmesa );
 231
 232    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
 233       fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
 234               rmesa->store.cmd_used/4, primitive, vertex_nr);
 235
 236    BEGIN_BATCH(3);
 237    OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
 238    OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
 239              (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
 240    END_BATCH();
 241 }
 242
 243 static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
 244 {
 245         BATCH_LOCALS(&rmesa->radeon);
 246
 247         if (vertex_count > 0) {
 248                 BEGIN_BATCH(8);
 249                 OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
 250                 OUT_BATCH(R200_VF_PRIM_WALK_IND |
 251                           ((vertex_count + 0) << 16) |
 252                           type);
 253
 254                 if (!rmesa->radeon.radeonScreen->kernel_mm) {
 255                         OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
 256                         OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
 257                         OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset,
 258                                         rmesa->tcl.elt_dma_bo,
 259                                         rmesa->tcl.elt_dma_offset,
 260                                         RADEON_GEM_DOMAIN_GTT, 0, 0);
 261                         OUT_BATCH(vertex_count/2);
 262                 } else {
 263                         OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
 264                         OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
 265                         OUT_BATCH(rmesa->tcl.elt_dma_offset);
 266                         OUT_BATCH(vertex_count/2);
 267                         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 268                                               rmesa->tcl.elt_dma_bo,
 269                                               RADEON_GEM_DOMAIN_GTT, 0, 0);
 270                 }
 271                 END_BATCH();
 272         }
 273 }
 274
 275 void r200FlushElts(GLcontext *ctx)
 276 {
 277   r200ContextPtr rmesa = R200_CONTEXT(ctx);
 278    int dwords;
 279    int nr, elt_used = rmesa->tcl.elt_used;
 280
 281    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
 282      fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
 283
 284    assert( rmesa->radeon.dma.flush == r200FlushElts );
 285    rmesa->radeon.dma.flush = NULL;
 286
 287    elt_used = (elt_used + 2) & ~2;
 288
 289    nr = elt_used / 2;
 290
 291    radeon_bo_unmap(rmesa->tcl.elt_dma_bo);
 292
 293    r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
 294
 295    radeon_bo_unref(rmesa->tcl.elt_dma_bo);
 296    rmesa->tcl.elt_dma_bo = NULL;
 297
 298    if (R200_DEBUG & DEBUG_SYNC) {
 299       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
 300       r200Finish( rmesa->radeon.glCtx );
 301    }
 302 }
 303
 304
 305 GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
 306                                     GLuint primitive,
 307                                     GLuint min_nr )
 308 {
 309    GLushort *retval;
 310
 311    if (R200_DEBUG & DEBUG_IOCTL)
 312       fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
 313
 314    assert((primitive & R200_VF_PRIM_WALK_IND));
 315
 316    r200EmitState( rmesa );
 317
 318    rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
 319                                           0, R200_ELT_BUF_SZ, 4,
 320                                           RADEON_GEM_DOMAIN_GTT, 0);
 321    rmesa->tcl.elt_dma_offset = 0;
 322    rmesa->tcl.elt_used = min_nr * 2;
 323
 324    radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
 325    retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
 326
 327
 328    if (R200_DEBUG & DEBUG_PRIMS)
 329       fprintf(stderr, "%s: header prim %x \n",
 330               __FUNCTION__, primitive);
 331
 332    assert(!rmesa->radeon.dma.flush);
 333    rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 334    rmesa->radeon.dma.flush = r200FlushElts;
 335
 336    return retval;
 337 }
 338
 339
 340
 341 void r200EmitVertexAOS( r200ContextPtr rmesa,
 342                         GLuint vertex_size,
 343                         struct radeon_bo *bo,
 344                         GLuint offset )
 345 {
 346    BATCH_LOCALS(&rmesa->radeon);
 347
 348    if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
 349       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
 350               __FUNCTION__, vertex_size, offset);
 351
 352
 353    BEGIN_BATCH(5);
 354    OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
 355    OUT_BATCH(1);
 356    OUT_BATCH(vertex_size | (vertex_size << 8));
 357    OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
 358    END_BATCH();
 359 }
 360
 361 void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
 362 {
 363    BATCH_LOCALS(&rmesa->radeon);
 364    uint32_t voffset;
 365    int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 366    int i;
 367
 368    if (RADEON_DEBUG & DEBUG_VERTS)
 369       fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 370               offset);
 371
 372    BEGIN_BATCH(sz+2);
 373    OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
 374    OUT_BATCH(nr);
 375
 376
 377    if (!rmesa->radeon.radeonScreen->kernel_mm) {
 378       for (i = 0; i + 1 < nr; i += 2) {
 379          OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
 380                    (rmesa->tcl.aos[i].stride << 8) |
 381                    (rmesa->tcl.aos[i + 1].components << 16) |
 382                    (rmesa->tcl.aos[i + 1].stride << 24));
 383
 384          voffset =  rmesa->tcl.aos[i + 0].offset +
 385             offset * 4 * rmesa->tcl.aos[i + 0].stride;
 386          OUT_BATCH_RELOC(voffset,
 387                          rmesa->tcl.aos[i].bo,
 388                          voffset,
 389                          RADEON_GEM_DOMAIN_GTT,
 390                          0, 0);
 391          voffset =  rmesa->tcl.aos[i + 1].offset +
 392             offset * 4 * rmesa->tcl.aos[i + 1].stride;
 393          OUT_BATCH_RELOC(voffset,
 394                          rmesa->tcl.aos[i+1].bo,
 395                          voffset,
 396                          RADEON_GEM_DOMAIN_GTT,
 397                          0, 0);
 398       }
 399
 400       if (nr & 1) {
 401          OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
 402                    (rmesa->tcl.aos[nr - 1].stride << 8));
 403          voffset =  rmesa->tcl.aos[nr - 1].offset +
 404             offset * 4 * rmesa->tcl.aos[nr - 1].stride;
 405          OUT_BATCH_RELOC(voffset,
 406                          rmesa->tcl.aos[nr - 1].bo,
 407                          voffset,
 408                          RADEON_GEM_DOMAIN_GTT,
 409                          0, 0);
 410       }
 411    } else {
 412       for (i = 0; i + 1 < nr; i += 2) {
 413          OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
 414                    (rmesa->tcl.aos[i].stride << 8) |
 415                    (rmesa->tcl.aos[i + 1].components << 16) |
 416                    (rmesa->tcl.aos[i + 1].stride << 24));
 417
 418          voffset =  rmesa->tcl.aos[i + 0].offset +
 419             offset * 4 * rmesa->tcl.aos[i + 0].stride;
 420          OUT_BATCH(voffset);
 421          voffset =  rmesa->tcl.aos[i + 1].offset +
 422             offset * 4 * rmesa->tcl.aos[i + 1].stride;
 423          OUT_BATCH(voffset);
 424       }
 425
 426       if (nr & 1) {
 427          OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
 428                    (rmesa->tcl.aos[nr - 1].stride << 8));
 429          voffset =  rmesa->tcl.aos[nr - 1].offset +
 430             offset * 4 * rmesa->tcl.aos[nr - 1].stride;
 431          OUT_BATCH(voffset);
 432       }
 433       for (i = 0; i + 1 < nr; i += 2) {
 434          voffset =  rmesa->tcl.aos[i + 0].offset +
 435             offset * 4 * rmesa->tcl.aos[i + 0].stride;
 436          radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 437                                rmesa->tcl.aos[i+0].bo,
 438                                RADEON_GEM_DOMAIN_GTT,
 439                                0, 0);
 440          voffset =  rmesa->tcl.aos[i + 1].offset +
 441             offset * 4 * rmesa->tcl.aos[i + 1].stride;
 442          radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 443                                rmesa->tcl.aos[i+1].bo,
 444                                RADEON_GEM_DOMAIN_GTT,
 445                                0, 0);
 446       }
 447       if (nr & 1) {
 448          voffset =  rmesa->tcl.aos[nr - 1].offset +
 449             offset * 4 * rmesa->tcl.aos[nr - 1].stride;
 450          radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
 451                                rmesa->tcl.aos[nr-1].bo,
 452                                RADEON_GEM_DOMAIN_GTT,
 453                                0, 0);
 454       }
 455    }
 456    END_BATCH();
 457 }
 458
 459 void r200FireAOS(r200ContextPtr rmesa, int vertex_count, int type)
 460 {
 461         BATCH_LOCALS(&rmesa->radeon);
 462
 463         BEGIN_BATCH(3);
 464         OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
 465         OUT_BATCH(R200_VF_PRIM_WALK_LIST | (vertex_count << 16) | type);
 466         END_BATCH();
 467 }
 468