src/gallium/drivers/svga/svga_tgsi_decl_sm30.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26
  27 #include "pipe/p_shader_tokens.h"
  28 #include "tgsi/tgsi_parse.h"
  29 #include "util/u_memory.h"
  30
  31 #include "svga_tgsi_emit.h"
  32
  33
  34 /**
  35  * Translate TGSI semantic info into SVGA3d semantic info.
  36  * This is called for VS outputs and PS inputs only.
  37  */
  38 static boolean
  39 translate_vs_ps_semantic(struct svga_shader_emitter *emit,
  40                          struct tgsi_declaration_semantic semantic,
  41                          unsigned *usage,
  42                          unsigned *idx)
  43 {
  44    switch (semantic.Name) {
  45    case TGSI_SEMANTIC_POSITION:
  46       *idx = semantic.Index;
  47       *usage = SVGA3D_DECLUSAGE_POSITION;
  48       break;
  49    case TGSI_SEMANTIC_COLOR:
  50       *idx = semantic.Index;
  51       *usage = SVGA3D_DECLUSAGE_COLOR;
  52       break;
  53    case TGSI_SEMANTIC_BCOLOR:
  54       *idx = semantic.Index + 2; /* sharing with COLOR */
  55       *usage = SVGA3D_DECLUSAGE_COLOR;
  56       break;
  57    case TGSI_SEMANTIC_FOG:
  58       *idx = 0;
  59       assert(semantic.Index == 0);
  60       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
  61       break;
  62    case TGSI_SEMANTIC_PSIZE:
  63       *idx = semantic.Index;
  64       *usage = SVGA3D_DECLUSAGE_PSIZE;
  65       break;
  66    case TGSI_SEMANTIC_GENERIC:
  67       *idx = svga_remap_generic_index(emit->key.generic_remap_table,
  68                                       semantic.Index);
  69       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
  70       break;
  71    case TGSI_SEMANTIC_NORMAL:
  72       *idx = semantic.Index;
  73       *usage = SVGA3D_DECLUSAGE_NORMAL;
  74       break;
  75    case TGSI_SEMANTIC_CLIPDIST:
  76    case TGSI_SEMANTIC_CLIPVERTEX:
  77       /* XXX at this time we don't support clip distance or clip vertices */
  78       debug_warn_once("unsupported clip distance/vertex attribute\n");
  79       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
  80       *idx = 0;
  81       return TRUE;
  82    default:
  83       assert(0);
  84       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
  85       *idx = 0;
  86       return FALSE;
  87    }
  88
  89    return TRUE;
  90 }
  91
  92
  93 /**
  94  * Emit a PS input (or VS depth/fog output) register declaration.
  95  * For example, if usage = SVGA3D_DECLUSAGE_TEXCOORD, reg.num = 1, and
  96  * index = 3, we'll emit "dcl_texcoord3 v1".
  97  */
  98 static boolean
  99 emit_decl(struct svga_shader_emitter *emit,
 100           SVGA3dShaderDestToken reg,
 101           unsigned usage,
 102           unsigned index)
 103 {
 104    SVGA3DOpDclArgs dcl;
 105    SVGA3dShaderInstToken opcode;
 106
 107    /* check values against bitfield sizes */
 108    assert(index < 16);
 109    assert(usage <= SVGA3D_DECLUSAGE_MAX);
 110
 111    opcode = inst_token(SVGA3DOP_DCL);
 112    dcl.values[0] = 0;
 113    dcl.values[1] = 0;
 114
 115    dcl.dst = reg;
 116    dcl.usage = usage;
 117    dcl.index = index;
 118    dcl.values[0] |= 1<<31;
 119
 120    return (emit_instruction(emit, opcode) &&
 121            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
 122 }
 123
 124
 125 /**
 126  * Emit declaration for PS front/back-face input register.
 127  */
 128 static boolean
 129 emit_vface_decl(struct svga_shader_emitter *emit)
 130 {
 131    if (!emit->emitted_vface) {
 132       SVGA3dShaderDestToken reg =
 133          dst_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
 134
 135       if (!emit_decl(emit, reg, 0, 0))
 136          return FALSE;
 137
 138       emit->emitted_vface = TRUE;
 139    }
 140    return TRUE;
 141 }
 142
 143
 144 /**
 145  * Emit PS input register to pass depth/fog coordinates.
 146  * Note that this always goes into texcoord[0].
 147  */
 148 static boolean
 149 ps30_input_emit_depth_fog(struct svga_shader_emitter *emit,
 150                           struct src_register *out)
 151 {
 152    struct src_register reg;
 153
 154    if (emit->emitted_depth_fog) {
 155       *out = emit->ps_depth_fog;
 156       return TRUE;
 157    }
 158
 159    if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
 160       return FALSE;
 161
 162    reg = src_register(SVGA3DREG_INPUT,
 163                        emit->ps30_input_count++);
 164
 165    *out = emit->ps_depth_fog = reg;
 166
 167    emit->emitted_depth_fog = TRUE;
 168
 169    return emit_decl(emit, dst(reg), SVGA3D_DECLUSAGE_TEXCOORD, 0);
 170 }
 171
 172
 173 /**
 174  * Process a PS input declaration.
 175  * We'll emit a declaration like "dcl_texcoord1 v2"
 176  */
 177 static boolean
 178 ps30_input(struct svga_shader_emitter *emit,
 179            struct tgsi_declaration_semantic semantic,
 180            unsigned idx)
 181 {
 182    unsigned usage, index;
 183    SVGA3dShaderDestToken reg;
 184
 185    if (semantic.Name == TGSI_SEMANTIC_POSITION) {
 186
 187       emit->ps_true_pos = src_register(SVGA3DREG_MISCTYPE,
 188                                         SVGA3DMISCREG_POSITION);
 189       emit->ps_true_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
 190                                                           TGSI_SWIZZLE_Y,
 191                                                           TGSI_SWIZZLE_Y,
 192                                                           TGSI_SWIZZLE_Y);
 193       reg = writemask(dst(emit->ps_true_pos),
 194                        TGSI_WRITEMASK_XY);
 195       emit->ps_reads_pos = TRUE;
 196
 197       if (emit->info.reads_z) {
 198          emit->ps_temp_pos = dst_register(SVGA3DREG_TEMP,
 199                                            emit->nr_hw_temp);
 200
 201          emit->input_map[idx] = src_register(SVGA3DREG_TEMP,
 202                                               emit->nr_hw_temp);
 203          emit->nr_hw_temp++;
 204
 205          if (!ps30_input_emit_depth_fog(emit, &emit->ps_depth_pos))
 206             return FALSE;
 207
 208          emit->ps_depth_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,
 209                                                               TGSI_SWIZZLE_Z,
 210                                                               TGSI_SWIZZLE_Z,
 211                                                               TGSI_SWIZZLE_W);
 212       }
 213       else {
 214          emit->input_map[idx] = emit->ps_true_pos;
 215       }
 216
 217       return emit_decl(emit, reg, 0, 0);
 218    }
 219    else if (emit->key.fs.light_twoside &&
 220             (semantic.Name == TGSI_SEMANTIC_COLOR)) {
 221
 222       if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
 223          return FALSE;
 224
 225       emit->internal_color_idx[emit->internal_color_count] = idx;
 226       emit->input_map[idx] =
 227          src_register(SVGA3DREG_INPUT, emit->ps30_input_count);
 228       emit->ps30_input_count++;
 229       emit->internal_color_count++;
 230
 231       reg = dst(emit->input_map[idx]);
 232
 233       if (!emit_decl(emit, reg, usage, index))
 234          return FALSE;
 235
 236       semantic.Name = TGSI_SEMANTIC_BCOLOR;
 237       if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
 238          return FALSE;
 239
 240       if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
 241          return FALSE;
 242
 243       reg = dst_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
 244
 245       if (!emit_decl(emit, reg, usage, index))
 246          return FALSE;
 247
 248       if (!emit_vface_decl(emit))
 249          return FALSE;
 250
 251       return TRUE;
 252    }
 253    else if (semantic.Name == TGSI_SEMANTIC_FACE) {
 254       if (!emit_vface_decl(emit))
 255          return FALSE;
 256       emit->emit_frontface = TRUE;
 257       emit->internal_frontface_idx = idx;
 258       return TRUE;
 259    }
 260    else if (semantic.Name == TGSI_SEMANTIC_FOG) {
 261
 262       assert(semantic.Index == 0);
 263
 264       if (!ps30_input_emit_depth_fog(emit, &emit->input_map[idx]))
 265          return FALSE;
 266
 267       emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
 268                                                              TGSI_SWIZZLE_X,
 269                                                              TGSI_SWIZZLE_X,
 270                                                              TGSI_SWIZZLE_X);
 271       return TRUE;
 272    }
 273    else {
 274
 275       if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
 276          return FALSE;
 277
 278       if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
 279          return FALSE;
 280
 281       emit->input_map[idx] =
 282          src_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
 283
 284       reg = dst(emit->input_map[idx]);
 285
 286       if (!emit_decl(emit, reg, usage, index))
 287          return FALSE;
 288
 289       if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
 290           emit->key.sprite_origin_lower_left &&
 291           index >= 1 &&
 292           emit->key.sprite_coord_enable & (1 << semantic.Index)) {
 293          /* This is a sprite texture coord with lower-left origin.
 294           * We need to invert the texture T coordinate since the SVGA3D
 295           * device only supports an upper-left origin.
 296           */
 297          unsigned unit = index - 1;
 298
 299          emit->inverted_texcoords |= (1 << unit);
 300
 301          /* save original texcoord reg */
 302          emit->ps_true_texcoord[unit] = emit->input_map[idx];
 303
 304          /* this temp register will be the results of the MAD instruction */
 305          emit->ps_inverted_texcoord[unit] =
 306             src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
 307          emit->nr_hw_temp++;
 308
 309          emit->ps_inverted_texcoord_input[unit] = idx;
 310
 311          /* replace input_map entry with the temp register */
 312          emit->input_map[idx] = emit->ps_inverted_texcoord[unit];
 313       }
 314
 315       return TRUE;
 316    }
 317
 318 }
 319
 320
 321 /**
 322  * Process a PS output declaration.
 323  * Note that we don't actually emit a SVGA3DOpDcl for PS outputs.
 324  * \idx  register index, such as OUT[2] (not semantic index)
 325  */
 326 static boolean
 327 ps30_output(struct svga_shader_emitter *emit,
 328             struct tgsi_declaration_semantic semantic,
 329             unsigned idx)
 330 {
 331    switch (semantic.Name) {
 332    case TGSI_SEMANTIC_COLOR:
 333       if (emit->unit == PIPE_SHADER_FRAGMENT) {
 334          if (emit->key.fs.white_fragments) {
 335             /* Used for XOR logicop mode */
 336             emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
 337                                                   emit->nr_hw_temp++);
 338             emit->temp_color_output[idx] = emit->output_map[idx];
 339             emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT,
 340                                                         semantic.Index);
 341          }
 342          else if (emit->key.fs.write_color0_to_n_cbufs) {
 343             /* We'll write color output [0] to all render targets.
 344              * Prepare all the output registers here, but only when the
 345              * semantic.Index == 0 so we don't do this more than once.
 346              */
 347             if (semantic.Index == 0) {
 348                unsigned i;
 349                for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
 350                   emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
 351                                                      emit->nr_hw_temp++);
 352                   emit->temp_color_output[i] = emit->output_map[idx+i];
 353                   emit->true_color_output[i] = dst_register(SVGA3DREG_COLOROUT,
 354                                                             i);
 355                }
 356             }
 357          }
 358          else {
 359             emit->output_map[idx] =
 360                dst_register(SVGA3DREG_COLOROUT, semantic.Index);
 361          }
 362       }
 363       else {
 364          emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT,
 365                                                semantic.Index);
 366       }
 367       break;
 368    case TGSI_SEMANTIC_POSITION:
 369       emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
 370                                             emit->nr_hw_temp++);
 371       emit->temp_pos = emit->output_map[idx];
 372       emit->true_pos = dst_register(SVGA3DREG_DEPTHOUT,
 373                                      semantic.Index);
 374       break;
 375    default:
 376       assert(0);
 377       /* A wild stab in the dark. */
 378       emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT, 0);
 379       break;
 380    }
 381
 382    return TRUE;
 383 }
 384
 385
 386 /**
 387  * Declare a VS input register.
 388  * We still make up the input semantics the same as in 2.0
 389  */
 390 static boolean
 391 vs30_input(struct svga_shader_emitter *emit,
 392            struct tgsi_declaration_semantic semantic,
 393            unsigned idx)
 394 {
 395    SVGA3DOpDclArgs dcl;
 396    SVGA3dShaderInstToken opcode;
 397    unsigned usage, index;
 398
 399    opcode = inst_token(SVGA3DOP_DCL);
 400    dcl.values[0] = 0;
 401    dcl.values[1] = 0;
 402
 403    emit->input_map[idx] = src_register(SVGA3DREG_INPUT, idx);
 404    dcl.dst = dst_register(SVGA3DREG_INPUT, idx);
 405
 406    assert(dcl.dst.reserved0);
 407
 408    svga_generate_vdecl_semantics(idx, &usage, &index);
 409
 410    dcl.usage = usage;
 411    dcl.index = index;
 412    dcl.values[0] |= 1<<31;
 413
 414    return (emit_instruction(emit, opcode) &&
 415            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
 416 }
 417
 418
 419 /**
 420  * Declare VS output for holding depth/fog.
 421  */
 422 static boolean
 423 vs30_output_emit_depth_fog(struct svga_shader_emitter *emit,
 424                            SVGA3dShaderDestToken *out)
 425 {
 426    SVGA3dShaderDestToken reg;
 427
 428    if (emit->emitted_depth_fog) {
 429       *out = emit->vs_depth_fog;
 430       return TRUE;
 431    }
 432
 433    reg = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
 434
 435    *out = emit->vs_depth_fog = reg;
 436
 437    emit->emitted_depth_fog = TRUE;
 438
 439    return emit_decl(emit, reg, SVGA3D_DECLUSAGE_TEXCOORD, 0);
 440 }
 441
 442
 443 /**
 444  * Declare a VS output.
 445  * VS3.0 outputs have proper declarations and semantic info for
 446  * matching against PS inputs.
 447  */
 448 static boolean
 449 vs30_output(struct svga_shader_emitter *emit,
 450             struct tgsi_declaration_semantic semantic,
 451             unsigned idx)
 452 {
 453    SVGA3DOpDclArgs dcl;
 454    SVGA3dShaderInstToken opcode;
 455    unsigned usage, index;
 456
 457    opcode = inst_token(SVGA3DOP_DCL);
 458    dcl.values[0] = 0;
 459    dcl.values[1] = 0;
 460
 461    if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
 462       return FALSE;
 463
 464    if (emit->vs30_output_count >= SVGA3D_OUTPUTREG_MAX)
 465       return FALSE;
 466
 467    dcl.dst = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
 468    dcl.usage = usage;
 469    dcl.index = index;
 470    dcl.values[0] |= 1<<31;
 471
 472    if (semantic.Name == TGSI_SEMANTIC_POSITION) {
 473       assert(idx == 0);
 474       emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
 475                                             emit->nr_hw_temp++);
 476       emit->temp_pos = emit->output_map[idx];
 477       emit->true_pos = dcl.dst;
 478
 479       /* Grab an extra output for the depth output */
 480       if (!vs30_output_emit_depth_fog(emit, &emit->depth_pos))
 481          return FALSE;
 482
 483    }
 484    else if (semantic.Name == TGSI_SEMANTIC_PSIZE) {
 485       emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
 486                                             emit->nr_hw_temp++);
 487       emit->temp_psiz = emit->output_map[idx];
 488
 489       /* This has the effect of not declaring psiz (below) and not
 490        * emitting the final MOV to true_psiz in the postamble.
 491        */
 492       if (!emit->key.vs.allow_psiz)
 493          return TRUE;
 494
 495       emit->true_psiz = dcl.dst;
 496    }
 497    else if (semantic.Name == TGSI_SEMANTIC_FOG) {
 498       /*
 499        * Fog is shared with depth.
 500        * So we need to decrement out_count since emit_depth_fog will increment it.
 501        */
 502       emit->vs30_output_count--;
 503
 504       if (!vs30_output_emit_depth_fog(emit, &emit->output_map[idx]))
 505          return FALSE;
 506
 507       return TRUE;
 508    }
 509    else {
 510       emit->output_map[idx] = dcl.dst;
 511    }
 512
 513    return (emit_instruction(emit, opcode) &&
 514            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
 515 }
 516
 517
 518 /** Translate PIPE_TEXTURE_x to SVGA3DSAMP_x */
 519 static ubyte
 520 svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
 521 {
 522    switch (emit->sampler_target[idx]) {
 523    case TGSI_TEXTURE_1D:
 524       return SVGA3DSAMP_2D;
 525    case TGSI_TEXTURE_2D:
 526    case TGSI_TEXTURE_RECT:
 527       return SVGA3DSAMP_2D;
 528    case TGSI_TEXTURE_SHADOW2D:
 529       return SVGA3DSAMP_2D_SHADOW;
 530    case TGSI_TEXTURE_3D:
 531       return SVGA3DSAMP_VOLUME;
 532    case TGSI_TEXTURE_CUBE:
 533       return SVGA3DSAMP_CUBE;
 534    }
 535
 536    return SVGA3DSAMP_UNKNOWN;
 537 }
 538
 539
 540 static boolean
 541 ps30_sampler(struct svga_shader_emitter *emit,
 542               unsigned idx)
 543 {
 544    SVGA3DOpDclArgs dcl;
 545    SVGA3dShaderInstToken opcode;
 546
 547    opcode = inst_token(SVGA3DOP_DCL);
 548    dcl.values[0] = 0;
 549    dcl.values[1] = 0;
 550
 551    dcl.dst = dst_register(SVGA3DREG_SAMPLER, idx);
 552    dcl.type = svga_tgsi_sampler_type(emit, idx);
 553    dcl.values[0] |= 1<<31;
 554
 555    return (emit_instruction(emit, opcode) &&
 556            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
 557 }
 558
 559
 560 boolean
 561 svga_shader_emit_samplers_decl(struct svga_shader_emitter *emit)
 562 {
 563    unsigned i;
 564
 565    for (i = 0; i < emit->num_samplers; i++) {
 566       if (!ps30_sampler(emit, i))
 567          return FALSE;
 568    }
 569    return TRUE;
 570 }
 571
 572
 573 boolean
 574 svga_translate_decl_sm30(struct svga_shader_emitter *emit,
 575                          const struct tgsi_full_declaration *decl)
 576 {
 577    unsigned first = decl->Range.First;
 578    unsigned last = decl->Range.Last;
 579    unsigned idx;
 580
 581    for (idx = first; idx <= last; idx++) {
 582       boolean ok = TRUE;
 583
 584       switch (decl->Declaration.File) {
 585       case TGSI_FILE_SAMPLER:
 586          assert (emit->unit == PIPE_SHADER_FRAGMENT);
 587          /* just keep track of the number of samplers here.
 588           * Will emit the declaration in the helpers function.
 589           */
 590          emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
 591          break;
 592
 593       case TGSI_FILE_INPUT:
 594          if (emit->unit == PIPE_SHADER_VERTEX)
 595             ok = vs30_input(emit, decl->Semantic, idx);
 596          else
 597             ok = ps30_input(emit, decl->Semantic, idx);
 598          break;
 599
 600       case TGSI_FILE_OUTPUT:
 601          if (emit->unit == PIPE_SHADER_VERTEX)
 602             ok = vs30_output(emit, decl->Semantic, idx);
 603          else
 604             ok = ps30_output(emit, decl->Semantic, idx);
 605          break;
 606
 607       case TGSI_FILE_SAMPLER_VIEW:
 608          {
 609             unsigned unit = decl->Range.First;
 610             assert(decl->Range.First == decl->Range.Last);
 611             emit->sampler_target[unit] = decl->SamplerView.Resource;
 612          }
 613          break;
 614
 615       default:
 616          /* don't need to declare other vars */
 617          ok = TRUE;
 618       }
 619
 620       if (!ok)
 621          return FALSE;
 622    }
 623
 624    return TRUE;
 625 }