src/gallium/drivers/svga/svga_shader.c

   1 /**********************************************************
   2  * Copyright 2008-2012 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26 #include "util/u_bitmask.h"
  27 #include "util/u_memory.h"
  28 #include "util/u_format.h"
  29 #include "svga_context.h"
  30 #include "svga_cmd.h"
  31 #include "svga_format.h"
  32 #include "svga_shader.h"
  33 #include "svga_resource_texture.h"
  34
  35
  36 /**
  37  * This bit isn't really used anywhere.  It only serves to help
  38  * generate a unique "signature" for the vertex shader output bitmask.
  39  * Shader input/output signatures are used to resolve shader linking
  40  * issues.
  41  */
  42 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
  43
  44
  45 /**
  46  * Use the shader info to generate a bitmask indicating which generic
  47  * inputs are used by the shader.  A set bit indicates that GENERIC[i]
  48  * is used.
  49  */
  50 uint64_t
  51 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
  52 {
  53    unsigned i;
  54    uint64_t mask = 0x0;
  55
  56    for (i = 0; i < info->num_inputs; i++) {
  57       if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
  58          unsigned j = info->input_semantic_index[i];
  59          assert(j < sizeof(mask) * 8);
  60          mask |= ((uint64_t) 1) << j;
  61       }
  62    }
  63
  64    return mask;
  65 }
  66
  67
  68 /**
  69  * Scan shader info to return a bitmask of written outputs.
  70  */
  71 uint64_t
  72 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
  73 {
  74    unsigned i;
  75    uint64_t mask = 0x0;
  76
  77    for (i = 0; i < info->num_outputs; i++) {
  78       switch (info->output_semantic_name[i]) {
  79       case TGSI_SEMANTIC_GENERIC:
  80          {
  81             unsigned j = info->output_semantic_index[i];
  82             assert(j < sizeof(mask) * 8);
  83             mask |= ((uint64_t) 1) << j;
  84          }
  85          break;
  86       case TGSI_SEMANTIC_FOG:
  87          mask |= FOG_GENERIC_BIT;
  88          break;
  89       }
  90    }
  91
  92    return mask;
  93 }
  94
  95
  96
  97 /**
  98  * Given a mask of used generic variables (as returned by the above functions)
  99  * fill in a table which maps those indexes to small integers.
 100  * This table is used by the remap_generic_index() function in
 101  * svga_tgsi_decl_sm30.c
 102  * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
 103  * GENERIC[3] are used.  The remap_table will contain:
 104  *   table[1] = 0;
 105  *   table[3] = 1;
 106  * The remaining table entries will be filled in with the next unused
 107  * generic index (in this example, 2).
 108  */
 109 void
 110 svga_remap_generics(uint64_t generics_mask,
 111                     int8_t remap_table[MAX_GENERIC_VARYING])
 112 {
 113    /* Note texcoord[0] is reserved so start at 1 */
 114    unsigned count = 1, i;
 115
 116    for (i = 0; i < MAX_GENERIC_VARYING; i++) {
 117       remap_table[i] = -1;
 118    }
 119
 120    /* for each bit set in generic_mask */
 121    while (generics_mask) {
 122       unsigned index = ffsll(generics_mask) - 1;
 123       remap_table[index] = count++;
 124       generics_mask &= ~((uint64_t) 1 << index);
 125    }
 126 }
 127
 128
 129 /**
 130  * Use the generic remap table to map a TGSI generic varying variable
 131  * index to a small integer.  If the remapping table doesn't have a
 132  * valid value for the given index (the table entry is -1) it means
 133  * the fragment shader doesn't use that VS output.  Just allocate
 134  * the next free value in that case.  Alternately, we could cull
 135  * VS instructions that write to register, or replace the register
 136  * with a dummy temp register.
 137  * XXX TODO: we should do one of the later as it would save precious
 138  * texcoord registers.
 139  */
 140 int
 141 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
 142                          int generic_index)
 143 {
 144    assert(generic_index < MAX_GENERIC_VARYING);
 145
 146    if (generic_index >= MAX_GENERIC_VARYING) {
 147       /* just don't return a random/garbage value */
 148       generic_index = MAX_GENERIC_VARYING - 1;
 149    }
 150
 151    if (remap_table[generic_index] == -1) {
 152       /* This is a VS output that has no matching PS input.  Find a
 153        * free index.
 154        */
 155       int i, max = 0;
 156       for (i = 0; i < MAX_GENERIC_VARYING; i++) {
 157          max = MAX2(max, remap_table[i]);
 158       }
 159       remap_table[generic_index] = max + 1;
 160    }
 161
 162    return remap_table[generic_index];
 163 }
 164
 165 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
 166    PIPE_SWIZZLE_X,
 167    PIPE_SWIZZLE_Y,
 168    PIPE_SWIZZLE_Z,
 169    PIPE_SWIZZLE_W,
 170    PIPE_SWIZZLE_0,
 171    PIPE_SWIZZLE_1,
 172    PIPE_SWIZZLE_NONE
 173 };
 174
 175 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
 176    PIPE_SWIZZLE_X,
 177    PIPE_SWIZZLE_Y,
 178    PIPE_SWIZZLE_Z,
 179    PIPE_SWIZZLE_1,
 180    PIPE_SWIZZLE_0,
 181    PIPE_SWIZZLE_1,
 182    PIPE_SWIZZLE_NONE
 183 };
 184
 185 /**
 186  * Initialize the shader-neutral fields of svga_compile_key from context
 187  * state.  This is basically the texture-related state.
 188  */
 189 void
 190 svga_init_shader_key_common(const struct svga_context *svga,
 191                             enum pipe_shader_type shader,
 192                             struct svga_compile_key *key)
 193 {
 194    unsigned i, idx = 0;
 195
 196    assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
 197
 198    /* In case the number of samplers and sampler_views doesn't match,
 199     * loop over the lower of the two counts.
 200     */
 201    key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
 202                             svga->curr.num_samplers[shader]);
 203
 204    for (i = 0; i < key->num_textures; i++) {
 205       struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
 206       const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
 207       if (view) {
 208          assert(view->texture);
 209          assert(view->texture->target < (1 << 4)); /* texture_target:4 */
 210
 211          /* 1D/2D array textures with one slice and cube map array textures
 212           * with one cube are treated as non-arrays by the SVGA3D device.
 213           * Set the is_array flag only if we know that we have more than 1
 214           * element.  This will be used to select shader instruction/resource
 215           * types during shader translation.
 216           */
 217          switch (view->texture->target) {
 218          case PIPE_TEXTURE_1D_ARRAY:
 219          case PIPE_TEXTURE_2D_ARRAY:
 220             key->tex[i].is_array = view->texture->array_size > 1;
 221             break;
 222          case PIPE_TEXTURE_CUBE_ARRAY:
 223             key->tex[i].is_array = view->texture->array_size > 6;
 224             break;
 225          default:
 226             ; /* nothing / silence compiler warning */
 227          }
 228
 229          assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
 230          key->tex[i].num_samples = view->texture->nr_samples;
 231
 232          /* If we have a non-alpha view into an svga3d surface with an
 233           * alpha channel, then explicitly set the alpha channel to 1
 234           * when sampling. Note that we need to check the
 235           * actual device format to cover also imported surface cases.
 236           */
 237          const enum pipe_swizzle *swizzle_tab =
 238             (view->texture->target != PIPE_BUFFER &&
 239              !util_format_has_alpha(view->format) &&
 240              svga_texture_device_format_has_alpha(view->texture)) ?
 241             set_alpha : copy_alpha;
 242
 243          if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
 244              view->texture->format == PIPE_FORMAT_DXT1_SRGB)
 245             swizzle_tab = set_alpha;
 246
 247          key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
 248          key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
 249          key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
 250          key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
 251       }
 252
 253       if (sampler) {
 254          if (!sampler->normalized_coords) {
 255             assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
 256             key->tex[i].width_height_idx = idx++;
 257             key->tex[i].unnormalized = TRUE;
 258             ++key->num_unnormalized_coords;
 259
 260             if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
 261                 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
 262                 key->tex[i].texel_bias = TRUE;
 263             }
 264          }
 265       }
 266    }
 267 }
 268
 269
 270 /** Search for a compiled shader variant with the same compile key */
 271 struct svga_shader_variant *
 272 svga_search_shader_key(const struct svga_shader *shader,
 273                        const struct svga_compile_key *key)
 274 {
 275    struct svga_shader_variant *variant = shader->variants;
 276
 277    assert(key);
 278
 279    for ( ; variant; variant = variant->next) {
 280       if (svga_compile_keys_equal(key, &variant->key))
 281          return variant;
 282    }
 283    return NULL;
 284 }
 285
 286 /** Search for a shader with the same token key */
 287 struct svga_shader *
 288 svga_search_shader_token_key(struct svga_shader *pshader,
 289                              const struct svga_token_key *key)
 290 {
 291    struct svga_shader *shader = pshader;
 292
 293    assert(key);
 294
 295    for ( ; shader; shader = shader->next) {
 296       if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
 297          return shader;
 298    }
 299    return NULL;
 300 }
 301
 302 /**
 303  * Helper function to define a gb shader for non-vgpu10 device
 304  */
 305 static enum pipe_error
 306 define_gb_shader_vgpu9(struct svga_context *svga,
 307                        SVGA3dShaderType type,
 308                        struct svga_shader_variant *variant,
 309                        unsigned codeLen)
 310 {
 311    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 312    enum pipe_error ret;
 313
 314    /**
 315     * Create gb memory for the shader and upload the shader code.
 316     * Kernel module will allocate an id for the shader and issue
 317     * the DefineGBShader command.
 318     */
 319    variant->gb_shader = sws->shader_create(sws, type,
 320                                            variant->tokens, codeLen);
 321
 322    if (!variant->gb_shader)
 323       return PIPE_ERROR_OUT_OF_MEMORY;
 324
 325    ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
 326
 327    return ret;
 328 }
 329
 330 /**
 331  * Helper function to define a gb shader for vgpu10 device
 332  */
 333 static enum pipe_error
 334 define_gb_shader_vgpu10(struct svga_context *svga,
 335                         SVGA3dShaderType type,
 336                         struct svga_shader_variant *variant,
 337                         unsigned codeLen)
 338 {
 339    struct svga_winsys_context *swc = svga->swc;
 340    enum pipe_error ret;
 341
 342    /**
 343     * Shaders in VGPU10 enabled device reside in the device COTable.
 344     * SVGA driver will allocate an integer ID for the shader and
 345     * issue DXDefineShader and DXBindShader commands.
 346     */
 347    variant->id = util_bitmask_add(svga->shader_id_bm);
 348    if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
 349       return PIPE_ERROR_OUT_OF_MEMORY;
 350    }
 351
 352    /* Create gb memory for the shader and upload the shader code */
 353    variant->gb_shader = swc->shader_create(swc,
 354                                            variant->id, type,
 355                                            variant->tokens, codeLen);
 356
 357    if (!variant->gb_shader) {
 358       /* Free the shader ID */
 359       assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
 360       goto fail_no_allocation;
 361    }
 362
 363    /**
 364     * Since we don't want to do any flush within state emission to avoid
 365     * partial state in a command buffer, it's important to make sure that
 366     * there is enough room to send both the DXDefineShader & DXBindShader
 367     * commands in the same command buffer. So let's send both
 368     * commands in one command reservation. If it fails, we'll undo
 369     * the shader creation and return an error.
 370     */
 371    ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
 372                                            variant->id, type, codeLen);
 373
 374    if (ret != PIPE_OK)
 375       goto fail;
 376
 377    return PIPE_OK;
 378
 379 fail:
 380    swc->shader_destroy(swc, variant->gb_shader);
 381    variant->gb_shader = NULL;
 382
 383 fail_no_allocation:
 384    util_bitmask_clear(svga->shader_id_bm, variant->id);
 385    variant->id = UTIL_BITMASK_INVALID_INDEX;
 386
 387    return PIPE_ERROR_OUT_OF_MEMORY;
 388 }
 389
 390 /**
 391  * Issue the SVGA3D commands to define a new shader.
 392  * \param variant  contains the shader tokens, etc.  The result->id field will
 393  *                 be set here.
 394  */
 395 enum pipe_error
 396 svga_define_shader(struct svga_context *svga,
 397                    SVGA3dShaderType type,
 398                    struct svga_shader_variant *variant)
 399 {
 400    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
 401    enum pipe_error ret;
 402
 403    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
 404
 405    variant->id = UTIL_BITMASK_INVALID_INDEX;
 406
 407    if (svga_have_gb_objects(svga)) {
 408       if (svga_have_vgpu10(svga))
 409          ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
 410       else
 411          ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
 412    }
 413    else {
 414       /* Allocate an integer ID for the shader */
 415       variant->id = util_bitmask_add(svga->shader_id_bm);
 416       if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
 417          ret = PIPE_ERROR_OUT_OF_MEMORY;
 418          goto done;
 419       }
 420
 421       /* Issue SVGA3D device command to define the shader */
 422       ret = SVGA3D_DefineShader(svga->swc,
 423                                 variant->id,
 424                                 type,
 425                                 variant->tokens,
 426                                 codeLen);
 427       if (ret != PIPE_OK) {
 428          /* free the ID */
 429          assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
 430          util_bitmask_clear(svga->shader_id_bm, variant->id);
 431          variant->id = UTIL_BITMASK_INVALID_INDEX;
 432       }
 433    }
 434
 435 done:
 436    SVGA_STATS_TIME_POP(svga_sws(svga));
 437    return ret;
 438 }
 439
 440
 441 /**
 442  * Issue the SVGA3D commands to set/bind a shader.
 443  * \param result  the shader to bind.
 444  */
 445 enum pipe_error
 446 svga_set_shader(struct svga_context *svga,
 447                 SVGA3dShaderType type,
 448                 struct svga_shader_variant *variant)
 449 {
 450    enum pipe_error ret;
 451    unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
 452
 453    assert(type == SVGA3D_SHADERTYPE_VS ||
 454           type == SVGA3D_SHADERTYPE_GS ||
 455           type == SVGA3D_SHADERTYPE_PS);
 456
 457    if (svga_have_gb_objects(svga)) {
 458       struct svga_winsys_gb_shader *gbshader =
 459          variant ? variant->gb_shader : NULL;
 460
 461       if (svga_have_vgpu10(svga))
 462          ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
 463       else
 464          ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
 465    }
 466    else {
 467       ret = SVGA3D_SetShader(svga->swc, type, id);
 468    }
 469
 470    return ret;
 471 }
 472
 473
 474 struct svga_shader_variant *
 475 svga_new_shader_variant(struct svga_context *svga)
 476 {
 477    svga->hud.num_shaders++;
 478    return CALLOC_STRUCT(svga_shader_variant);
 479 }
 480
 481
 482 enum pipe_error
 483 svga_destroy_shader_variant(struct svga_context *svga,
 484                             SVGA3dShaderType type,
 485                             struct svga_shader_variant *variant)
 486 {
 487    enum pipe_error ret = PIPE_OK;
 488
 489    if (svga_have_gb_objects(svga) && variant->gb_shader) {
 490       if (svga_have_vgpu10(svga)) {
 491          struct svga_winsys_context *swc = svga->swc;
 492          swc->shader_destroy(swc, variant->gb_shader);
 493          ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
 494          if (ret != PIPE_OK) {
 495             /* flush and try again */
 496             svga_context_flush(svga, NULL);
 497             ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
 498          }
 499          util_bitmask_clear(svga->shader_id_bm, variant->id);
 500       }
 501       else {
 502          struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 503          sws->shader_destroy(sws, variant->gb_shader);
 504       }
 505       variant->gb_shader = NULL;
 506    }
 507    else {
 508       if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
 509          ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
 510          if (ret != PIPE_OK) {
 511             /* flush and try again */
 512             svga_context_flush(svga, NULL);
 513             ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
 514             assert(ret == PIPE_OK);
 515          }
 516          util_bitmask_clear(svga->shader_id_bm, variant->id);
 517       }
 518    }
 519
 520    FREE((unsigned *)variant->tokens);
 521    FREE(variant);
 522
 523    svga->hud.num_shaders--;
 524
 525    return ret;
 526 }
 527
 528 /*
 529  * Rebind shaders.
 530  * Called at the beginning of every new command buffer to ensure that
 531  * shaders are properly paged-in. Instead of sending the SetShader
 532  * command, this function sends a private allocation command to
 533  * page in a shader. This avoids emitting redundant state to the device
 534  * just to page in a resource.
 535  */
 536 enum pipe_error
 537 svga_rebind_shaders(struct svga_context *svga)
 538 {
 539    struct svga_winsys_context *swc = svga->swc;
 540    struct svga_hw_draw_state *hw = &svga->state.hw_draw;
 541    enum pipe_error ret;
 542
 543    assert(svga_have_vgpu10(svga));
 544
 545    /**
 546     * If the underlying winsys layer does not need resource rebinding,
 547     * just clear the rebind flags and return.
 548     */
 549    if (swc->resource_rebind == NULL) {
 550       svga->rebind.flags.vs = 0;
 551       svga->rebind.flags.gs = 0;
 552       svga->rebind.flags.fs = 0;
 553
 554       return PIPE_OK;
 555    }
 556
 557    if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
 558       ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
 559       if (ret != PIPE_OK)
 560          return ret;
 561    }
 562    svga->rebind.flags.vs = 0;
 563
 564    if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
 565       ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
 566       if (ret != PIPE_OK)
 567          return ret;
 568    }
 569    svga->rebind.flags.gs = 0;
 570
 571    if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
 572       ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
 573       if (ret != PIPE_OK)
 574          return ret;
 575    }
 576    svga->rebind.flags.fs = 0;
 577
 578    return PIPE_OK;
 579 }