src/gallium/drivers/svga/svga_state_fs.c

   1 /**********************************************************
   2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26 #include "util/u_inlines.h"
  27 #include "pipe/p_defines.h"
  28 #include "util/u_math.h"
  29 #include "util/u_memory.h"
  30 #include "util/u_bitmask.h"
  31 #include "tgsi/tgsi_ureg.h"
  32
  33 #include "svga_context.h"
  34 #include "svga_state.h"
  35 #include "svga_cmd.h"
  36 #include "svga_shader.h"
  37 #include "svga_resource_texture.h"
  38 #include "svga_tgsi.h"
  39
  40 #include "svga_hw_reg.h"
  41
  42
  43
  44 static INLINE int
  45 compare_fs_keys(const struct svga_fs_compile_key *a,
  46                 const struct svga_fs_compile_key *b)
  47 {
  48    unsigned keysize_a = svga_fs_key_size( a );
  49    unsigned keysize_b = svga_fs_key_size( b );
  50
  51    if (keysize_a != keysize_b) {
  52       return (int)(keysize_a - keysize_b);
  53    }
  54    return memcmp( a, b, keysize_a );
  55 }
  56
  57
  58 /** Search for a fragment shader variant */
  59 static struct svga_shader_variant *
  60 search_fs_key(const struct svga_fragment_shader *fs,
  61               const struct svga_fs_compile_key *key)
  62 {
  63    struct svga_shader_variant *variant = fs->base.variants;
  64
  65    assert(key);
  66
  67    for ( ; variant; variant = variant->next) {
  68       if (compare_fs_keys( key, &variant->key.fkey ) == 0)
  69          return variant;
  70    }
  71
  72    return NULL;
  73 }
  74
  75
  76 /**
  77  * If we fail to compile a fragment shader (because it uses too many
  78  * registers, for example) we'll use a dummy/fallback shader that
  79  * simply emits a constant color (red for debug, black for release).
  80  * We hit this with the Unigine/Heaven demo when Shaders = High.
  81  * With black, the demo still looks good.
  82  */
  83 static const struct tgsi_token *
  84 get_dummy_fragment_shader(void)
  85 {
  86 #ifdef DEBUG
  87    static const float color[4] = { 1.0, 0.0, 0.0, 0.0 }; /* red */
  88 #else
  89    static const float color[4] = { 0.0, 0.0, 0.0, 0.0 }; /* black */
  90 #endif
  91    struct ureg_program *ureg;
  92    const struct tgsi_token *tokens;
  93    struct ureg_src src;
  94    struct ureg_dst dst;
  95    unsigned num_tokens;
  96
  97    ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
  98    if (!ureg)
  99       return NULL;
 100
 101    dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
 102    src = ureg_DECL_immediate(ureg, color, 4);
 103    ureg_MOV(ureg, dst, src);
 104    ureg_END(ureg);
 105
 106    tokens = ureg_get_tokens(ureg, &num_tokens);
 107
 108    ureg_destroy(ureg);
 109
 110    return tokens;
 111 }
 112
 113
 114 /**
 115  * Replace the given shader's instruction with a simple constant-color
 116  * shader.  We use this when normal shader translation fails.
 117  */
 118 static struct svga_shader_variant *
 119 get_compiled_dummy_shader(struct svga_fragment_shader *fs,
 120                           const struct svga_fs_compile_key *key)
 121 {
 122    const struct tgsi_token *dummy = get_dummy_fragment_shader();
 123    struct svga_shader_variant *variant;
 124
 125    if (!dummy) {
 126       return NULL;
 127    }
 128
 129    FREE((void *) fs->base.tokens);
 130    fs->base.tokens = dummy;
 131
 132    variant = svga_translate_fragment_program(fs, key);
 133    return variant;
 134 }
 135
 136
 137 /**
 138  * Translate TGSI shader into an svga shader variant.
 139  */
 140 static enum pipe_error
 141 compile_fs(struct svga_context *svga,
 142            struct svga_fragment_shader *fs,
 143            const struct svga_fs_compile_key *key,
 144            struct svga_shader_variant **out_variant)
 145 {
 146    struct svga_shader_variant *variant;
 147    enum pipe_error ret = PIPE_ERROR;
 148
 149    variant = svga_translate_fragment_program( fs, key );
 150    if (variant == NULL) {
 151       debug_printf("Failed to compile fragment shader,"
 152                    " using dummy shader instead.\n");
 153       variant = get_compiled_dummy_shader(fs, key);
 154       if (!variant) {
 155          ret = PIPE_ERROR;
 156          goto fail;
 157       }
 158    }
 159
 160    if (svga_shader_too_large(svga, variant)) {
 161       /* too big, use dummy shader */
 162       debug_printf("Shader too large (%lu bytes),"
 163                    " using dummy shader instead.\n",
 164                    (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0]));
 165       variant = get_compiled_dummy_shader(fs, key);
 166       if (!variant) {
 167          ret = PIPE_ERROR;
 168          goto fail;
 169       }
 170    }
 171
 172    ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
 173    if (ret != PIPE_OK)
 174       goto fail;
 175
 176    *out_variant = variant;
 177
 178    /* insert variants at head of linked list */
 179    variant->next = fs->base.variants;
 180    fs->base.variants = variant;
 181
 182    return PIPE_OK;
 183
 184 fail:
 185    if (variant) {
 186       svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
 187    }
 188    return ret;
 189 }
 190
 191
 192 /* SVGA_NEW_TEXTURE_BINDING
 193  * SVGA_NEW_RAST
 194  * SVGA_NEW_NEED_SWTNL
 195  * SVGA_NEW_SAMPLER
 196  */
 197 static enum pipe_error
 198 make_fs_key(const struct svga_context *svga,
 199             struct svga_fragment_shader *fs,
 200             struct svga_fs_compile_key *key)
 201 {
 202    unsigned i;
 203    int idx = 0;
 204
 205    memset(key, 0, sizeof *key);
 206
 207    /* Only need fragment shader fixup for twoside lighting if doing
 208     * hwtnl.  Otherwise the draw module does the whole job for us.
 209     *
 210     * SVGA_NEW_SWTNL
 211     */
 212    if (!svga->state.sw.need_swtnl) {
 213       /* SVGA_NEW_RAST
 214        */
 215       key->light_twoside = svga->curr.rast->templ.light_twoside;
 216       key->front_ccw = svga->curr.rast->templ.front_ccw;
 217    }
 218
 219    /* The blend workaround for simulating logicop xor behaviour
 220     * requires that the incoming fragment color be white.  This change
 221     * achieves that by creating a variant of the current fragment
 222     * shader that overrides all output colors with 1,1,1,1
 223     *
 224     * This will work for most shaders, including those containing
 225     * TEXKIL and/or depth-write.  However, it will break on the
 226     * combination of xor-logicop plus alphatest.
 227     *
 228     * Ultimately, we could implement alphatest in the shader using
 229     * texkil prior to overriding the outgoing fragment color.
 230     *
 231     * SVGA_NEW_BLEND
 232     */
 233    if (svga->curr.blend->need_white_fragments) {
 234       key->white_fragments = 1;
 235    }
 236
 237 #ifdef DEBUG
 238    /*
 239     * We expect a consistent set of samplers and sampler views.
 240     * Do some debug checks/warnings here.
 241     */
 242    {
 243       static boolean warned = FALSE;
 244       unsigned i, n = MAX2(svga->curr.num_sampler_views,
 245                            svga->curr.num_samplers);
 246       /* Only warn once to prevent too much debug output */
 247       if (!warned) {
 248          if (svga->curr.num_sampler_views != svga->curr.num_samplers) {
 249             debug_printf("svga: mismatched number of sampler views (%u) "
 250                          "vs. samplers (%u)\n",
 251                          svga->curr.num_sampler_views,
 252                          svga->curr.num_samplers);
 253          }
 254          for (i = 0; i < n; i++) {
 255             if ((svga->curr.sampler_views[i] == NULL) !=
 256                 (svga->curr.sampler[i] == NULL))
 257                debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n",
 258                             i, svga->curr.sampler_views[i],
 259                             i, svga->curr.sampler[i]);
 260          }
 261          warned = TRUE;
 262       }
 263    }
 264 #endif
 265
 266    /* XXX: want to limit this to the textures that the shader actually
 267     * refers to.
 268     *
 269     * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
 270     */
 271    for (i = 0; i < svga->curr.num_sampler_views; i++) {
 272       if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
 273          assert(svga->curr.sampler_views[i]->texture);
 274          key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target;
 275          if (!svga->curr.sampler[i]->normalized_coords) {
 276             key->tex[i].width_height_idx = idx++;
 277             key->tex[i].unnormalized = TRUE;
 278             ++key->num_unnormalized_coords;
 279          }
 280
 281          key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r;
 282          key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g;
 283          key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b;
 284          key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a;
 285       }
 286    }
 287    key->num_textures = svga->curr.num_sampler_views;
 288
 289    idx = 0;
 290    for (i = 0; i < svga->curr.num_samplers; ++i) {
 291       if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
 292          struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
 293          struct svga_texture *stex = svga_texture(tex);
 294          SVGA3dSurfaceFormat format = stex->key.format;
 295
 296          if (format == SVGA3D_Z_D16 ||
 297              format == SVGA3D_Z_D24X8 ||
 298              format == SVGA3D_Z_D24S8) {
 299             /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
 300              * or SVGA3D_Z_D24S8 surface, we'll automatically get
 301              * shadow comparison.  But we only get LEQUAL mode.
 302              * Set TEX_COMPARE_NONE here so we don't emit the extra FS
 303              * code for shadow comparison.
 304              */
 305             key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
 306             key->tex[i].compare_func = PIPE_FUNC_NEVER;
 307             /* These depth formats _only_ support comparison mode and
 308              * not ordinary sampling so warn if the later is expected.
 309              */
 310             if (svga->curr.sampler[i]->compare_mode !=
 311                 PIPE_TEX_COMPARE_R_TO_TEXTURE) {
 312                debug_warn_once("Unsupported shadow compare mode");
 313             }
 314             /* The only supported comparison mode is LEQUAL */
 315             if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) {
 316                debug_warn_once("Unsupported shadow compare function");
 317             }
 318          }
 319          else {
 320             /* For other texture formats, just use the compare func/mode
 321              * as-is.  Should be no-ops for color textures.  For depth
 322              * textures, we do not get automatic depth compare.  We have
 323              * to do it ourselves in the shader.  And we don't get PCF.
 324              */
 325             key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
 326             key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
 327          }
 328       }
 329    }
 330
 331    /* sprite coord gen state */
 332    for (i = 0; i < svga->curr.num_samplers; ++i) {
 333       key->tex[i].sprite_texgen =
 334          svga->curr.rast->templ.sprite_coord_enable & (1 << i);
 335    }
 336
 337    key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
 338                                     == PIPE_SPRITE_COORD_LOWER_LEFT);
 339
 340    /* SVGA_NEW_FRAME_BUFFER */
 341    if (fs->base.info.color0_writes_all_cbufs) {
 342       /* Replicate color0 output to N colorbuffers */
 343       key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
 344    }
 345
 346    return PIPE_OK;
 347 }
 348
 349
 350 /**
 351  * svga_reemit_fs_bindings - Reemit the fragment shader bindings
 352  */
 353 enum pipe_error
 354 svga_reemit_fs_bindings(struct svga_context *svga)
 355 {
 356    enum pipe_error ret;
 357
 358    assert(svga->rebind.fs);
 359    assert(svga_have_gb_objects(svga));
 360
 361    if (!svga->state.hw_draw.fs)
 362       return PIPE_OK;
 363
 364    ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
 365                             svga->state.hw_draw.fs->gb_shader);
 366    if (ret != PIPE_OK)
 367       return ret;
 368
 369    svga->rebind.fs = FALSE;
 370    return PIPE_OK;
 371 }
 372
 373
 374
 375 static enum pipe_error
 376 emit_hw_fs(struct svga_context *svga, unsigned dirty)
 377 {
 378    struct svga_shader_variant *variant = NULL;
 379    enum pipe_error ret = PIPE_OK;
 380    struct svga_fragment_shader *fs = svga->curr.fs;
 381    struct svga_fs_compile_key key;
 382
 383    /* SVGA_NEW_BLEND
 384     * SVGA_NEW_TEXTURE_BINDING
 385     * SVGA_NEW_RAST
 386     * SVGA_NEW_NEED_SWTNL
 387     * SVGA_NEW_SAMPLER
 388     * SVGA_NEW_FRAME_BUFFER
 389     */
 390    ret = make_fs_key( svga, fs, &key );
 391    if (ret != PIPE_OK)
 392       return ret;
 393
 394    variant = search_fs_key( fs, &key );
 395    if (!variant) {
 396       ret = compile_fs( svga, fs, &key, &variant );
 397       if (ret != PIPE_OK)
 398          return ret;
 399    }
 400
 401    assert(variant);
 402
 403    if (variant != svga->state.hw_draw.fs) {
 404       if (svga_have_gb_objects(svga)) {
 405          /*
 406           * Bind is necessary here only because pipebuffer_fenced may move
 407           * the shader contents around....
 408           */
 409          ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
 410          if (ret != PIPE_OK)
 411             return ret;
 412
 413          ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
 414                                   variant->gb_shader);
 415          if (ret != PIPE_OK)
 416             return ret;
 417
 418          svga->rebind.fs = FALSE;
 419       }
 420       else {
 421          ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id);
 422          if (ret != PIPE_OK)
 423             return ret;
 424       }
 425
 426       svga->dirty |= SVGA_NEW_FS_VARIANT;
 427       svga->state.hw_draw.fs = variant;
 428    }
 429
 430    return PIPE_OK;
 431 }
 432
 433 struct svga_tracked_state svga_hw_fs =
 434 {
 435    "fragment shader (hwtnl)",
 436    (SVGA_NEW_FS |
 437     SVGA_NEW_TEXTURE_BINDING |
 438     SVGA_NEW_NEED_SWTNL |
 439     SVGA_NEW_RAST |
 440     SVGA_NEW_SAMPLER |
 441     SVGA_NEW_FRAME_BUFFER |
 442     SVGA_NEW_BLEND),
 443    emit_hw_fs
 444 };
 445
 446
 447