src/vulkan/anv_compiler.cpp

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include <sys/stat.h>
  25 #include <unistd.h>
  26 #include <fcntl.h>
  27
  28 #include "anv_private.h"
  29
  30 #include <brw_context.h>
  31 #include <brw_wm.h> /* brw_new_shader_program is here */
  32 #include <brw_nir.h>
  33
  34 #include <brw_vs.h>
  35 #include <brw_gs.h>
  36 #include <brw_cs.h>
  37
  38 #include <mesa/main/shaderobj.h>
  39 #include <mesa/main/fbobject.h>
  40 #include <mesa/main/context.h>
  41 #include <mesa/program/program.h>
  42 #include <glsl/program.h>
  43
  44 /* XXX: We need this to keep symbols in nir.h from conflicting with the
  45  * generated GEN command packing headers.  We need to fix *both* to not
  46  * define something as generic as LOAD.
  47  */
  48 #undef LOAD
  49
  50 #include <glsl/nir/nir_spirv.h>
  51
  52 #define SPIR_V_MAGIC_NUMBER 0x07230203
  53
  54 static void
  55 fail_if(int cond, const char *format, ...)
  56 {
  57    va_list args;
  58
  59    if (!cond)
  60       return;
  61
  62    va_start(args, format);
  63    vfprintf(stderr, format, args);
  64    va_end(args);
  65
  66    exit(1);
  67 }
  68
  69 static VkResult
  70 set_binding_table_layout(struct brw_stage_prog_data *prog_data,
  71                          struct anv_pipeline *pipeline, uint32_t stage)
  72 {
  73    uint32_t bias, count, k, *map;
  74    struct anv_pipeline_layout *layout = pipeline->layout;
  75
  76    /* No layout is valid for shaders that don't bind any resources. */
  77    if (pipeline->layout == NULL)
  78       return VK_SUCCESS;
  79
  80    if (stage == VK_SHADER_STAGE_FRAGMENT)
  81       bias = MAX_RTS;
  82    else
  83       bias = 0;
  84
  85    count = layout->stage[stage].surface_count;
  86    prog_data->map_entries =
  87       (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0]));
  88    if (prog_data->map_entries == NULL)
  89       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  90
  91    k = bias;
  92    map = prog_data->map_entries;
  93    for (uint32_t i = 0; i < layout->num_sets; i++) {
  94       prog_data->bind_map[i].index = map;
  95       for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++)
  96          *map++ = k++;
  97
  98       prog_data->bind_map[i].index_count =
  99          layout->set[i].layout->stage[stage].surface_count;
 100    }
 101
 102    return VK_SUCCESS;
 103 }
 104
 105 static uint32_t
 106 upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size)
 107 {
 108    struct anv_state state =
 109       anv_state_stream_alloc(&pipeline->program_stream, size, 64);
 110
 111    assert(size < pipeline->program_stream.block_pool->block_size);
 112
 113    memcpy(state.map, data, size);
 114
 115    return state.offset;
 116 }
 117
 118 static void
 119 brw_vs_populate_key(struct brw_context *brw,
 120                     struct brw_vertex_program *vp,
 121                     struct brw_vs_prog_key *key)
 122 {
 123    struct gl_context *ctx = &brw->ctx;
 124    /* BRW_NEW_VERTEX_PROGRAM */
 125    struct gl_program *prog = (struct gl_program *) vp;
 126
 127    memset(key, 0, sizeof(*key));
 128
 129    /* Just upload the program verbatim for now.  Always send it all
 130     * the inputs it asks for, whether they are varying or not.
 131     */
 132    key->base.program_string_id = vp->id;
 133    brw_setup_vue_key_clip_info(brw, &key->base,
 134                                vp->program.Base.UsesClipDistanceOut);
 135
 136    /* _NEW_POLYGON */
 137    if (brw->gen < 6) {
 138       key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
 139                            ctx->Polygon.BackMode != GL_FILL);
 140    }
 141
 142    if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
 143                                VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
 144       /* _NEW_LIGHT | _NEW_BUFFERS */
 145       key->clamp_vertex_color = ctx->Light._ClampVertexColor;
 146    }
 147
 148    /* _NEW_POINT */
 149    if (brw->gen < 6 && ctx->Point.PointSprite) {
 150       for (int i = 0; i < 8; i++) {
 151          if (ctx->Point.CoordReplace[i])
 152             key->point_coord_replace |= (1 << i);
 153       }
 154    }
 155
 156    /* _NEW_TEXTURE */
 157    brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
 158                                       &key->base.tex);
 159 }
 160
 161 static bool
 162 really_do_vs_prog(struct brw_context *brw,
 163                   struct gl_shader_program *prog,
 164                   struct brw_vertex_program *vp,
 165                   struct brw_vs_prog_key *key, struct anv_pipeline *pipeline)
 166 {
 167    GLuint program_size;
 168    const GLuint *program;
 169    struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
 170    struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base;
 171    void *mem_ctx;
 172    struct gl_shader *vs = NULL;
 173
 174    if (prog)
 175       vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
 176
 177    memset(prog_data, 0, sizeof(*prog_data));
 178
 179    mem_ctx = ralloc_context(NULL);
 180
 181    /* Allocate the references to the uniforms that will end up in the
 182     * prog_data associated with the compiled program, and which will be freed
 183     * by the state cache.
 184     */
 185    int param_count;
 186    if (vs) {
 187       /* We add padding around uniform values below vec4 size, with the worst
 188        * case being a float value that gets blown up to a vec4, so be
 189        * conservative here.
 190        */
 191       param_count = vs->num_uniform_components * 4;
 192
 193    } else {
 194       param_count = vp->program.Base.Parameters->NumParameters * 4;
 195    }
 196    /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
 197     * planes as uniforms.
 198     */
 199    param_count += key->base.nr_userclip_plane_consts * 4;
 200
 201    /* Setting nr_params here NOT to the size of the param and pull_param
 202     * arrays, but to the number of uniform components vec4_visitor
 203     * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
 204     */
 205    stage_prog_data->nr_params = ALIGN(param_count, 4) / 4;
 206    if (vs) {
 207       stage_prog_data->nr_params += vs->num_samplers;
 208    }
 209
 210    GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
 211    prog_data->inputs_read = vp->program.Base.InputsRead;
 212
 213    if (key->copy_edgeflag) {
 214       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
 215       prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
 216    }
 217
 218    if (brw->gen < 6) {
 219       /* Put dummy slots into the VUE for the SF to put the replaced
 220        * point sprite coords in.  We shouldn't need these dummy slots,
 221        * which take up precious URB space, but it would mean that the SF
 222        * doesn't get nice aligned pairs of input coords into output
 223        * coords, which would be a pain to handle.
 224        */
 225       for (int i = 0; i < 8; i++) {
 226          if (key->point_coord_replace & (1 << i))
 227             outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
 228       }
 229
 230       /* if back colors are written, allocate slots for front colors too */
 231       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
 232          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
 233       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
 234          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
 235    }
 236
 237    /* In order for legacy clipping to work, we need to populate the clip
 238     * distance varying slots whenever clipping is enabled, even if the vertex
 239     * shader doesn't write to gl_ClipDistance.
 240     */
 241    if (key->base.userclip_active) {
 242       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
 243       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
 244    }
 245
 246    brw_compute_vue_map(brw->intelScreen->devinfo,
 247                        &prog_data->base.vue_map, outputs_written);
 248 \
 249    set_binding_table_layout(&prog_data->base.base, pipeline,
 250                             VK_SHADER_STAGE_VERTEX);
 251
 252    /* Emit GEN4 code.
 253     */
 254    program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
 255                          prog, &program_size);
 256    if (program == NULL) {
 257       ralloc_free(mem_ctx);
 258       return false;
 259    }
 260
 261    const uint32_t offset = upload_kernel(pipeline, program, program_size);
 262    if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
 263       pipeline->vs_simd8 = offset;
 264       pipeline->vs_vec4 = NO_KERNEL;
 265    } else {
 266       pipeline->vs_simd8 = NO_KERNEL;
 267       pipeline->vs_vec4 = offset;
 268    }
 269
 270    ralloc_free(mem_ctx);
 271
 272    return true;
 273 }
 274
 275 void brw_wm_populate_key(struct brw_context *brw,
 276                          struct brw_fragment_program *fp,
 277                          struct brw_wm_prog_key *key)
 278 {
 279    struct gl_context *ctx = &brw->ctx;
 280    struct gl_program *prog = (struct gl_program *) brw->fragment_program;
 281    GLuint lookup = 0;
 282    GLuint line_aa;
 283    bool program_uses_dfdy = fp->program.UsesDFdy;
 284    struct gl_framebuffer draw_buffer;
 285    bool multisample_fbo;
 286
 287    memset(key, 0, sizeof(*key));
 288
 289    for (int i = 0; i < MAX_SAMPLERS; i++) {
 290       /* Assume color sampler, no swizzling. */
 291       key->tex.swizzles[i] = SWIZZLE_XYZW;
 292    }
 293
 294    /* A non-zero framebuffer name indicates that the framebuffer was created by
 295     * the user rather than the window system. */
 296    draw_buffer.Name = 1;
 297    draw_buffer.Visual.samples = 1;
 298    draw_buffer._NumColorDrawBuffers = 1;
 299    draw_buffer._NumColorDrawBuffers = 1;
 300    draw_buffer.Width = 400;
 301    draw_buffer.Height = 400;
 302    ctx->DrawBuffer = &draw_buffer;
 303
 304    multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
 305
 306    /* Build the index for table lookup
 307     */
 308    if (brw->gen < 6) {
 309       /* _NEW_COLOR */
 310       if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
 311          lookup |= IZ_PS_KILL_ALPHATEST_BIT;
 312
 313       if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
 314          lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
 315
 316       /* _NEW_DEPTH */
 317       if (ctx->Depth.Test)
 318          lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
 319
 320       if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
 321          lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
 322
 323       /* _NEW_STENCIL | _NEW_BUFFERS */
 324       if (ctx->Stencil._Enabled) {
 325          lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 326
 327          if (ctx->Stencil.WriteMask[0] ||
 328              ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
 329             lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
 330       }
 331       key->iz_lookup = lookup;
 332    }
 333
 334    line_aa = AA_NEVER;
 335
 336    /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
 337    if (ctx->Line.SmoothFlag) {
 338       if (brw->reduced_primitive == GL_LINES) {
 339          line_aa = AA_ALWAYS;
 340       }
 341       else if (brw->reduced_primitive == GL_TRIANGLES) {
 342          if (ctx->Polygon.FrontMode == GL_LINE) {
 343             line_aa = AA_SOMETIMES;
 344
 345             if (ctx->Polygon.BackMode == GL_LINE ||
 346                 (ctx->Polygon.CullFlag &&
 347                  ctx->Polygon.CullFaceMode == GL_BACK))
 348                line_aa = AA_ALWAYS;
 349          }
 350          else if (ctx->Polygon.BackMode == GL_LINE) {
 351             line_aa = AA_SOMETIMES;
 352
 353             if ((ctx->Polygon.CullFlag &&
 354                  ctx->Polygon.CullFaceMode == GL_FRONT))
 355                line_aa = AA_ALWAYS;
 356          }
 357       }
 358    }
 359
 360    key->line_aa = line_aa;
 361
 362    /* _NEW_HINT */
 363    key->high_quality_derivatives =
 364       ctx->Hint.FragmentShaderDerivative == GL_NICEST;
 365
 366    if (brw->gen < 6)
 367       key->stats_wm = brw->stats_wm;
 368
 369    /* _NEW_LIGHT */
 370    key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
 371
 372    /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
 373    key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
 374
 375    /* _NEW_TEXTURE */
 376    brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
 377                                       &key->tex);
 378
 379    /* _NEW_BUFFERS */
 380    /*
 381     * Include the draw buffer origin and height so that we can calculate
 382     * fragment position values relative to the bottom left of the drawable,
 383     * from the incoming screen origin relative position we get as part of our
 384     * payload.
 385     *
 386     * This is only needed for the WM_WPOSXY opcode when the fragment program
 387     * uses the gl_FragCoord input.
 388     *
 389     * We could avoid recompiling by including this as a constant referenced by
 390     * our program, but if we were to do that it would also be nice to handle
 391     * getting that constant updated at batchbuffer submit time (when we
 392     * hold the lock and know where the buffer really is) rather than at emit
 393     * time when we don't hold the lock and are just guessing.  We could also
 394     * just avoid using this as key data if the program doesn't use
 395     * fragment.position.
 396     *
 397     * For DRI2 the origin_x/y will always be (0,0) but we still need the
 398     * drawable height in order to invert the Y axis.
 399     */
 400    if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
 401       key->drawable_height = ctx->DrawBuffer->Height;
 402    }
 403
 404    if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
 405       key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
 406    }
 407
 408    /* _NEW_BUFFERS */
 409    key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
 410
 411    /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
 412    key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
 413       (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
 414
 415    /* _NEW_BUFFERS _NEW_MULTISAMPLE */
 416    /* Ignore sample qualifier while computing this flag. */
 417    key->persample_shading =
 418       _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
 419    if (key->persample_shading)
 420       key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
 421
 422    key->compute_pos_offset =
 423       _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
 424       fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
 425
 426    key->compute_sample_id =
 427       multisample_fbo &&
 428       ctx->Multisample.Enabled &&
 429       (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
 430
 431    /* BRW_NEW_VUE_MAP_GEOM_OUT */
 432    if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
 433                                          BRW_FS_VARYING_INPUT_MASK) > 16)
 434       key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
 435
 436
 437    /* _NEW_COLOR | _NEW_BUFFERS */
 438    /* Pre-gen6, the hardware alpha test always used each render
 439     * target's alpha to do alpha test, as opposed to render target 0's alpha
 440     * like GL requires.  Fix that by building the alpha test into the
 441     * shader, and we'll skip enabling the fixed function alpha test.
 442     */
 443    if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
 444       key->alpha_test_func = ctx->Color.AlphaFunc;
 445       key->alpha_test_ref = ctx->Color.AlphaRef;
 446    }
 447
 448    /* The unique fragment program ID */
 449    key->program_string_id = fp->id;
 450
 451    ctx->DrawBuffer = NULL;
 452 }
 453
 454 static uint8_t
 455 computed_depth_mode(struct gl_fragment_program *fp)
 456 {
 457    if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 458       switch (fp->FragDepthLayout) {
 459       case FRAG_DEPTH_LAYOUT_NONE:
 460       case FRAG_DEPTH_LAYOUT_ANY:
 461          return BRW_PSCDEPTH_ON;
 462       case FRAG_DEPTH_LAYOUT_GREATER:
 463          return BRW_PSCDEPTH_ON_GE;
 464       case FRAG_DEPTH_LAYOUT_LESS:
 465          return BRW_PSCDEPTH_ON_LE;
 466       case FRAG_DEPTH_LAYOUT_UNCHANGED:
 467          return BRW_PSCDEPTH_OFF;
 468       }
 469    }
 470    return BRW_PSCDEPTH_OFF;
 471 }
 472
 473 static bool
 474 really_do_wm_prog(struct brw_context *brw,
 475                   struct gl_shader_program *prog,
 476                   struct brw_fragment_program *fp,
 477                   struct brw_wm_prog_key *key, struct anv_pipeline *pipeline)
 478 {
 479    struct gl_context *ctx = &brw->ctx;
 480    void *mem_ctx = ralloc_context(NULL);
 481    struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
 482    struct gl_shader *fs = NULL;
 483    unsigned int program_size;
 484    const uint32_t *program;
 485
 486    if (prog)
 487       fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
 488
 489    memset(prog_data, 0, sizeof(*prog_data));
 490
 491    /* key->alpha_test_func means simulating alpha testing via discards,
 492     * so the shader definitely kills pixels.
 493     */
 494    prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func;
 495
 496    prog_data->computed_depth_mode = computed_depth_mode(&fp->program);
 497
 498    /* Allocate the references to the uniforms that will end up in the
 499     * prog_data associated with the compiled program, and which will be freed
 500     * by the state cache.
 501     */
 502    int param_count;
 503    if (fs) {
 504       param_count = fs->num_uniform_components;
 505    } else {
 506       param_count = fp->program.Base.Parameters->NumParameters * 4;
 507    }
 508    /* The backend also sometimes adds params for texture size. */
 509    param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
 510    prog_data->base.param =
 511       rzalloc_array(NULL, const gl_constant_value *, param_count);
 512    prog_data->base.pull_param =
 513       rzalloc_array(NULL, const gl_constant_value *, param_count);
 514    prog_data->base.nr_params = param_count;
 515
 516    prog_data->barycentric_interp_modes =
 517       brw_compute_barycentric_interp_modes(brw, key->flat_shade,
 518                                            key->persample_shading,
 519                                            &fp->program);
 520
 521    set_binding_table_layout(&prog_data->base, pipeline,
 522                             VK_SHADER_STAGE_FRAGMENT);
 523    /* This needs to come after shader time and pull constant entries, but we
 524     * don't have those set up now, so just put it after the layout entries.
 525     */
 526    prog_data->binding_table.render_target_start = 0;
 527
 528    program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
 529                             &fp->program, prog, &program_size);
 530    if (program == NULL) {
 531       ralloc_free(mem_ctx);
 532       return false;
 533    }
 534
 535    uint32_t offset = upload_kernel(pipeline, program, program_size);
 536
 537    if (prog_data->no_8)
 538       pipeline->ps_simd8 = NO_KERNEL;
 539    else
 540       pipeline->ps_simd8 = offset;
 541
 542    if (prog_data->no_8 || prog_data->prog_offset_16) {
 543       pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
 544    } else {
 545       pipeline->ps_simd16 = NO_KERNEL;
 546    }
 547
 548    ralloc_free(mem_ctx);
 549
 550    return true;
 551 }
 552
 553 static void
 554 brw_gs_populate_key(struct brw_context *brw,
 555                     struct anv_pipeline *pipeline,
 556                     struct brw_geometry_program *gp,
 557                     struct brw_gs_prog_key *key)
 558 {
 559    struct gl_context *ctx = &brw->ctx;
 560    struct brw_stage_state *stage_state = &brw->gs.base;
 561    struct gl_program *prog = &gp->program.Base;
 562
 563    memset(key, 0, sizeof(*key));
 564
 565    key->base.program_string_id = gp->id;
 566    brw_setup_vue_key_clip_info(brw, &key->base,
 567                                gp->program.Base.UsesClipDistanceOut);
 568
 569    /* _NEW_TEXTURE */
 570    brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
 571                                       &key->base.tex);
 572
 573    struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
 574
 575    /* BRW_NEW_VUE_MAP_VS */
 576    key->input_varyings = prog_data->base.vue_map.slots_valid;
 577 }
 578
 579 static bool
 580 really_do_gs_prog(struct brw_context *brw,
 581                   struct gl_shader_program *prog,
 582                   struct brw_geometry_program *gp,
 583                   struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
 584 {
 585    struct brw_gs_compile_output output;
 586
 587    /* FIXME: We pass the bind map to the compile in the output struct. Need
 588     * something better. */
 589    set_binding_table_layout(&output.prog_data.base.base,
 590                             pipeline, VK_SHADER_STAGE_GEOMETRY);
 591
 592    brw_compile_gs_prog(brw, prog, gp, key, &output);
 593
 594    pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size);
 595    pipeline->gs_vertex_count = gp->program.VerticesIn;
 596
 597    ralloc_free(output.mem_ctx);
 598
 599    return true;
 600 }
 601
 602 static bool
 603 brw_codegen_cs_prog(struct brw_context *brw,
 604                     struct gl_shader_program *prog,
 605                     struct brw_compute_program *cp,
 606                     struct brw_cs_prog_key *key, struct anv_pipeline *pipeline)
 607 {
 608    struct gl_context *ctx = &brw->ctx;
 609    const GLuint *program;
 610    void *mem_ctx = ralloc_context(NULL);
 611    GLuint program_size;
 612    struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
 613
 614    struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
 615    assert (cs);
 616
 617    memset(prog_data, 0, sizeof(*prog_data));
 618
 619    set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE);
 620
 621    /* Allocate the references to the uniforms that will end up in the
 622     * prog_data associated with the compiled program, and which will be freed
 623     * by the state cache.
 624     */
 625    int param_count = cs->num_uniform_components;
 626
 627    /* The backend also sometimes adds params for texture size. */
 628    param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 629    prog_data->base.param =
 630       rzalloc_array(NULL, const gl_constant_value *, param_count);
 631    prog_data->base.pull_param =
 632       rzalloc_array(NULL, const gl_constant_value *, param_count);
 633    prog_data->base.nr_params = param_count;
 634
 635    program = brw_cs_emit(brw, mem_ctx, key, prog_data,
 636                          &cp->program, prog, &program_size);
 637    if (program == NULL) {
 638       ralloc_free(mem_ctx);
 639       return false;
 640    }
 641
 642    if (unlikely(INTEL_DEBUG & DEBUG_CS))
 643       fprintf(stderr, "\n");
 644
 645    pipeline->cs_simd = upload_kernel(pipeline, program, program_size);
 646
 647    ralloc_free(mem_ctx);
 648
 649    return true;
 650 }
 651
 652 static void
 653 brw_cs_populate_key(struct brw_context *brw,
 654                     struct brw_compute_program *bcp, struct brw_cs_prog_key *key)
 655 {
 656    memset(key, 0, sizeof(*key));
 657
 658    /* The unique compute program ID */
 659    key->program_string_id = bcp->id;
 660 }
 661
 662 static void
 663 fail_on_compile_error(int status, const char *msg)
 664 {
 665    int source, line, column;
 666    char error[256];
 667
 668    if (status)
 669       return;
 670
 671    if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4)
 672       fail_if(!status, "%d:%s\n", line, error);
 673    else
 674       fail_if(!status, "%s\n", msg);
 675 }
 676
 677 struct anv_compiler {
 678    struct anv_device *device;
 679    struct intel_screen *screen;
 680    struct brw_context *brw;
 681    struct gl_pipeline_object pipeline;
 682 };
 683
 684 extern "C" {
 685
 686 struct anv_compiler *
 687 anv_compiler_create(struct anv_device *device)
 688 {
 689    const struct brw_device_info *devinfo = &device->info;
 690    struct anv_compiler *compiler;
 691    struct gl_context *ctx;
 692
 693    compiler = rzalloc(NULL, struct anv_compiler);
 694    if (compiler == NULL)
 695       return NULL;
 696
 697    compiler->screen = rzalloc(compiler, struct intel_screen);
 698    if (compiler->screen == NULL)
 699       goto fail;
 700
 701    compiler->brw = rzalloc(compiler, struct brw_context);
 702    if (compiler->brw == NULL)
 703       goto fail;
 704
 705    compiler->device = device;
 706
 707    compiler->brw->optionCache.info = NULL;
 708    compiler->brw->bufmgr = NULL;
 709    compiler->brw->gen = devinfo->gen;
 710    compiler->brw->is_g4x = devinfo->is_g4x;
 711    compiler->brw->is_baytrail = devinfo->is_baytrail;
 712    compiler->brw->is_haswell = devinfo->is_haswell;
 713    compiler->brw->is_cherryview = devinfo->is_cherryview;
 714
 715    /* We need this at least for CS, which will check brw->max_cs_threads
 716     * against the work group size. */
 717    compiler->brw->max_vs_threads = devinfo->max_vs_threads;
 718    compiler->brw->max_hs_threads = devinfo->max_hs_threads;
 719    compiler->brw->max_ds_threads = devinfo->max_ds_threads;
 720    compiler->brw->max_gs_threads = devinfo->max_gs_threads;
 721    compiler->brw->max_wm_threads = devinfo->max_wm_threads;
 722    compiler->brw->max_cs_threads = devinfo->max_cs_threads;
 723    compiler->brw->urb.size = devinfo->urb.size;
 724    compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 725    compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 726    compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 727    compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 728    compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 729
 730    compiler->brw->intelScreen = compiler->screen;
 731    compiler->screen->devinfo = &device->info;
 732
 733    brw_process_intel_debug_variable(compiler->screen);
 734
 735    compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
 736
 737    ctx = &compiler->brw->ctx;
 738    _mesa_init_shader_object_functions(&ctx->Driver);
 739
 740    _mesa_init_constants(&ctx->Const, API_OPENGL_CORE);
 741
 742    brw_initialize_context_constants(compiler->brw);
 743
 744    intelInitExtensions(ctx);
 745
 746    /* Set dd::NewShader */
 747    brwInitFragProgFuncs(&ctx->Driver);
 748
 749    ctx->_Shader = &compiler->pipeline;
 750
 751    compiler->brw->precompile = false;
 752
 753    return compiler;
 754
 755  fail:
 756    ralloc_free(compiler);
 757    return NULL;
 758 }
 759
 760 void
 761 anv_compiler_destroy(struct anv_compiler *compiler)
 762 {
 763    _mesa_free_errors_data(&compiler->brw->ctx);
 764    ralloc_free(compiler);
 765 }
 766
 767 /* From gen7_urb.c */
 768
 769 /* FIXME: Add to struct intel_device_info */
 770
 771 static const int gen8_push_size = 32 * 1024;
 772
 773 static void
 774 gen7_compute_urb_partition(struct anv_pipeline *pipeline)
 775 {
 776    const struct brw_device_info *devinfo = &pipeline->device->info;
 777    bool vs_present = pipeline->vs_simd8 != NO_KERNEL;
 778    unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
 779    unsigned vs_entry_size_bytes = vs_size * 64;
 780    bool gs_present = pipeline->gs_vec4 != NO_KERNEL;
 781    unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
 782    unsigned gs_entry_size_bytes = gs_size * 64;
 783
 784    /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
 785     *
 786     *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
 787     *     Allocation Size is less than 9 512-bit URB entries.
 788     *
 789     * Similar text exists for GS.
 790     */
 791    unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
 792    unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
 793
 794    /* URB allocations must be done in 8k chunks. */
 795    unsigned chunk_size_bytes = 8192;
 796
 797    /* Determine the size of the URB in chunks. */
 798    unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
 799
 800    /* Reserve space for push constants */
 801    unsigned push_constant_bytes = gen8_push_size;
 802    unsigned push_constant_chunks =
 803       push_constant_bytes / chunk_size_bytes;
 804
 805    /* Initially, assign each stage the minimum amount of URB space it needs,
 806     * and make a note of how much additional space it "wants" (the amount of
 807     * additional space it could actually make use of).
 808     */
 809
 810    /* VS has a lower limit on the number of URB entries */
 811    unsigned vs_chunks =
 812       ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
 813             chunk_size_bytes) / chunk_size_bytes;
 814    unsigned vs_wants =
 815       ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
 816             chunk_size_bytes) / chunk_size_bytes - vs_chunks;
 817
 818    unsigned gs_chunks = 0;
 819    unsigned gs_wants = 0;
 820    if (gs_present) {
 821       /* There are two constraints on the minimum amount of URB space we can
 822        * allocate:
 823        *
 824        * (1) We need room for at least 2 URB entries, since we always operate
 825        * the GS in DUAL_OBJECT mode.
 826        *
 827        * (2) We can't allocate less than nr_gs_entries_granularity.
 828        */
 829       gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
 830                         chunk_size_bytes) / chunk_size_bytes;
 831       gs_wants =
 832          ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
 833                chunk_size_bytes) / chunk_size_bytes - gs_chunks;
 834    }
 835
 836    /* There should always be enough URB space to satisfy the minimum
 837     * requirements of each stage.
 838     */
 839    unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
 840    assert(total_needs <= urb_chunks);
 841
 842    /* Mete out remaining space (if any) in proportion to "wants". */
 843    unsigned total_wants = vs_wants + gs_wants;
 844    unsigned remaining_space = urb_chunks - total_needs;
 845    if (remaining_space > total_wants)
 846       remaining_space = total_wants;
 847    if (remaining_space > 0) {
 848       unsigned vs_additional = (unsigned)
 849          round(vs_wants * (((double) remaining_space) / total_wants));
 850       vs_chunks += vs_additional;
 851       remaining_space -= vs_additional;
 852       gs_chunks += remaining_space;
 853    }
 854
 855    /* Sanity check that we haven't over-allocated. */
 856    assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
 857
 858    /* Finally, compute the number of entries that can fit in the space
 859     * allocated to each stage.
 860     */
 861    unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
 862    unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
 863
 864    /* Since we rounded up when computing *_wants, this may be slightly more
 865     * than the maximum allowed amount, so correct for that.
 866     */
 867    nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
 868    nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
 869
 870    /* Ensure that we program a multiple of the granularity. */
 871    nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
 872    nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
 873
 874    /* Finally, sanity check to make sure we have at least the minimum number
 875     * of entries needed for each stage.
 876     */
 877    assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
 878    if (gs_present)
 879       assert(nr_gs_entries >= 2);
 880
 881    /* Lay out the URB in the following order:
 882     * - push constants
 883     * - VS
 884     * - GS
 885     */
 886    pipeline->urb.vs_start = push_constant_chunks;
 887    pipeline->urb.vs_size = vs_size;
 888    pipeline->urb.nr_vs_entries = nr_vs_entries;
 889
 890    pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
 891    pipeline->urb.gs_size = gs_size;
 892    pipeline->urb.nr_gs_entries = nr_gs_entries;
 893 }
 894
 895 static const struct {
 896    uint32_t token;
 897    gl_shader_stage stage;
 898    const char *name;
 899 } stage_info[] = {
 900    { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" },
 901    { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" },
 902    { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" },
 903    { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" },
 904    { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" },
 905    { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" },
 906 };
 907
 908 struct spirv_header{
 909    uint32_t magic;
 910    uint32_t version;
 911    uint32_t gen_magic;
 912 };
 913
 914 static const char *
 915 src_as_glsl(const char *data)
 916 {
 917    const struct spirv_header *as_spirv = (const struct spirv_header *)data;
 918
 919    /* Check alignment */
 920    if ((intptr_t)data & 0x3) {
 921       return data;
 922    }
 923
 924    if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) {
 925       /* LunarG back-door */
 926       if (as_spirv->version == 0)
 927          return data + 12;
 928       else
 929          return NULL;
 930    } else {
 931       return data;
 932    }
 933 }
 934
 935 static void
 936 anv_compile_shader_glsl(struct anv_compiler *compiler,
 937                    struct gl_shader_program *program,
 938                    struct anv_pipeline *pipeline, uint32_t stage)
 939 {
 940    struct brw_context *brw = compiler->brw;
 941    struct gl_shader *shader;
 942    int name = 0;
 943
 944    shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
 945    fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name);
 946
 947    shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->module->data));
 948    _mesa_glsl_compile_shader(&brw->ctx, shader, false, false);
 949    fail_on_compile_error(shader->CompileStatus, shader->InfoLog);
 950
 951    program->Shaders[program->NumShaders] = shader;
 952    program->NumShaders++;
 953 }
 954
 955 static void
 956 setup_nir_io(struct gl_program *prog,
 957              nir_shader *shader)
 958 {
 959    foreach_list_typed(nir_variable, var, node, &shader->inputs) {
 960       prog->InputsRead |= BITFIELD64_BIT(var->data.location);
 961    }
 962
 963    foreach_list_typed(nir_variable, var, node, &shader->outputs) {
 964       prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
 965    }
 966 }
 967
 968 static void
 969 anv_compile_shader_spirv(struct anv_compiler *compiler,
 970                          struct gl_shader_program *program,
 971                          struct anv_pipeline *pipeline, uint32_t stage)
 972 {
 973    struct brw_context *brw = compiler->brw;
 974    struct anv_shader *shader = pipeline->shaders[stage];
 975    struct gl_shader *mesa_shader;
 976    int name = 0;
 977
 978    mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
 979    fail_if(mesa_shader == NULL,
 980            "failed to create %s shader\n", stage_info[stage].name);
 981
 982    switch (stage) {
 983    case VK_SHADER_STAGE_VERTEX:
 984       mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base;
 985       break;
 986    case VK_SHADER_STAGE_GEOMETRY:
 987       mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base;
 988       break;
 989    case VK_SHADER_STAGE_FRAGMENT:
 990       mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base;
 991       break;
 992    case VK_SHADER_STAGE_COMPUTE:
 993       mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base;
 994       break;
 995    }
 996
 997    mesa_shader->Program->Parameters =
 998       rzalloc(mesa_shader, struct gl_program_parameter_list);
 999
1000    mesa_shader->Type = stage_info[stage].token;
1001    mesa_shader->Stage = stage_info[stage].stage;
1002
1003    assert(shader->module->size % 4 == 0);
1004
1005    struct gl_shader_compiler_options *glsl_options =
1006       &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage];
1007
1008    mesa_shader->Program->nir =
1009       spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4,
1010                    stage_info[stage].stage, glsl_options->NirOptions);
1011    nir_validate_shader(mesa_shader->Program->nir);
1012
1013    brw_process_nir(mesa_shader->Program->nir,
1014                    compiler->screen->devinfo,
1015                    NULL, mesa_shader->Stage, false);
1016
1017    setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir);
1018
1019    fail_if(mesa_shader->Program->nir == NULL,
1020            "failed to translate SPIR-V to NIR\n");
1021
1022    program->Shaders[program->NumShaders] = mesa_shader;
1023    program->NumShaders++;
1024 }
1025
1026 static void
1027 add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
1028                    struct brw_stage_prog_data *prog_data)
1029 {
1030    struct brw_device_info *devinfo = &pipeline->device->info;
1031    uint32_t max_threads[] = {
1032       [VK_SHADER_STAGE_VERTEX]                  = devinfo->max_vs_threads,
1033       [VK_SHADER_STAGE_TESS_CONTROL]            = 0,
1034       [VK_SHADER_STAGE_TESS_EVALUATION]         = 0,
1035       [VK_SHADER_STAGE_GEOMETRY]                = devinfo->max_gs_threads,
1036       [VK_SHADER_STAGE_FRAGMENT]                = devinfo->max_wm_threads,
1037       [VK_SHADER_STAGE_COMPUTE]                 = devinfo->max_cs_threads,
1038    };
1039
1040    pipeline->prog_data[stage] = prog_data;
1041    pipeline->active_stages |= 1 << stage;
1042    pipeline->scratch_start[stage] = pipeline->total_scratch;
1043    pipeline->total_scratch =
1044       align_u32(pipeline->total_scratch, 1024) +
1045       prog_data->total_scratch * max_threads[stage];
1046 }
1047
1048 int
1049 anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
1050 {
1051    struct gl_shader_program *program;
1052    int name = 0;
1053    struct brw_context *brw = compiler->brw;
1054
1055    pipeline->writes_point_size = false;
1056
1057    /* When we free the pipeline, we detect stages based on the NULL status
1058     * of various prog_data pointers.  Make them NULL by default.
1059     */
1060    memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
1061    memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
1062
1063    brw->use_rep_send = pipeline->use_repclear;
1064    brw->no_simd8 = pipeline->use_repclear;
1065
1066    program = brw->ctx.Driver.NewShaderProgram(name);
1067    program->Shaders = (struct gl_shader **)
1068       calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *));
1069    fail_if(program == NULL || program->Shaders == NULL,
1070            "failed to create program\n");
1071
1072    bool all_spirv = true;
1073    for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
1074       if (pipeline->shaders[i] == NULL)
1075          continue;
1076
1077       /* You need at least this much for "void main() { }" anyway */
1078       assert(pipeline->shaders[i]->module->size >= 12);
1079
1080       if (src_as_glsl(pipeline->shaders[i]->module->data)) {
1081          all_spirv = false;
1082          break;
1083       }
1084
1085       assert(pipeline->shaders[i]->module->size % 4 == 0);
1086    }
1087
1088    if (all_spirv) {
1089       for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
1090          if (pipeline->shaders[i])
1091             anv_compile_shader_spirv(compiler, program, pipeline, i);
1092       }
1093
1094       for (unsigned i = 0; i < program->NumShaders; i++) {
1095          struct gl_shader *shader = program->Shaders[i];
1096          program->_LinkedShaders[shader->Stage] = shader;
1097       }
1098    } else {
1099       for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
1100          if (pipeline->shaders[i])
1101             anv_compile_shader_glsl(compiler, program, pipeline, i);
1102       }
1103
1104       _mesa_glsl_link_shader(&brw->ctx, program);
1105       fail_on_compile_error(program->LinkStatus,
1106                             program->InfoLog);
1107    }
1108
1109    bool success;
1110    pipeline->active_stages = 0;
1111    pipeline->total_scratch = 0;
1112
1113    if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
1114       struct brw_vs_prog_key vs_key;
1115       struct gl_vertex_program *vp = (struct gl_vertex_program *)
1116          program->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
1117       struct brw_vertex_program *bvp = brw_vertex_program(vp);
1118
1119       brw_vs_populate_key(brw, bvp, &vs_key);
1120
1121       success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
1122       fail_if(!success, "do_wm_prog failed\n");
1123       add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
1124                          &pipeline->vs_prog_data.base.base);
1125
1126       if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
1127          pipeline->writes_point_size = true;
1128    } else {
1129       memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
1130       pipeline->vs_simd8 = NO_KERNEL;
1131       pipeline->vs_vec4 = NO_KERNEL;
1132    }
1133
1134
1135    if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) {
1136       struct brw_gs_prog_key gs_key;
1137       struct gl_geometry_program *gp = (struct gl_geometry_program *)
1138          program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
1139       struct brw_geometry_program *bgp = brw_geometry_program(gp);
1140
1141       brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
1142
1143       success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
1144       fail_if(!success, "do_gs_prog failed\n");
1145       add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
1146                          &pipeline->gs_prog_data.base.base);
1147
1148       if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
1149          pipeline->writes_point_size = true;
1150    } else {
1151       pipeline->gs_vec4 = NO_KERNEL;
1152    }
1153
1154    if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) {
1155       struct brw_wm_prog_key wm_key;
1156       struct gl_fragment_program *fp = (struct gl_fragment_program *)
1157          program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
1158       struct brw_fragment_program *bfp = brw_fragment_program(fp);
1159
1160       brw_wm_populate_key(brw, bfp, &wm_key);
1161
1162       success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
1163       fail_if(!success, "do_wm_prog failed\n");
1164       add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
1165                          &pipeline->wm_prog_data.base);
1166    }
1167
1168    if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
1169       struct brw_cs_prog_key cs_key;
1170       struct gl_compute_program *cp = (struct gl_compute_program *)
1171          program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program;
1172       struct brw_compute_program *bcp = brw_compute_program(cp);
1173
1174       brw_cs_populate_key(brw, bcp, &cs_key);
1175
1176       success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
1177       fail_if(!success, "brw_codegen_cs_prog failed\n");
1178       add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
1179                          &pipeline->cs_prog_data.base);
1180    }
1181
1182    /* XXX: Deleting the shader is broken with our current SPIR-V hacks.  We
1183     * need to fix this ASAP.
1184     */
1185    if (!all_spirv)
1186       brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
1187
1188    struct anv_device *device = compiler->device;
1189    while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
1190       anv_block_pool_alloc(&device->scratch_block_pool);
1191
1192    gen7_compute_urb_partition(pipeline);
1193
1194    return 0;
1195 }
1196
1197 /* This badly named function frees the struct anv_pipeline data that the compiler
1198  * allocates.  Currently just the prog_data structs.
1199  */
1200 void
1201 anv_compiler_free(struct anv_pipeline *pipeline)
1202 {
1203    for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) {
1204       if (pipeline->prog_data[stage]) {
1205          free(pipeline->prog_data[stage]->map_entries);
1206          ralloc_free(pipeline->prog_data[stage]->param);
1207          ralloc_free(pipeline->prog_data[stage]->pull_param);
1208       }
1209    }
1210 }
1211
1212 }