vk: Add func anv_is_aligned()
[mesa.git] / src / vulkan / compiler.cpp
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <fcntl.h>
27
28 #include "private.h"
29
30 #include <brw_context.h>
31 #include <brw_wm.h> /* brw_new_shader_program is here */
32
33 #include <brw_vs.h>
34 #include <brw_gs.h>
35 #include <brw_cs.h>
36
37 #include <mesa/main/shaderobj.h>
38 #include <mesa/main/fbobject.h>
39 #include <mesa/main/context.h>
40 #include <mesa/program/program.h>
41 #include <glsl/program.h>
42
43 #define SPIR_V_MAGIC_NUMBER 0x07230203
44
45 static void
46 fail_if(int cond, const char *format, ...)
47 {
48 va_list args;
49
50 if (!cond)
51 return;
52
53 va_start(args, format);
54 vfprintf(stderr, format, args);
55 va_end(args);
56
57 exit(1);
58 }
59
60 static VkResult
61 set_binding_table_layout(struct brw_stage_prog_data *prog_data,
62 struct anv_pipeline *pipeline, uint32_t stage)
63 {
64 uint32_t bias, count, k, *map;
65 struct anv_pipeline_layout *layout = pipeline->layout;
66
67 /* No layout is valid for shaders that don't bind any resources. */
68 if (pipeline->layout == NULL)
69 return VK_SUCCESS;
70
71 if (stage == VK_SHADER_STAGE_FRAGMENT)
72 bias = MAX_RTS;
73 else
74 bias = 0;
75
76 count = layout->stage[stage].surface_count;
77 prog_data->map_entries =
78 (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0]));
79 if (prog_data->map_entries == NULL)
80 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
81
82 k = bias;
83 map = prog_data->map_entries;
84 for (uint32_t i = 0; i < layout->num_sets; i++) {
85 prog_data->bind_map[i].index = map;
86 for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++)
87 *map++ = k++;
88
89 prog_data->bind_map[i].index_count =
90 layout->set[i].layout->stage[stage].surface_count;
91 }
92
93 return VK_SUCCESS;
94 }
95
96 static void
97 brw_vs_populate_key(struct brw_context *brw,
98 struct brw_vertex_program *vp,
99 struct brw_vs_prog_key *key)
100 {
101 struct gl_context *ctx = &brw->ctx;
102 /* BRW_NEW_VERTEX_PROGRAM */
103 struct gl_program *prog = (struct gl_program *) vp;
104
105 memset(key, 0, sizeof(*key));
106
107 /* Just upload the program verbatim for now. Always send it all
108 * the inputs it asks for, whether they are varying or not.
109 */
110 key->base.program_string_id = vp->id;
111 brw_setup_vue_key_clip_info(brw, &key->base,
112 vp->program.Base.UsesClipDistanceOut);
113
114 /* _NEW_POLYGON */
115 if (brw->gen < 6) {
116 key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
117 ctx->Polygon.BackMode != GL_FILL);
118 }
119
120 if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
121 VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
122 /* _NEW_LIGHT | _NEW_BUFFERS */
123 key->clamp_vertex_color = ctx->Light._ClampVertexColor;
124 }
125
126 /* _NEW_POINT */
127 if (brw->gen < 6 && ctx->Point.PointSprite) {
128 for (int i = 0; i < 8; i++) {
129 if (ctx->Point.CoordReplace[i])
130 key->point_coord_replace |= (1 << i);
131 }
132 }
133
134 /* _NEW_TEXTURE */
135 brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
136 &key->base.tex);
137 }
138
139 static bool
140 really_do_vs_prog(struct brw_context *brw,
141 struct gl_shader_program *prog,
142 struct brw_vertex_program *vp,
143 struct brw_vs_prog_key *key, struct anv_pipeline *pipeline)
144 {
145 GLuint program_size;
146 const GLuint *program;
147 struct brw_vs_compile c;
148 struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
149 struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base;
150 void *mem_ctx;
151 struct gl_shader *vs = NULL;
152
153 if (prog)
154 vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
155
156 memset(&c, 0, sizeof(c));
157 memcpy(&c.key, key, sizeof(*key));
158 memset(prog_data, 0, sizeof(*prog_data));
159
160 mem_ctx = ralloc_context(NULL);
161
162 c.vp = vp;
163
164 /* Allocate the references to the uniforms that will end up in the
165 * prog_data associated with the compiled program, and which will be freed
166 * by the state cache.
167 */
168 int param_count;
169 if (vs) {
170 /* We add padding around uniform values below vec4 size, with the worst
171 * case being a float value that gets blown up to a vec4, so be
172 * conservative here.
173 */
174 param_count = vs->num_uniform_components * 4;
175
176 } else {
177 param_count = vp->program.Base.Parameters->NumParameters * 4;
178 }
179 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
180 * planes as uniforms.
181 */
182 param_count += c.key.base.nr_userclip_plane_consts * 4;
183
184 /* Setting nr_params here NOT to the size of the param and pull_param
185 * arrays, but to the number of uniform components vec4_visitor
186 * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
187 */
188 stage_prog_data->nr_params = ALIGN(param_count, 4) / 4;
189 if (vs) {
190 stage_prog_data->nr_params += vs->num_samplers;
191 }
192
193 GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
194 prog_data->inputs_read = vp->program.Base.InputsRead;
195
196 if (c.key.copy_edgeflag) {
197 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
198 prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
199 }
200
201 if (brw->gen < 6) {
202 /* Put dummy slots into the VUE for the SF to put the replaced
203 * point sprite coords in. We shouldn't need these dummy slots,
204 * which take up precious URB space, but it would mean that the SF
205 * doesn't get nice aligned pairs of input coords into output
206 * coords, which would be a pain to handle.
207 */
208 for (int i = 0; i < 8; i++) {
209 if (c.key.point_coord_replace & (1 << i))
210 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
211 }
212
213 /* if back colors are written, allocate slots for front colors too */
214 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
215 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
216 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
217 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
218 }
219
220 /* In order for legacy clipping to work, we need to populate the clip
221 * distance varying slots whenever clipping is enabled, even if the vertex
222 * shader doesn't write to gl_ClipDistance.
223 */
224 if (c.key.base.userclip_active) {
225 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
226 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
227 }
228
229 brw_compute_vue_map(brw->intelScreen->devinfo,
230 &prog_data->base.vue_map, outputs_written);
231 \
232 set_binding_table_layout(&prog_data->base.base, pipeline,
233 VK_SHADER_STAGE_VERTEX);
234
235 /* Emit GEN4 code.
236 */
237 program = brw_vs_emit(brw, prog, &c, prog_data, mem_ctx, &program_size);
238 if (program == NULL) {
239 ralloc_free(mem_ctx);
240 return false;
241 }
242
243 struct anv_state vs_state = anv_state_stream_alloc(&pipeline->program_stream,
244 program_size, 64);
245 memcpy(vs_state.map, program, program_size);
246
247 pipeline->vs_simd8 = vs_state.offset;
248
249 ralloc_free(mem_ctx);
250
251 return true;
252 }
253
254 void brw_wm_populate_key(struct brw_context *brw,
255 struct brw_fragment_program *fp,
256 struct brw_wm_prog_key *key)
257 {
258 struct gl_context *ctx = &brw->ctx;
259 struct gl_program *prog = (struct gl_program *) brw->fragment_program;
260 GLuint lookup = 0;
261 GLuint line_aa;
262 bool program_uses_dfdy = fp->program.UsesDFdy;
263 struct gl_framebuffer draw_buffer;
264 bool multisample_fbo;
265
266 memset(key, 0, sizeof(*key));
267
268 for (int i = 0; i < MAX_SAMPLERS; i++) {
269 /* Assume color sampler, no swizzling. */
270 key->tex.swizzles[i] = SWIZZLE_XYZW;
271 }
272
273 /* A non-zero framebuffer name indicates that the framebuffer was created by
274 * the user rather than the window system. */
275 draw_buffer.Name = 1;
276 draw_buffer.Visual.samples = 1;
277 draw_buffer._NumColorDrawBuffers = 1;
278 draw_buffer._NumColorDrawBuffers = 1;
279 draw_buffer.Width = 400;
280 draw_buffer.Height = 400;
281 ctx->DrawBuffer = &draw_buffer;
282
283 multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
284
285 /* Build the index for table lookup
286 */
287 if (brw->gen < 6) {
288 /* _NEW_COLOR */
289 if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
290 lookup |= IZ_PS_KILL_ALPHATEST_BIT;
291
292 if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
293 lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
294
295 /* _NEW_DEPTH */
296 if (ctx->Depth.Test)
297 lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
298
299 if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
300 lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
301
302 /* _NEW_STENCIL | _NEW_BUFFERS */
303 if (ctx->Stencil._Enabled) {
304 lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
305
306 if (ctx->Stencil.WriteMask[0] ||
307 ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
308 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
309 }
310 key->iz_lookup = lookup;
311 }
312
313 line_aa = AA_NEVER;
314
315 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
316 if (ctx->Line.SmoothFlag) {
317 if (brw->reduced_primitive == GL_LINES) {
318 line_aa = AA_ALWAYS;
319 }
320 else if (brw->reduced_primitive == GL_TRIANGLES) {
321 if (ctx->Polygon.FrontMode == GL_LINE) {
322 line_aa = AA_SOMETIMES;
323
324 if (ctx->Polygon.BackMode == GL_LINE ||
325 (ctx->Polygon.CullFlag &&
326 ctx->Polygon.CullFaceMode == GL_BACK))
327 line_aa = AA_ALWAYS;
328 }
329 else if (ctx->Polygon.BackMode == GL_LINE) {
330 line_aa = AA_SOMETIMES;
331
332 if ((ctx->Polygon.CullFlag &&
333 ctx->Polygon.CullFaceMode == GL_FRONT))
334 line_aa = AA_ALWAYS;
335 }
336 }
337 }
338
339 key->line_aa = line_aa;
340
341 /* _NEW_HINT */
342 key->high_quality_derivatives =
343 ctx->Hint.FragmentShaderDerivative == GL_NICEST;
344
345 if (brw->gen < 6)
346 key->stats_wm = brw->stats_wm;
347
348 /* _NEW_LIGHT */
349 key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
350
351 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
352 key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
353
354 /* _NEW_TEXTURE */
355 brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
356 &key->tex);
357
358 /* _NEW_BUFFERS */
359 /*
360 * Include the draw buffer origin and height so that we can calculate
361 * fragment position values relative to the bottom left of the drawable,
362 * from the incoming screen origin relative position we get as part of our
363 * payload.
364 *
365 * This is only needed for the WM_WPOSXY opcode when the fragment program
366 * uses the gl_FragCoord input.
367 *
368 * We could avoid recompiling by including this as a constant referenced by
369 * our program, but if we were to do that it would also be nice to handle
370 * getting that constant updated at batchbuffer submit time (when we
371 * hold the lock and know where the buffer really is) rather than at emit
372 * time when we don't hold the lock and are just guessing. We could also
373 * just avoid using this as key data if the program doesn't use
374 * fragment.position.
375 *
376 * For DRI2 the origin_x/y will always be (0,0) but we still need the
377 * drawable height in order to invert the Y axis.
378 */
379 if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
380 key->drawable_height = ctx->DrawBuffer->Height;
381 }
382
383 if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
384 key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
385 }
386
387 /* _NEW_BUFFERS */
388 key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
389
390 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
391 key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
392 (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
393
394 /* _NEW_BUFFERS _NEW_MULTISAMPLE */
395 /* Ignore sample qualifier while computing this flag. */
396 key->persample_shading =
397 _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
398 if (key->persample_shading)
399 key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
400
401 key->compute_pos_offset =
402 _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
403 fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
404
405 key->compute_sample_id =
406 multisample_fbo &&
407 ctx->Multisample.Enabled &&
408 (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
409
410 /* BRW_NEW_VUE_MAP_GEOM_OUT */
411 if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
412 BRW_FS_VARYING_INPUT_MASK) > 16)
413 key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
414
415
416 /* _NEW_COLOR | _NEW_BUFFERS */
417 /* Pre-gen6, the hardware alpha test always used each render
418 * target's alpha to do alpha test, as opposed to render target 0's alpha
419 * like GL requires. Fix that by building the alpha test into the
420 * shader, and we'll skip enabling the fixed function alpha test.
421 */
422 if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
423 key->alpha_test_func = ctx->Color.AlphaFunc;
424 key->alpha_test_ref = ctx->Color.AlphaRef;
425 }
426
427 /* The unique fragment program ID */
428 key->program_string_id = fp->id;
429
430 ctx->DrawBuffer = NULL;
431 }
432
433 static uint8_t
434 computed_depth_mode(struct gl_fragment_program *fp)
435 {
436 if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
437 switch (fp->FragDepthLayout) {
438 case FRAG_DEPTH_LAYOUT_NONE:
439 case FRAG_DEPTH_LAYOUT_ANY:
440 return BRW_PSCDEPTH_ON;
441 case FRAG_DEPTH_LAYOUT_GREATER:
442 return BRW_PSCDEPTH_ON_GE;
443 case FRAG_DEPTH_LAYOUT_LESS:
444 return BRW_PSCDEPTH_ON_LE;
445 case FRAG_DEPTH_LAYOUT_UNCHANGED:
446 return BRW_PSCDEPTH_OFF;
447 }
448 }
449 return BRW_PSCDEPTH_OFF;
450 }
451
452 static bool
453 really_do_wm_prog(struct brw_context *brw,
454 struct gl_shader_program *prog,
455 struct brw_fragment_program *fp,
456 struct brw_wm_prog_key *key, struct anv_pipeline *pipeline)
457 {
458 struct gl_context *ctx = &brw->ctx;
459 void *mem_ctx = ralloc_context(NULL);
460 struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
461 struct gl_shader *fs = NULL;
462 unsigned int program_size;
463 const uint32_t *program;
464
465 if (prog)
466 fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
467
468 memset(prog_data, 0, sizeof(*prog_data));
469
470 /* key->alpha_test_func means simulating alpha testing via discards,
471 * so the shader definitely kills pixels.
472 */
473 prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func;
474
475 prog_data->computed_depth_mode = computed_depth_mode(&fp->program);
476
477 /* Allocate the references to the uniforms that will end up in the
478 * prog_data associated with the compiled program, and which will be freed
479 * by the state cache.
480 */
481 int param_count;
482 if (fs) {
483 param_count = fs->num_uniform_components;
484 } else {
485 param_count = fp->program.Base.Parameters->NumParameters * 4;
486 }
487 /* The backend also sometimes adds params for texture size. */
488 param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
489 prog_data->base.param =
490 rzalloc_array(NULL, const gl_constant_value *, param_count);
491 prog_data->base.pull_param =
492 rzalloc_array(NULL, const gl_constant_value *, param_count);
493 prog_data->base.nr_params = param_count;
494
495 prog_data->barycentric_interp_modes =
496 brw_compute_barycentric_interp_modes(brw, key->flat_shade,
497 key->persample_shading,
498 &fp->program);
499
500 set_binding_table_layout(&prog_data->base, pipeline,
501 VK_SHADER_STAGE_FRAGMENT);
502 /* This needs to come after shader time and pull constant entries, but we
503 * don't have those set up now, so just put it after the layout entries.
504 */
505 prog_data->binding_table.render_target_start = 0;
506
507 program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
508 &fp->program, prog, &program_size);
509 if (program == NULL) {
510 ralloc_free(mem_ctx);
511 return false;
512 }
513
514 struct anv_state ps_state = anv_state_stream_alloc(&pipeline->program_stream,
515 program_size, 64);
516 memcpy(ps_state.map, program, program_size);
517
518 if (prog_data->no_8)
519 pipeline->ps_simd8 = NO_KERNEL;
520 else
521 pipeline->ps_simd8 = ps_state.offset;
522
523 if (prog_data->no_8 || prog_data->prog_offset_16) {
524 pipeline->ps_simd16 = ps_state.offset + prog_data->prog_offset_16;
525 } else {
526 pipeline->ps_simd16 = NO_KERNEL;
527 }
528
529 ralloc_free(mem_ctx);
530
531 return true;
532 }
533
534 static void
535 brw_gs_populate_key(struct brw_context *brw,
536 struct anv_pipeline *pipeline,
537 struct brw_geometry_program *gp,
538 struct brw_gs_prog_key *key)
539 {
540 struct gl_context *ctx = &brw->ctx;
541 struct brw_stage_state *stage_state = &brw->gs.base;
542 struct gl_program *prog = &gp->program.Base;
543
544 memset(key, 0, sizeof(*key));
545
546 key->base.program_string_id = gp->id;
547 brw_setup_vue_key_clip_info(brw, &key->base,
548 gp->program.Base.UsesClipDistanceOut);
549
550 /* _NEW_TEXTURE */
551 brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
552 &key->base.tex);
553
554 struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
555
556 /* BRW_NEW_VUE_MAP_VS */
557 key->input_varyings = prog_data->base.vue_map.slots_valid;
558 }
559
560 static bool
561 really_do_gs_prog(struct brw_context *brw,
562 struct gl_shader_program *prog,
563 struct brw_geometry_program *gp,
564 struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
565 {
566 struct brw_gs_compile_output output;
567
568 /* FIXME: We pass the bind map to the compile in the output struct. Need
569 * something better. */
570 set_binding_table_layout(&output.prog_data.base.base,
571 pipeline, VK_SHADER_STAGE_GEOMETRY);
572
573 brw_compile_gs_prog(brw, prog, gp, key, &output);
574
575 struct anv_state gs_state = anv_state_stream_alloc(&pipeline->program_stream,
576 output.program_size, 64);
577 memcpy(gs_state.map, output.program, output.program_size);
578
579 pipeline->gs_vec4 = gs_state.offset;
580 pipeline->gs_vertex_count = gp->program.VerticesIn;
581
582 ralloc_free(output.mem_ctx);
583
584 return true;
585 }
586
587 static bool
588 brw_codegen_cs_prog(struct brw_context *brw,
589 struct gl_shader_program *prog,
590 struct brw_compute_program *cp,
591 struct brw_cs_prog_key *key, struct anv_pipeline *pipeline)
592 {
593 struct gl_context *ctx = &brw->ctx;
594 const GLuint *program;
595 void *mem_ctx = ralloc_context(NULL);
596 GLuint program_size;
597 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
598
599 struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
600 assert (cs);
601
602 memset(prog_data, 0, sizeof(*prog_data));
603
604 set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE);
605
606 /* Allocate the references to the uniforms that will end up in the
607 * prog_data associated with the compiled program, and which will be freed
608 * by the state cache.
609 */
610 int param_count = cs->num_uniform_components;
611
612 /* The backend also sometimes adds params for texture size. */
613 param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
614 prog_data->base.param =
615 rzalloc_array(NULL, const gl_constant_value *, param_count);
616 prog_data->base.pull_param =
617 rzalloc_array(NULL, const gl_constant_value *, param_count);
618 prog_data->base.nr_params = param_count;
619
620 program = brw_cs_emit(brw, mem_ctx, key, prog_data,
621 &cp->program, prog, &program_size);
622 if (program == NULL) {
623 ralloc_free(mem_ctx);
624 return false;
625 }
626
627 if (unlikely(INTEL_DEBUG & DEBUG_CS))
628 fprintf(stderr, "\n");
629
630 struct anv_state cs_state = anv_state_stream_alloc(&pipeline->program_stream,
631 program_size, 64);
632 memcpy(cs_state.map, program, program_size);
633
634 pipeline->cs_simd = cs_state.offset;
635
636 ralloc_free(mem_ctx);
637
638 return true;
639 }
640
641 static void
642 brw_cs_populate_key(struct brw_context *brw,
643 struct brw_compute_program *bcp, struct brw_cs_prog_key *key)
644 {
645 memset(key, 0, sizeof(*key));
646
647 /* The unique compute program ID */
648 key->program_string_id = bcp->id;
649 }
650
651 static void
652 fail_on_compile_error(int status, const char *msg)
653 {
654 int source, line, column;
655 char error[256];
656
657 if (status)
658 return;
659
660 if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4)
661 fail_if(!status, "%d:%s\n", line, error);
662 else
663 fail_if(!status, "%s\n", msg);
664 }
665
666 struct anv_compiler {
667 struct anv_device *device;
668 struct intel_screen *screen;
669 struct brw_context *brw;
670 struct gl_pipeline_object pipeline;
671 };
672
673 extern "C" {
674
675 struct anv_compiler *
676 anv_compiler_create(struct anv_device *device)
677 {
678 const struct brw_device_info *devinfo = &device->info;
679 struct anv_compiler *compiler;
680 struct gl_context *ctx;
681
682 compiler = rzalloc(NULL, struct anv_compiler);
683 if (compiler == NULL)
684 return NULL;
685
686 compiler->screen = rzalloc(compiler, struct intel_screen);
687 if (compiler->screen == NULL)
688 goto fail;
689
690 compiler->brw = rzalloc(compiler, struct brw_context);
691 if (compiler->brw == NULL)
692 goto fail;
693
694 compiler->device = device;
695
696 compiler->brw->optionCache.info = NULL;
697 compiler->brw->bufmgr = NULL;
698 compiler->brw->gen = devinfo->gen;
699 compiler->brw->is_g4x = devinfo->is_g4x;
700 compiler->brw->is_baytrail = devinfo->is_baytrail;
701 compiler->brw->is_haswell = devinfo->is_haswell;
702 compiler->brw->is_cherryview = devinfo->is_cherryview;
703
704 /* We need this at least for CS, which will check brw->max_cs_threads
705 * against the work group size. */
706 compiler->brw->max_vs_threads = devinfo->max_vs_threads;
707 compiler->brw->max_hs_threads = devinfo->max_hs_threads;
708 compiler->brw->max_ds_threads = devinfo->max_ds_threads;
709 compiler->brw->max_gs_threads = devinfo->max_gs_threads;
710 compiler->brw->max_wm_threads = devinfo->max_wm_threads;
711 compiler->brw->max_cs_threads = devinfo->max_cs_threads;
712 compiler->brw->urb.size = devinfo->urb.size;
713 compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
714 compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
715 compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
716 compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
717 compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
718
719 compiler->brw->intelScreen = compiler->screen;
720 compiler->screen->devinfo = &device->info;
721
722 brw_process_intel_debug_variable(compiler->screen);
723
724 compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
725
726 ctx = &compiler->brw->ctx;
727 _mesa_init_shader_object_functions(&ctx->Driver);
728
729 _mesa_init_constants(&ctx->Const, API_OPENGL_CORE);
730
731 brw_initialize_context_constants(compiler->brw);
732
733 intelInitExtensions(ctx);
734
735 /* Set dd::NewShader */
736 brwInitFragProgFuncs(&ctx->Driver);
737
738 ctx->_Shader = &compiler->pipeline;
739
740 compiler->brw->precompile = false;
741
742 return compiler;
743
744 fail:
745 ralloc_free(compiler);
746 return NULL;
747 }
748
749 void
750 anv_compiler_destroy(struct anv_compiler *compiler)
751 {
752 _mesa_free_errors_data(&compiler->brw->ctx);
753 ralloc_free(compiler);
754 }
755
756 /* From gen7_urb.c */
757
758 /* FIXME: Add to struct intel_device_info */
759
760 static const int gen8_push_size = 32 * 1024;
761
762 static void
763 gen7_compute_urb_partition(struct anv_pipeline *pipeline)
764 {
765 const struct brw_device_info *devinfo = &pipeline->device->info;
766 bool vs_present = pipeline->vs_simd8 != NO_KERNEL;
767 unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
768 unsigned vs_entry_size_bytes = vs_size * 64;
769 bool gs_present = pipeline->gs_vec4 != NO_KERNEL;
770 unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
771 unsigned gs_entry_size_bytes = gs_size * 64;
772
773 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
774 *
775 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
776 * Allocation Size is less than 9 512-bit URB entries.
777 *
778 * Similar text exists for GS.
779 */
780 unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
781 unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
782
783 /* URB allocations must be done in 8k chunks. */
784 unsigned chunk_size_bytes = 8192;
785
786 /* Determine the size of the URB in chunks. */
787 unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
788
789 /* Reserve space for push constants */
790 unsigned push_constant_bytes = gen8_push_size;
791 unsigned push_constant_chunks =
792 push_constant_bytes / chunk_size_bytes;
793
794 /* Initially, assign each stage the minimum amount of URB space it needs,
795 * and make a note of how much additional space it "wants" (the amount of
796 * additional space it could actually make use of).
797 */
798
799 /* VS has a lower limit on the number of URB entries */
800 unsigned vs_chunks =
801 ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
802 chunk_size_bytes) / chunk_size_bytes;
803 unsigned vs_wants =
804 ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
805 chunk_size_bytes) / chunk_size_bytes - vs_chunks;
806
807 unsigned gs_chunks = 0;
808 unsigned gs_wants = 0;
809 if (gs_present) {
810 /* There are two constraints on the minimum amount of URB space we can
811 * allocate:
812 *
813 * (1) We need room for at least 2 URB entries, since we always operate
814 * the GS in DUAL_OBJECT mode.
815 *
816 * (2) We can't allocate less than nr_gs_entries_granularity.
817 */
818 gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
819 chunk_size_bytes) / chunk_size_bytes;
820 gs_wants =
821 ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
822 chunk_size_bytes) / chunk_size_bytes - gs_chunks;
823 }
824
825 /* There should always be enough URB space to satisfy the minimum
826 * requirements of each stage.
827 */
828 unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
829 assert(total_needs <= urb_chunks);
830
831 /* Mete out remaining space (if any) in proportion to "wants". */
832 unsigned total_wants = vs_wants + gs_wants;
833 unsigned remaining_space = urb_chunks - total_needs;
834 if (remaining_space > total_wants)
835 remaining_space = total_wants;
836 if (remaining_space > 0) {
837 unsigned vs_additional = (unsigned)
838 round(vs_wants * (((double) remaining_space) / total_wants));
839 vs_chunks += vs_additional;
840 remaining_space -= vs_additional;
841 gs_chunks += remaining_space;
842 }
843
844 /* Sanity check that we haven't over-allocated. */
845 assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
846
847 /* Finally, compute the number of entries that can fit in the space
848 * allocated to each stage.
849 */
850 unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
851 unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
852
853 /* Since we rounded up when computing *_wants, this may be slightly more
854 * than the maximum allowed amount, so correct for that.
855 */
856 nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
857 nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
858
859 /* Ensure that we program a multiple of the granularity. */
860 nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
861 nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
862
863 /* Finally, sanity check to make sure we have at least the minimum number
864 * of entries needed for each stage.
865 */
866 assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
867 if (gs_present)
868 assert(nr_gs_entries >= 2);
869
870 /* Lay out the URB in the following order:
871 * - push constants
872 * - VS
873 * - GS
874 */
875 pipeline->urb.vs_start = push_constant_chunks;
876 pipeline->urb.vs_size = vs_size;
877 pipeline->urb.nr_vs_entries = nr_vs_entries;
878
879 pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
880 pipeline->urb.gs_size = gs_size;
881 pipeline->urb.nr_gs_entries = nr_gs_entries;
882 }
883
884 static const struct {
885 uint32_t token;
886 const char *name;
887 } stage_info[] = {
888 { GL_VERTEX_SHADER, "vertex" },
889 { GL_TESS_CONTROL_SHADER, "tess control" },
890 { GL_TESS_EVALUATION_SHADER, "tess evaluation" },
891 { GL_GEOMETRY_SHADER, "geometry" },
892 { GL_FRAGMENT_SHADER, "fragment" },
893 { GL_COMPUTE_SHADER, "compute" },
894 };
895
896 struct spirv_header{
897 uint32_t magic;
898 uint32_t version;
899 uint32_t gen_magic;
900 };
901
902 static const char *
903 src_as_glsl(const char *data)
904 {
905 const struct spirv_header *as_spirv = (const struct spirv_header *)data;
906
907 /* Check alignment */
908 if ((intptr_t)data & 0x3) {
909 return data;
910 }
911
912 if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) {
913 /* LunarG back-door */
914 if (as_spirv->version == 0)
915 return data + 12;
916 else
917 return NULL;
918 } else {
919 return data;
920 }
921 }
922
923 static void
924 anv_compile_shader_glsl(struct anv_compiler *compiler,
925 struct gl_shader_program *program,
926 struct anv_pipeline *pipeline, uint32_t stage)
927 {
928 struct brw_context *brw = compiler->brw;
929 struct gl_shader *shader;
930 int name = 0;
931
932 shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
933 fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name);
934
935 shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->data));
936 _mesa_glsl_compile_shader(&brw->ctx, shader, false, false);
937 fail_on_compile_error(shader->CompileStatus, shader->InfoLog);
938
939 program->Shaders[program->NumShaders] = shader;
940 program->NumShaders++;
941 }
942
943 static void
944 anv_compile_shader_spirv(struct anv_compiler *compiler,
945 struct gl_shader_program *program,
946 struct anv_pipeline *pipeline, uint32_t stage)
947 {
948 unreachable("SPIR-V is not supported yet!");
949 }
950
951 static void
952 add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
953 struct brw_stage_prog_data *prog_data)
954 {
955 struct brw_device_info *devinfo = &pipeline->device->info;
956 uint32_t max_threads[] = {
957 [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads,
958 [VK_SHADER_STAGE_TESS_CONTROL] = 0,
959 [VK_SHADER_STAGE_TESS_EVALUATION] = 0,
960 [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads,
961 [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads,
962 [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads,
963 };
964
965 pipeline->prog_data[stage] = prog_data;
966 pipeline->active_stages |= 1 << stage;
967 pipeline->scratch_start[stage] = pipeline->total_scratch;
968 pipeline->total_scratch =
969 ALIGN_U32(pipeline->total_scratch, 1024) +
970 prog_data->total_scratch * max_threads[stage];
971 }
972
973 int
974 anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
975 {
976 struct gl_shader_program *program;
977 int name = 0;
978 struct brw_context *brw = compiler->brw;
979
980 /* When we free the pipeline, we detect stages based on the NULL status
981 * of various prog_data pointers. Make them NULL by default.
982 */
983 memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
984 memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
985
986 brw->use_rep_send = pipeline->use_repclear;
987 brw->no_simd8 = pipeline->use_repclear;
988
989 program = brw->ctx.Driver.NewShaderProgram(name);
990 program->Shaders = (struct gl_shader **)
991 calloc(VK_NUM_SHADER_STAGE, sizeof(struct gl_shader *));
992 fail_if(program == NULL || program->Shaders == NULL,
993 "failed to create program\n");
994
995 bool all_spirv = true;
996 for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) {
997 if (pipeline->shaders[i] == NULL)
998 continue;
999
1000 /* You need at least this much for "void main() { }" anyway */
1001 assert(pipeline->shaders[i]->size >= 12);
1002
1003 if (src_as_glsl(pipeline->shaders[i]->data)) {
1004 all_spirv = false;
1005 break;
1006 }
1007
1008 assert(pipeline->shaders[i]->size % 4 == 0);
1009 }
1010
1011 if (all_spirv) {
1012 for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) {
1013 if (pipeline->shaders[i])
1014 anv_compile_shader_spirv(compiler, program, pipeline, i);
1015 }
1016
1017 /* TODO: nir_link_shader? */
1018 } else {
1019 for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) {
1020 if (pipeline->shaders[i])
1021 anv_compile_shader_glsl(compiler, program, pipeline, i);
1022 }
1023
1024 _mesa_glsl_link_shader(&brw->ctx, program);
1025 fail_on_compile_error(program->LinkStatus,
1026 program->InfoLog);
1027 }
1028
1029 bool success;
1030 pipeline->active_stages = 0;
1031 pipeline->total_scratch = 0;
1032
1033 if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
1034 struct brw_vs_prog_key vs_key;
1035 struct gl_vertex_program *vp = (struct gl_vertex_program *)
1036 program->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
1037 struct brw_vertex_program *bvp = brw_vertex_program(vp);
1038
1039 brw_vs_populate_key(brw, bvp, &vs_key);
1040
1041 success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
1042 fail_if(!success, "do_wm_prog failed\n");
1043 add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
1044 &pipeline->vs_prog_data.base.base);
1045 } else {
1046 memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
1047 pipeline->vs_simd8 = NO_KERNEL;
1048 }
1049
1050
1051 if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) {
1052 struct brw_gs_prog_key gs_key;
1053 struct gl_geometry_program *gp = (struct gl_geometry_program *)
1054 program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
1055 struct brw_geometry_program *bgp = brw_geometry_program(gp);
1056
1057 brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
1058
1059 success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
1060 fail_if(!success, "do_gs_prog failed\n");
1061 add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
1062 &pipeline->gs_prog_data.base.base);
1063 } else {
1064 pipeline->gs_vec4 = NO_KERNEL;
1065 }
1066
1067 if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) {
1068 struct brw_wm_prog_key wm_key;
1069 struct gl_fragment_program *fp = (struct gl_fragment_program *)
1070 program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
1071 struct brw_fragment_program *bfp = brw_fragment_program(fp);
1072
1073 brw_wm_populate_key(brw, bfp, &wm_key);
1074
1075 success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
1076 fail_if(!success, "do_wm_prog failed\n");
1077 add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
1078 &pipeline->wm_prog_data.base);
1079 }
1080
1081 if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
1082 struct brw_cs_prog_key cs_key;
1083 struct gl_compute_program *cp = (struct gl_compute_program *)
1084 program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program;
1085 struct brw_compute_program *bcp = brw_compute_program(cp);
1086
1087 brw_cs_populate_key(brw, bcp, &cs_key);
1088
1089 success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
1090 fail_if(!success, "brw_codegen_cs_prog failed\n");
1091 add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
1092 &pipeline->cs_prog_data.base);
1093 }
1094
1095 brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
1096
1097 struct anv_device *device = compiler->device;
1098 while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
1099 anv_block_pool_alloc(&device->scratch_block_pool);
1100
1101 gen7_compute_urb_partition(pipeline);
1102
1103 return 0;
1104 }
1105
1106 /* This badly named function frees the struct anv_pipeline data that the compiler
1107 * allocates. Currently just the prog_data structs.
1108 */
1109 void
1110 anv_compiler_free(struct anv_pipeline *pipeline)
1111 {
1112 for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) {
1113 if (pipeline->prog_data[stage]) {
1114 free(pipeline->prog_data[stage]->map_entries);
1115 ralloc_free(pipeline->prog_data[stage]->param);
1116 ralloc_free(pipeline->prog_data[stage]->pull_param);
1117 }
1118 }
1119 }
1120
1121 }