vk: Fix vkGetOjectInfo return values
[mesa.git] / src / vulkan / compiler.cpp
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <fcntl.h>
27
28 #include "private.h"
29
30 #include <brw_context.h>
31 #include <brw_wm.h> /* brw_new_shader_program is here */
32
33 #include <brw_vs.h>
34 #include <brw_gs.h>
35
36 #include <mesa/main/shaderobj.h>
37 #include <mesa/main/fbobject.h>
38 #include <mesa/program/program.h>
39 #include <glsl/program.h>
40
41 static void
42 fail_if(int cond, const char *format, ...)
43 {
44 va_list args;
45
46 if (!cond)
47 return;
48
49 va_start(args, format);
50 vfprintf(stderr, format, args);
51 va_end(args);
52
53 exit(1);
54 }
55
56 static VkResult
57 set_binding_table_layout(struct brw_stage_prog_data *prog_data,
58 struct anv_pipeline *pipeline, uint32_t stage)
59 {
60 uint32_t bias, count, k, *map;
61 struct anv_pipeline_layout *layout = pipeline->layout;
62
63 /* No layout is valid for shaders that don't bind any resources. */
64 if (pipeline->layout == NULL)
65 return VK_SUCCESS;
66
67 if (stage == VK_SHADER_STAGE_FRAGMENT)
68 bias = MAX_RTS;
69 else
70 bias = 0;
71
72 prog_data->binding_table.texture_start = bias;
73
74 count = layout->stage[stage].surface_count;
75 prog_data->map_entries =
76 (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0]));
77 if (prog_data->map_entries == NULL)
78 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
79
80 k = bias;
81 map = prog_data->map_entries;
82 for (uint32_t i = 0; i < layout->num_sets; i++) {
83 prog_data->bind_map[i] = map;
84 for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++)
85 *map++ = k++;
86 }
87
88 return VK_SUCCESS;
89 }
90
91 static void
92 brw_vs_populate_key(struct brw_context *brw,
93 struct brw_vertex_program *vp,
94 struct brw_vs_prog_key *key)
95 {
96 struct gl_context *ctx = &brw->ctx;
97 /* BRW_NEW_VERTEX_PROGRAM */
98 struct gl_program *prog = (struct gl_program *) vp;
99
100 memset(key, 0, sizeof(*key));
101
102 /* Just upload the program verbatim for now. Always send it all
103 * the inputs it asks for, whether they are varying or not.
104 */
105 key->base.program_string_id = vp->id;
106 brw_setup_vue_key_clip_info(brw, &key->base,
107 vp->program.Base.UsesClipDistanceOut);
108
109 /* _NEW_POLYGON */
110 if (brw->gen < 6) {
111 key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
112 ctx->Polygon.BackMode != GL_FILL);
113 }
114
115 if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
116 VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
117 /* _NEW_LIGHT | _NEW_BUFFERS */
118 key->clamp_vertex_color = ctx->Light._ClampVertexColor;
119 }
120
121 /* _NEW_POINT */
122 if (brw->gen < 6 && ctx->Point.PointSprite) {
123 for (int i = 0; i < 8; i++) {
124 if (ctx->Point.CoordReplace[i])
125 key->point_coord_replace |= (1 << i);
126 }
127 }
128
129 /* _NEW_TEXTURE */
130 brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
131 &key->base.tex);
132 }
133
134 static bool
135 really_do_vs_prog(struct brw_context *brw,
136 struct gl_shader_program *prog,
137 struct brw_vertex_program *vp,
138 struct brw_vs_prog_key *key, struct anv_pipeline *pipeline)
139 {
140 GLuint program_size;
141 const GLuint *program;
142 struct brw_vs_compile c;
143 struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
144 struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base;
145 void *mem_ctx;
146 struct gl_shader *vs = NULL;
147
148 if (prog)
149 vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
150
151 memset(&c, 0, sizeof(c));
152 memcpy(&c.key, key, sizeof(*key));
153 memset(prog_data, 0, sizeof(*prog_data));
154
155 mem_ctx = ralloc_context(NULL);
156
157 c.vp = vp;
158
159 /* Allocate the references to the uniforms that will end up in the
160 * prog_data associated with the compiled program, and which will be freed
161 * by the state cache.
162 */
163 int param_count;
164 if (vs) {
165 /* We add padding around uniform values below vec4 size, with the worst
166 * case being a float value that gets blown up to a vec4, so be
167 * conservative here.
168 */
169 param_count = vs->num_uniform_components * 4;
170
171 } else {
172 param_count = vp->program.Base.Parameters->NumParameters * 4;
173 }
174 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
175 * planes as uniforms.
176 */
177 param_count += c.key.base.nr_userclip_plane_consts * 4;
178
179 /* Setting nr_params here NOT to the size of the param and pull_param
180 * arrays, but to the number of uniform components vec4_visitor
181 * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
182 */
183 stage_prog_data->nr_params = ALIGN(param_count, 4) / 4;
184 if (vs) {
185 stage_prog_data->nr_params += vs->num_samplers;
186 }
187
188 GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
189 prog_data->inputs_read = vp->program.Base.InputsRead;
190
191 if (c.key.copy_edgeflag) {
192 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
193 prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
194 }
195
196 if (brw->gen < 6) {
197 /* Put dummy slots into the VUE for the SF to put the replaced
198 * point sprite coords in. We shouldn't need these dummy slots,
199 * which take up precious URB space, but it would mean that the SF
200 * doesn't get nice aligned pairs of input coords into output
201 * coords, which would be a pain to handle.
202 */
203 for (int i = 0; i < 8; i++) {
204 if (c.key.point_coord_replace & (1 << i))
205 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
206 }
207
208 /* if back colors are written, allocate slots for front colors too */
209 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
210 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
211 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
212 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
213 }
214
215 /* In order for legacy clipping to work, we need to populate the clip
216 * distance varying slots whenever clipping is enabled, even if the vertex
217 * shader doesn't write to gl_ClipDistance.
218 */
219 if (c.key.base.userclip_active) {
220 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
221 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
222 }
223
224 brw_compute_vue_map(brw->intelScreen->devinfo,
225 &prog_data->base.vue_map, outputs_written);
226 \
227 set_binding_table_layout(&prog_data->base.base, pipeline,
228 VK_SHADER_STAGE_VERTEX);
229
230 /* Emit GEN4 code.
231 */
232 program = brw_vs_emit(brw, prog, &c, prog_data, mem_ctx, &program_size);
233 if (program == NULL) {
234 ralloc_free(mem_ctx);
235 return false;
236 }
237
238 pipeline->vs_simd8 = pipeline->program_next;
239 memcpy((char *) pipeline->device->instruction_block_pool.map +
240 pipeline->vs_simd8, program, program_size);
241
242 pipeline->program_next = align(pipeline->program_next + program_size, 64);
243
244 ralloc_free(mem_ctx);
245
246 if (stage_prog_data->total_scratch > 0)
247 if (!anv_bo_init_new(&pipeline->vs_scratch_bo,
248 pipeline->device,
249 stage_prog_data->total_scratch))
250 return false;
251
252
253 return true;
254 }
255
256 void brw_wm_populate_key(struct brw_context *brw,
257 struct brw_fragment_program *fp,
258 struct brw_wm_prog_key *key)
259 {
260 struct gl_context *ctx = &brw->ctx;
261 struct gl_program *prog = (struct gl_program *) brw->fragment_program;
262 GLuint lookup = 0;
263 GLuint line_aa;
264 bool program_uses_dfdy = fp->program.UsesDFdy;
265 struct gl_framebuffer draw_buffer;
266 bool multisample_fbo;
267
268 memset(key, 0, sizeof(*key));
269
270 for (int i = 0; i < MAX_SAMPLERS; i++) {
271 /* Assume color sampler, no swizzling. */
272 key->tex.swizzles[i] = SWIZZLE_XYZW;
273 }
274
275 /* A non-zero framebuffer name indicates that the framebuffer was created by
276 * the user rather than the window system. */
277 draw_buffer.Name = 1;
278 draw_buffer.Visual.samples = 1;
279 draw_buffer._NumColorDrawBuffers = 1;
280 draw_buffer._NumColorDrawBuffers = 1;
281 draw_buffer.Width = 400;
282 draw_buffer.Height = 400;
283 ctx->DrawBuffer = &draw_buffer;
284
285 multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
286
287 /* Build the index for table lookup
288 */
289 if (brw->gen < 6) {
290 /* _NEW_COLOR */
291 if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
292 lookup |= IZ_PS_KILL_ALPHATEST_BIT;
293
294 if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
295 lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
296
297 /* _NEW_DEPTH */
298 if (ctx->Depth.Test)
299 lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
300
301 if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
302 lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
303
304 /* _NEW_STENCIL | _NEW_BUFFERS */
305 if (ctx->Stencil._Enabled) {
306 lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
307
308 if (ctx->Stencil.WriteMask[0] ||
309 ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
310 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
311 }
312 key->iz_lookup = lookup;
313 }
314
315 line_aa = AA_NEVER;
316
317 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
318 if (ctx->Line.SmoothFlag) {
319 if (brw->reduced_primitive == GL_LINES) {
320 line_aa = AA_ALWAYS;
321 }
322 else if (brw->reduced_primitive == GL_TRIANGLES) {
323 if (ctx->Polygon.FrontMode == GL_LINE) {
324 line_aa = AA_SOMETIMES;
325
326 if (ctx->Polygon.BackMode == GL_LINE ||
327 (ctx->Polygon.CullFlag &&
328 ctx->Polygon.CullFaceMode == GL_BACK))
329 line_aa = AA_ALWAYS;
330 }
331 else if (ctx->Polygon.BackMode == GL_LINE) {
332 line_aa = AA_SOMETIMES;
333
334 if ((ctx->Polygon.CullFlag &&
335 ctx->Polygon.CullFaceMode == GL_FRONT))
336 line_aa = AA_ALWAYS;
337 }
338 }
339 }
340
341 key->line_aa = line_aa;
342
343 /* _NEW_HINT */
344 key->high_quality_derivatives =
345 ctx->Hint.FragmentShaderDerivative == GL_NICEST;
346
347 if (brw->gen < 6)
348 key->stats_wm = brw->stats_wm;
349
350 /* _NEW_LIGHT */
351 key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
352
353 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
354 key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
355
356 /* _NEW_TEXTURE */
357 brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
358 &key->tex);
359
360 /* _NEW_BUFFERS */
361 /*
362 * Include the draw buffer origin and height so that we can calculate
363 * fragment position values relative to the bottom left of the drawable,
364 * from the incoming screen origin relative position we get as part of our
365 * payload.
366 *
367 * This is only needed for the WM_WPOSXY opcode when the fragment program
368 * uses the gl_FragCoord input.
369 *
370 * We could avoid recompiling by including this as a constant referenced by
371 * our program, but if we were to do that it would also be nice to handle
372 * getting that constant updated at batchbuffer submit time (when we
373 * hold the lock and know where the buffer really is) rather than at emit
374 * time when we don't hold the lock and are just guessing. We could also
375 * just avoid using this as key data if the program doesn't use
376 * fragment.position.
377 *
378 * For DRI2 the origin_x/y will always be (0,0) but we still need the
379 * drawable height in order to invert the Y axis.
380 */
381 if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
382 key->drawable_height = ctx->DrawBuffer->Height;
383 }
384
385 if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
386 key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
387 }
388
389 /* _NEW_BUFFERS */
390 key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
391
392 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
393 key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
394 (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
395
396 /* _NEW_BUFFERS _NEW_MULTISAMPLE */
397 /* Ignore sample qualifier while computing this flag. */
398 key->persample_shading =
399 _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
400 if (key->persample_shading)
401 key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
402
403 key->compute_pos_offset =
404 _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
405 fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
406
407 key->compute_sample_id =
408 multisample_fbo &&
409 ctx->Multisample.Enabled &&
410 (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
411
412 /* BRW_NEW_VUE_MAP_GEOM_OUT */
413 if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
414 BRW_FS_VARYING_INPUT_MASK) > 16)
415 key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
416
417
418 /* _NEW_COLOR | _NEW_BUFFERS */
419 /* Pre-gen6, the hardware alpha test always used each render
420 * target's alpha to do alpha test, as opposed to render target 0's alpha
421 * like GL requires. Fix that by building the alpha test into the
422 * shader, and we'll skip enabling the fixed function alpha test.
423 */
424 if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
425 key->alpha_test_func = ctx->Color.AlphaFunc;
426 key->alpha_test_ref = ctx->Color.AlphaRef;
427 }
428
429 /* The unique fragment program ID */
430 key->program_string_id = fp->id;
431
432 ctx->DrawBuffer = NULL;
433 }
434
435 static uint8_t
436 computed_depth_mode(struct gl_fragment_program *fp)
437 {
438 if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
439 switch (fp->FragDepthLayout) {
440 case FRAG_DEPTH_LAYOUT_NONE:
441 case FRAG_DEPTH_LAYOUT_ANY:
442 return BRW_PSCDEPTH_ON;
443 case FRAG_DEPTH_LAYOUT_GREATER:
444 return BRW_PSCDEPTH_ON_GE;
445 case FRAG_DEPTH_LAYOUT_LESS:
446 return BRW_PSCDEPTH_ON_LE;
447 case FRAG_DEPTH_LAYOUT_UNCHANGED:
448 return BRW_PSCDEPTH_OFF;
449 }
450 }
451 return BRW_PSCDEPTH_OFF;
452 }
453
454 static bool
455 really_do_wm_prog(struct brw_context *brw,
456 struct gl_shader_program *prog,
457 struct brw_fragment_program *fp,
458 struct brw_wm_prog_key *key, struct anv_pipeline *pipeline)
459 {
460 struct gl_context *ctx = &brw->ctx;
461 void *mem_ctx = ralloc_context(NULL);
462 struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
463 struct gl_shader *fs = NULL;
464 unsigned int program_size;
465 const uint32_t *program;
466 uint32_t offset;
467
468 if (prog)
469 fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
470
471 memset(prog_data, 0, sizeof(*prog_data));
472
473 /* key->alpha_test_func means simulating alpha testing via discards,
474 * so the shader definitely kills pixels.
475 */
476 prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func;
477
478 prog_data->computed_depth_mode = computed_depth_mode(&fp->program);
479
480 /* Allocate the references to the uniforms that will end up in the
481 * prog_data associated with the compiled program, and which will be freed
482 * by the state cache.
483 */
484 int param_count;
485 if (fs) {
486 param_count = fs->num_uniform_components;
487 } else {
488 param_count = fp->program.Base.Parameters->NumParameters * 4;
489 }
490 /* The backend also sometimes adds params for texture size. */
491 param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
492 prog_data->base.param =
493 rzalloc_array(NULL, const gl_constant_value *, param_count);
494 prog_data->base.pull_param =
495 rzalloc_array(NULL, const gl_constant_value *, param_count);
496 prog_data->base.nr_params = param_count;
497
498 prog_data->barycentric_interp_modes =
499 brw_compute_barycentric_interp_modes(brw, key->flat_shade,
500 key->persample_shading,
501 &fp->program);
502
503 set_binding_table_layout(&prog_data->base, pipeline,
504 VK_SHADER_STAGE_FRAGMENT);
505 /* This needs to come after shader time and pull constant entries, but we
506 * don't have those set up now, so just put it after the layout entries.
507 */
508 prog_data->binding_table.render_target_start = 0;
509
510 program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
511 &fp->program, prog, &program_size);
512 if (program == NULL) {
513 ralloc_free(mem_ctx);
514 return false;
515 }
516
517 offset = pipeline->program_next;
518 pipeline->program_next = align(pipeline->program_next + program_size, 64);
519
520 if (prog_data->no_8)
521 pipeline->ps_simd8 = NO_KERNEL;
522 else
523 pipeline->ps_simd8 = offset;
524
525 if (prog_data->no_8 || prog_data->prog_offset_16)
526 pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
527 else
528 pipeline->ps_simd16 = NO_KERNEL;
529
530 memcpy((char *) pipeline->device->instruction_block_pool.map +
531 offset, program, program_size);
532
533 ralloc_free(mem_ctx);
534
535 if (prog_data->base.total_scratch > 0)
536 if (!anv_bo_init_new(&pipeline->ps_scratch_bo,
537 pipeline->device,
538 prog_data->base.total_scratch))
539 return false;
540
541 return true;
542 }
543
544 static void
545 brw_gs_populate_key(struct brw_context *brw,
546 struct anv_pipeline *pipeline,
547 struct brw_geometry_program *gp,
548 struct brw_gs_prog_key *key)
549 {
550 struct gl_context *ctx = &brw->ctx;
551 struct brw_stage_state *stage_state = &brw->gs.base;
552 struct gl_program *prog = &gp->program.Base;
553
554 memset(key, 0, sizeof(*key));
555
556 key->base.program_string_id = gp->id;
557 brw_setup_vue_key_clip_info(brw, &key->base,
558 gp->program.Base.UsesClipDistanceOut);
559
560 /* _NEW_TEXTURE */
561 brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
562 &key->base.tex);
563
564 struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
565
566 /* BRW_NEW_VUE_MAP_VS */
567 key->input_varyings = prog_data->base.vue_map.slots_valid;
568 }
569
570 static bool
571 really_do_gs_prog(struct brw_context *brw,
572 struct gl_shader_program *prog,
573 struct brw_geometry_program *gp,
574 struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
575 {
576 struct brw_gs_compile_output output;
577 uint32_t offset;
578
579 /* FIXME: We pass the bind map to the compile in the output struct. Need
580 * something better. */
581 set_binding_table_layout(&output.prog_data.base.base,
582 pipeline, VK_SHADER_STAGE_GEOMETRY);
583
584 brw_compile_gs_prog(brw, prog, gp, key, &output);
585
586 offset = pipeline->program_next;
587 pipeline->program_next = align(pipeline->program_next + output.program_size, 64);
588
589 pipeline->gs_vec4 = offset;
590 pipeline->gs_vertex_count = gp->program.VerticesIn;
591
592 memcpy((char *) pipeline->device->instruction_block_pool.map +
593 offset, output.program, output.program_size);
594
595 ralloc_free(output.mem_ctx);
596
597 if (output.prog_data.base.base.total_scratch) {
598 if (!anv_bo_init_new(&pipeline->gs_scratch_bo,
599 pipeline->device,
600 output.prog_data.base.base.total_scratch))
601 return false;
602 }
603
604 memcpy(&pipeline->gs_prog_data, &output.prog_data, sizeof pipeline->gs_prog_data);
605
606 return true;
607 }
608
609 static void
610 fail_on_compile_error(int status, const char *msg)
611 {
612 int source, line, column;
613 char error[256];
614
615 if (status)
616 return;
617
618 if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4)
619 fail_if(!status, "%d:%s\n", line, error);
620 else
621 fail_if(!status, "%s\n", msg);
622 }
623
624 struct anv_compiler {
625 struct intel_screen *screen;
626 struct brw_context *brw;
627 };
628
629
630 extern "C" {
631
632 struct anv_compiler *
633 anv_compiler_create(int fd)
634 {
635 struct anv_compiler *compiler;
636
637 compiler = (struct anv_compiler *) malloc(sizeof *compiler);
638 if (compiler == NULL)
639 return NULL;
640
641 compiler->screen = intel_screen_create(fd);
642 if (compiler->screen == NULL) {
643 free(compiler);
644 return NULL;
645 }
646
647 compiler->brw = intel_context_create(compiler->screen);
648 if (compiler->brw == NULL) {
649 free(compiler);
650 return NULL;
651 }
652
653 compiler->brw->precompile = false;
654
655 return compiler;
656 }
657
658 void
659 anv_compiler_destroy(struct anv_compiler *compiler)
660 {
661 intel_context_destroy(compiler->brw);
662 intel_screen_destroy(compiler->screen);
663 free(compiler);
664 }
665
666 /* From gen7_urb.c */
667
668 /* FIXME: Add to struct intel_device_info */
669
670 static const int gen8_push_size = 32 * 1024;
671
672 static void
673 gen7_compute_urb_partition(struct anv_pipeline *pipeline)
674 {
675 const struct brw_device_info *devinfo = &pipeline->device->info;
676 bool vs_present = pipeline->vs_simd8 != NO_KERNEL;
677 unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
678 unsigned vs_entry_size_bytes = vs_size * 64;
679 bool gs_present = pipeline->gs_vec4 != NO_KERNEL;
680 unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
681 unsigned gs_entry_size_bytes = gs_size * 64;
682
683 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
684 *
685 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
686 * Allocation Size is less than 9 512-bit URB entries.
687 *
688 * Similar text exists for GS.
689 */
690 unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
691 unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
692
693 /* URB allocations must be done in 8k chunks. */
694 unsigned chunk_size_bytes = 8192;
695
696 /* Determine the size of the URB in chunks. */
697 unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
698
699 /* Reserve space for push constants */
700 unsigned push_constant_bytes = gen8_push_size;
701 unsigned push_constant_chunks =
702 push_constant_bytes / chunk_size_bytes;
703
704 /* Initially, assign each stage the minimum amount of URB space it needs,
705 * and make a note of how much additional space it "wants" (the amount of
706 * additional space it could actually make use of).
707 */
708
709 /* VS has a lower limit on the number of URB entries */
710 unsigned vs_chunks =
711 ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
712 chunk_size_bytes) / chunk_size_bytes;
713 unsigned vs_wants =
714 ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
715 chunk_size_bytes) / chunk_size_bytes - vs_chunks;
716
717 unsigned gs_chunks = 0;
718 unsigned gs_wants = 0;
719 if (gs_present) {
720 /* There are two constraints on the minimum amount of URB space we can
721 * allocate:
722 *
723 * (1) We need room for at least 2 URB entries, since we always operate
724 * the GS in DUAL_OBJECT mode.
725 *
726 * (2) We can't allocate less than nr_gs_entries_granularity.
727 */
728 gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
729 chunk_size_bytes) / chunk_size_bytes;
730 gs_wants =
731 ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
732 chunk_size_bytes) / chunk_size_bytes - gs_chunks;
733 }
734
735 /* There should always be enough URB space to satisfy the minimum
736 * requirements of each stage.
737 */
738 unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
739 assert(total_needs <= urb_chunks);
740
741 /* Mete out remaining space (if any) in proportion to "wants". */
742 unsigned total_wants = vs_wants + gs_wants;
743 unsigned remaining_space = urb_chunks - total_needs;
744 if (remaining_space > total_wants)
745 remaining_space = total_wants;
746 if (remaining_space > 0) {
747 unsigned vs_additional = (unsigned)
748 round(vs_wants * (((double) remaining_space) / total_wants));
749 vs_chunks += vs_additional;
750 remaining_space -= vs_additional;
751 gs_chunks += remaining_space;
752 }
753
754 /* Sanity check that we haven't over-allocated. */
755 assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
756
757 /* Finally, compute the number of entries that can fit in the space
758 * allocated to each stage.
759 */
760 unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
761 unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
762
763 /* Since we rounded up when computing *_wants, this may be slightly more
764 * than the maximum allowed amount, so correct for that.
765 */
766 nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
767 nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
768
769 /* Ensure that we program a multiple of the granularity. */
770 nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
771 nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
772
773 /* Finally, sanity check to make sure we have at least the minimum number
774 * of entries needed for each stage.
775 */
776 assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
777 if (gs_present)
778 assert(nr_gs_entries >= 2);
779
780 /* Lay out the URB in the following order:
781 * - push constants
782 * - VS
783 * - GS
784 */
785 pipeline->urb.vs_start = push_constant_chunks;
786 pipeline->urb.vs_size = vs_size;
787 pipeline->urb.nr_vs_entries = nr_vs_entries;
788
789 pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
790 pipeline->urb.gs_size = gs_size;
791 pipeline->urb.nr_gs_entries = nr_gs_entries;
792 }
793
794 static const struct {
795 uint32_t token;
796 const char *name;
797 } stage_info[] = {
798 { GL_VERTEX_SHADER, "vertex" },
799 { GL_TESS_CONTROL_SHADER, "tess control" },
800 { GL_TESS_EVALUATION_SHADER, "tess evaluation" },
801 { GL_GEOMETRY_SHADER, "geometry" },
802 { GL_FRAGMENT_SHADER, "fragment" },
803 { GL_COMPUTE_SHADER, "compute" },
804 };
805
806 static void
807 anv_compile_shader(struct anv_compiler *compiler,
808 struct gl_shader_program *program,
809 struct anv_pipeline *pipeline, uint32_t stage)
810 {
811 struct brw_context *brw = compiler->brw;
812 struct gl_shader *shader;
813 int name = 0;
814
815 shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
816 fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name);
817 shader->Source = strdup(pipeline->shaders[stage]->data);
818 _mesa_glsl_compile_shader(&brw->ctx, shader, false, false);
819 fail_on_compile_error(shader->CompileStatus, shader->InfoLog);
820
821 program->Shaders[program->NumShaders] = shader;
822 program->NumShaders++;
823 }
824
825 int
826 anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
827 {
828 struct gl_shader_program *program;
829 int name = 0;
830 struct brw_context *brw = compiler->brw;
831 struct anv_device *device = pipeline->device;
832
833 /* When we free the pipeline, we detect stages based on the NULL status
834 * of various prog_data pointers. Make them NULL by default.
835 */
836 memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
837
838 brw->use_rep_send = pipeline->use_repclear;
839 brw->no_simd8 = pipeline->use_repclear;
840
841 program = brw->ctx.Driver.NewShaderProgram(name);
842 program->Shaders = (struct gl_shader **)
843 calloc(VK_NUM_SHADER_STAGE, sizeof(struct gl_shader *));
844 fail_if(program == NULL || program->Shaders == NULL,
845 "failed to create program\n");
846
847 if (pipeline->shaders[VK_SHADER_STAGE_VERTEX])
848 anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_VERTEX);
849 anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_FRAGMENT);
850 if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY])
851 anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_GEOMETRY);
852
853 _mesa_glsl_link_shader(&brw->ctx, program);
854 fail_on_compile_error(program->LinkStatus,
855 program->InfoLog);
856
857 pipeline->program_block =
858 anv_block_pool_alloc(&device->instruction_block_pool);
859 pipeline->program_next = pipeline->program_block;
860
861
862 bool success;
863 struct brw_wm_prog_key wm_key;
864 struct gl_fragment_program *fp = (struct gl_fragment_program *)
865 program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
866 struct brw_fragment_program *bfp = brw_fragment_program(fp);
867
868 brw_wm_populate_key(brw, bfp, &wm_key);
869
870 success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
871 fail_if(!success, "do_wm_prog failed\n");
872 pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base;
873 pipeline->active_stages = VK_SHADER_STAGE_FRAGMENT_BIT;
874
875
876 if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
877 struct brw_vs_prog_key vs_key;
878 struct gl_vertex_program *vp = (struct gl_vertex_program *)
879 program->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
880 struct brw_vertex_program *bvp = brw_vertex_program(vp);
881
882 brw_vs_populate_key(brw, bvp, &vs_key);
883
884 success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
885 fail_if(!success, "do_wm_prog failed\n");
886 pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base;
887 pipeline->active_stages |= VK_SHADER_STAGE_VERTEX_BIT;;
888 } else {
889 pipeline->vs_simd8 = NO_KERNEL;
890 }
891
892
893 if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) {
894 struct brw_gs_prog_key gs_key;
895 struct gl_geometry_program *gp = (struct gl_geometry_program *)
896 program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
897 struct brw_geometry_program *bgp = brw_geometry_program(gp);
898
899 brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
900
901 success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
902 fail_if(!success, "do_gs_prog failed\n");
903 pipeline->active_stages |= VK_SHADER_STAGE_GEOMETRY_BIT;
904 pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base;
905 } else {
906 pipeline->gs_vec4 = NO_KERNEL;
907 }
908
909
910 /* FIXME: Allocate more blocks if we fill up this one and worst case,
911 * allocate multiple continuous blocks from end of pool to hold really big
912 * programs. */
913 assert(pipeline->program_next - pipeline->program_block < 8192);
914
915 brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
916
917 gen7_compute_urb_partition(pipeline);
918
919 return 0;
920 }
921
922 /* This badly named function frees the struct anv_pipeline data that the compiler
923 * allocates. Currently just the prog_data structs.
924 */
925 void
926 anv_compiler_free(struct anv_pipeline *pipeline)
927 {
928 struct anv_device *device = pipeline->device;
929
930 for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++)
931 if (pipeline->prog_data[stage])
932 free(pipeline->prog_data[stage]->map_entries);
933
934 anv_block_pool_free(&device->instruction_block_pool,
935 pipeline->program_block);
936 }
937
938 }