2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
28 #include "anv_private.h"
31 #include <brw_context.h>
32 #include <brw_wm.h> /* brw_new_shader_program is here */
39 #include <mesa/main/shaderobj.h>
40 #include <mesa/main/fbobject.h>
41 #include <mesa/main/context.h>
42 #include <mesa/program/program.h>
43 #include <glsl/program.h>
45 /* XXX: We need this to keep symbols in nir.h from conflicting with the
46 * generated GEN command packing headers. We need to fix *both* to not
47 * define something as generic as LOAD.
51 #include <glsl/nir/nir_spirv.h>
53 #define SPIR_V_MAGIC_NUMBER 0x07230203
56 fail_if(int cond
, const char *format
, ...)
63 va_start(args
, format
);
64 vfprintf(stderr
, format
, args
);
71 set_binding_table_layout(struct brw_stage_prog_data
*prog_data
,
72 struct anv_pipeline
*pipeline
, uint32_t stage
)
74 uint32_t bias
, count
, k
, *map
;
75 struct anv_pipeline_layout
*layout
= pipeline
->layout
;
77 /* No layout is valid for shaders that don't bind any resources. */
78 if (pipeline
->layout
== NULL
)
81 if (stage
== VK_SHADER_STAGE_FRAGMENT
)
86 count
= layout
->stage
[stage
].surface_count
;
87 prog_data
->map_entries
=
88 (uint32_t *) malloc(count
* sizeof(prog_data
->map_entries
[0]));
89 if (prog_data
->map_entries
== NULL
)
90 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
93 map
= prog_data
->map_entries
;
94 for (uint32_t i
= 0; i
< layout
->num_sets
; i
++) {
95 prog_data
->bind_map
[i
].index
= map
;
96 for (uint32_t j
= 0; j
< layout
->set
[i
].layout
->stage
[stage
].surface_count
; j
++)
99 prog_data
->bind_map
[i
].index_count
=
100 layout
->set
[i
].layout
->stage
[stage
].surface_count
;
107 upload_kernel(struct anv_pipeline
*pipeline
, const void *data
, size_t size
)
109 struct anv_state state
=
110 anv_state_stream_alloc(&pipeline
->program_stream
, size
, 64);
112 assert(size
< pipeline
->program_stream
.block_pool
->block_size
);
114 memcpy(state
.map
, data
, size
);
120 create_params_array(struct anv_pipeline
*pipeline
,
121 struct gl_shader
*shader
,
122 struct brw_stage_prog_data
*prog_data
)
124 VkShaderStage stage
= anv_vk_shader_stage_for_mesa_stage(shader
->Stage
);
125 unsigned num_params
= 0;
127 if (shader
->num_uniform_components
) {
128 /* If the shader uses any push constants at all, we'll just give
129 * them the maximum possible number
131 num_params
+= MAX_PUSH_CONSTANTS_SIZE
/ sizeof(float);
134 if (pipeline
->layout
&& pipeline
->layout
->stage
[stage
].has_dynamic_offsets
)
135 num_params
+= MAX_DYNAMIC_BUFFERS
;
140 prog_data
->param
= (const gl_constant_value
**)
141 anv_device_alloc(pipeline
->device
,
142 num_params
* sizeof(gl_constant_value
*),
143 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER
);
145 /* We now set the param values to be offsets into a
146 * anv_push_constant_data structure. Since the compiler doesn't
147 * actually dereference any of the gl_constant_value pointers in the
148 * params array, it doesn't really matter what we put here.
150 struct anv_push_constants
*null_data
= NULL
;
151 for (unsigned i
= 0; i
< num_params
; i
++)
152 prog_data
->param
[i
] =
153 (const gl_constant_value
*)&null_data
->client_data
[i
* sizeof(float)];
157 brw_vs_populate_key(struct brw_context
*brw
,
158 struct brw_vertex_program
*vp
,
159 struct brw_vs_prog_key
*key
)
161 struct gl_context
*ctx
= &brw
->ctx
;
162 /* BRW_NEW_VERTEX_PROGRAM */
163 struct gl_program
*prog
= (struct gl_program
*) vp
;
165 memset(key
, 0, sizeof(*key
));
167 /* Just upload the program verbatim for now. Always send it all
168 * the inputs it asks for, whether they are varying or not.
170 key
->base
.program_string_id
= vp
->id
;
171 brw_setup_vue_key_clip_info(brw
, &key
->base
,
172 vp
->program
.Base
.UsesClipDistanceOut
);
176 key
->copy_edgeflag
= (ctx
->Polygon
.FrontMode
!= GL_FILL
||
177 ctx
->Polygon
.BackMode
!= GL_FILL
);
180 if (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
181 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
)) {
182 /* _NEW_LIGHT | _NEW_BUFFERS */
183 key
->clamp_vertex_color
= ctx
->Light
._ClampVertexColor
;
187 if (brw
->gen
< 6 && ctx
->Point
.PointSprite
) {
188 for (int i
= 0; i
< 8; i
++) {
189 if (ctx
->Point
.CoordReplace
[i
])
190 key
->point_coord_replace
|= (1 << i
);
195 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->vs
.base
.sampler_count
,
200 really_do_vs_prog(struct brw_context
*brw
,
201 struct gl_shader_program
*prog
,
202 struct brw_vertex_program
*vp
,
203 struct brw_vs_prog_key
*key
, struct anv_pipeline
*pipeline
)
206 const GLuint
*program
;
207 struct brw_vs_prog_data
*prog_data
= &pipeline
->vs_prog_data
;
209 struct gl_shader
*vs
= NULL
;
212 vs
= prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
214 memset(prog_data
, 0, sizeof(*prog_data
));
216 mem_ctx
= ralloc_context(NULL
);
218 create_params_array(pipeline
, vs
, &prog_data
->base
.base
);
219 anv_nir_apply_dynamic_offsets(pipeline
, vs
->Program
->nir
,
220 &prog_data
->base
.base
);
222 GLbitfield64 outputs_written
= vp
->program
.Base
.OutputsWritten
;
223 prog_data
->inputs_read
= vp
->program
.Base
.InputsRead
;
225 if (key
->copy_edgeflag
) {
226 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_EDGE
);
227 prog_data
->inputs_read
|= VERT_BIT_EDGEFLAG
;
231 /* Put dummy slots into the VUE for the SF to put the replaced
232 * point sprite coords in. We shouldn't need these dummy slots,
233 * which take up precious URB space, but it would mean that the SF
234 * doesn't get nice aligned pairs of input coords into output
235 * coords, which would be a pain to handle.
237 for (int i
= 0; i
< 8; i
++) {
238 if (key
->point_coord_replace
& (1 << i
))
239 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_TEX0
+ i
);
242 /* if back colors are written, allocate slots for front colors too */
243 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC0
))
244 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL0
);
245 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC1
))
246 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL1
);
249 /* In order for legacy clipping to work, we need to populate the clip
250 * distance varying slots whenever clipping is enabled, even if the vertex
251 * shader doesn't write to gl_ClipDistance.
253 if (key
->base
.userclip_active
) {
254 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0
);
255 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1
);
258 brw_compute_vue_map(brw
->intelScreen
->devinfo
,
259 &prog_data
->base
.vue_map
, outputs_written
);
261 set_binding_table_layout(&prog_data
->base
.base
, pipeline
,
262 VK_SHADER_STAGE_VERTEX
);
266 program
= brw_vs_emit(brw
, mem_ctx
, key
, prog_data
, &vp
->program
,
267 prog
, &program_size
);
268 if (program
== NULL
) {
269 ralloc_free(mem_ctx
);
273 const uint32_t offset
= upload_kernel(pipeline
, program
, program_size
);
274 if (prog_data
->base
.dispatch_mode
== DISPATCH_MODE_SIMD8
) {
275 pipeline
->vs_simd8
= offset
;
276 pipeline
->vs_vec4
= NO_KERNEL
;
278 pipeline
->vs_simd8
= NO_KERNEL
;
279 pipeline
->vs_vec4
= offset
;
282 ralloc_free(mem_ctx
);
287 void brw_wm_populate_key(struct brw_context
*brw
,
288 struct brw_fragment_program
*fp
,
289 struct brw_wm_prog_key
*key
)
291 struct gl_context
*ctx
= &brw
->ctx
;
292 struct gl_program
*prog
= (struct gl_program
*) brw
->fragment_program
;
295 bool program_uses_dfdy
= fp
->program
.UsesDFdy
;
296 struct gl_framebuffer draw_buffer
;
297 bool multisample_fbo
;
299 memset(key
, 0, sizeof(*key
));
301 for (int i
= 0; i
< MAX_SAMPLERS
; i
++) {
302 /* Assume color sampler, no swizzling. */
303 key
->tex
.swizzles
[i
] = SWIZZLE_XYZW
;
306 /* A non-zero framebuffer name indicates that the framebuffer was created by
307 * the user rather than the window system. */
308 draw_buffer
.Name
= 1;
309 draw_buffer
.Visual
.samples
= 1;
310 draw_buffer
._NumColorDrawBuffers
= 1;
311 draw_buffer
._NumColorDrawBuffers
= 1;
312 draw_buffer
.Width
= 400;
313 draw_buffer
.Height
= 400;
314 ctx
->DrawBuffer
= &draw_buffer
;
316 multisample_fbo
= ctx
->DrawBuffer
->Visual
.samples
> 1;
318 /* Build the index for table lookup
322 if (fp
->program
.UsesKill
|| ctx
->Color
.AlphaEnabled
)
323 lookup
|= IZ_PS_KILL_ALPHATEST_BIT
;
325 if (fp
->program
.Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
326 lookup
|= IZ_PS_COMPUTES_DEPTH_BIT
;
330 lookup
|= IZ_DEPTH_TEST_ENABLE_BIT
;
332 if (ctx
->Depth
.Test
&& ctx
->Depth
.Mask
) /* ?? */
333 lookup
|= IZ_DEPTH_WRITE_ENABLE_BIT
;
335 /* _NEW_STENCIL | _NEW_BUFFERS */
336 if (ctx
->Stencil
._Enabled
) {
337 lookup
|= IZ_STENCIL_TEST_ENABLE_BIT
;
339 if (ctx
->Stencil
.WriteMask
[0] ||
340 ctx
->Stencil
.WriteMask
[ctx
->Stencil
._BackFace
])
341 lookup
|= IZ_STENCIL_WRITE_ENABLE_BIT
;
343 key
->iz_lookup
= lookup
;
348 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
349 if (ctx
->Line
.SmoothFlag
) {
350 if (brw
->reduced_primitive
== GL_LINES
) {
353 else if (brw
->reduced_primitive
== GL_TRIANGLES
) {
354 if (ctx
->Polygon
.FrontMode
== GL_LINE
) {
355 line_aa
= AA_SOMETIMES
;
357 if (ctx
->Polygon
.BackMode
== GL_LINE
||
358 (ctx
->Polygon
.CullFlag
&&
359 ctx
->Polygon
.CullFaceMode
== GL_BACK
))
362 else if (ctx
->Polygon
.BackMode
== GL_LINE
) {
363 line_aa
= AA_SOMETIMES
;
365 if ((ctx
->Polygon
.CullFlag
&&
366 ctx
->Polygon
.CullFaceMode
== GL_FRONT
))
372 key
->line_aa
= line_aa
;
375 key
->high_quality_derivatives
=
376 ctx
->Hint
.FragmentShaderDerivative
== GL_NICEST
;
379 key
->stats_wm
= brw
->stats_wm
;
382 key
->flat_shade
= (ctx
->Light
.ShadeModel
== GL_FLAT
);
384 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
385 key
->clamp_fragment_color
= ctx
->Color
._ClampFragmentColor
;
388 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->wm
.base
.sampler_count
,
393 * Include the draw buffer origin and height so that we can calculate
394 * fragment position values relative to the bottom left of the drawable,
395 * from the incoming screen origin relative position we get as part of our
398 * This is only needed for the WM_WPOSXY opcode when the fragment program
399 * uses the gl_FragCoord input.
401 * We could avoid recompiling by including this as a constant referenced by
402 * our program, but if we were to do that it would also be nice to handle
403 * getting that constant updated at batchbuffer submit time (when we
404 * hold the lock and know where the buffer really is) rather than at emit
405 * time when we don't hold the lock and are just guessing. We could also
406 * just avoid using this as key data if the program doesn't use
409 * For DRI2 the origin_x/y will always be (0,0) but we still need the
410 * drawable height in order to invert the Y axis.
412 if (fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) {
413 key
->drawable_height
= ctx
->DrawBuffer
->Height
;
416 if ((fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) || program_uses_dfdy
) {
417 key
->render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
421 key
->nr_color_regions
= ctx
->DrawBuffer
->_NumColorDrawBuffers
;
423 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
424 key
->replicate_alpha
= ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
425 (ctx
->Multisample
.SampleAlphaToCoverage
|| ctx
->Color
.AlphaEnabled
);
427 /* _NEW_BUFFERS _NEW_MULTISAMPLE */
428 /* Ignore sample qualifier while computing this flag. */
429 key
->persample_shading
=
430 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, true) > 1;
431 if (key
->persample_shading
)
432 key
->persample_2x
= ctx
->DrawBuffer
->Visual
.samples
== 2;
434 key
->compute_pos_offset
=
435 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, false) > 1 &&
436 fp
->program
.Base
.SystemValuesRead
& SYSTEM_BIT_SAMPLE_POS
;
438 key
->compute_sample_id
=
440 ctx
->Multisample
.Enabled
&&
441 (fp
->program
.Base
.SystemValuesRead
& SYSTEM_BIT_SAMPLE_ID
);
443 /* BRW_NEW_VUE_MAP_GEOM_OUT */
444 if (brw
->gen
< 6 || _mesa_bitcount_64(fp
->program
.Base
.InputsRead
&
445 BRW_FS_VARYING_INPUT_MASK
) > 16)
446 key
->input_slots_valid
= brw
->vue_map_geom_out
.slots_valid
;
449 /* _NEW_COLOR | _NEW_BUFFERS */
450 /* Pre-gen6, the hardware alpha test always used each render
451 * target's alpha to do alpha test, as opposed to render target 0's alpha
452 * like GL requires. Fix that by building the alpha test into the
453 * shader, and we'll skip enabling the fixed function alpha test.
455 if (brw
->gen
< 6 && ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 && ctx
->Color
.AlphaEnabled
) {
456 key
->alpha_test_func
= ctx
->Color
.AlphaFunc
;
457 key
->alpha_test_ref
= ctx
->Color
.AlphaRef
;
460 /* The unique fragment program ID */
461 key
->program_string_id
= fp
->id
;
463 ctx
->DrawBuffer
= NULL
;
467 computed_depth_mode(struct gl_fragment_program
*fp
)
469 if (fp
->Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
)) {
470 switch (fp
->FragDepthLayout
) {
471 case FRAG_DEPTH_LAYOUT_NONE
:
472 case FRAG_DEPTH_LAYOUT_ANY
:
473 return BRW_PSCDEPTH_ON
;
474 case FRAG_DEPTH_LAYOUT_GREATER
:
475 return BRW_PSCDEPTH_ON_GE
;
476 case FRAG_DEPTH_LAYOUT_LESS
:
477 return BRW_PSCDEPTH_ON_LE
;
478 case FRAG_DEPTH_LAYOUT_UNCHANGED
:
479 return BRW_PSCDEPTH_OFF
;
482 return BRW_PSCDEPTH_OFF
;
486 really_do_wm_prog(struct brw_context
*brw
,
487 struct gl_shader_program
*prog
,
488 struct brw_fragment_program
*fp
,
489 struct brw_wm_prog_key
*key
, struct anv_pipeline
*pipeline
)
491 void *mem_ctx
= ralloc_context(NULL
);
492 struct brw_wm_prog_data
*prog_data
= &pipeline
->wm_prog_data
;
493 struct gl_shader
*fs
= NULL
;
494 unsigned int program_size
;
495 const uint32_t *program
;
498 fs
= prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
];
500 memset(prog_data
, 0, sizeof(*prog_data
));
502 /* key->alpha_test_func means simulating alpha testing via discards,
503 * so the shader definitely kills pixels.
505 prog_data
->uses_kill
= fp
->program
.UsesKill
|| key
->alpha_test_func
;
507 prog_data
->computed_depth_mode
= computed_depth_mode(&fp
->program
);
509 create_params_array(pipeline
, fs
, &prog_data
->base
);
510 anv_nir_apply_dynamic_offsets(pipeline
, fs
->Program
->nir
, &prog_data
->base
);
512 prog_data
->barycentric_interp_modes
=
513 brw_compute_barycentric_interp_modes(brw
, key
->flat_shade
,
514 key
->persample_shading
,
517 set_binding_table_layout(&prog_data
->base
, pipeline
,
518 VK_SHADER_STAGE_FRAGMENT
);
519 /* This needs to come after shader time and pull constant entries, but we
520 * don't have those set up now, so just put it after the layout entries.
522 prog_data
->binding_table
.render_target_start
= 0;
524 program
= brw_wm_fs_emit(brw
, mem_ctx
, key
, prog_data
,
525 &fp
->program
, prog
, &program_size
);
526 if (program
== NULL
) {
527 ralloc_free(mem_ctx
);
531 uint32_t offset
= upload_kernel(pipeline
, program
, program_size
);
534 pipeline
->ps_simd8
= NO_KERNEL
;
536 pipeline
->ps_simd8
= offset
;
538 if (prog_data
->no_8
|| prog_data
->prog_offset_16
) {
539 pipeline
->ps_simd16
= offset
+ prog_data
->prog_offset_16
;
541 pipeline
->ps_simd16
= NO_KERNEL
;
544 ralloc_free(mem_ctx
);
550 brw_gs_populate_key(struct brw_context
*brw
,
551 struct anv_pipeline
*pipeline
,
552 struct brw_geometry_program
*gp
,
553 struct brw_gs_prog_key
*key
)
555 struct gl_context
*ctx
= &brw
->ctx
;
556 struct brw_stage_state
*stage_state
= &brw
->gs
.base
;
557 struct gl_program
*prog
= &gp
->program
.Base
;
559 memset(key
, 0, sizeof(*key
));
561 key
->base
.program_string_id
= gp
->id
;
562 brw_setup_vue_key_clip_info(brw
, &key
->base
,
563 gp
->program
.Base
.UsesClipDistanceOut
);
566 brw_populate_sampler_prog_key_data(ctx
, prog
, stage_state
->sampler_count
,
569 struct brw_vs_prog_data
*prog_data
= &pipeline
->vs_prog_data
;
571 /* BRW_NEW_VUE_MAP_VS */
572 key
->input_varyings
= prog_data
->base
.vue_map
.slots_valid
;
576 really_do_gs_prog(struct brw_context
*brw
,
577 struct gl_shader_program
*prog
,
578 struct brw_geometry_program
*gp
,
579 struct brw_gs_prog_key
*key
, struct anv_pipeline
*pipeline
)
581 struct brw_gs_compile_output output
;
583 /* FIXME: We pass the bind map to the compile in the output struct. Need
584 * something better. */
585 set_binding_table_layout(&output
.prog_data
.base
.base
,
586 pipeline
, VK_SHADER_STAGE_GEOMETRY
);
588 brw_compile_gs_prog(brw
, prog
, gp
, key
, &output
);
590 pipeline
->gs_vec4
= upload_kernel(pipeline
, output
.program
, output
.program_size
);
591 pipeline
->gs_vertex_count
= gp
->program
.VerticesIn
;
593 ralloc_free(output
.mem_ctx
);
599 brw_codegen_cs_prog(struct brw_context
*brw
,
600 struct gl_shader_program
*prog
,
601 struct brw_compute_program
*cp
,
602 struct brw_cs_prog_key
*key
, struct anv_pipeline
*pipeline
)
604 const GLuint
*program
;
605 void *mem_ctx
= ralloc_context(NULL
);
607 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
609 struct gl_shader
*cs
= prog
->_LinkedShaders
[MESA_SHADER_COMPUTE
];
612 memset(prog_data
, 0, sizeof(*prog_data
));
614 set_binding_table_layout(&prog_data
->base
, pipeline
, VK_SHADER_STAGE_COMPUTE
);
616 create_params_array(pipeline
, cs
, &prog_data
->base
);
617 anv_nir_apply_dynamic_offsets(pipeline
, cs
->Program
->nir
, &prog_data
->base
);
619 program
= brw_cs_emit(brw
, mem_ctx
, key
, prog_data
,
620 &cp
->program
, prog
, &program_size
);
621 if (program
== NULL
) {
622 ralloc_free(mem_ctx
);
626 if (unlikely(INTEL_DEBUG
& DEBUG_CS
))
627 fprintf(stderr
, "\n");
629 pipeline
->cs_simd
= upload_kernel(pipeline
, program
, program_size
);
631 ralloc_free(mem_ctx
);
637 brw_cs_populate_key(struct brw_context
*brw
,
638 struct brw_compute_program
*bcp
, struct brw_cs_prog_key
*key
)
640 memset(key
, 0, sizeof(*key
));
642 /* The unique compute program ID */
643 key
->program_string_id
= bcp
->id
;
646 struct anv_compiler
{
647 struct anv_device
*device
;
648 struct intel_screen
*screen
;
649 struct brw_context
*brw
;
650 struct gl_pipeline_object pipeline
;
655 struct anv_compiler
*
656 anv_compiler_create(struct anv_device
*device
)
658 const struct brw_device_info
*devinfo
= &device
->info
;
659 struct anv_compiler
*compiler
;
660 struct gl_context
*ctx
;
662 compiler
= rzalloc(NULL
, struct anv_compiler
);
663 if (compiler
== NULL
)
666 compiler
->screen
= rzalloc(compiler
, struct intel_screen
);
667 if (compiler
->screen
== NULL
)
670 compiler
->brw
= rzalloc(compiler
, struct brw_context
);
671 if (compiler
->brw
== NULL
)
674 compiler
->device
= device
;
676 compiler
->brw
->gen
= devinfo
->gen
;
677 compiler
->brw
->is_g4x
= devinfo
->is_g4x
;
678 compiler
->brw
->is_baytrail
= devinfo
->is_baytrail
;
679 compiler
->brw
->is_haswell
= devinfo
->is_haswell
;
680 compiler
->brw
->is_cherryview
= devinfo
->is_cherryview
;
682 /* We need this at least for CS, which will check brw->max_cs_threads
683 * against the work group size. */
684 compiler
->brw
->max_vs_threads
= devinfo
->max_vs_threads
;
685 compiler
->brw
->max_hs_threads
= devinfo
->max_hs_threads
;
686 compiler
->brw
->max_ds_threads
= devinfo
->max_ds_threads
;
687 compiler
->brw
->max_gs_threads
= devinfo
->max_gs_threads
;
688 compiler
->brw
->max_wm_threads
= devinfo
->max_wm_threads
;
689 compiler
->brw
->max_cs_threads
= devinfo
->max_cs_threads
;
690 compiler
->brw
->urb
.size
= devinfo
->urb
.size
;
691 compiler
->brw
->urb
.min_vs_entries
= devinfo
->urb
.min_vs_entries
;
692 compiler
->brw
->urb
.max_vs_entries
= devinfo
->urb
.max_vs_entries
;
693 compiler
->brw
->urb
.max_hs_entries
= devinfo
->urb
.max_hs_entries
;
694 compiler
->brw
->urb
.max_ds_entries
= devinfo
->urb
.max_ds_entries
;
695 compiler
->brw
->urb
.max_gs_entries
= devinfo
->urb
.max_gs_entries
;
697 compiler
->brw
->intelScreen
= compiler
->screen
;
698 compiler
->screen
->devinfo
= &device
->info
;
700 brw_process_intel_debug_variable(compiler
->screen
);
702 compiler
->screen
->compiler
= brw_compiler_create(compiler
, &device
->info
);
704 ctx
= &compiler
->brw
->ctx
;
705 _mesa_init_shader_object_functions(&ctx
->Driver
);
707 /* brw_select_clip_planes() needs this for bogus reasons. */
708 ctx
->_Shader
= &compiler
->pipeline
;
713 ralloc_free(compiler
);
718 anv_compiler_destroy(struct anv_compiler
*compiler
)
720 _mesa_free_errors_data(&compiler
->brw
->ctx
);
721 ralloc_free(compiler
);
724 /* From gen7_urb.c */
726 /* FIXME: Add to struct intel_device_info */
728 static const int gen8_push_size
= 32 * 1024;
731 gen7_compute_urb_partition(struct anv_pipeline
*pipeline
)
733 const struct brw_device_info
*devinfo
= &pipeline
->device
->info
;
734 bool vs_present
= pipeline
->vs_simd8
!= NO_KERNEL
;
735 unsigned vs_size
= vs_present
? pipeline
->vs_prog_data
.base
.urb_entry_size
: 1;
736 unsigned vs_entry_size_bytes
= vs_size
* 64;
737 bool gs_present
= pipeline
->gs_vec4
!= NO_KERNEL
;
738 unsigned gs_size
= gs_present
? pipeline
->gs_prog_data
.base
.urb_entry_size
: 1;
739 unsigned gs_entry_size_bytes
= gs_size
* 64;
741 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
743 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
744 * Allocation Size is less than 9 512-bit URB entries.
746 * Similar text exists for GS.
748 unsigned vs_granularity
= (vs_size
< 9) ? 8 : 1;
749 unsigned gs_granularity
= (gs_size
< 9) ? 8 : 1;
751 /* URB allocations must be done in 8k chunks. */
752 unsigned chunk_size_bytes
= 8192;
754 /* Determine the size of the URB in chunks. */
755 unsigned urb_chunks
= devinfo
->urb
.size
* 1024 / chunk_size_bytes
;
757 /* Reserve space for push constants */
758 unsigned push_constant_bytes
= gen8_push_size
;
759 unsigned push_constant_chunks
=
760 push_constant_bytes
/ chunk_size_bytes
;
762 /* Initially, assign each stage the minimum amount of URB space it needs,
763 * and make a note of how much additional space it "wants" (the amount of
764 * additional space it could actually make use of).
767 /* VS has a lower limit on the number of URB entries */
769 ALIGN(devinfo
->urb
.min_vs_entries
* vs_entry_size_bytes
,
770 chunk_size_bytes
) / chunk_size_bytes
;
772 ALIGN(devinfo
->urb
.max_vs_entries
* vs_entry_size_bytes
,
773 chunk_size_bytes
) / chunk_size_bytes
- vs_chunks
;
775 unsigned gs_chunks
= 0;
776 unsigned gs_wants
= 0;
778 /* There are two constraints on the minimum amount of URB space we can
781 * (1) We need room for at least 2 URB entries, since we always operate
782 * the GS in DUAL_OBJECT mode.
784 * (2) We can't allocate less than nr_gs_entries_granularity.
786 gs_chunks
= ALIGN(MAX2(gs_granularity
, 2) * gs_entry_size_bytes
,
787 chunk_size_bytes
) / chunk_size_bytes
;
789 ALIGN(devinfo
->urb
.max_gs_entries
* gs_entry_size_bytes
,
790 chunk_size_bytes
) / chunk_size_bytes
- gs_chunks
;
793 /* There should always be enough URB space to satisfy the minimum
794 * requirements of each stage.
796 unsigned total_needs
= push_constant_chunks
+ vs_chunks
+ gs_chunks
;
797 assert(total_needs
<= urb_chunks
);
799 /* Mete out remaining space (if any) in proportion to "wants". */
800 unsigned total_wants
= vs_wants
+ gs_wants
;
801 unsigned remaining_space
= urb_chunks
- total_needs
;
802 if (remaining_space
> total_wants
)
803 remaining_space
= total_wants
;
804 if (remaining_space
> 0) {
805 unsigned vs_additional
= (unsigned)
806 round(vs_wants
* (((double) remaining_space
) / total_wants
));
807 vs_chunks
+= vs_additional
;
808 remaining_space
-= vs_additional
;
809 gs_chunks
+= remaining_space
;
812 /* Sanity check that we haven't over-allocated. */
813 assert(push_constant_chunks
+ vs_chunks
+ gs_chunks
<= urb_chunks
);
815 /* Finally, compute the number of entries that can fit in the space
816 * allocated to each stage.
818 unsigned nr_vs_entries
= vs_chunks
* chunk_size_bytes
/ vs_entry_size_bytes
;
819 unsigned nr_gs_entries
= gs_chunks
* chunk_size_bytes
/ gs_entry_size_bytes
;
821 /* Since we rounded up when computing *_wants, this may be slightly more
822 * than the maximum allowed amount, so correct for that.
824 nr_vs_entries
= MIN2(nr_vs_entries
, devinfo
->urb
.max_vs_entries
);
825 nr_gs_entries
= MIN2(nr_gs_entries
, devinfo
->urb
.max_gs_entries
);
827 /* Ensure that we program a multiple of the granularity. */
828 nr_vs_entries
= ROUND_DOWN_TO(nr_vs_entries
, vs_granularity
);
829 nr_gs_entries
= ROUND_DOWN_TO(nr_gs_entries
, gs_granularity
);
831 /* Finally, sanity check to make sure we have at least the minimum number
832 * of entries needed for each stage.
834 assert(nr_vs_entries
>= devinfo
->urb
.min_vs_entries
);
836 assert(nr_gs_entries
>= 2);
838 /* Lay out the URB in the following order:
843 pipeline
->urb
.vs_start
= push_constant_chunks
;
844 pipeline
->urb
.vs_size
= vs_size
;
845 pipeline
->urb
.nr_vs_entries
= nr_vs_entries
;
847 pipeline
->urb
.gs_start
= push_constant_chunks
+ vs_chunks
;
848 pipeline
->urb
.gs_size
= gs_size
;
849 pipeline
->urb
.nr_gs_entries
= nr_gs_entries
;
852 static const struct {
854 gl_shader_stage stage
;
857 { GL_VERTEX_SHADER
, MESA_SHADER_VERTEX
, "vertex" },
858 { GL_TESS_CONTROL_SHADER
, (gl_shader_stage
)-1,"tess control" },
859 { GL_TESS_EVALUATION_SHADER
, (gl_shader_stage
)-1, "tess evaluation" },
860 { GL_GEOMETRY_SHADER
, MESA_SHADER_GEOMETRY
, "geometry" },
861 { GL_FRAGMENT_SHADER
, MESA_SHADER_FRAGMENT
, "fragment" },
862 { GL_COMPUTE_SHADER
, MESA_SHADER_COMPUTE
, "compute" },
872 setup_nir_io(struct gl_shader
*mesa_shader
,
875 struct gl_program
*prog
= mesa_shader
->Program
;
876 foreach_list_typed(nir_variable
, var
, node
, &shader
->inputs
) {
877 prog
->InputsRead
|= BITFIELD64_BIT(var
->data
.location
);
878 if (shader
->stage
== MESA_SHADER_FRAGMENT
) {
879 struct gl_fragment_program
*fprog
= (struct gl_fragment_program
*)prog
;
881 fprog
->InterpQualifier
[var
->data
.location
] =
882 (glsl_interp_qualifier
)var
->data
.interpolation
;
883 if (var
->data
.centroid
)
884 fprog
->IsCentroid
|= BITFIELD64_BIT(var
->data
.location
);
885 if (var
->data
.sample
)
886 fprog
->IsSample
|= BITFIELD64_BIT(var
->data
.location
);
890 foreach_list_typed(nir_variable
, var
, node
, &shader
->outputs
) {
891 prog
->OutputsWritten
|= BITFIELD64_BIT(var
->data
.location
);
894 mesa_shader
->num_uniform_components
= shader
->num_uniforms
;
898 anv_compile_shader_spirv(struct anv_compiler
*compiler
,
899 struct gl_shader_program
*program
,
900 struct anv_pipeline
*pipeline
, uint32_t stage
)
902 struct brw_context
*brw
= compiler
->brw
;
903 struct anv_shader
*shader
= pipeline
->shaders
[stage
];
904 struct gl_shader
*mesa_shader
;
908 mesa_shader
= brw_new_shader(&brw
->ctx
, name
, stage_info
[stage
].token
);
909 fail_if(mesa_shader
== NULL
,
910 "failed to create %s shader\n", stage_info
[stage
].name
);
912 #define CREATE_PROGRAM(stage) \
913 _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0)
916 struct gl_program
*prog
;
918 case VK_SHADER_STAGE_VERTEX
:
919 prog
= CREATE_PROGRAM(vertex
);
920 is_scalar
= compiler
->screen
->compiler
->scalar_vs
;
922 case VK_SHADER_STAGE_GEOMETRY
:
923 prog
= CREATE_PROGRAM(geometry
);
926 case VK_SHADER_STAGE_FRAGMENT
:
927 prog
= CREATE_PROGRAM(fragment
);
930 case VK_SHADER_STAGE_COMPUTE
:
931 prog
= CREATE_PROGRAM(compute
);
935 unreachable("Unsupported shader stage");
937 _mesa_reference_program(&brw
->ctx
, &mesa_shader
->Program
, prog
);
939 mesa_shader
->Program
->Parameters
=
940 rzalloc(mesa_shader
, struct gl_program_parameter_list
);
942 mesa_shader
->Type
= stage_info
[stage
].token
;
943 mesa_shader
->Stage
= stage_info
[stage
].stage
;
945 struct gl_shader_compiler_options
*glsl_options
=
946 &compiler
->screen
->compiler
->glsl_compiler_options
[stage_info
[stage
].stage
];
948 spirv
= (uint32_t *) shader
->module
->data
;
949 assert(spirv
[0] == SPIR_V_MAGIC_NUMBER
);
950 assert(shader
->module
->size
% 4 == 0);
952 mesa_shader
->Program
->nir
=
953 spirv_to_nir(spirv
, shader
->module
->size
/ 4,
954 stage_info
[stage
].stage
, glsl_options
->NirOptions
);
955 nir_validate_shader(mesa_shader
->Program
->nir
);
957 brw_process_nir(mesa_shader
->Program
->nir
,
958 compiler
->screen
->devinfo
,
959 NULL
, mesa_shader
->Stage
, is_scalar
);
961 setup_nir_io(mesa_shader
, mesa_shader
->Program
->nir
);
963 fail_if(mesa_shader
->Program
->nir
== NULL
,
964 "failed to translate SPIR-V to NIR\n");
966 _mesa_reference_shader(&brw
->ctx
, &program
->Shaders
[program
->NumShaders
],
968 program
->NumShaders
++;
972 add_compiled_stage(struct anv_pipeline
*pipeline
, uint32_t stage
,
973 struct brw_stage_prog_data
*prog_data
)
975 struct brw_device_info
*devinfo
= &pipeline
->device
->info
;
976 uint32_t max_threads
[] = {
977 [VK_SHADER_STAGE_VERTEX
] = devinfo
->max_vs_threads
,
978 [VK_SHADER_STAGE_TESS_CONTROL
] = 0,
979 [VK_SHADER_STAGE_TESS_EVALUATION
] = 0,
980 [VK_SHADER_STAGE_GEOMETRY
] = devinfo
->max_gs_threads
,
981 [VK_SHADER_STAGE_FRAGMENT
] = devinfo
->max_wm_threads
,
982 [VK_SHADER_STAGE_COMPUTE
] = devinfo
->max_cs_threads
,
985 pipeline
->prog_data
[stage
] = prog_data
;
986 pipeline
->active_stages
|= 1 << stage
;
987 pipeline
->scratch_start
[stage
] = pipeline
->total_scratch
;
988 pipeline
->total_scratch
=
989 align_u32(pipeline
->total_scratch
, 1024) +
990 prog_data
->total_scratch
* max_threads
[stage
];
994 anv_compiler_run(struct anv_compiler
*compiler
, struct anv_pipeline
*pipeline
)
996 struct gl_shader_program
*program
;
998 struct brw_context
*brw
= compiler
->brw
;
1000 pipeline
->writes_point_size
= false;
1002 /* When we free the pipeline, we detect stages based on the NULL status
1003 * of various prog_data pointers. Make them NULL by default.
1005 memset(pipeline
->prog_data
, 0, sizeof(pipeline
->prog_data
));
1006 memset(pipeline
->scratch_start
, 0, sizeof(pipeline
->scratch_start
));
1008 brw
->use_rep_send
= pipeline
->use_repclear
;
1009 brw
->no_simd8
= pipeline
->use_repclear
;
1011 program
= brw
->ctx
.Driver
.NewShaderProgram(name
);
1012 program
->Shaders
= (struct gl_shader
**)
1013 calloc(VK_SHADER_STAGE_NUM
, sizeof(struct gl_shader
*));
1014 fail_if(program
== NULL
|| program
->Shaders
== NULL
,
1015 "failed to create program\n");
1017 for (unsigned i
= 0; i
< VK_SHADER_STAGE_NUM
; i
++) {
1018 if (pipeline
->shaders
[i
])
1019 anv_compile_shader_spirv(compiler
, program
, pipeline
, i
);
1022 for (unsigned i
= 0; i
< program
->NumShaders
; i
++) {
1023 struct gl_shader
*shader
= program
->Shaders
[i
];
1024 program
->_LinkedShaders
[shader
->Stage
] = shader
;
1028 pipeline
->active_stages
= 0;
1029 pipeline
->total_scratch
= 0;
1031 if (pipeline
->shaders
[VK_SHADER_STAGE_VERTEX
]) {
1032 struct brw_vs_prog_key vs_key
;
1033 struct gl_vertex_program
*vp
= (struct gl_vertex_program
*)
1034 program
->_LinkedShaders
[MESA_SHADER_VERTEX
]->Program
;
1035 struct brw_vertex_program
*bvp
= brw_vertex_program(vp
);
1037 brw_vs_populate_key(brw
, bvp
, &vs_key
);
1039 success
= really_do_vs_prog(brw
, program
, bvp
, &vs_key
, pipeline
);
1040 fail_if(!success
, "do_wm_prog failed\n");
1041 add_compiled_stage(pipeline
, VK_SHADER_STAGE_VERTEX
,
1042 &pipeline
->vs_prog_data
.base
.base
);
1044 if (vp
->Base
.OutputsWritten
& VARYING_SLOT_PSIZ
)
1045 pipeline
->writes_point_size
= true;
1047 memset(&pipeline
->vs_prog_data
, 0, sizeof(pipeline
->vs_prog_data
));
1048 pipeline
->vs_simd8
= NO_KERNEL
;
1049 pipeline
->vs_vec4
= NO_KERNEL
;
1053 if (pipeline
->shaders
[VK_SHADER_STAGE_GEOMETRY
]) {
1054 struct brw_gs_prog_key gs_key
;
1055 struct gl_geometry_program
*gp
= (struct gl_geometry_program
*)
1056 program
->_LinkedShaders
[MESA_SHADER_GEOMETRY
]->Program
;
1057 struct brw_geometry_program
*bgp
= brw_geometry_program(gp
);
1059 brw_gs_populate_key(brw
, pipeline
, bgp
, &gs_key
);
1061 success
= really_do_gs_prog(brw
, program
, bgp
, &gs_key
, pipeline
);
1062 fail_if(!success
, "do_gs_prog failed\n");
1063 add_compiled_stage(pipeline
, VK_SHADER_STAGE_GEOMETRY
,
1064 &pipeline
->gs_prog_data
.base
.base
);
1066 if (gp
->Base
.OutputsWritten
& VARYING_SLOT_PSIZ
)
1067 pipeline
->writes_point_size
= true;
1069 pipeline
->gs_vec4
= NO_KERNEL
;
1072 if (pipeline
->shaders
[VK_SHADER_STAGE_FRAGMENT
]) {
1073 struct brw_wm_prog_key wm_key
;
1074 struct gl_fragment_program
*fp
= (struct gl_fragment_program
*)
1075 program
->_LinkedShaders
[MESA_SHADER_FRAGMENT
]->Program
;
1076 struct brw_fragment_program
*bfp
= brw_fragment_program(fp
);
1078 brw_wm_populate_key(brw
, bfp
, &wm_key
);
1080 success
= really_do_wm_prog(brw
, program
, bfp
, &wm_key
, pipeline
);
1081 fail_if(!success
, "do_wm_prog failed\n");
1082 add_compiled_stage(pipeline
, VK_SHADER_STAGE_FRAGMENT
,
1083 &pipeline
->wm_prog_data
.base
);
1086 if (pipeline
->shaders
[VK_SHADER_STAGE_COMPUTE
]) {
1087 struct brw_cs_prog_key cs_key
;
1088 struct gl_compute_program
*cp
= (struct gl_compute_program
*)
1089 program
->_LinkedShaders
[MESA_SHADER_COMPUTE
]->Program
;
1090 struct brw_compute_program
*bcp
= brw_compute_program(cp
);
1092 brw_cs_populate_key(brw
, bcp
, &cs_key
);
1094 success
= brw_codegen_cs_prog(brw
, program
, bcp
, &cs_key
, pipeline
);
1095 fail_if(!success
, "brw_codegen_cs_prog failed\n");
1096 add_compiled_stage(pipeline
, VK_SHADER_STAGE_COMPUTE
,
1097 &pipeline
->cs_prog_data
.base
);
1100 brw
->ctx
.Driver
.DeleteShaderProgram(&brw
->ctx
, program
);
1102 struct anv_device
*device
= compiler
->device
;
1103 while (device
->scratch_block_pool
.bo
.size
< pipeline
->total_scratch
)
1104 anv_block_pool_alloc(&device
->scratch_block_pool
);
1106 gen7_compute_urb_partition(pipeline
);
1111 /* This badly named function frees the struct anv_pipeline data that the compiler
1112 * allocates. Currently just the prog_data structs.
1115 anv_compiler_free(struct anv_pipeline
*pipeline
)
1117 for (uint32_t stage
= 0; stage
< VK_SHADER_STAGE_NUM
; stage
++) {
1118 if (pipeline
->prog_data
[stage
]) {
1119 free(pipeline
->prog_data
[stage
]->map_entries
);
1120 /* We only ever set up the params array because we don't do
1121 * non-UBO pull constants
1123 anv_device_free(pipeline
->device
, pipeline
->prog_data
[stage
]->param
);