2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
30 #include <brw_context.h>
31 #include <brw_wm.h> /* brw_new_shader_program is here */
36 #include <mesa/main/shaderobj.h>
37 #include <mesa/main/fbobject.h>
38 #include <mesa/program/program.h>
39 #include <glsl/program.h>
42 fail_if(int cond
, const char *format
, ...)
49 va_start(args
, format
);
50 vfprintf(stderr
, format
, args
);
57 set_binding_table_layout(struct brw_stage_prog_data
*prog_data
,
58 struct anv_pipeline
*pipeline
, uint32_t stage
)
60 uint32_t bias
, count
, k
, *map
;
61 struct anv_pipeline_layout
*layout
= pipeline
->layout
;
63 /* No layout is valid for shaders that don't bind any resources. */
64 if (pipeline
->layout
== NULL
)
67 if (stage
== VK_SHADER_STAGE_FRAGMENT
)
72 prog_data
->binding_table
.texture_start
= bias
;
74 count
= layout
->stage
[stage
].surface_count
;
75 prog_data
->map_entries
=
76 (uint32_t *) malloc(count
* sizeof(prog_data
->map_entries
[0]));
77 if (prog_data
->map_entries
== NULL
)
78 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
81 map
= prog_data
->map_entries
;
82 for (uint32_t i
= 0; i
< layout
->num_sets
; i
++) {
83 prog_data
->bind_map
[i
] = map
;
84 for (uint32_t j
= 0; j
< layout
->set
[i
].layout
->stage
[stage
].surface_count
; j
++)
92 brw_vs_populate_key(struct brw_context
*brw
,
93 struct brw_vertex_program
*vp
,
94 struct brw_vs_prog_key
*key
)
96 struct gl_context
*ctx
= &brw
->ctx
;
97 /* BRW_NEW_VERTEX_PROGRAM */
98 struct gl_program
*prog
= (struct gl_program
*) vp
;
100 memset(key
, 0, sizeof(*key
));
102 /* Just upload the program verbatim for now. Always send it all
103 * the inputs it asks for, whether they are varying or not.
105 key
->base
.program_string_id
= vp
->id
;
106 brw_setup_vue_key_clip_info(brw
, &key
->base
,
107 vp
->program
.Base
.UsesClipDistanceOut
);
111 key
->copy_edgeflag
= (ctx
->Polygon
.FrontMode
!= GL_FILL
||
112 ctx
->Polygon
.BackMode
!= GL_FILL
);
115 if (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
116 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
)) {
117 /* _NEW_LIGHT | _NEW_BUFFERS */
118 key
->clamp_vertex_color
= ctx
->Light
._ClampVertexColor
;
122 if (brw
->gen
< 6 && ctx
->Point
.PointSprite
) {
123 for (int i
= 0; i
< 8; i
++) {
124 if (ctx
->Point
.CoordReplace
[i
])
125 key
->point_coord_replace
|= (1 << i
);
130 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->vs
.base
.sampler_count
,
135 really_do_vs_prog(struct brw_context
*brw
,
136 struct gl_shader_program
*prog
,
137 struct brw_vertex_program
*vp
,
138 struct brw_vs_prog_key
*key
, struct anv_pipeline
*pipeline
)
141 const GLuint
*program
;
142 struct brw_vs_compile c
;
143 struct brw_vs_prog_data
*prog_data
= &pipeline
->vs_prog_data
;
144 struct brw_stage_prog_data
*stage_prog_data
= &prog_data
->base
.base
;
146 struct gl_shader
*vs
= NULL
;
149 vs
= prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
151 memset(&c
, 0, sizeof(c
));
152 memcpy(&c
.key
, key
, sizeof(*key
));
153 memset(prog_data
, 0, sizeof(*prog_data
));
155 mem_ctx
= ralloc_context(NULL
);
159 /* Allocate the references to the uniforms that will end up in the
160 * prog_data associated with the compiled program, and which will be freed
161 * by the state cache.
165 /* We add padding around uniform values below vec4 size, with the worst
166 * case being a float value that gets blown up to a vec4, so be
169 param_count
= vs
->num_uniform_components
* 4;
172 param_count
= vp
->program
.Base
.Parameters
->NumParameters
* 4;
174 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
175 * planes as uniforms.
177 param_count
+= c
.key
.base
.nr_userclip_plane_consts
* 4;
179 /* Setting nr_params here NOT to the size of the param and pull_param
180 * arrays, but to the number of uniform components vec4_visitor
181 * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
183 stage_prog_data
->nr_params
= ALIGN(param_count
, 4) / 4;
185 stage_prog_data
->nr_params
+= vs
->num_samplers
;
188 GLbitfield64 outputs_written
= vp
->program
.Base
.OutputsWritten
;
189 prog_data
->inputs_read
= vp
->program
.Base
.InputsRead
;
191 if (c
.key
.copy_edgeflag
) {
192 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_EDGE
);
193 prog_data
->inputs_read
|= VERT_BIT_EDGEFLAG
;
197 /* Put dummy slots into the VUE for the SF to put the replaced
198 * point sprite coords in. We shouldn't need these dummy slots,
199 * which take up precious URB space, but it would mean that the SF
200 * doesn't get nice aligned pairs of input coords into output
201 * coords, which would be a pain to handle.
203 for (int i
= 0; i
< 8; i
++) {
204 if (c
.key
.point_coord_replace
& (1 << i
))
205 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_TEX0
+ i
);
208 /* if back colors are written, allocate slots for front colors too */
209 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC0
))
210 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL0
);
211 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC1
))
212 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL1
);
215 /* In order for legacy clipping to work, we need to populate the clip
216 * distance varying slots whenever clipping is enabled, even if the vertex
217 * shader doesn't write to gl_ClipDistance.
219 if (c
.key
.base
.userclip_active
) {
220 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0
);
221 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1
);
224 brw_compute_vue_map(brw
->intelScreen
->devinfo
,
225 &prog_data
->base
.vue_map
, outputs_written
);
227 set_binding_table_layout(&prog_data
->base
.base
, pipeline
,
228 VK_SHADER_STAGE_VERTEX
);
232 program
= brw_vs_emit(brw
, prog
, &c
, prog_data
, mem_ctx
, &program_size
);
233 if (program
== NULL
) {
234 ralloc_free(mem_ctx
);
238 pipeline
->vs_simd8
= pipeline
->program_next
;
239 memcpy((char *) pipeline
->device
->instruction_block_pool
.map
+
240 pipeline
->vs_simd8
, program
, program_size
);
242 pipeline
->program_next
= align(pipeline
->program_next
+ program_size
, 64);
244 ralloc_free(mem_ctx
);
246 if (stage_prog_data
->total_scratch
> 0)
247 if (!anv_bo_init_new(&pipeline
->vs_scratch_bo
,
249 stage_prog_data
->total_scratch
))
256 void brw_wm_populate_key(struct brw_context
*brw
,
257 struct brw_fragment_program
*fp
,
258 struct brw_wm_prog_key
*key
)
260 struct gl_context
*ctx
= &brw
->ctx
;
261 struct gl_program
*prog
= (struct gl_program
*) brw
->fragment_program
;
264 bool program_uses_dfdy
= fp
->program
.UsesDFdy
;
265 struct gl_framebuffer draw_buffer
;
266 bool multisample_fbo
;
268 memset(key
, 0, sizeof(*key
));
270 for (int i
= 0; i
< MAX_SAMPLERS
; i
++) {
271 /* Assume color sampler, no swizzling. */
272 key
->tex
.swizzles
[i
] = SWIZZLE_XYZW
;
275 /* A non-zero framebuffer name indicates that the framebuffer was created by
276 * the user rather than the window system. */
277 draw_buffer
.Name
= 1;
278 draw_buffer
.Visual
.samples
= 1;
279 draw_buffer
._NumColorDrawBuffers
= 1;
280 draw_buffer
._NumColorDrawBuffers
= 1;
281 draw_buffer
.Width
= 400;
282 draw_buffer
.Height
= 400;
283 ctx
->DrawBuffer
= &draw_buffer
;
285 multisample_fbo
= ctx
->DrawBuffer
->Visual
.samples
> 1;
287 /* Build the index for table lookup
291 if (fp
->program
.UsesKill
|| ctx
->Color
.AlphaEnabled
)
292 lookup
|= IZ_PS_KILL_ALPHATEST_BIT
;
294 if (fp
->program
.Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
295 lookup
|= IZ_PS_COMPUTES_DEPTH_BIT
;
299 lookup
|= IZ_DEPTH_TEST_ENABLE_BIT
;
301 if (ctx
->Depth
.Test
&& ctx
->Depth
.Mask
) /* ?? */
302 lookup
|= IZ_DEPTH_WRITE_ENABLE_BIT
;
304 /* _NEW_STENCIL | _NEW_BUFFERS */
305 if (ctx
->Stencil
._Enabled
) {
306 lookup
|= IZ_STENCIL_TEST_ENABLE_BIT
;
308 if (ctx
->Stencil
.WriteMask
[0] ||
309 ctx
->Stencil
.WriteMask
[ctx
->Stencil
._BackFace
])
310 lookup
|= IZ_STENCIL_WRITE_ENABLE_BIT
;
312 key
->iz_lookup
= lookup
;
317 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
318 if (ctx
->Line
.SmoothFlag
) {
319 if (brw
->reduced_primitive
== GL_LINES
) {
322 else if (brw
->reduced_primitive
== GL_TRIANGLES
) {
323 if (ctx
->Polygon
.FrontMode
== GL_LINE
) {
324 line_aa
= AA_SOMETIMES
;
326 if (ctx
->Polygon
.BackMode
== GL_LINE
||
327 (ctx
->Polygon
.CullFlag
&&
328 ctx
->Polygon
.CullFaceMode
== GL_BACK
))
331 else if (ctx
->Polygon
.BackMode
== GL_LINE
) {
332 line_aa
= AA_SOMETIMES
;
334 if ((ctx
->Polygon
.CullFlag
&&
335 ctx
->Polygon
.CullFaceMode
== GL_FRONT
))
341 key
->line_aa
= line_aa
;
344 key
->high_quality_derivatives
=
345 ctx
->Hint
.FragmentShaderDerivative
== GL_NICEST
;
348 key
->stats_wm
= brw
->stats_wm
;
351 key
->flat_shade
= (ctx
->Light
.ShadeModel
== GL_FLAT
);
353 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
354 key
->clamp_fragment_color
= ctx
->Color
._ClampFragmentColor
;
357 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->wm
.base
.sampler_count
,
362 * Include the draw buffer origin and height so that we can calculate
363 * fragment position values relative to the bottom left of the drawable,
364 * from the incoming screen origin relative position we get as part of our
367 * This is only needed for the WM_WPOSXY opcode when the fragment program
368 * uses the gl_FragCoord input.
370 * We could avoid recompiling by including this as a constant referenced by
371 * our program, but if we were to do that it would also be nice to handle
372 * getting that constant updated at batchbuffer submit time (when we
373 * hold the lock and know where the buffer really is) rather than at emit
374 * time when we don't hold the lock and are just guessing. We could also
375 * just avoid using this as key data if the program doesn't use
378 * For DRI2 the origin_x/y will always be (0,0) but we still need the
379 * drawable height in order to invert the Y axis.
381 if (fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) {
382 key
->drawable_height
= ctx
->DrawBuffer
->Height
;
385 if ((fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) || program_uses_dfdy
) {
386 key
->render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
390 key
->nr_color_regions
= ctx
->DrawBuffer
->_NumColorDrawBuffers
;
392 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
393 key
->replicate_alpha
= ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
394 (ctx
->Multisample
.SampleAlphaToCoverage
|| ctx
->Color
.AlphaEnabled
);
396 /* _NEW_BUFFERS _NEW_MULTISAMPLE */
397 /* Ignore sample qualifier while computing this flag. */
398 key
->persample_shading
=
399 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, true) > 1;
400 if (key
->persample_shading
)
401 key
->persample_2x
= ctx
->DrawBuffer
->Visual
.samples
== 2;
403 key
->compute_pos_offset
=
404 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, false) > 1 &&
405 fp
->program
.Base
.SystemValuesRead
& SYSTEM_BIT_SAMPLE_POS
;
407 key
->compute_sample_id
=
409 ctx
->Multisample
.Enabled
&&
410 (fp
->program
.Base
.SystemValuesRead
& SYSTEM_BIT_SAMPLE_ID
);
412 /* BRW_NEW_VUE_MAP_GEOM_OUT */
413 if (brw
->gen
< 6 || _mesa_bitcount_64(fp
->program
.Base
.InputsRead
&
414 BRW_FS_VARYING_INPUT_MASK
) > 16)
415 key
->input_slots_valid
= brw
->vue_map_geom_out
.slots_valid
;
418 /* _NEW_COLOR | _NEW_BUFFERS */
419 /* Pre-gen6, the hardware alpha test always used each render
420 * target's alpha to do alpha test, as opposed to render target 0's alpha
421 * like GL requires. Fix that by building the alpha test into the
422 * shader, and we'll skip enabling the fixed function alpha test.
424 if (brw
->gen
< 6 && ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 && ctx
->Color
.AlphaEnabled
) {
425 key
->alpha_test_func
= ctx
->Color
.AlphaFunc
;
426 key
->alpha_test_ref
= ctx
->Color
.AlphaRef
;
429 /* The unique fragment program ID */
430 key
->program_string_id
= fp
->id
;
432 ctx
->DrawBuffer
= NULL
;
436 computed_depth_mode(struct gl_fragment_program
*fp
)
438 if (fp
->Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
)) {
439 switch (fp
->FragDepthLayout
) {
440 case FRAG_DEPTH_LAYOUT_NONE
:
441 case FRAG_DEPTH_LAYOUT_ANY
:
442 return BRW_PSCDEPTH_ON
;
443 case FRAG_DEPTH_LAYOUT_GREATER
:
444 return BRW_PSCDEPTH_ON_GE
;
445 case FRAG_DEPTH_LAYOUT_LESS
:
446 return BRW_PSCDEPTH_ON_LE
;
447 case FRAG_DEPTH_LAYOUT_UNCHANGED
:
448 return BRW_PSCDEPTH_OFF
;
451 return BRW_PSCDEPTH_OFF
;
455 really_do_wm_prog(struct brw_context
*brw
,
456 struct gl_shader_program
*prog
,
457 struct brw_fragment_program
*fp
,
458 struct brw_wm_prog_key
*key
, struct anv_pipeline
*pipeline
)
460 struct gl_context
*ctx
= &brw
->ctx
;
461 void *mem_ctx
= ralloc_context(NULL
);
462 struct brw_wm_prog_data
*prog_data
= &pipeline
->wm_prog_data
;
463 struct gl_shader
*fs
= NULL
;
464 unsigned int program_size
;
465 const uint32_t *program
;
469 fs
= prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
];
471 memset(prog_data
, 0, sizeof(*prog_data
));
473 /* key->alpha_test_func means simulating alpha testing via discards,
474 * so the shader definitely kills pixels.
476 prog_data
->uses_kill
= fp
->program
.UsesKill
|| key
->alpha_test_func
;
478 prog_data
->computed_depth_mode
= computed_depth_mode(&fp
->program
);
480 /* Allocate the references to the uniforms that will end up in the
481 * prog_data associated with the compiled program, and which will be freed
482 * by the state cache.
486 param_count
= fs
->num_uniform_components
;
488 param_count
= fp
->program
.Base
.Parameters
->NumParameters
* 4;
490 /* The backend also sometimes adds params for texture size. */
491 param_count
+= 2 * ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxTextureImageUnits
;
492 prog_data
->base
.param
=
493 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
494 prog_data
->base
.pull_param
=
495 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
496 prog_data
->base
.nr_params
= param_count
;
498 prog_data
->barycentric_interp_modes
=
499 brw_compute_barycentric_interp_modes(brw
, key
->flat_shade
,
500 key
->persample_shading
,
503 set_binding_table_layout(&prog_data
->base
, pipeline
,
504 VK_SHADER_STAGE_FRAGMENT
);
505 /* This needs to come after shader time and pull constant entries, but we
506 * don't have those set up now, so just put it after the layout entries.
508 prog_data
->binding_table
.render_target_start
= 0;
510 program
= brw_wm_fs_emit(brw
, mem_ctx
, key
, prog_data
,
511 &fp
->program
, prog
, &program_size
);
512 if (program
== NULL
) {
513 ralloc_free(mem_ctx
);
517 offset
= pipeline
->program_next
;
518 pipeline
->program_next
= align(pipeline
->program_next
+ program_size
, 64);
521 pipeline
->ps_simd8
= NO_KERNEL
;
523 pipeline
->ps_simd8
= offset
;
525 if (prog_data
->no_8
|| prog_data
->prog_offset_16
)
526 pipeline
->ps_simd16
= offset
+ prog_data
->prog_offset_16
;
528 pipeline
->ps_simd16
= NO_KERNEL
;
530 memcpy((char *) pipeline
->device
->instruction_block_pool
.map
+
531 offset
, program
, program_size
);
533 ralloc_free(mem_ctx
);
535 if (prog_data
->base
.total_scratch
> 0)
536 if (!anv_bo_init_new(&pipeline
->ps_scratch_bo
,
538 prog_data
->base
.total_scratch
))
545 brw_gs_populate_key(struct brw_context
*brw
,
546 struct anv_pipeline
*pipeline
,
547 struct brw_geometry_program
*gp
,
548 struct brw_gs_prog_key
*key
)
550 struct gl_context
*ctx
= &brw
->ctx
;
551 struct brw_stage_state
*stage_state
= &brw
->gs
.base
;
552 struct gl_program
*prog
= &gp
->program
.Base
;
554 memset(key
, 0, sizeof(*key
));
556 key
->base
.program_string_id
= gp
->id
;
557 brw_setup_vue_key_clip_info(brw
, &key
->base
,
558 gp
->program
.Base
.UsesClipDistanceOut
);
561 brw_populate_sampler_prog_key_data(ctx
, prog
, stage_state
->sampler_count
,
564 struct brw_vs_prog_data
*prog_data
= &pipeline
->vs_prog_data
;
566 /* BRW_NEW_VUE_MAP_VS */
567 key
->input_varyings
= prog_data
->base
.vue_map
.slots_valid
;
571 really_do_gs_prog(struct brw_context
*brw
,
572 struct gl_shader_program
*prog
,
573 struct brw_geometry_program
*gp
,
574 struct brw_gs_prog_key
*key
, struct anv_pipeline
*pipeline
)
576 struct brw_gs_compile_output output
;
579 /* FIXME: We pass the bind map to the compile in the output struct. Need
580 * something better. */
581 set_binding_table_layout(&output
.prog_data
.base
.base
,
582 pipeline
, VK_SHADER_STAGE_GEOMETRY
);
584 brw_compile_gs_prog(brw
, prog
, gp
, key
, &output
);
586 offset
= pipeline
->program_next
;
587 pipeline
->program_next
= align(pipeline
->program_next
+ output
.program_size
, 64);
589 pipeline
->gs_vec4
= offset
;
590 pipeline
->gs_vertex_count
= gp
->program
.VerticesIn
;
592 memcpy((char *) pipeline
->device
->instruction_block_pool
.map
+
593 offset
, output
.program
, output
.program_size
);
595 ralloc_free(output
.mem_ctx
);
597 if (output
.prog_data
.base
.base
.total_scratch
) {
598 if (!anv_bo_init_new(&pipeline
->gs_scratch_bo
,
600 output
.prog_data
.base
.base
.total_scratch
))
604 memcpy(&pipeline
->gs_prog_data
, &output
.prog_data
, sizeof pipeline
->gs_prog_data
);
610 fail_on_compile_error(int status
, const char *msg
)
612 int source
, line
, column
;
618 if (sscanf(msg
, "%d:%d(%d): error: %255[^\n]", &source
, &line
, &column
, error
) == 4)
619 fail_if(!status
, "%d:%s\n", line
, error
);
621 fail_if(!status
, "%s\n", msg
);
624 struct anv_compiler
{
625 struct intel_screen
*screen
;
626 struct brw_context
*brw
;
632 struct anv_compiler
*
633 anv_compiler_create(int fd
)
635 struct anv_compiler
*compiler
;
637 compiler
= (struct anv_compiler
*) malloc(sizeof *compiler
);
638 if (compiler
== NULL
)
641 compiler
->screen
= intel_screen_create(fd
);
642 if (compiler
->screen
== NULL
) {
647 compiler
->brw
= intel_context_create(compiler
->screen
);
648 if (compiler
->brw
== NULL
) {
653 compiler
->brw
->precompile
= false;
659 anv_compiler_destroy(struct anv_compiler
*compiler
)
661 intel_context_destroy(compiler
->brw
);
662 intel_screen_destroy(compiler
->screen
);
666 /* From gen7_urb.c */
668 /* FIXME: Add to struct intel_device_info */
670 static const int gen8_push_size
= 32 * 1024;
673 gen7_compute_urb_partition(struct anv_pipeline
*pipeline
)
675 const struct brw_device_info
*devinfo
= &pipeline
->device
->info
;
676 bool vs_present
= pipeline
->vs_simd8
!= NO_KERNEL
;
677 unsigned vs_size
= vs_present
? pipeline
->vs_prog_data
.base
.urb_entry_size
: 1;
678 unsigned vs_entry_size_bytes
= vs_size
* 64;
679 bool gs_present
= pipeline
->gs_vec4
!= NO_KERNEL
;
680 unsigned gs_size
= gs_present
? pipeline
->gs_prog_data
.base
.urb_entry_size
: 1;
681 unsigned gs_entry_size_bytes
= gs_size
* 64;
683 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
685 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
686 * Allocation Size is less than 9 512-bit URB entries.
688 * Similar text exists for GS.
690 unsigned vs_granularity
= (vs_size
< 9) ? 8 : 1;
691 unsigned gs_granularity
= (gs_size
< 9) ? 8 : 1;
693 /* URB allocations must be done in 8k chunks. */
694 unsigned chunk_size_bytes
= 8192;
696 /* Determine the size of the URB in chunks. */
697 unsigned urb_chunks
= devinfo
->urb
.size
* 1024 / chunk_size_bytes
;
699 /* Reserve space for push constants */
700 unsigned push_constant_bytes
= gen8_push_size
;
701 unsigned push_constant_chunks
=
702 push_constant_bytes
/ chunk_size_bytes
;
704 /* Initially, assign each stage the minimum amount of URB space it needs,
705 * and make a note of how much additional space it "wants" (the amount of
706 * additional space it could actually make use of).
709 /* VS has a lower limit on the number of URB entries */
711 ALIGN(devinfo
->urb
.min_vs_entries
* vs_entry_size_bytes
,
712 chunk_size_bytes
) / chunk_size_bytes
;
714 ALIGN(devinfo
->urb
.max_vs_entries
* vs_entry_size_bytes
,
715 chunk_size_bytes
) / chunk_size_bytes
- vs_chunks
;
717 unsigned gs_chunks
= 0;
718 unsigned gs_wants
= 0;
720 /* There are two constraints on the minimum amount of URB space we can
723 * (1) We need room for at least 2 URB entries, since we always operate
724 * the GS in DUAL_OBJECT mode.
726 * (2) We can't allocate less than nr_gs_entries_granularity.
728 gs_chunks
= ALIGN(MAX2(gs_granularity
, 2) * gs_entry_size_bytes
,
729 chunk_size_bytes
) / chunk_size_bytes
;
731 ALIGN(devinfo
->urb
.max_gs_entries
* gs_entry_size_bytes
,
732 chunk_size_bytes
) / chunk_size_bytes
- gs_chunks
;
735 /* There should always be enough URB space to satisfy the minimum
736 * requirements of each stage.
738 unsigned total_needs
= push_constant_chunks
+ vs_chunks
+ gs_chunks
;
739 assert(total_needs
<= urb_chunks
);
741 /* Mete out remaining space (if any) in proportion to "wants". */
742 unsigned total_wants
= vs_wants
+ gs_wants
;
743 unsigned remaining_space
= urb_chunks
- total_needs
;
744 if (remaining_space
> total_wants
)
745 remaining_space
= total_wants
;
746 if (remaining_space
> 0) {
747 unsigned vs_additional
= (unsigned)
748 round(vs_wants
* (((double) remaining_space
) / total_wants
));
749 vs_chunks
+= vs_additional
;
750 remaining_space
-= vs_additional
;
751 gs_chunks
+= remaining_space
;
754 /* Sanity check that we haven't over-allocated. */
755 assert(push_constant_chunks
+ vs_chunks
+ gs_chunks
<= urb_chunks
);
757 /* Finally, compute the number of entries that can fit in the space
758 * allocated to each stage.
760 unsigned nr_vs_entries
= vs_chunks
* chunk_size_bytes
/ vs_entry_size_bytes
;
761 unsigned nr_gs_entries
= gs_chunks
* chunk_size_bytes
/ gs_entry_size_bytes
;
763 /* Since we rounded up when computing *_wants, this may be slightly more
764 * than the maximum allowed amount, so correct for that.
766 nr_vs_entries
= MIN2(nr_vs_entries
, devinfo
->urb
.max_vs_entries
);
767 nr_gs_entries
= MIN2(nr_gs_entries
, devinfo
->urb
.max_gs_entries
);
769 /* Ensure that we program a multiple of the granularity. */
770 nr_vs_entries
= ROUND_DOWN_TO(nr_vs_entries
, vs_granularity
);
771 nr_gs_entries
= ROUND_DOWN_TO(nr_gs_entries
, gs_granularity
);
773 /* Finally, sanity check to make sure we have at least the minimum number
774 * of entries needed for each stage.
776 assert(nr_vs_entries
>= devinfo
->urb
.min_vs_entries
);
778 assert(nr_gs_entries
>= 2);
780 /* Lay out the URB in the following order:
785 pipeline
->urb
.vs_start
= push_constant_chunks
;
786 pipeline
->urb
.vs_size
= vs_size
;
787 pipeline
->urb
.nr_vs_entries
= nr_vs_entries
;
789 pipeline
->urb
.gs_start
= push_constant_chunks
+ vs_chunks
;
790 pipeline
->urb
.gs_size
= gs_size
;
791 pipeline
->urb
.nr_gs_entries
= nr_gs_entries
;
794 static const struct {
798 { GL_VERTEX_SHADER
, "vertex" },
799 { GL_TESS_CONTROL_SHADER
, "tess control" },
800 { GL_TESS_EVALUATION_SHADER
, "tess evaluation" },
801 { GL_GEOMETRY_SHADER
, "geometry" },
802 { GL_FRAGMENT_SHADER
, "fragment" },
803 { GL_COMPUTE_SHADER
, "compute" },
807 anv_compile_shader(struct anv_compiler
*compiler
,
808 struct gl_shader_program
*program
,
809 struct anv_pipeline
*pipeline
, uint32_t stage
)
811 struct brw_context
*brw
= compiler
->brw
;
812 struct gl_shader
*shader
;
815 shader
= brw_new_shader(&brw
->ctx
, name
, stage_info
[stage
].token
);
816 fail_if(shader
== NULL
, "failed to create %s shader\n", stage_info
[stage
].name
);
817 shader
->Source
= strdup(pipeline
->shaders
[stage
]->data
);
818 _mesa_glsl_compile_shader(&brw
->ctx
, shader
, false, false);
819 fail_on_compile_error(shader
->CompileStatus
, shader
->InfoLog
);
821 program
->Shaders
[program
->NumShaders
] = shader
;
822 program
->NumShaders
++;
826 anv_compiler_run(struct anv_compiler
*compiler
, struct anv_pipeline
*pipeline
)
828 struct gl_shader_program
*program
;
830 struct brw_context
*brw
= compiler
->brw
;
831 struct anv_device
*device
= pipeline
->device
;
833 /* When we free the pipeline, we detect stages based on the NULL status
834 * of various prog_data pointers. Make them NULL by default.
836 memset(pipeline
->prog_data
, 0, sizeof(pipeline
->prog_data
));
838 brw
->use_rep_send
= pipeline
->use_repclear
;
839 brw
->no_simd8
= pipeline
->use_repclear
;
841 program
= brw
->ctx
.Driver
.NewShaderProgram(name
);
842 program
->Shaders
= (struct gl_shader
**)
843 calloc(VK_NUM_SHADER_STAGE
, sizeof(struct gl_shader
*));
844 fail_if(program
== NULL
|| program
->Shaders
== NULL
,
845 "failed to create program\n");
847 if (pipeline
->shaders
[VK_SHADER_STAGE_VERTEX
])
848 anv_compile_shader(compiler
, program
, pipeline
, VK_SHADER_STAGE_VERTEX
);
849 anv_compile_shader(compiler
, program
, pipeline
, VK_SHADER_STAGE_FRAGMENT
);
850 if (pipeline
->shaders
[VK_SHADER_STAGE_GEOMETRY
])
851 anv_compile_shader(compiler
, program
, pipeline
, VK_SHADER_STAGE_GEOMETRY
);
853 _mesa_glsl_link_shader(&brw
->ctx
, program
);
854 fail_on_compile_error(program
->LinkStatus
,
857 pipeline
->program_block
=
858 anv_block_pool_alloc(&device
->instruction_block_pool
);
859 pipeline
->program_next
= pipeline
->program_block
;
863 struct brw_wm_prog_key wm_key
;
864 struct gl_fragment_program
*fp
= (struct gl_fragment_program
*)
865 program
->_LinkedShaders
[MESA_SHADER_FRAGMENT
]->Program
;
866 struct brw_fragment_program
*bfp
= brw_fragment_program(fp
);
868 brw_wm_populate_key(brw
, bfp
, &wm_key
);
870 success
= really_do_wm_prog(brw
, program
, bfp
, &wm_key
, pipeline
);
871 fail_if(!success
, "do_wm_prog failed\n");
872 pipeline
->prog_data
[VK_SHADER_STAGE_FRAGMENT
] = &pipeline
->wm_prog_data
.base
;
873 pipeline
->active_stages
= VK_SHADER_STAGE_FRAGMENT_BIT
;
876 if (pipeline
->shaders
[VK_SHADER_STAGE_VERTEX
]) {
877 struct brw_vs_prog_key vs_key
;
878 struct gl_vertex_program
*vp
= (struct gl_vertex_program
*)
879 program
->_LinkedShaders
[MESA_SHADER_VERTEX
]->Program
;
880 struct brw_vertex_program
*bvp
= brw_vertex_program(vp
);
882 brw_vs_populate_key(brw
, bvp
, &vs_key
);
884 success
= really_do_vs_prog(brw
, program
, bvp
, &vs_key
, pipeline
);
885 fail_if(!success
, "do_wm_prog failed\n");
886 pipeline
->prog_data
[VK_SHADER_STAGE_VERTEX
] = &pipeline
->vs_prog_data
.base
.base
;
887 pipeline
->active_stages
|= VK_SHADER_STAGE_VERTEX_BIT
;;
889 pipeline
->vs_simd8
= NO_KERNEL
;
893 if (pipeline
->shaders
[VK_SHADER_STAGE_GEOMETRY
]) {
894 struct brw_gs_prog_key gs_key
;
895 struct gl_geometry_program
*gp
= (struct gl_geometry_program
*)
896 program
->_LinkedShaders
[MESA_SHADER_GEOMETRY
]->Program
;
897 struct brw_geometry_program
*bgp
= brw_geometry_program(gp
);
899 brw_gs_populate_key(brw
, pipeline
, bgp
, &gs_key
);
901 success
= really_do_gs_prog(brw
, program
, bgp
, &gs_key
, pipeline
);
902 fail_if(!success
, "do_gs_prog failed\n");
903 pipeline
->active_stages
|= VK_SHADER_STAGE_GEOMETRY_BIT
;
904 pipeline
->prog_data
[VK_SHADER_STAGE_GEOMETRY
] = &pipeline
->gs_prog_data
.base
.base
;
906 pipeline
->gs_vec4
= NO_KERNEL
;
910 /* FIXME: Allocate more blocks if we fill up this one and worst case,
911 * allocate multiple continuous blocks from end of pool to hold really big
913 assert(pipeline
->program_next
- pipeline
->program_block
< 8192);
915 brw
->ctx
.Driver
.DeleteShaderProgram(&brw
->ctx
, program
);
917 gen7_compute_urb_partition(pipeline
);
922 /* This badly named function frees the struct anv_pipeline data that the compiler
923 * allocates. Currently just the prog_data structs.
926 anv_compiler_free(struct anv_pipeline
*pipeline
)
928 struct anv_device
*device
= pipeline
->device
;
930 for (uint32_t stage
= 0; stage
< VK_NUM_SHADER_STAGE
; stage
++)
931 if (pipeline
->prog_data
[stage
])
932 free(pipeline
->prog_data
[stage
]->map_entries
);
934 anv_block_pool_free(&device
->instruction_block_pool
,
935 pipeline
->program_block
);