i965/urb: fixes division by zero
[mesa.git] / src / mesa / drivers / dri / i965 / brw_compiler.h
1 /*
2 * Copyright © 2010 - 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #pragma once
25
26 #include <stdio.h>
27 #include "brw_device_info.h"
28 #include "main/mtypes.h"
29
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33
34 struct ra_regs;
35 struct nir_shader;
36 struct brw_geometry_program;
37 union gl_constant_value;
38
39 struct brw_compiler {
40 const struct brw_device_info *devinfo;
41
42 struct {
43 struct ra_regs *regs;
44
45 /**
46 * Array of the ra classes for the unaligned contiguous register
47 * block sizes used.
48 */
49 int *classes;
50
51 /**
52 * Mapping for register-allocated objects in *regs to the first
53 * GRF for that object.
54 */
55 uint8_t *ra_reg_to_grf;
56 } vec4_reg_set;
57
58 struct {
59 struct ra_regs *regs;
60
61 /**
62 * Array of the ra classes for the unaligned contiguous register
63 * block sizes used, indexed by register size.
64 */
65 int classes[16];
66
67 /**
68 * Mapping from classes to ra_reg ranges. Each of the per-size
69 * classes corresponds to a range of ra_reg nodes. This array stores
70 * those ranges in the form of first ra_reg in each class and the
71 * total number of ra_reg elements in the last array element. This
72 * way the range of the i'th class is given by:
73 * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
74 */
75 int class_to_ra_reg_range[17];
76
77 /**
78 * Mapping for register-allocated objects in *regs to the first
79 * GRF for that object.
80 */
81 uint8_t *ra_reg_to_grf;
82
83 /**
84 * ra class for the aligned pairs we use for PLN, which doesn't
85 * appear in *classes.
86 */
87 int aligned_pairs_class;
88 } fs_reg_sets[2];
89
90 void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
91 void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
92
93 bool scalar_stage[MESA_SHADER_STAGES];
94 struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
95
96 /**
97 * Apply workarounds for SIN and COS output range problems.
98 * This can negatively impact performance.
99 */
100 bool precise_trig;
101 };
102
103
104 /**
105 * Program key structures.
106 *
107 * When drawing, we look for the currently bound shaders in the program
108 * cache. This is essentially a hash table lookup, and these are the keys.
109 *
110 * Sometimes OpenGL features specified as state need to be simulated via
111 * shader code, due to a mismatch between the API and the hardware. This
112 * is often referred to as "non-orthagonal state" or "NOS". We store NOS
113 * in the program key so it's considered when searching for a program. If
114 * we haven't seen a particular combination before, we have to recompile a
115 * new specialized version.
116 *
117 * Shader compilation should not look up state in gl_context directly, but
118 * instead use the copy in the program key. This guarantees recompiles will
119 * happen correctly.
120 *
121 * @{
122 */
123
124 enum PACKED gen6_gather_sampler_wa {
125 WA_SIGN = 1, /* whether we need to sign extend */
126 WA_8BIT = 2, /* if we have an 8bit format needing wa */
127 WA_16BIT = 4, /* if we have a 16bit format needing wa */
128 };
129
130 /**
131 * Sampler information needed by VS, WM, and GS program cache keys.
132 */
133 struct brw_sampler_prog_key_data {
134 /**
135 * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
136 */
137 uint16_t swizzles[MAX_SAMPLERS];
138
139 uint32_t gl_clamp_mask[3];
140
141 /**
142 * For RG32F, gather4's channel select is broken.
143 */
144 uint32_t gather_channel_quirk_mask;
145
146 /**
147 * Whether this sampler uses the compressed multisample surface layout.
148 */
149 uint32_t compressed_multisample_layout_mask;
150
151 /**
152 * Whether this sampler is using 16x multisampling. If so fetching from
153 * this sampler will be handled with a different instruction, ld2dms_w
154 * instead of ld2dms.
155 */
156 uint32_t msaa_16;
157
158 /**
159 * For Sandybridge, which shader w/a we need for gather quirks.
160 */
161 enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
162 };
163
164
165 /** The program key for Vertex Shaders. */
166 struct brw_vs_prog_key {
167 unsigned program_string_id;
168
169 /*
170 * Per-attribute workaround flags
171 */
172 uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
173
174 bool copy_edgeflag:1;
175
176 bool clamp_vertex_color:1;
177
178 /**
179 * How many user clipping planes are being uploaded to the vertex shader as
180 * push constants.
181 *
182 * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
183 * clip distances.
184 */
185 unsigned nr_userclip_plane_consts:4;
186
187 /**
188 * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
189 * are going to be replaced with point coordinates (as a consequence of a
190 * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
191 * our SF thread requires exact matching between VS outputs and FS inputs,
192 * these texture coordinates will need to be unconditionally included in
193 * the VUE, even if they aren't written by the vertex shader.
194 */
195 uint8_t point_coord_replace;
196
197 struct brw_sampler_prog_key_data tex;
198 };
199
200 /** The program key for Tessellation Control Shaders. */
201 struct brw_tcs_prog_key
202 {
203 unsigned program_string_id;
204
205 GLenum tes_primitive_mode;
206
207 unsigned input_vertices;
208
209 /** A bitfield of per-patch outputs written. */
210 uint32_t patch_outputs_written;
211
212 /** A bitfield of per-vertex outputs written. */
213 uint64_t outputs_written;
214
215 struct brw_sampler_prog_key_data tex;
216 };
217
218 /** The program key for Tessellation Evaluation Shaders. */
219 struct brw_tes_prog_key
220 {
221 unsigned program_string_id;
222
223 /** A bitfield of per-patch inputs read. */
224 uint32_t patch_inputs_read;
225
226 /** A bitfield of per-vertex inputs read. */
227 uint64_t inputs_read;
228
229 struct brw_sampler_prog_key_data tex;
230 };
231
232 /** The program key for Geometry Shaders. */
233 struct brw_gs_prog_key
234 {
235 unsigned program_string_id;
236
237 struct brw_sampler_prog_key_data tex;
238 };
239
240 /** The program key for Fragment/Pixel Shaders. */
241 struct brw_wm_prog_key {
242 uint8_t iz_lookup;
243 bool stats_wm:1;
244 bool flat_shade:1;
245 unsigned nr_color_regions:5;
246 bool replicate_alpha:1;
247 bool render_to_fbo:1;
248 bool clamp_fragment_color:1;
249 bool persample_interp:1;
250 bool multisample_fbo:1;
251 unsigned line_aa:2;
252 bool high_quality_derivatives:1;
253 bool force_dual_color_blend:1;
254
255 uint16_t drawable_height;
256 uint64_t input_slots_valid;
257 unsigned program_string_id;
258 GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
259 float alpha_test_ref;
260
261 struct brw_sampler_prog_key_data tex;
262 };
263
264 struct brw_cs_prog_key {
265 uint32_t program_string_id;
266 struct brw_sampler_prog_key_data tex;
267 };
268
269 /*
270 * Image metadata structure as laid out in the shader parameter
271 * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
272 * able to use them. That's okay because the padding and any unused
273 * entries [most of them except when we're doing untyped surface
274 * access] will be removed by the uniform packing pass.
275 */
276 #define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
277 #define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
278 #define BRW_IMAGE_PARAM_SIZE_OFFSET 8
279 #define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
280 #define BRW_IMAGE_PARAM_TILING_OFFSET 16
281 #define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
282 #define BRW_IMAGE_PARAM_SIZE 24
283
284 struct brw_image_param {
285 /** Surface binding table index. */
286 uint32_t surface_idx;
287
288 /** Offset applied to the X and Y surface coordinates. */
289 uint32_t offset[2];
290
291 /** Surface X, Y and Z dimensions. */
292 uint32_t size[3];
293
294 /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
295 * pixels, vertical slice stride in pixels.
296 */
297 uint32_t stride[4];
298
299 /** Log2 of the tiling modulus in the X, Y and Z dimension. */
300 uint32_t tiling[3];
301
302 /**
303 * Right shift to apply for bit 6 address swizzling. Two different
304 * swizzles can be specified and will be applied one after the other. The
305 * resulting address will be:
306 *
307 * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
308 * (addr >> swizzling[1])))
309 *
310 * Use \c 0xff if any of the swizzles is not required.
311 */
312 uint32_t swizzling[2];
313 };
314
315 struct brw_stage_prog_data {
316 struct {
317 /** size of our binding table. */
318 uint32_t size_bytes;
319
320 /** @{
321 * surface indices for the various groups of surfaces
322 */
323 uint32_t pull_constants_start;
324 uint32_t texture_start;
325 uint32_t gather_texture_start;
326 uint32_t ubo_start;
327 uint32_t ssbo_start;
328 uint32_t abo_start;
329 uint32_t image_start;
330 uint32_t shader_time_start;
331 /** @} */
332 } binding_table;
333
334 GLuint nr_params; /**< number of float params/constants */
335 GLuint nr_pull_params;
336 unsigned nr_image_params;
337
338 unsigned curb_read_length;
339 unsigned total_scratch;
340 unsigned total_shared;
341
342 /**
343 * Register where the thread expects to find input data from the URB
344 * (typically uniforms, followed by vertex or fragment attributes).
345 */
346 unsigned dispatch_grf_start_reg;
347
348 bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
349
350 /* Pointers to tracked values (only valid once
351 * _mesa_load_state_parameters has been called at runtime).
352 */
353 const union gl_constant_value **param;
354 const union gl_constant_value **pull_param;
355
356 /** Image metadata passed to the shader as uniforms. */
357 struct brw_image_param *image_param;
358 };
359
360 /* Data about a particular attempt to compile a program. Note that
361 * there can be many of these, each in a different GL state
362 * corresponding to a different brw_wm_prog_key struct, with different
363 * compiled programs.
364 */
365 struct brw_wm_prog_data {
366 struct brw_stage_prog_data base;
367
368 GLuint num_varying_inputs;
369
370 uint8_t reg_blocks_0;
371 uint8_t reg_blocks_2;
372
373 uint8_t dispatch_grf_start_reg_2;
374 uint32_t prog_offset_2;
375
376 struct {
377 /** @{
378 * surface indices the WM-specific surfaces
379 */
380 uint32_t render_target_start;
381 /** @} */
382 } binding_table;
383
384 uint8_t computed_depth_mode;
385 bool computed_stencil;
386
387 bool early_fragment_tests;
388 bool dispatch_8;
389 bool dispatch_16;
390 bool dual_src_blend;
391 bool persample_dispatch;
392 bool uses_pos_offset;
393 bool uses_omask;
394 bool uses_kill;
395 bool uses_src_depth;
396 bool uses_src_w;
397 bool uses_sample_mask;
398 bool pulls_bary;
399
400 /**
401 * Mask of which interpolation modes are required by the fragment shader.
402 * Used in hardware setup on gen6+.
403 */
404 uint32_t barycentric_interp_modes;
405
406 /**
407 * Mask of which FS inputs are marked flat by the shader source. This is
408 * needed for setting up 3DSTATE_SF/SBE.
409 */
410 uint32_t flat_inputs;
411
412 /**
413 * Map from gl_varying_slot to the position within the FS setup data
414 * payload where the varying's attribute vertex deltas should be delivered.
415 * For varying slots that are not used by the FS, the value is -1.
416 */
417 int urb_setup[VARYING_SLOT_MAX];
418 };
419
420 struct brw_cs_prog_data {
421 struct brw_stage_prog_data base;
422
423 GLuint dispatch_grf_start_reg_16;
424 unsigned local_size[3];
425 unsigned simd_size;
426 bool uses_barrier;
427 bool uses_num_work_groups;
428 unsigned local_invocation_id_regs;
429
430 struct {
431 /** @{
432 * surface indices the CS-specific surfaces
433 */
434 uint32_t work_groups_start;
435 /** @} */
436 } binding_table;
437 };
438
439 /**
440 * Enum representing the i965-specific vertex results that don't correspond
441 * exactly to any element of gl_varying_slot. The values of this enum are
442 * assigned such that they don't conflict with gl_varying_slot.
443 */
444 typedef enum
445 {
446 BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
447 BRW_VARYING_SLOT_PAD,
448 /**
449 * Technically this is not a varying but just a placeholder that
450 * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
451 * builtin variable to be compiled correctly. see compile_sf_prog() for
452 * more info.
453 */
454 BRW_VARYING_SLOT_PNTC,
455 BRW_VARYING_SLOT_COUNT
456 } brw_varying_slot;
457
458 /**
459 * Data structure recording the relationship between the gl_varying_slot enum
460 * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
461 * single octaword within the VUE (128 bits).
462 *
463 * Note that each BRW register contains 256 bits (2 octawords), so when
464 * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
465 * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
466 * in a vertex shader), each register corresponds to a single VUE slot, since
467 * it contains data for two separate vertices.
468 */
469 struct brw_vue_map {
470 /**
471 * Bitfield representing all varying slots that are (a) stored in this VUE
472 * map, and (b) actually written by the shader. Does not include any of
473 * the additional varying slots defined in brw_varying_slot.
474 */
475 GLbitfield64 slots_valid;
476
477 /**
478 * Is this VUE map for a separate shader pipeline?
479 *
480 * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
481 * without the linker having a chance to dead code eliminate unused varyings.
482 *
483 * This means that we have to use a fixed slot layout, based on the output's
484 * location field, rather than assigning slots in a compact contiguous block.
485 */
486 bool separate;
487
488 /**
489 * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
490 * not stored in a slot (because they are not written, or because
491 * additional processing is applied before storing them in the VUE), the
492 * value is -1.
493 */
494 signed char varying_to_slot[VARYING_SLOT_TESS_MAX];
495
496 /**
497 * Map from VUE slot to gl_varying_slot value. For slots that do not
498 * directly correspond to a gl_varying_slot, the value comes from
499 * brw_varying_slot.
500 *
501 * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
502 */
503 signed char slot_to_varying[VARYING_SLOT_TESS_MAX];
504
505 /**
506 * Total number of VUE slots in use
507 */
508 int num_slots;
509
510 /**
511 * Number of per-patch VUE slots. Only valid for tessellation control
512 * shader outputs and tessellation evaluation shader inputs.
513 */
514 int num_per_patch_slots;
515
516 /**
517 * Number of per-vertex VUE slots. Only valid for tessellation control
518 * shader outputs and tessellation evaluation shader inputs.
519 */
520 int num_per_vertex_slots;
521 };
522
523 void brw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map);
524
525 /**
526 * Convert a VUE slot number into a byte offset within the VUE.
527 */
528 static inline GLuint brw_vue_slot_to_offset(GLuint slot)
529 {
530 return 16*slot;
531 }
532
533 /**
534 * Convert a vertex output (brw_varying_slot) into a byte offset within the
535 * VUE.
536 */
537 static inline
538 GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
539 {
540 return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
541 }
542
543 void brw_compute_vue_map(const struct brw_device_info *devinfo,
544 struct brw_vue_map *vue_map,
545 GLbitfield64 slots_valid,
546 bool separate_shader);
547
548 void brw_compute_tess_vue_map(struct brw_vue_map *const vue_map,
549 const GLbitfield64 slots_valid,
550 const GLbitfield is_patch);
551
552 enum shader_dispatch_mode {
553 DISPATCH_MODE_4X1_SINGLE = 0,
554 DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
555 DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
556 DISPATCH_MODE_SIMD8 = 3,
557 };
558
559 /**
560 * @defgroup Tessellator parameter enumerations.
561 *
562 * These correspond to the hardware values in 3DSTATE_TE, and are provided
563 * as part of the tessellation evaluation shader.
564 *
565 * @{
566 */
567 enum brw_tess_partitioning {
568 BRW_TESS_PARTITIONING_INTEGER = 0,
569 BRW_TESS_PARTITIONING_ODD_FRACTIONAL = 1,
570 BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
571 };
572
573 enum brw_tess_output_topology {
574 BRW_TESS_OUTPUT_TOPOLOGY_POINT = 0,
575 BRW_TESS_OUTPUT_TOPOLOGY_LINE = 1,
576 BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2,
577 BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
578 };
579
580 enum brw_tess_domain {
581 BRW_TESS_DOMAIN_QUAD = 0,
582 BRW_TESS_DOMAIN_TRI = 1,
583 BRW_TESS_DOMAIN_ISOLINE = 2,
584 };
585 /** @} */
586
587 struct brw_vue_prog_data {
588 struct brw_stage_prog_data base;
589 struct brw_vue_map vue_map;
590
591 /** Should the hardware deliver input VUE handles for URB pull loads? */
592 bool include_vue_handles;
593
594 GLuint urb_read_length;
595 GLuint total_grf;
596
597 uint32_t cull_distance_mask;
598
599 /* Used for calculating urb partitions. In the VS, this is the size of the
600 * URB entry used for both input and output to the thread. In the GS, this
601 * is the size of the URB entry used for output.
602 */
603 GLuint urb_entry_size;
604
605 enum shader_dispatch_mode dispatch_mode;
606 };
607
608 struct brw_vs_prog_data {
609 struct brw_vue_prog_data base;
610
611 GLbitfield64 inputs_read;
612
613 unsigned nr_attributes;
614 unsigned nr_attribute_slots;
615
616 bool uses_vertexid;
617 bool uses_instanceid;
618 bool uses_basevertex;
619 bool uses_baseinstance;
620 bool uses_drawid;
621 };
622
623 struct brw_tcs_prog_data
624 {
625 struct brw_vue_prog_data base;
626
627 /** Number vertices in output patch */
628 int instances;
629 };
630
631
632 struct brw_tes_prog_data
633 {
634 struct brw_vue_prog_data base;
635
636 enum brw_tess_partitioning partitioning;
637 enum brw_tess_output_topology output_topology;
638 enum brw_tess_domain domain;
639 };
640
641 struct brw_gs_prog_data
642 {
643 struct brw_vue_prog_data base;
644
645 unsigned vertices_in;
646
647 /**
648 * Size of an output vertex, measured in HWORDS (32 bytes).
649 */
650 unsigned output_vertex_size_hwords;
651
652 unsigned output_topology;
653
654 /**
655 * Size of the control data (cut bits or StreamID bits), in hwords (32
656 * bytes). 0 if there is no control data.
657 */
658 unsigned control_data_header_size_hwords;
659
660 /**
661 * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
662 * if the control data is StreamID bits, or
663 * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
664 * Ignored if control_data_header_size is 0.
665 */
666 unsigned control_data_format;
667
668 bool include_primitive_id;
669
670 /**
671 * The number of vertices emitted, if constant - otherwise -1.
672 */
673 int static_vertex_count;
674
675 int invocations;
676
677 /**
678 * Gen6 transform feedback enabled flag.
679 */
680 bool gen6_xfb_enabled;
681
682 /**
683 * Gen6: Provoking vertex convention for odd-numbered triangles
684 * in tristrips.
685 */
686 GLuint pv_first:1;
687
688 /**
689 * Gen6: Number of varyings that are output to transform feedback.
690 */
691 GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
692
693 /**
694 * Gen6: Map from the index of a transform feedback binding table entry to the
695 * gl_varying_slot that should be streamed out through that binding table
696 * entry.
697 */
698 unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */];
699
700 /**
701 * Gen6: Map from the index of a transform feedback binding table entry to the
702 * swizzles that should be used when streaming out data through that
703 * binding table entry.
704 */
705 unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */];
706 };
707
708
709 /** @} */
710
711 struct brw_compiler *
712 brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
713
714 /**
715 * Compile a vertex shader.
716 *
717 * Returns the final assembly and the program's size.
718 */
719 const unsigned *
720 brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
721 void *mem_ctx,
722 const struct brw_vs_prog_key *key,
723 struct brw_vs_prog_data *prog_data,
724 const struct nir_shader *shader,
725 gl_clip_plane *clip_planes,
726 bool use_legacy_snorm_formula,
727 int shader_time_index,
728 unsigned *final_assembly_size,
729 char **error_str);
730
731 /**
732 * Compile a tessellation control shader.
733 *
734 * Returns the final assembly and the program's size.
735 */
736 const unsigned *
737 brw_compile_tcs(const struct brw_compiler *compiler,
738 void *log_data,
739 void *mem_ctx,
740 const struct brw_tcs_prog_key *key,
741 struct brw_tcs_prog_data *prog_data,
742 const struct nir_shader *nir,
743 int shader_time_index,
744 unsigned *final_assembly_size,
745 char **error_str);
746
747 /**
748 * Compile a tessellation evaluation shader.
749 *
750 * Returns the final assembly and the program's size.
751 */
752 const unsigned *
753 brw_compile_tes(const struct brw_compiler *compiler, void *log_data,
754 void *mem_ctx,
755 const struct brw_tes_prog_key *key,
756 struct brw_tes_prog_data *prog_data,
757 const struct nir_shader *shader,
758 struct gl_shader_program *shader_prog,
759 int shader_time_index,
760 unsigned *final_assembly_size,
761 char **error_str);
762
763 /**
764 * Compile a vertex shader.
765 *
766 * Returns the final assembly and the program's size.
767 */
768 const unsigned *
769 brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
770 void *mem_ctx,
771 const struct brw_gs_prog_key *key,
772 struct brw_gs_prog_data *prog_data,
773 const struct nir_shader *shader,
774 struct gl_shader_program *shader_prog,
775 int shader_time_index,
776 unsigned *final_assembly_size,
777 char **error_str);
778
779 /**
780 * Compile a fragment shader.
781 *
782 * Returns the final assembly and the program's size.
783 */
784 const unsigned *
785 brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
786 void *mem_ctx,
787 const struct brw_wm_prog_key *key,
788 struct brw_wm_prog_data *prog_data,
789 const struct nir_shader *shader,
790 struct gl_program *prog,
791 int shader_time_index8,
792 int shader_time_index16,
793 bool allow_spilling,
794 bool use_rep_send,
795 unsigned *final_assembly_size,
796 char **error_str);
797
798 /**
799 * Compile a compute shader.
800 *
801 * Returns the final assembly and the program's size.
802 */
803 const unsigned *
804 brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
805 void *mem_ctx,
806 const struct brw_cs_prog_key *key,
807 struct brw_cs_prog_data *prog_data,
808 const struct nir_shader *shader,
809 int shader_time_index,
810 unsigned *final_assembly_size,
811 char **error_str);
812
813 /**
814 * Fill out local id payload for compute shader according to cs_prog_data.
815 */
816 void
817 brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
818 void *buffer, uint32_t threads, uint32_t stride);
819
820 #ifdef __cplusplus
821 } /* extern "C" */
822 #endif