i965: Add an INTEL_PRECISE_TRIG=1 option to fix SIN/COS output range.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_compiler.h
1 /*
2 * Copyright © 2010 - 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #pragma once
25
26 #include <stdio.h>
27 #include "brw_device_info.h"
28 #include "main/mtypes.h"
29
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33
34 struct ra_regs;
35 struct nir_shader;
36 struct brw_geometry_program;
37 union gl_constant_value;
38
39 struct brw_compiler {
40 const struct brw_device_info *devinfo;
41
42 struct {
43 struct ra_regs *regs;
44
45 /**
46 * Array of the ra classes for the unaligned contiguous register
47 * block sizes used.
48 */
49 int *classes;
50
51 /**
52 * Mapping for register-allocated objects in *regs to the first
53 * GRF for that object.
54 */
55 uint8_t *ra_reg_to_grf;
56 } vec4_reg_set;
57
58 struct {
59 struct ra_regs *regs;
60
61 /**
62 * Array of the ra classes for the unaligned contiguous register
63 * block sizes used, indexed by register size.
64 */
65 int classes[16];
66
67 /**
68 * Mapping from classes to ra_reg ranges. Each of the per-size
69 * classes corresponds to a range of ra_reg nodes. This array stores
70 * those ranges in the form of first ra_reg in each class and the
71 * total number of ra_reg elements in the last array element. This
72 * way the range of the i'th class is given by:
73 * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
74 */
75 int class_to_ra_reg_range[17];
76
77 /**
78 * Mapping for register-allocated objects in *regs to the first
79 * GRF for that object.
80 */
81 uint8_t *ra_reg_to_grf;
82
83 /**
84 * ra class for the aligned pairs we use for PLN, which doesn't
85 * appear in *classes.
86 */
87 int aligned_pairs_class;
88 } fs_reg_sets[2];
89
90 void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
91 void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
92
93 bool scalar_stage[MESA_SHADER_STAGES];
94 struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
95
96 /**
97 * Apply workarounds for SIN and COS output range problems.
98 * This can negatively impact performance.
99 */
100 bool precise_trig;
101 };
102
103
104 /**
105 * Program key structures.
106 *
107 * When drawing, we look for the currently bound shaders in the program
108 * cache. This is essentially a hash table lookup, and these are the keys.
109 *
110 * Sometimes OpenGL features specified as state need to be simulated via
111 * shader code, due to a mismatch between the API and the hardware. This
112 * is often referred to as "non-orthagonal state" or "NOS". We store NOS
113 * in the program key so it's considered when searching for a program. If
114 * we haven't seen a particular combination before, we have to recompile a
115 * new specialized version.
116 *
117 * Shader compilation should not look up state in gl_context directly, but
118 * instead use the copy in the program key. This guarantees recompiles will
119 * happen correctly.
120 *
121 * @{
122 */
123
124 enum PACKED gen6_gather_sampler_wa {
125 WA_SIGN = 1, /* whether we need to sign extend */
126 WA_8BIT = 2, /* if we have an 8bit format needing wa */
127 WA_16BIT = 4, /* if we have a 16bit format needing wa */
128 };
129
130 /**
131 * Sampler information needed by VS, WM, and GS program cache keys.
132 */
133 struct brw_sampler_prog_key_data {
134 /**
135 * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
136 */
137 uint16_t swizzles[MAX_SAMPLERS];
138
139 uint32_t gl_clamp_mask[3];
140
141 /**
142 * For RG32F, gather4's channel select is broken.
143 */
144 uint32_t gather_channel_quirk_mask;
145
146 /**
147 * Whether this sampler uses the compressed multisample surface layout.
148 */
149 uint32_t compressed_multisample_layout_mask;
150
151 /**
152 * Whether this sampler is using 16x multisampling. If so fetching from
153 * this sampler will be handled with a different instruction, ld2dms_w
154 * instead of ld2dms.
155 */
156 uint32_t msaa_16;
157
158 /**
159 * For Sandybridge, which shader w/a we need for gather quirks.
160 */
161 enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
162 };
163
164
165 /** The program key for Vertex Shaders. */
166 struct brw_vs_prog_key {
167 unsigned program_string_id;
168
169 /*
170 * Per-attribute workaround flags
171 */
172 uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
173
174 bool copy_edgeflag:1;
175
176 bool clamp_vertex_color:1;
177
178 /**
179 * How many user clipping planes are being uploaded to the vertex shader as
180 * push constants.
181 *
182 * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
183 * clip distances.
184 */
185 unsigned nr_userclip_plane_consts:4;
186
187 /**
188 * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
189 * are going to be replaced with point coordinates (as a consequence of a
190 * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
191 * our SF thread requires exact matching between VS outputs and FS inputs,
192 * these texture coordinates will need to be unconditionally included in
193 * the VUE, even if they aren't written by the vertex shader.
194 */
195 uint8_t point_coord_replace;
196
197 struct brw_sampler_prog_key_data tex;
198 };
199
200 /** The program key for Tessellation Control Shaders. */
201 struct brw_tcs_prog_key
202 {
203 unsigned program_string_id;
204
205 GLenum tes_primitive_mode;
206
207 unsigned input_vertices;
208
209 /** A bitfield of per-patch outputs written. */
210 uint32_t patch_outputs_written;
211
212 /** A bitfield of per-vertex outputs written. */
213 uint64_t outputs_written;
214
215 struct brw_sampler_prog_key_data tex;
216 };
217
218 /** The program key for Tessellation Evaluation Shaders. */
219 struct brw_tes_prog_key
220 {
221 unsigned program_string_id;
222
223 /** A bitfield of per-patch inputs read. */
224 uint32_t patch_inputs_read;
225
226 /** A bitfield of per-vertex inputs read. */
227 uint64_t inputs_read;
228
229 struct brw_sampler_prog_key_data tex;
230 };
231
232 /** The program key for Geometry Shaders. */
233 struct brw_gs_prog_key
234 {
235 unsigned program_string_id;
236
237 struct brw_sampler_prog_key_data tex;
238 };
239
240 /** The program key for Fragment/Pixel Shaders. */
241 struct brw_wm_prog_key {
242 uint8_t iz_lookup;
243 bool stats_wm:1;
244 bool flat_shade:1;
245 bool persample_shading:1;
246 bool persample_2x:1;
247 unsigned nr_color_regions:5;
248 bool replicate_alpha:1;
249 bool render_to_fbo:1;
250 bool clamp_fragment_color:1;
251 bool compute_pos_offset:1;
252 bool compute_sample_id:1;
253 unsigned line_aa:2;
254 bool high_quality_derivatives:1;
255 bool force_dual_color_blend:1;
256
257 uint16_t drawable_height;
258 uint64_t input_slots_valid;
259 unsigned program_string_id;
260 GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
261 float alpha_test_ref;
262
263 struct brw_sampler_prog_key_data tex;
264 };
265
266 struct brw_cs_prog_key {
267 uint32_t program_string_id;
268 struct brw_sampler_prog_key_data tex;
269 };
270
271 /*
272 * Image metadata structure as laid out in the shader parameter
273 * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
274 * able to use them. That's okay because the padding and any unused
275 * entries [most of them except when we're doing untyped surface
276 * access] will be removed by the uniform packing pass.
277 */
278 #define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
279 #define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
280 #define BRW_IMAGE_PARAM_SIZE_OFFSET 8
281 #define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
282 #define BRW_IMAGE_PARAM_TILING_OFFSET 16
283 #define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
284 #define BRW_IMAGE_PARAM_SIZE 24
285
286 struct brw_image_param {
287 /** Surface binding table index. */
288 uint32_t surface_idx;
289
290 /** Offset applied to the X and Y surface coordinates. */
291 uint32_t offset[2];
292
293 /** Surface X, Y and Z dimensions. */
294 uint32_t size[3];
295
296 /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
297 * pixels, vertical slice stride in pixels.
298 */
299 uint32_t stride[4];
300
301 /** Log2 of the tiling modulus in the X, Y and Z dimension. */
302 uint32_t tiling[3];
303
304 /**
305 * Right shift to apply for bit 6 address swizzling. Two different
306 * swizzles can be specified and will be applied one after the other. The
307 * resulting address will be:
308 *
309 * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
310 * (addr >> swizzling[1])))
311 *
312 * Use \c 0xff if any of the swizzles is not required.
313 */
314 uint32_t swizzling[2];
315 };
316
317 struct brw_stage_prog_data {
318 struct {
319 /** size of our binding table. */
320 uint32_t size_bytes;
321
322 /** @{
323 * surface indices for the various groups of surfaces
324 */
325 uint32_t pull_constants_start;
326 uint32_t texture_start;
327 uint32_t gather_texture_start;
328 uint32_t ubo_start;
329 uint32_t ssbo_start;
330 uint32_t abo_start;
331 uint32_t image_start;
332 uint32_t shader_time_start;
333 /** @} */
334 } binding_table;
335
336 GLuint nr_params; /**< number of float params/constants */
337 GLuint nr_pull_params;
338 unsigned nr_image_params;
339
340 unsigned curb_read_length;
341 unsigned total_scratch;
342 unsigned total_shared;
343
344 /**
345 * Register where the thread expects to find input data from the URB
346 * (typically uniforms, followed by vertex or fragment attributes).
347 */
348 unsigned dispatch_grf_start_reg;
349
350 bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
351
352 /* Pointers to tracked values (only valid once
353 * _mesa_load_state_parameters has been called at runtime).
354 */
355 const union gl_constant_value **param;
356 const union gl_constant_value **pull_param;
357
358 /** Image metadata passed to the shader as uniforms. */
359 struct brw_image_param *image_param;
360 };
361
362 /* Data about a particular attempt to compile a program. Note that
363 * there can be many of these, each in a different GL state
364 * corresponding to a different brw_wm_prog_key struct, with different
365 * compiled programs.
366 */
367 struct brw_wm_prog_data {
368 struct brw_stage_prog_data base;
369
370 GLuint num_varying_inputs;
371
372 GLuint dispatch_grf_start_reg_16;
373 GLuint reg_blocks;
374 GLuint reg_blocks_16;
375
376 struct {
377 /** @{
378 * surface indices the WM-specific surfaces
379 */
380 uint32_t render_target_start;
381 /** @} */
382 } binding_table;
383
384 uint8_t computed_depth_mode;
385 bool computed_stencil;
386
387 bool early_fragment_tests;
388 bool no_8;
389 bool dual_src_blend;
390 bool uses_pos_offset;
391 bool uses_omask;
392 bool uses_kill;
393 bool uses_src_depth;
394 bool uses_src_w;
395 bool uses_sample_mask;
396 bool pulls_bary;
397 uint32_t prog_offset_16;
398
399 /**
400 * Mask of which interpolation modes are required by the fragment shader.
401 * Used in hardware setup on gen6+.
402 */
403 uint32_t barycentric_interp_modes;
404
405 /**
406 * Map from gl_varying_slot to the position within the FS setup data
407 * payload where the varying's attribute vertex deltas should be delivered.
408 * For varying slots that are not used by the FS, the value is -1.
409 */
410 int urb_setup[VARYING_SLOT_MAX];
411 };
412
413 struct brw_cs_prog_data {
414 struct brw_stage_prog_data base;
415
416 GLuint dispatch_grf_start_reg_16;
417 unsigned local_size[3];
418 unsigned simd_size;
419 bool uses_barrier;
420 bool uses_num_work_groups;
421 unsigned local_invocation_id_regs;
422
423 struct {
424 /** @{
425 * surface indices the CS-specific surfaces
426 */
427 uint32_t work_groups_start;
428 /** @} */
429 } binding_table;
430 };
431
432 /**
433 * Enum representing the i965-specific vertex results that don't correspond
434 * exactly to any element of gl_varying_slot. The values of this enum are
435 * assigned such that they don't conflict with gl_varying_slot.
436 */
437 typedef enum
438 {
439 BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
440 BRW_VARYING_SLOT_PAD,
441 /**
442 * Technically this is not a varying but just a placeholder that
443 * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
444 * builtin variable to be compiled correctly. see compile_sf_prog() for
445 * more info.
446 */
447 BRW_VARYING_SLOT_PNTC,
448 BRW_VARYING_SLOT_COUNT
449 } brw_varying_slot;
450
451 /**
452 * Data structure recording the relationship between the gl_varying_slot enum
453 * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
454 * single octaword within the VUE (128 bits).
455 *
456 * Note that each BRW register contains 256 bits (2 octawords), so when
457 * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
458 * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
459 * in a vertex shader), each register corresponds to a single VUE slot, since
460 * it contains data for two separate vertices.
461 */
462 struct brw_vue_map {
463 /**
464 * Bitfield representing all varying slots that are (a) stored in this VUE
465 * map, and (b) actually written by the shader. Does not include any of
466 * the additional varying slots defined in brw_varying_slot.
467 */
468 GLbitfield64 slots_valid;
469
470 /**
471 * Is this VUE map for a separate shader pipeline?
472 *
473 * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
474 * without the linker having a chance to dead code eliminate unused varyings.
475 *
476 * This means that we have to use a fixed slot layout, based on the output's
477 * location field, rather than assigning slots in a compact contiguous block.
478 */
479 bool separate;
480
481 /**
482 * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
483 * not stored in a slot (because they are not written, or because
484 * additional processing is applied before storing them in the VUE), the
485 * value is -1.
486 */
487 signed char varying_to_slot[VARYING_SLOT_TESS_MAX];
488
489 /**
490 * Map from VUE slot to gl_varying_slot value. For slots that do not
491 * directly correspond to a gl_varying_slot, the value comes from
492 * brw_varying_slot.
493 *
494 * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
495 */
496 signed char slot_to_varying[VARYING_SLOT_TESS_MAX];
497
498 /**
499 * Total number of VUE slots in use
500 */
501 int num_slots;
502
503 /**
504 * Number of per-patch VUE slots. Only valid for tessellation control
505 * shader outputs and tessellation evaluation shader inputs.
506 */
507 int num_per_patch_slots;
508
509 /**
510 * Number of per-vertex VUE slots. Only valid for tessellation control
511 * shader outputs and tessellation evaluation shader inputs.
512 */
513 int num_per_vertex_slots;
514 };
515
516 void brw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map);
517
518 /**
519 * Convert a VUE slot number into a byte offset within the VUE.
520 */
521 static inline GLuint brw_vue_slot_to_offset(GLuint slot)
522 {
523 return 16*slot;
524 }
525
526 /**
527 * Convert a vertex output (brw_varying_slot) into a byte offset within the
528 * VUE.
529 */
530 static inline
531 GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
532 {
533 return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
534 }
535
536 void brw_compute_vue_map(const struct brw_device_info *devinfo,
537 struct brw_vue_map *vue_map,
538 GLbitfield64 slots_valid,
539 bool separate_shader);
540
541 void brw_compute_tess_vue_map(struct brw_vue_map *const vue_map,
542 const GLbitfield64 slots_valid,
543 const GLbitfield is_patch);
544
545 enum shader_dispatch_mode {
546 DISPATCH_MODE_4X1_SINGLE = 0,
547 DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
548 DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
549 DISPATCH_MODE_SIMD8 = 3,
550 };
551
552 /**
553 * @defgroup Tessellator parameter enumerations.
554 *
555 * These correspond to the hardware values in 3DSTATE_TE, and are provided
556 * as part of the tessellation evaluation shader.
557 *
558 * @{
559 */
560 enum brw_tess_partitioning {
561 BRW_TESS_PARTITIONING_INTEGER = 0,
562 BRW_TESS_PARTITIONING_ODD_FRACTIONAL = 1,
563 BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
564 };
565
566 enum brw_tess_output_topology {
567 BRW_TESS_OUTPUT_TOPOLOGY_POINT = 0,
568 BRW_TESS_OUTPUT_TOPOLOGY_LINE = 1,
569 BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2,
570 BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
571 };
572
573 enum brw_tess_domain {
574 BRW_TESS_DOMAIN_QUAD = 0,
575 BRW_TESS_DOMAIN_TRI = 1,
576 BRW_TESS_DOMAIN_ISOLINE = 2,
577 };
578 /** @} */
579
580 struct brw_vue_prog_data {
581 struct brw_stage_prog_data base;
582 struct brw_vue_map vue_map;
583
584 /** Should the hardware deliver input VUE handles for URB pull loads? */
585 bool include_vue_handles;
586
587 GLuint urb_read_length;
588 GLuint total_grf;
589
590 /* Used for calculating urb partitions. In the VS, this is the size of the
591 * URB entry used for both input and output to the thread. In the GS, this
592 * is the size of the URB entry used for output.
593 */
594 GLuint urb_entry_size;
595
596 enum shader_dispatch_mode dispatch_mode;
597 };
598
599 struct brw_vs_prog_data {
600 struct brw_vue_prog_data base;
601
602 GLbitfield64 inputs_read;
603
604 unsigned nr_attributes;
605
606 bool uses_vertexid;
607 bool uses_instanceid;
608 bool uses_basevertex;
609 bool uses_baseinstance;
610 bool uses_drawid;
611 };
612
613 struct brw_tcs_prog_data
614 {
615 struct brw_vue_prog_data base;
616
617 /** Number vertices in output patch */
618 int instances;
619 };
620
621
622 struct brw_tes_prog_data
623 {
624 struct brw_vue_prog_data base;
625
626 enum brw_tess_partitioning partitioning;
627 enum brw_tess_output_topology output_topology;
628 enum brw_tess_domain domain;
629 };
630
631 struct brw_gs_prog_data
632 {
633 struct brw_vue_prog_data base;
634
635 unsigned vertices_in;
636
637 /**
638 * Size of an output vertex, measured in HWORDS (32 bytes).
639 */
640 unsigned output_vertex_size_hwords;
641
642 unsigned output_topology;
643
644 /**
645 * Size of the control data (cut bits or StreamID bits), in hwords (32
646 * bytes). 0 if there is no control data.
647 */
648 unsigned control_data_header_size_hwords;
649
650 /**
651 * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
652 * if the control data is StreamID bits, or
653 * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
654 * Ignored if control_data_header_size is 0.
655 */
656 unsigned control_data_format;
657
658 bool include_primitive_id;
659
660 /**
661 * The number of vertices emitted, if constant - otherwise -1.
662 */
663 int static_vertex_count;
664
665 int invocations;
666
667 /**
668 * Gen6 transform feedback enabled flag.
669 */
670 bool gen6_xfb_enabled;
671
672 /**
673 * Gen6: Provoking vertex convention for odd-numbered triangles
674 * in tristrips.
675 */
676 GLuint pv_first:1;
677
678 /**
679 * Gen6: Number of varyings that are output to transform feedback.
680 */
681 GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
682
683 /**
684 * Gen6: Map from the index of a transform feedback binding table entry to the
685 * gl_varying_slot that should be streamed out through that binding table
686 * entry.
687 */
688 unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */];
689
690 /**
691 * Gen6: Map from the index of a transform feedback binding table entry to the
692 * swizzles that should be used when streaming out data through that
693 * binding table entry.
694 */
695 unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */];
696 };
697
698
699 /** @} */
700
701 struct brw_compiler *
702 brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
703
704 /**
705 * Compile a vertex shader.
706 *
707 * Returns the final assembly and the program's size.
708 */
709 const unsigned *
710 brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
711 void *mem_ctx,
712 const struct brw_vs_prog_key *key,
713 struct brw_vs_prog_data *prog_data,
714 const struct nir_shader *shader,
715 gl_clip_plane *clip_planes,
716 bool use_legacy_snorm_formula,
717 int shader_time_index,
718 unsigned *final_assembly_size,
719 char **error_str);
720
721 /**
722 * Compile a tessellation control shader.
723 *
724 * Returns the final assembly and the program's size.
725 */
726 const unsigned *
727 brw_compile_tcs(const struct brw_compiler *compiler,
728 void *log_data,
729 void *mem_ctx,
730 const struct brw_tcs_prog_key *key,
731 struct brw_tcs_prog_data *prog_data,
732 const struct nir_shader *nir,
733 int shader_time_index,
734 unsigned *final_assembly_size,
735 char **error_str);
736
737 /**
738 * Compile a tessellation evaluation shader.
739 *
740 * Returns the final assembly and the program's size.
741 */
742 const unsigned *
743 brw_compile_tes(const struct brw_compiler *compiler, void *log_data,
744 void *mem_ctx,
745 const struct brw_tes_prog_key *key,
746 struct brw_tes_prog_data *prog_data,
747 const struct nir_shader *shader,
748 struct gl_shader_program *shader_prog,
749 int shader_time_index,
750 unsigned *final_assembly_size,
751 char **error_str);
752
753 /**
754 * Compile a vertex shader.
755 *
756 * Returns the final assembly and the program's size.
757 */
758 const unsigned *
759 brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
760 void *mem_ctx,
761 const struct brw_gs_prog_key *key,
762 struct brw_gs_prog_data *prog_data,
763 const struct nir_shader *shader,
764 struct gl_shader_program *shader_prog,
765 int shader_time_index,
766 unsigned *final_assembly_size,
767 char **error_str);
768
769 /**
770 * Compile a fragment shader.
771 *
772 * Returns the final assembly and the program's size.
773 */
774 const unsigned *
775 brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
776 void *mem_ctx,
777 const struct brw_wm_prog_key *key,
778 struct brw_wm_prog_data *prog_data,
779 const struct nir_shader *shader,
780 struct gl_program *prog,
781 int shader_time_index8,
782 int shader_time_index16,
783 bool use_rep_send,
784 unsigned *final_assembly_size,
785 char **error_str);
786
787 /**
788 * Compile a compute shader.
789 *
790 * Returns the final assembly and the program's size.
791 */
792 const unsigned *
793 brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
794 void *mem_ctx,
795 const struct brw_cs_prog_key *key,
796 struct brw_cs_prog_data *prog_data,
797 const struct nir_shader *shader,
798 int shader_time_index,
799 unsigned *final_assembly_size,
800 char **error_str);
801
802 /**
803 * Fill out local id payload for compute shader according to cs_prog_data.
804 */
805 void
806 brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
807 void *buffer, uint32_t threads, uint32_t stride);
808
809 #ifdef __cplusplus
810 } /* extern "C" */
811 #endif