nir: Add bit_count to lower_int64 pass
[mesa.git] / src / compiler / shader_info.h
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef SHADER_INFO_H
26 #define SHADER_INFO_H
27
28 #include "shader_enums.h"
29 #include <stdint.h>
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35 struct spirv_supported_capabilities {
36 bool address;
37 bool atomic_storage;
38 bool demote_to_helper_invocation;
39 bool derivative_group;
40 bool descriptor_array_dynamic_indexing;
41 bool descriptor_array_non_uniform_indexing;
42 bool descriptor_indexing;
43 bool device_group;
44 bool draw_parameters;
45 bool float32_atomic_add;
46 bool float64;
47 bool float64_atomic_add;
48 bool fragment_shader_sample_interlock;
49 bool fragment_shader_pixel_interlock;
50 bool geometry_streams;
51 bool image_ms_array;
52 bool image_read_without_format;
53 bool image_write_without_format;
54 bool int8;
55 bool int16;
56 bool int64;
57 bool int64_atomics;
58 bool integer_functions2;
59 bool kernel;
60 bool min_lod;
61 bool multiview;
62 bool physical_storage_buffer_address;
63 bool post_depth_coverage;
64 bool runtime_descriptor_array;
65 bool float_controls;
66 bool shader_clock;
67 bool shader_viewport_index_layer;
68 bool stencil_export;
69 bool storage_8bit;
70 bool storage_16bit;
71 bool storage_image_ms;
72 bool subgroup_arithmetic;
73 bool subgroup_ballot;
74 bool subgroup_basic;
75 bool subgroup_quad;
76 bool subgroup_shuffle;
77 bool subgroup_vote;
78 bool tessellation;
79 bool transform_feedback;
80 bool variable_pointers;
81 bool vk_memory_model;
82 bool vk_memory_model_device_scope;
83 bool float16;
84 bool amd_fragment_mask;
85 bool amd_gcn_shader;
86 bool amd_shader_ballot;
87 bool amd_trinary_minmax;
88 bool amd_image_read_write_lod;
89 bool amd_shader_explicit_vertex_parameter;
90 bool amd_image_gather_bias_lod;
91 };
92
93 typedef struct shader_info {
94 const char *name;
95
96 /* Descriptive name provided by the client; may be NULL */
97 const char *label;
98
99 /** The shader stage, such as MESA_SHADER_VERTEX. */
100 gl_shader_stage stage:8;
101
102 /** The shader stage in a non SSO linked program that follows this stage,
103 * such as MESA_SHADER_FRAGMENT.
104 */
105 gl_shader_stage next_stage:8;
106
107 /* Number of textures used by this shader */
108 uint8_t num_textures;
109 /* Number of uniform buffers used by this shader */
110 uint8_t num_ubos;
111 /* Number of atomic buffers used by this shader */
112 uint8_t num_abos;
113 /* Number of shader storage buffers (max .driver_location + 1) used by this
114 * shader. In the case of nir_lower_atomics_to_ssbo being used, this will
115 * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
116 * and atomic counters in nir_shader->info.
117 */
118 uint8_t num_ssbos;
119 /* Number of images used by this shader */
120 uint8_t num_images;
121
122 /* Which inputs are actually read */
123 uint64_t inputs_read;
124 /* Which outputs are actually written */
125 uint64_t outputs_written;
126 /* Which outputs are actually read */
127 uint64_t outputs_read;
128 /* Which system values are actually read */
129 uint64_t system_values_read;
130
131 /* Which patch inputs are actually read */
132 uint32_t patch_inputs_read;
133 /* Which patch outputs are actually written */
134 uint32_t patch_outputs_written;
135 /* Which patch outputs are read */
136 uint32_t patch_outputs_read;
137
138 /* Which inputs are read indirectly (subset of inputs_read) */
139 uint64_t inputs_read_indirectly;
140 /* Which outputs are read or written indirectly */
141 uint64_t outputs_accessed_indirectly;
142 /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
143 uint64_t patch_inputs_read_indirectly;
144 /* Which patch outputs are read or written indirectly */
145 uint64_t patch_outputs_accessed_indirectly;
146
147 /** Bitfield of which textures are used */
148 uint32_t textures_used;
149
150 /** Bitfield of which textures are used by texelFetch() */
151 uint32_t textures_used_by_txf;
152
153 /** Bitfield of which images are used */
154 uint32_t images_used;
155 /** Bitfield of which images are buffers. */
156 uint32_t image_buffers;
157 /** Bitfield of which images are MSAA. */
158 uint32_t msaa_images;
159
160 /* SPV_KHR_float_controls: execution mode for floating point ops */
161 uint16_t float_controls_execution_mode;
162
163 /* The size of the gl_ClipDistance[] array, if declared. */
164 uint8_t clip_distance_array_size:4;
165
166 /* The size of the gl_CullDistance[] array, if declared. */
167 uint8_t cull_distance_array_size:4;
168
169 /* Whether or not this shader ever uses textureGather() */
170 bool uses_texture_gather:1;
171
172 /**
173 * True if this shader uses the fddx/fddy opcodes.
174 *
175 * Note that this does not include the "fine" and "coarse" variants.
176 */
177 bool uses_fddx_fddy:1;
178
179 /**
180 * True if this shader uses 64-bit ALU operations
181 */
182 bool uses_64bit:1;
183
184 /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
185 bool first_ubo_is_default_ubo:1;
186
187 /* Whether or not separate shader objects were used */
188 bool separate_shader:1;
189
190 /** Was this shader linked with any transform feedback varyings? */
191 bool has_transform_feedback_varyings:1;
192
193 /* Whether flrp has been lowered. */
194 bool flrp_lowered:1;
195
196 /* Whether nir_lower_io has been called to lower derefs.
197 * nir_variables for inputs and outputs might not be present in the IR.
198 */
199 bool io_lowered:1;
200
201 /* Whether the shader writes memory, including transform feedback. */
202 bool writes_memory:1;
203
204 /* Whether gl_Layer is viewport-relative */
205 bool layer_viewport_relative:1;
206
207 union {
208 struct {
209 /* Which inputs are doubles */
210 uint64_t double_inputs;
211
212 /* For AMD-specific driver-internal shaders. It replaces vertex
213 * buffer loads with code generating VS inputs from scalar registers.
214 *
215 * Valid values: SI_VS_BLIT_SGPRS_POS_*
216 */
217 uint8_t blit_sgprs_amd:4;
218
219 /* True if the shader writes position in window space coordinates pre-transform */
220 bool window_space_position:1;
221 } vs;
222
223 struct {
224 /** The output primitive type (GL enum value) */
225 uint16_t output_primitive;
226
227 /** The input primitive type (GL enum value) */
228 uint16_t input_primitive;
229
230 /** The maximum number of vertices the geometry shader might write. */
231 uint16_t vertices_out;
232
233 /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
234 uint8_t invocations;
235
236 /** The number of vertices recieves per input primitive (max. 6) */
237 uint8_t vertices_in:3;
238
239 /** Whether or not this shader uses EndPrimitive */
240 bool uses_end_primitive:1;
241
242 /** The streams used in this shaders (max. 4) */
243 uint8_t active_stream_mask:4;
244 } gs;
245
246 struct {
247 bool uses_discard:1;
248 bool uses_demote:1;
249
250 /**
251 * True if this fragment shader requires helper invocations. This
252 * can be caused by the use of ALU derivative ops, texture
253 * instructions which do implicit derivatives, and the use of quad
254 * subgroup operations.
255 */
256 bool needs_helper_invocations:1;
257
258 /**
259 * Whether any inputs are declared with the "sample" qualifier.
260 */
261 bool uses_sample_qualifier:1;
262
263 /**
264 * Whether early fragment tests are enabled as defined by
265 * ARB_shader_image_load_store.
266 */
267 bool early_fragment_tests:1;
268
269 /**
270 * Defined by INTEL_conservative_rasterization.
271 */
272 bool inner_coverage:1;
273
274 bool post_depth_coverage:1;
275
276 /**
277 * \name ARB_fragment_coord_conventions
278 * @{
279 */
280 bool pixel_center_integer:1;
281 bool origin_upper_left:1;
282 /*@}*/
283
284 bool pixel_interlock_ordered:1;
285 bool pixel_interlock_unordered:1;
286 bool sample_interlock_ordered:1;
287 bool sample_interlock_unordered:1;
288
289 /**
290 * Flags whether NIR's base types on the FS color outputs should be
291 * ignored.
292 *
293 * GLSL requires that fragment shader output base types match the
294 * render target's base types for the behavior to be defined. From
295 * the GL 4.6 spec:
296 *
297 * "If the values written by the fragment shader do not match the
298 * format(s) of the corresponding color buffer(s), the result is
299 * undefined."
300 *
301 * However, for NIR shaders translated from TGSI, we don't have the
302 * output types any more, so the driver will need to do whatever
303 * fixups are necessary to handle effectively untyped data being
304 * output from the FS.
305 */
306 bool untyped_color_outputs:1;
307
308 /** gl_FragDepth layout for ARB_conservative_depth. */
309 enum gl_frag_depth_layout depth_layout:3;
310
311 /**
312 * Interpolation qualifiers for drivers that lowers color inputs
313 * to system values.
314 */
315 unsigned color0_interp:3; /* glsl_interp_mode */
316 bool color0_sample:1;
317 bool color0_centroid:1;
318 unsigned color1_interp:3; /* glsl_interp_mode */
319 bool color1_sample:1;
320 bool color1_centroid:1;
321 } fs;
322
323 struct {
324 uint16_t local_size[3];
325
326 bool local_size_variable:1;
327 uint8_t user_data_components_amd:3;
328
329 /*
330 * Arrangement of invocations used to calculate derivatives in a compute
331 * shader. From NV_compute_shader_derivatives.
332 */
333 enum gl_derivative_group derivative_group:2;
334
335 /**
336 * Size of shared variables accessed by the compute shader.
337 */
338 unsigned shared_size;
339
340 /**
341 * pointer size is:
342 * AddressingModelLogical: 0 (default)
343 * AddressingModelPhysical32: 32
344 * AddressingModelPhysical64: 64
345 */
346 unsigned ptr_size;
347 } cs;
348
349 /* Applies to both TCS and TES. */
350 struct {
351 uint16_t primitive_mode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
352
353 /** The number of vertices in the TCS output patch. */
354 uint8_t tcs_vertices_out;
355 enum gl_tess_spacing spacing:2;
356
357 /** Is the vertex order counterclockwise? */
358 bool ccw:1;
359 bool point_mode:1;
360
361 /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
362 * with a vertex index that is NOT the invocation id
363 */
364 uint64_t tcs_cross_invocation_inputs_read;
365
366 /* Bit mask of TCS per-vertex outputs that are used
367 * with a vertex index that is NOT the invocation id
368 */
369 uint64_t tcs_cross_invocation_outputs_read;
370 } tess;
371 };
372 } shader_info;
373
374 #ifdef __cplusplus
375 }
376 #endif
377
378 #endif /* SHADER_INFO_H */