2 * © Copyright 2018 Alyssa Rosenzweig
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "pan_context.h"
31 #include "panfrost-quirks.h"
33 #include "compiler/nir/nir.h"
34 #include "nir/tgsi_to_nir.h"
35 #include "midgard/midgard_compile.h"
36 #include "bifrost/bifrost_compile.h"
37 #include "util/u_dynarray.h"
39 #include "tgsi/tgsi_dump.h"
42 pan_pack_midgard_props(struct panfrost_shader_state
*state
,
43 gl_shader_stage stage
)
45 pan_pack(&state
->properties
, MIDGARD_PROPERTIES
, cfg
) {
46 cfg
.uniform_buffer_count
= state
->ubo_count
;
47 cfg
.uniform_count
= state
->uniform_count
;
48 cfg
.writes_globals
= state
->writes_global
;
49 cfg
.suppress_inf_nan
= true; /* XXX */
51 if (stage
== MESA_SHADER_FRAGMENT
) {
52 /* Work register count, early-z, reads at draw-time */
53 cfg
.stencil_from_shader
= state
->writes_stencil
;
54 cfg
.helper_invocation_enable
= state
->helper_invocations
;
55 cfg
.depth_source
= state
->writes_depth
?
56 MALI_DEPTH_SOURCE_SHADER
:
57 MALI_DEPTH_SOURCE_FIXED_FUNCTION
;
59 cfg
.work_register_count
= state
->work_reg_count
;
65 pan_pack_bifrost_props(struct panfrost_shader_state
*state
,
66 gl_shader_stage stage
)
69 case MESA_SHADER_VERTEX
:
70 pan_pack(&state
->properties
, BIFROST_PROPERTIES
, cfg
) {
71 cfg
.unknown
= 0x800000; /* XXX */
72 cfg
.uniform_buffer_count
= state
->ubo_count
;
75 pan_pack(&state
->preload
, PRELOAD_VERTEX
, cfg
) {
76 cfg
.uniform_count
= state
->uniform_count
;
78 cfg
.instance_id
= true;
82 case MESA_SHADER_FRAGMENT
:
83 pan_pack(&state
->properties
, BIFROST_PROPERTIES
, cfg
) {
84 /* Early-Z set at draw-time */
86 cfg
.unknown
= 0x950020; /* XXX */
87 cfg
.uniform_buffer_count
= state
->ubo_count
;
90 pan_pack(&state
->preload
, PRELOAD_FRAGMENT
, cfg
) {
91 cfg
.uniform_count
= state
->uniform_count
;
92 cfg
.fragment_position
= state
->reads_frag_coord
;
102 pan_format_from_nir_base(nir_alu_type base
)
106 return MALI_FORMAT_SINT
;
109 return MALI_FORMAT_UINT
;
111 return MALI_CHANNEL_FLOAT
;
113 unreachable("Invalid base");
118 pan_format_from_nir_size(nir_alu_type base
, unsigned size
)
120 if (base
== nir_type_float
) {
122 case 16: return MALI_FORMAT_SINT
;
123 case 32: return MALI_FORMAT_UNORM
;
125 unreachable("Invalid float size for format");
130 case 8: return MALI_CHANNEL_8
;
131 case 16: return MALI_CHANNEL_16
;
132 case 32: return MALI_CHANNEL_32
;
134 unreachable("Invalid int size for format");
139 static enum mali_format
140 pan_format_from_glsl(const struct glsl_type
*type
, unsigned precision
, unsigned frac
)
142 const struct glsl_type
*column
= glsl_without_array_or_matrix(type
);
143 enum glsl_base_type glsl_base
= glsl_get_base_type(column
);
144 nir_alu_type t
= nir_get_nir_type_for_glsl_base_type(glsl_base
);
145 unsigned chan
= glsl_get_components(column
);
147 /* If we have a fractional location added, we need to increase the size
148 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
149 * We could do better but this is an edge case as it is, normally
150 * packed varyings will be aligned. */
153 assert(chan
>= 1 && chan
<= 4);
155 unsigned base
= nir_alu_type_get_base_type(t
);
156 unsigned size
= nir_alu_type_get_type_size(t
);
158 /* Demote to fp16 where possible. int16 varyings are TODO as the hw
159 * will saturate instead of wrap which is not conformant, so we need to
160 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
161 * the intended behaviour */
163 bool is_16
= (precision
== GLSL_PRECISION_MEDIUM
)
164 || (precision
== GLSL_PRECISION_LOW
);
166 if (is_16
&& base
== nir_type_float
)
171 return pan_format_from_nir_base(base
) |
172 pan_format_from_nir_size(base
, size
) |
173 MALI_NR_CHANNELS(chan
);
176 static enum bifrost_shader_type
177 bifrost_blend_type_from_nir(nir_alu_type nir_type
)
180 case 0: /* Render target not in use */
182 case nir_type_float16
:
183 return BIFROST_BLEND_F16
;
184 case nir_type_float32
:
185 return BIFROST_BLEND_F32
;
187 return BIFROST_BLEND_I32
;
188 case nir_type_uint32
:
189 return BIFROST_BLEND_U32
;
191 return BIFROST_BLEND_I16
;
192 case nir_type_uint16
:
193 return BIFROST_BLEND_U16
;
195 unreachable("Unsupported blend shader type for NIR alu type");
201 panfrost_shader_compile(struct panfrost_context
*ctx
,
202 enum pipe_shader_ir ir_type
,
204 gl_shader_stage stage
,
205 struct panfrost_shader_state
*state
,
206 uint64_t *outputs_written
)
208 struct panfrost_device
*dev
= pan_device(ctx
->base
.screen
);
212 if (ir_type
== PIPE_SHADER_IR_NIR
) {
213 s
= nir_shader_clone(NULL
, ir
);
215 assert (ir_type
== PIPE_SHADER_IR_TGSI
);
216 s
= tgsi_to_nir(ir
, ctx
->base
.screen
, false);
219 s
->info
.stage
= stage
;
221 /* Call out to Midgard compiler given the above NIR */
222 panfrost_program program
= {};
223 memcpy(program
.rt_formats
, state
->rt_formats
, sizeof(program
.rt_formats
));
225 if (dev
->quirks
& IS_BIFROST
) {
226 bifrost_compile_shader_nir(s
, &program
, dev
->gpu_id
);
228 midgard_compile_shader_nir(s
, &program
, false, 0, dev
->gpu_id
,
229 dev
->debug
& PAN_DBG_PRECOMPILE
, false);
232 /* Prepare the compiled binary for upload */
234 unsigned attribute_count
= 0, varying_count
= 0;
235 int size
= program
.compiled
.size
;
238 state
->bo
= panfrost_bo_create(dev
, size
, PAN_BO_EXECUTE
);
239 memcpy(state
->bo
->cpu
, program
.compiled
.data
, size
);
240 shader
= state
->bo
->gpu
;
243 /* Midgard needs the first tag on the bottom nibble */
245 if (!(dev
->quirks
& IS_BIFROST
)) {
246 /* If size = 0, we tag as "end-of-shader" */
249 shader
|= program
.first_tag
;
254 util_dynarray_fini(&program
.compiled
);
256 state
->sysval_count
= program
.sysval_count
;
257 memcpy(state
->sysval
, program
.sysvals
, sizeof(state
->sysval
[0]) * state
->sysval_count
);
259 bool vertex_id
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_VERTEX_ID
);
260 bool instance_id
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_INSTANCE_ID
);
262 /* On Bifrost it's a sysval, on Midgard it's a varying */
263 state
->reads_frag_coord
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_FRAG_COORD
);
265 state
->writes_global
= s
->info
.writes_memory
;
268 case MESA_SHADER_VERTEX
:
269 attribute_count
= util_bitcount64(s
->info
.inputs_read
);
270 varying_count
= util_bitcount64(s
->info
.outputs_written
);
273 attribute_count
= MAX2(attribute_count
, PAN_VERTEX_ID
+ 1);
276 attribute_count
= MAX2(attribute_count
, PAN_INSTANCE_ID
+ 1);
279 case MESA_SHADER_FRAGMENT
:
280 varying_count
= util_bitcount64(s
->info
.inputs_read
);
281 if (s
->info
.outputs_written
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
282 state
->writes_depth
= true;
283 if (s
->info
.outputs_written
& BITFIELD64_BIT(FRAG_RESULT_STENCIL
))
284 state
->writes_stencil
= true;
286 uint64_t outputs_read
= s
->info
.outputs_read
;
287 if (outputs_read
& BITFIELD64_BIT(FRAG_RESULT_COLOR
))
288 outputs_read
|= BITFIELD64_BIT(FRAG_RESULT_DATA0
);
290 state
->outputs_read
= outputs_read
>> FRAG_RESULT_DATA0
;
292 /* List of reasons we need to execute frag shaders when things
296 s
->info
.writes_memory
||
297 s
->info
.fs
.uses_discard
||
298 s
->info
.fs
.uses_demote
;
300 case MESA_SHADER_COMPUTE
:
302 state
->shared_size
= s
->info
.cs
.shared_size
;
305 unreachable("Unknown shader state");
308 state
->can_discard
= s
->info
.fs
.uses_discard
;
309 state
->helper_invocations
= s
->info
.fs
.needs_helper_invocations
;
310 state
->stack_size
= program
.tls_size
;
312 state
->reads_frag_coord
= s
->info
.inputs_read
& (1 << VARYING_SLOT_POS
);
313 state
->reads_point_coord
= s
->info
.inputs_read
& (1 << VARYING_SLOT_PNTC
);
314 state
->reads_face
= s
->info
.inputs_read
& (1 << VARYING_SLOT_FACE
);
315 state
->writes_point_size
= s
->info
.outputs_written
& (1 << VARYING_SLOT_PSIZ
);
318 *outputs_written
= s
->info
.outputs_written
;
320 /* Separate as primary uniform count is truncated. Sysvals are prefix
322 state
->uniform_count
= MIN2(s
->num_uniforms
+ program
.sysval_count
, program
.uniform_cutoff
);
323 state
->work_reg_count
= program
.work_register_count
;
325 if (dev
->quirks
& IS_BIFROST
)
326 for (unsigned i
= 0; i
< BIFROST_MAX_RENDER_TARGET_COUNT
; i
++)
327 state
->blend_types
[i
] = bifrost_blend_type_from_nir(program
.blend_types
[i
]);
329 /* Record the varying mapping for the command stream's bookkeeping */
331 nir_variable_mode varying_mode
=
332 stage
== MESA_SHADER_VERTEX
? nir_var_shader_out
: nir_var_shader_in
;
334 nir_foreach_variable_with_modes(var
, s
, varying_mode
) {
335 unsigned loc
= var
->data
.driver_location
;
336 unsigned sz
= glsl_count_attribute_slots(var
->type
, FALSE
);
338 for (int c
= 0; c
< sz
; ++c
) {
339 state
->varyings_loc
[loc
+ c
] = var
->data
.location
+ c
;
340 state
->varyings
[loc
+ c
] = pan_format_from_glsl(var
->type
,
341 var
->data
.precision
, var
->data
.location_frac
);
345 /* Needed for linkage */
346 state
->attribute_count
= attribute_count
;
347 state
->varying_count
= varying_count
;
348 state
->ubo_count
= s
->info
.num_ubos
+ 1; /* off-by-one for uniforms */
350 /* Prepare the descriptors at compile-time */
351 pan_pack(&state
->shader
, SHADER
, cfg
) {
353 cfg
.attribute_count
= attribute_count
;
354 cfg
.varying_count
= varying_count
;
355 cfg
.texture_count
= s
->info
.num_textures
;
356 cfg
.sampler_count
= cfg
.texture_count
;
359 if (dev
->quirks
& IS_BIFROST
)
360 pan_pack_bifrost_props(state
, stage
);
362 pan_pack_midgard_props(state
, stage
);
364 /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against