2 * © Copyright 2018 Alyssa Rosenzweig
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "pan_context.h"
31 #include "panfrost-quirks.h"
33 #include "compiler/nir/nir.h"
34 #include "nir/tgsi_to_nir.h"
35 #include "midgard/midgard_compile.h"
36 #include "bifrost/bifrost_compile.h"
37 #include "util/u_dynarray.h"
38 #include "util/u_upload_mgr.h"
40 #include "tgsi/tgsi_dump.h"
43 pan_pack_midgard_props(struct panfrost_shader_state
*state
,
44 gl_shader_stage stage
)
46 pan_pack(&state
->properties
, MIDGARD_PROPERTIES
, cfg
) {
47 cfg
.uniform_buffer_count
= state
->ubo_count
;
48 cfg
.uniform_count
= state
->uniform_count
;
49 cfg
.writes_globals
= state
->writes_global
;
50 cfg
.suppress_inf_nan
= true; /* XXX */
52 if (stage
== MESA_SHADER_FRAGMENT
) {
53 /* Work register count, early-z, reads at draw-time */
54 cfg
.stencil_from_shader
= state
->writes_stencil
;
55 cfg
.helper_invocation_enable
= state
->helper_invocations
;
56 cfg
.depth_source
= state
->writes_depth
?
57 MALI_DEPTH_SOURCE_SHADER
:
58 MALI_DEPTH_SOURCE_FIXED_FUNCTION
;
60 cfg
.work_register_count
= state
->work_reg_count
;
66 pan_pack_bifrost_props(struct panfrost_shader_state
*state
,
67 gl_shader_stage stage
)
70 case MESA_SHADER_VERTEX
:
71 pan_pack(&state
->properties
, BIFROST_PROPERTIES
, cfg
) {
72 cfg
.unknown
= 0x800000; /* XXX */
73 cfg
.uniform_buffer_count
= state
->ubo_count
;
76 pan_pack(&state
->preload
, PRELOAD_VERTEX
, cfg
) {
77 cfg
.uniform_count
= state
->uniform_count
;
79 cfg
.instance_id
= true;
83 case MESA_SHADER_FRAGMENT
:
84 pan_pack(&state
->properties
, BIFROST_PROPERTIES
, cfg
) {
85 /* Early-Z set at draw-time */
87 cfg
.unknown
= 0x950020; /* XXX */
88 cfg
.uniform_buffer_count
= state
->ubo_count
;
91 pan_pack(&state
->preload
, PRELOAD_FRAGMENT
, cfg
) {
92 cfg
.uniform_count
= state
->uniform_count
;
93 cfg
.fragment_position
= state
->reads_frag_coord
;
103 pan_upload_shader_descriptor(struct panfrost_context
*ctx
,
104 struct panfrost_shader_state
*state
)
106 const struct panfrost_device
*dev
= pan_device(ctx
->base
.screen
);
107 struct mali_state_packed
*out
;
109 u_upload_alloc(ctx
->state_uploader
, 0, MALI_STATE_LENGTH
, MALI_STATE_LENGTH
,
110 &state
->upload
.offset
, &state
->upload
.rsrc
, (void **) &out
);
112 pan_pack(out
, STATE_OPAQUE
, cfg
) {
113 cfg
.shader
= state
->shader
;
114 memcpy(&cfg
.properties
, &state
->properties
, sizeof(state
->properties
));
116 if (dev
->quirks
& IS_BIFROST
)
117 cfg
.preload
= state
->preload
;
120 u_upload_unmap(ctx
->state_uploader
);
124 pan_format_from_nir_base(nir_alu_type base
)
128 return MALI_FORMAT_SINT
;
131 return MALI_FORMAT_UINT
;
133 return MALI_CHANNEL_FLOAT
;
135 unreachable("Invalid base");
140 pan_format_from_nir_size(nir_alu_type base
, unsigned size
)
142 if (base
== nir_type_float
) {
144 case 16: return MALI_FORMAT_SINT
;
145 case 32: return MALI_FORMAT_UNORM
;
147 unreachable("Invalid float size for format");
152 case 8: return MALI_CHANNEL_8
;
153 case 16: return MALI_CHANNEL_16
;
154 case 32: return MALI_CHANNEL_32
;
156 unreachable("Invalid int size for format");
161 static enum mali_format
162 pan_format_from_glsl(const struct glsl_type
*type
, unsigned precision
, unsigned frac
)
164 const struct glsl_type
*column
= glsl_without_array_or_matrix(type
);
165 enum glsl_base_type glsl_base
= glsl_get_base_type(column
);
166 nir_alu_type t
= nir_get_nir_type_for_glsl_base_type(glsl_base
);
167 unsigned chan
= glsl_get_components(column
);
169 /* If we have a fractional location added, we need to increase the size
170 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
171 * We could do better but this is an edge case as it is, normally
172 * packed varyings will be aligned. */
175 assert(chan
>= 1 && chan
<= 4);
177 unsigned base
= nir_alu_type_get_base_type(t
);
178 unsigned size
= nir_alu_type_get_type_size(t
);
180 /* Demote to fp16 where possible. int16 varyings are TODO as the hw
181 * will saturate instead of wrap which is not conformant, so we need to
182 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
183 * the intended behaviour */
185 bool is_16
= (precision
== GLSL_PRECISION_MEDIUM
)
186 || (precision
== GLSL_PRECISION_LOW
);
188 if (is_16
&& base
== nir_type_float
)
193 return pan_format_from_nir_base(base
) |
194 pan_format_from_nir_size(base
, size
) |
195 MALI_NR_CHANNELS(chan
);
198 static enum bifrost_shader_type
199 bifrost_blend_type_from_nir(nir_alu_type nir_type
)
202 case 0: /* Render target not in use */
204 case nir_type_float16
:
205 return BIFROST_BLEND_F16
;
206 case nir_type_float32
:
207 return BIFROST_BLEND_F32
;
209 return BIFROST_BLEND_I32
;
210 case nir_type_uint32
:
211 return BIFROST_BLEND_U32
;
213 return BIFROST_BLEND_I16
;
214 case nir_type_uint16
:
215 return BIFROST_BLEND_U16
;
217 unreachable("Unsupported blend shader type for NIR alu type");
223 panfrost_shader_compile(struct panfrost_context
*ctx
,
224 enum pipe_shader_ir ir_type
,
226 gl_shader_stage stage
,
227 struct panfrost_shader_state
*state
,
228 uint64_t *outputs_written
)
230 struct panfrost_device
*dev
= pan_device(ctx
->base
.screen
);
234 if (ir_type
== PIPE_SHADER_IR_NIR
) {
235 s
= nir_shader_clone(NULL
, ir
);
237 assert (ir_type
== PIPE_SHADER_IR_TGSI
);
238 s
= tgsi_to_nir(ir
, ctx
->base
.screen
, false);
241 s
->info
.stage
= stage
;
243 /* Call out to Midgard compiler given the above NIR */
244 panfrost_program program
= {0};
245 memcpy(program
.rt_formats
, state
->rt_formats
, sizeof(program
.rt_formats
));
247 if (dev
->quirks
& IS_BIFROST
) {
248 bifrost_compile_shader_nir(s
, &program
, dev
->gpu_id
);
250 midgard_compile_shader_nir(s
, &program
, false, 0, dev
->gpu_id
,
251 dev
->debug
& PAN_DBG_PRECOMPILE
, false);
254 /* Prepare the compiled binary for upload */
256 unsigned attribute_count
= 0, varying_count
= 0;
257 int size
= program
.compiled
.size
;
260 state
->bo
= panfrost_bo_create(dev
, size
, PAN_BO_EXECUTE
);
261 memcpy(state
->bo
->cpu
, program
.compiled
.data
, size
);
262 shader
= state
->bo
->gpu
;
265 /* Midgard needs the first tag on the bottom nibble */
267 if (!(dev
->quirks
& IS_BIFROST
)) {
268 /* If size = 0, we tag as "end-of-shader" */
271 shader
|= program
.first_tag
;
276 util_dynarray_fini(&program
.compiled
);
278 state
->sysval_count
= program
.sysval_count
;
279 memcpy(state
->sysval
, program
.sysvals
, sizeof(state
->sysval
[0]) * state
->sysval_count
);
281 bool vertex_id
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_VERTEX_ID
);
282 bool instance_id
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_INSTANCE_ID
);
284 /* On Bifrost it's a sysval, on Midgard it's a varying */
285 state
->reads_frag_coord
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_FRAG_COORD
);
287 state
->writes_global
= s
->info
.writes_memory
;
290 case MESA_SHADER_VERTEX
:
291 attribute_count
= util_bitcount64(s
->info
.inputs_read
);
292 varying_count
= util_bitcount64(s
->info
.outputs_written
);
295 attribute_count
= MAX2(attribute_count
, PAN_VERTEX_ID
+ 1);
298 attribute_count
= MAX2(attribute_count
, PAN_INSTANCE_ID
+ 1);
301 case MESA_SHADER_FRAGMENT
:
302 varying_count
= util_bitcount64(s
->info
.inputs_read
);
303 if (s
->info
.outputs_written
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
304 state
->writes_depth
= true;
305 if (s
->info
.outputs_written
& BITFIELD64_BIT(FRAG_RESULT_STENCIL
))
306 state
->writes_stencil
= true;
308 uint64_t outputs_read
= s
->info
.outputs_read
;
309 if (outputs_read
& BITFIELD64_BIT(FRAG_RESULT_COLOR
))
310 outputs_read
|= BITFIELD64_BIT(FRAG_RESULT_DATA0
);
312 state
->outputs_read
= outputs_read
>> FRAG_RESULT_DATA0
;
314 /* List of reasons we need to execute frag shaders when things
318 s
->info
.writes_memory
||
319 s
->info
.fs
.uses_discard
||
320 s
->info
.fs
.uses_demote
;
322 case MESA_SHADER_COMPUTE
:
324 state
->shared_size
= s
->info
.cs
.shared_size
;
327 unreachable("Unknown shader state");
330 state
->can_discard
= s
->info
.fs
.uses_discard
;
331 state
->helper_invocations
= s
->info
.fs
.needs_helper_invocations
;
332 state
->stack_size
= program
.tls_size
;
334 state
->reads_frag_coord
= s
->info
.inputs_read
& (1 << VARYING_SLOT_POS
);
335 state
->reads_point_coord
= s
->info
.inputs_read
& (1 << VARYING_SLOT_PNTC
);
336 state
->reads_face
= s
->info
.inputs_read
& (1 << VARYING_SLOT_FACE
);
337 state
->writes_point_size
= s
->info
.outputs_written
& (1 << VARYING_SLOT_PSIZ
);
340 *outputs_written
= s
->info
.outputs_written
;
342 /* Separate as primary uniform count is truncated. Sysvals are prefix
344 state
->uniform_count
= MIN2(s
->num_uniforms
+ program
.sysval_count
, program
.uniform_cutoff
);
345 state
->work_reg_count
= program
.work_register_count
;
347 if (dev
->quirks
& IS_BIFROST
)
348 for (unsigned i
= 0; i
< BIFROST_MAX_RENDER_TARGET_COUNT
; i
++)
349 state
->blend_types
[i
] = bifrost_blend_type_from_nir(program
.blend_types
[i
]);
351 /* Record the varying mapping for the command stream's bookkeeping */
353 nir_variable_mode varying_mode
=
354 stage
== MESA_SHADER_VERTEX
? nir_var_shader_out
: nir_var_shader_in
;
356 nir_foreach_variable_with_modes(var
, s
, varying_mode
) {
357 unsigned loc
= var
->data
.driver_location
;
358 unsigned sz
= glsl_count_attribute_slots(var
->type
, FALSE
);
360 for (int c
= 0; c
< sz
; ++c
) {
361 state
->varyings_loc
[loc
+ c
] = var
->data
.location
+ c
;
362 state
->varyings
[loc
+ c
] = pan_format_from_glsl(var
->type
,
363 var
->data
.precision
, var
->data
.location_frac
);
367 /* Needed for linkage */
368 state
->attribute_count
= attribute_count
;
369 state
->varying_count
= varying_count
;
370 state
->ubo_count
= s
->info
.num_ubos
+ 1; /* off-by-one for uniforms */
372 /* Prepare the descriptors at compile-time */
373 pan_pack(&state
->shader
, SHADER
, cfg
) {
375 cfg
.attribute_count
= attribute_count
;
376 cfg
.varying_count
= varying_count
;
377 cfg
.texture_count
= s
->info
.num_textures
;
378 cfg
.sampler_count
= cfg
.texture_count
;
381 if (dev
->quirks
& IS_BIFROST
)
382 pan_pack_bifrost_props(state
, stage
);
384 pan_pack_midgard_props(state
, stage
);
386 if (stage
!= MESA_SHADER_FRAGMENT
)
387 pan_upload_shader_descriptor(ctx
, state
);
389 /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against