2 * © Copyright 2018 Alyssa Rosenzweig
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "pan_context.h"
31 #include "panfrost-quirks.h"
33 #include "compiler/nir/nir.h"
34 #include "nir/tgsi_to_nir.h"
35 #include "midgard/midgard_compile.h"
36 #include "bifrost/bifrost_compile.h"
37 #include "util/u_dynarray.h"
39 #include "tgsi/tgsi_dump.h"
42 pan_format_from_nir_base(nir_alu_type base
)
46 return MALI_FORMAT_SINT
;
49 return MALI_FORMAT_UINT
;
51 return MALI_CHANNEL_FLOAT
;
53 unreachable("Invalid base");
58 pan_format_from_nir_size(nir_alu_type base
, unsigned size
)
60 if (base
== nir_type_float
) {
62 case 16: return MALI_FORMAT_SINT
;
63 case 32: return MALI_FORMAT_UNORM
;
65 unreachable("Invalid float size for format");
70 case 8: return MALI_CHANNEL_8
;
71 case 16: return MALI_CHANNEL_16
;
72 case 32: return MALI_CHANNEL_32
;
74 unreachable("Invalid int size for format");
79 static enum mali_format
80 pan_format_from_glsl(const struct glsl_type
*type
, unsigned precision
, unsigned frac
)
82 const struct glsl_type
*column
= glsl_without_array_or_matrix(type
);
83 enum glsl_base_type glsl_base
= glsl_get_base_type(column
);
84 nir_alu_type t
= nir_get_nir_type_for_glsl_base_type(glsl_base
);
85 unsigned chan
= glsl_get_components(column
);
87 /* If we have a fractional location added, we need to increase the size
88 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
89 * We could do better but this is an edge case as it is, normally
90 * packed varyings will be aligned. */
93 assert(chan
>= 1 && chan
<= 4);
95 unsigned base
= nir_alu_type_get_base_type(t
);
96 unsigned size
= nir_alu_type_get_type_size(t
);
98 /* Demote to fp16 where possible. int16 varyings are TODO as the hw
99 * will saturate instead of wrap which is not conformant, so we need to
100 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
101 * the intended behaviour */
103 bool is_16
= (precision
== GLSL_PRECISION_MEDIUM
)
104 || (precision
== GLSL_PRECISION_LOW
);
106 if (is_16
&& base
== nir_type_float
)
111 return pan_format_from_nir_base(base
) |
112 pan_format_from_nir_size(base
, size
) |
113 MALI_NR_CHANNELS(chan
);
116 static enum bifrost_shader_type
117 bifrost_blend_type_from_nir(nir_alu_type nir_type
)
120 case 0: /* Render target not in use */
122 case nir_type_float16
:
123 return BIFROST_BLEND_F16
;
124 case nir_type_float32
:
125 return BIFROST_BLEND_F32
;
127 return BIFROST_BLEND_I32
;
128 case nir_type_uint32
:
129 return BIFROST_BLEND_U32
;
131 return BIFROST_BLEND_I16
;
132 case nir_type_uint16
:
133 return BIFROST_BLEND_U16
;
135 unreachable("Unsupported blend shader type for NIR alu type");
141 panfrost_shader_compile(struct panfrost_context
*ctx
,
142 enum pipe_shader_ir ir_type
,
144 gl_shader_stage stage
,
145 struct panfrost_shader_state
*state
,
146 uint64_t *outputs_written
)
148 struct panfrost_device
*dev
= pan_device(ctx
->base
.screen
);
153 if (ir_type
== PIPE_SHADER_IR_NIR
) {
154 s
= nir_shader_clone(NULL
, ir
);
156 assert (ir_type
== PIPE_SHADER_IR_TGSI
);
157 s
= tgsi_to_nir(ir
, ctx
->base
.screen
, false);
160 s
->info
.stage
= stage
;
162 /* Call out to Midgard compiler given the above NIR */
163 panfrost_program program
= {};
164 memcpy(program
.rt_formats
, state
->rt_formats
, sizeof(program
.rt_formats
));
166 if (dev
->quirks
& IS_BIFROST
) {
167 bifrost_compile_shader_nir(s
, &program
, dev
->gpu_id
);
169 midgard_compile_shader_nir(s
, &program
, false, 0, dev
->gpu_id
,
170 dev
->debug
& PAN_DBG_PRECOMPILE
, false);
173 /* Prepare the compiled binary for upload */
174 int size
= program
.compiled
.size
;
175 dst
= program
.compiled
.data
;
177 /* Upload the shader. The lookahead tag is ORed on as a tagged pointer.
178 * I bet someone just thought that would be a cute pun. At least,
179 * that's how I'd do it. */
182 state
->bo
= panfrost_bo_create(dev
, size
, PAN_BO_EXECUTE
);
183 memcpy(state
->bo
->cpu
, dst
, size
);
186 if (!(dev
->quirks
& IS_BIFROST
)) {
187 /* If size = 0, no shader. Use dummy tag to avoid
188 * INSTR_INVALID_ENC */
189 state
->first_tag
= size
? program
.first_tag
: 1;
192 util_dynarray_fini(&program
.compiled
);
194 state
->sysval_count
= program
.sysval_count
;
195 memcpy(state
->sysval
, program
.sysvals
, sizeof(state
->sysval
[0]) * state
->sysval_count
);
197 bool vertex_id
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_VERTEX_ID
);
198 bool instance_id
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_INSTANCE_ID
);
200 /* On Bifrost it's a sysval, on Midgard it's a varying */
201 state
->reads_frag_coord
= s
->info
.system_values_read
& (1 << SYSTEM_VALUE_FRAG_COORD
);
203 state
->writes_global
= s
->info
.writes_memory
;
206 case MESA_SHADER_VERTEX
:
207 state
->attribute_count
= util_bitcount64(s
->info
.inputs_read
);
208 state
->varying_count
= util_bitcount64(s
->info
.outputs_written
);
211 state
->attribute_count
= MAX2(state
->attribute_count
, PAN_VERTEX_ID
+ 1);
214 state
->attribute_count
= MAX2(state
->attribute_count
, PAN_INSTANCE_ID
+ 1);
217 case MESA_SHADER_FRAGMENT
:
218 state
->attribute_count
= 0;
219 state
->varying_count
= util_bitcount64(s
->info
.inputs_read
);
220 if (s
->info
.outputs_written
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
221 state
->writes_depth
= true;
222 if (s
->info
.outputs_written
& BITFIELD64_BIT(FRAG_RESULT_STENCIL
))
223 state
->writes_stencil
= true;
225 uint64_t outputs_read
= s
->info
.outputs_read
;
226 if (outputs_read
& BITFIELD64_BIT(FRAG_RESULT_COLOR
))
227 outputs_read
|= BITFIELD64_BIT(FRAG_RESULT_DATA0
);
229 state
->outputs_read
= outputs_read
>> FRAG_RESULT_DATA0
;
231 /* List of reasons we need to execute frag shaders when things
235 s
->info
.writes_memory
||
236 s
->info
.fs
.uses_discard
||
237 s
->info
.fs
.uses_demote
;
239 case MESA_SHADER_COMPUTE
:
241 state
->attribute_count
= 0;
242 state
->varying_count
= 0;
243 state
->shared_size
= s
->info
.cs
.shared_size
;
246 unreachable("Unknown shader state");
249 state
->can_discard
= s
->info
.fs
.uses_discard
;
250 state
->helper_invocations
= s
->info
.fs
.needs_helper_invocations
;
251 state
->stack_size
= program
.tls_size
;
253 state
->reads_frag_coord
= s
->info
.inputs_read
& (1 << VARYING_SLOT_POS
);
254 state
->reads_point_coord
= s
->info
.inputs_read
& (1 << VARYING_SLOT_PNTC
);
255 state
->reads_face
= s
->info
.inputs_read
& (1 << VARYING_SLOT_FACE
);
256 state
->writes_point_size
= s
->info
.outputs_written
& (1 << VARYING_SLOT_PSIZ
);
259 *outputs_written
= s
->info
.outputs_written
;
261 /* Separate as primary uniform count is truncated. Sysvals are prefix
263 state
->uniform_count
= s
->num_uniforms
+ program
.sysval_count
;
264 state
->uniform_cutoff
= program
.uniform_cutoff
;
265 state
->work_reg_count
= program
.work_register_count
;
267 if (dev
->quirks
& IS_BIFROST
)
268 for (unsigned i
= 0; i
< BIFROST_MAX_RENDER_TARGET_COUNT
; i
++)
269 state
->blend_types
[i
] = bifrost_blend_type_from_nir(program
.blend_types
[i
]);
271 /* Record the varying mapping for the command stream's bookkeeping */
273 nir_variable_mode varying_mode
=
274 stage
== MESA_SHADER_VERTEX
? nir_var_shader_out
: nir_var_shader_in
;
276 nir_foreach_variable_with_modes(var
, s
, varying_mode
) {
277 unsigned loc
= var
->data
.driver_location
;
278 unsigned sz
= glsl_count_attribute_slots(var
->type
, FALSE
);
280 for (int c
= 0; c
< sz
; ++c
) {
281 state
->varyings_loc
[loc
+ c
] = var
->data
.location
+ c
;
282 state
->varyings
[loc
+ c
] = pan_format_from_glsl(var
->type
,
283 var
->data
.precision
, var
->data
.location_frac
);
287 /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against