2 * Copyright (C) 2020 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
29 #include "pan_encoder.h"
31 #include "pan_scoreboard.h"
32 #include "pan_texture.h"
33 #include "panfrost-quirks.h"
34 #include "../midgard/midgard_compile.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "util/u_math.h"
38 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
39 * missing in many cases. We instead use software paths as fallbacks to
40 * implement blits, which are done as TILER jobs. No vertex shader is
41 * necessary since we can supply screen-space coordinates directly.
43 * This is primarily designed as a fallback for preloads but could be extended
44 * for other clears/blits if needed in the future. */
47 panfrost_build_blit_shader(panfrost_program
*program
, unsigned gpu_id
, gl_frag_result loc
, nir_alu_type T
, bool ms
)
49 bool is_colour
= loc
>= FRAG_RESULT_DATA0
;
51 nir_shader
*shader
= nir_shader_create(NULL
, MESA_SHADER_FRAGMENT
, &midgard_nir_options
, NULL
);
52 nir_function
*fn
= nir_function_create(shader
, "main");
53 nir_function_impl
*impl
= nir_function_impl_create(fn
);
55 nir_variable
*c_src
= nir_variable_create(shader
, nir_var_shader_in
, glsl_vector_type(GLSL_TYPE_FLOAT
, 2), "coord");
56 nir_variable
*c_out
= nir_variable_create(shader
, nir_var_shader_out
, glsl_vector_type(
57 GLSL_TYPE_FLOAT
, is_colour
? 4 : 1), "out");
59 c_src
->data
.location
= VARYING_SLOT_TEX0
;
60 c_out
->data
.location
= loc
;
64 nir_builder_init(b
, impl
);
65 b
->cursor
= nir_before_block(nir_start_block(impl
));
67 nir_ssa_def
*coord
= nir_load_var(b
, c_src
);
69 nir_tex_instr
*tex
= nir_tex_instr_create(shader
, ms
? 3 : 1);
74 tex
->src
[0].src_type
= nir_tex_src_coord
;
75 tex
->src
[0].src
= nir_src_for_ssa(nir_f2i32(b
, coord
));
76 tex
->coord_components
= 2;
78 tex
->src
[1].src_type
= nir_tex_src_ms_index
;
79 tex
->src
[1].src
= nir_src_for_ssa(nir_load_sample_id(b
));
81 tex
->src
[2].src_type
= nir_tex_src_lod
;
82 tex
->src
[2].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
83 tex
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
84 tex
->op
= nir_texop_txf_ms
;
86 tex
->op
= nir_texop_tex
;
88 tex
->src
[0].src_type
= nir_tex_src_coord
;
89 tex
->src
[0].src
= nir_src_for_ssa(coord
);
90 tex
->coord_components
= 2;
92 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
95 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
, 4, 32, NULL
);
96 nir_builder_instr_insert(b
, &tex
->instr
);
99 nir_store_var(b
, c_out
, &tex
->dest
.ssa
, 0xFF);
101 nir_store_var(b
, c_out
, nir_channel(b
, &tex
->dest
.ssa
, 0), 0xFF);
103 midgard_compile_shader_nir(shader
, program
, false, 0, gpu_id
, false, true);
107 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
108 * time overhead. There's only ~30 of them at the moment, so this is fine */
111 panfrost_init_blit_shaders(struct panfrost_device
*dev
)
113 static const struct {
117 { FRAG_RESULT_DEPTH
, 1 << PAN_BLIT_FLOAT
},
118 { FRAG_RESULT_STENCIL
, 1 << PAN_BLIT_UINT
},
119 { FRAG_RESULT_DATA0
, ~0 },
120 { FRAG_RESULT_DATA1
, ~0 },
121 { FRAG_RESULT_DATA2
, ~0 },
122 { FRAG_RESULT_DATA3
, ~0 },
123 { FRAG_RESULT_DATA4
, ~0 },
124 { FRAG_RESULT_DATA5
, ~0 },
125 { FRAG_RESULT_DATA6
, ~0 },
126 { FRAG_RESULT_DATA7
, ~0 }
129 nir_alu_type nir_types
[PAN_BLIT_NUM_TYPES
] = {
135 /* Total size = # of shaders * bytes per shader. There are
136 * shaders for each RT (so up to DATA7 -- overestimate is
137 * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
138 * variants. These shaders are simple enough that they should be less
139 * than 8 quadwords each (again, overestimate is fine). */
142 unsigned total_size
= (FRAG_RESULT_DATA7
* PAN_BLIT_NUM_TYPES
)
145 dev
->blit_shaders
.bo
= panfrost_bo_create(dev
, total_size
, PAN_BO_EXECUTE
);
147 /* Don't bother generating multisampling variants if we don't actually
148 * support multisampling */
149 bool has_ms
= !(dev
->quirks
& MIDGARD_SFBD
);
151 for (unsigned ms
= 0; ms
<= has_ms
; ++ms
) {
152 for (unsigned i
= 0; i
< ARRAY_SIZE(shader_descs
); ++i
) {
153 unsigned loc
= shader_descs
[i
].loc
;
155 for (enum pan_blit_type T
= 0; T
< PAN_BLIT_NUM_TYPES
; ++T
) {
156 if (!(shader_descs
[i
].types
& (1 << T
)))
159 panfrost_program program
;
160 panfrost_build_blit_shader(&program
, dev
->gpu_id
, loc
,
163 assert(offset
+ program
.compiled
.size
< total_size
);
164 memcpy(dev
->blit_shaders
.bo
->cpu
+ offset
, program
.compiled
.data
, program
.compiled
.size
);
166 dev
->blit_shaders
.loads
[loc
][T
][ms
] = (dev
->blit_shaders
.bo
->gpu
+ offset
) | program
.first_tag
;
167 offset
+= ALIGN_POT(program
.compiled
.size
, 64);
168 util_dynarray_fini(&program
.compiled
);
174 /* Add a shader-based load on Midgard (draw-time for GL). Shaders are
179 struct pan_pool
*pool
,
180 struct pan_scoreboard
*scoreboard
,
181 mali_ptr blend_shader
,
183 mali_ptr coordinates
, unsigned vertex_count
,
184 struct pan_image
*image
,
187 bool srgb
= util_format_is_srgb(image
->format
);
188 unsigned width
= u_minify(image
->width0
, image
->first_level
);
189 unsigned height
= u_minify(image
->height0
, image
->first_level
);
191 struct panfrost_transfer viewport
= panfrost_pool_alloc(pool
, MALI_VIEWPORT_LENGTH
);
192 struct panfrost_transfer sampler
= panfrost_pool_alloc(pool
, MALI_MIDGARD_SAMPLER_LENGTH
);
193 struct panfrost_transfer varying
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_LENGTH
);
194 struct panfrost_transfer varying_buffer
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_BUFFER_LENGTH
);
196 pan_pack(viewport
.cpu
, VIEWPORT
, cfg
) {
197 cfg
.scissor_maximum_x
= width
- 1; /* Inclusive */
198 cfg
.scissor_maximum_y
= height
- 1;
201 pan_pack(varying_buffer
.cpu
, ATTRIBUTE_BUFFER
, cfg
) {
202 cfg
.pointer
= coordinates
;
203 cfg
.stride
= 4 * sizeof(float);
204 cfg
.size
= cfg
.stride
* vertex_count
;
207 pan_pack(varying
.cpu
, ATTRIBUTE
, cfg
) {
208 cfg
.buffer_index
= 0;
209 cfg
.format
= (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_RGBA32F
<< 12);
212 struct mali_blend_equation_packed eq
;
214 pan_pack(&eq
, BLEND_EQUATION
, cfg
) {
215 cfg
.rgb_mode
= 0x122;
216 cfg
.alpha_mode
= 0x122;
218 if (loc
< FRAG_RESULT_DATA0
)
219 cfg
.color_mask
= 0x0;
222 union midgard_blend replace
= {
227 replace
.shader
= blend_shader
;
229 /* Determine the sampler type needed. Stencil is always sampled as
230 * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
232 enum pan_blit_type T
=
233 (loc
== FRAG_RESULT_STENCIL
) ? PAN_BLIT_UINT
:
234 (util_format_is_pure_uint(image
->format
)) ? PAN_BLIT_UINT
:
235 (util_format_is_pure_sint(image
->format
)) ? PAN_BLIT_INT
:
238 bool ms
= image
->nr_samples
> 1;
240 struct mali_midgard_properties_packed properties
;
242 struct panfrost_transfer shader_meta_t
= panfrost_pool_alloc_aligned(
243 pool
, MALI_STATE_LENGTH
+ 8 * sizeof(struct midgard_blend_rt
), 128);
245 pan_pack(&properties
, MIDGARD_PROPERTIES
, cfg
) {
246 cfg
.work_register_count
= 4;
247 cfg
.early_z_enable
= (loc
>= FRAG_RESULT_DATA0
);
248 cfg
.stencil_from_shader
= (loc
== FRAG_RESULT_STENCIL
);
249 cfg
.depth_source
= (loc
== FRAG_RESULT_DEPTH
) ?
250 MALI_DEPTH_SOURCE_SHADER
:
251 MALI_DEPTH_SOURCE_FIXED_FUNCTION
;
254 pan_pack(shader_meta_t
.cpu
, STATE
, cfg
) {
255 cfg
.shader
.shader
= pool
->dev
->blit_shaders
.loads
[loc
][T
][ms
];
256 cfg
.shader
.varying_count
= 1;
257 cfg
.shader
.texture_count
= 1;
258 cfg
.shader
.sampler_count
= 1;
260 cfg
.properties
= properties
.opaque
[0];
262 cfg
.multisample_misc
.sample_mask
= 0xFFFF;
263 cfg
.multisample_misc
.multisample_enable
= ms
;
264 cfg
.multisample_misc
.evaluate_per_sample
= ms
;
265 cfg
.multisample_misc
.depth_write_mask
= (loc
== FRAG_RESULT_DEPTH
);
266 cfg
.multisample_misc
.depth_function
= MALI_FUNC_ALWAYS
;
268 cfg
.stencil_mask_misc
.stencil_enable
= (loc
== FRAG_RESULT_STENCIL
);
269 cfg
.stencil_mask_misc
.stencil_mask_front
= 0xFF;
270 cfg
.stencil_mask_misc
.stencil_mask_back
= 0xFF;
271 cfg
.stencil_mask_misc
.unknown_1
= 0x7;
273 cfg
.stencil_front
.compare_function
= MALI_FUNC_ALWAYS
;
274 cfg
.stencil_front
.stencil_fail
= MALI_STENCIL_OP_REPLACE
;
275 cfg
.stencil_front
.depth_fail
= MALI_STENCIL_OP_REPLACE
;
276 cfg
.stencil_front
.depth_pass
= MALI_STENCIL_OP_REPLACE
;
278 cfg
.stencil_back
= cfg
.stencil_front
;
280 if (pool
->dev
->quirks
& MIDGARD_SFBD
) {
281 cfg
.stencil_mask_misc
.sfbd_write_enable
= true;
282 cfg
.stencil_mask_misc
.sfbd_dither_disable
= true;
283 cfg
.stencil_mask_misc
.sfbd_srgb
= srgb
;
284 cfg
.multisample_misc
.sfbd_blend_shader
= blend_shader
;
285 memcpy(&cfg
.sfbd_blend
, &replace
, sizeof(replace
));
286 } else if (!(pool
->dev
->quirks
& IS_BIFROST
)) {
287 memcpy(&cfg
.sfbd_blend
, &blend_shader
, sizeof(blend_shader
));
290 assert(cfg
.shader
.shader
);
293 /* Create the texture descriptor. We partially compute the base address
294 * ourselves to account for layer, such that the texture descriptor
295 * itself is for a 2D texture with array size 1 even for 3D/array
296 * textures, removing the need to separately key the blit shaders for
297 * 2D and 3D variants */
299 struct panfrost_transfer texture_t
= panfrost_pool_alloc_aligned(
300 pool
, MALI_MIDGARD_TEXTURE_LENGTH
+ sizeof(mali_ptr
) * 2 * MAX2(image
->nr_samples
, 1), 128);
302 panfrost_new_texture(texture_t
.cpu
,
303 image
->width0
, image
->height0
,
304 MAX2(image
->nr_samples
, 1), 1,
305 image
->format
, MALI_TEXTURE_DIMENSION_2D
,
307 image
->first_level
, image
->last_level
,
311 (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_CHANNEL_B
<< 6) | (MALI_CHANNEL_A
<< 9),
312 image
->bo
->gpu
+ image
->first_layer
*
313 panfrost_get_layer_stride(image
->slices
,
314 image
->dim
== MALI_TEXTURE_DIMENSION_3D
,
315 image
->cubemap_stride
, image
->first_level
),
318 pan_pack(sampler
.cpu
, MIDGARD_SAMPLER
, cfg
)
319 cfg
.normalized_coordinates
= false;
321 for (unsigned i
= 0; i
< 8; ++i
) {
322 void *dest
= shader_meta_t
.cpu
+ MALI_STATE_LENGTH
+ sizeof(struct midgard_blend_rt
) * i
;
324 if (loc
== (FRAG_RESULT_DATA0
+ i
)) {
325 struct midgard_blend_rt blend_rt
= {
330 pan_pack(&flags
, BLEND_FLAGS
, cfg
) {
331 cfg
.dither_disable
= true;
333 cfg
.midgard_blend_shader
= blend_shader
;
335 blend_rt
.flags
.opaque
[0] = flags
;
338 blend_rt
.blend
.shader
= blend_shader
;
340 memcpy(dest
, &blend_rt
, sizeof(struct midgard_blend_rt
));
342 memset(dest
, 0x0, sizeof(struct midgard_blend_rt
));
346 struct midgard_payload_vertex_tiler payload
= {};
347 struct mali_primitive_packed primitive
;
348 struct mali_draw_packed draw
;
349 struct mali_invocation_packed invocation
;
351 pan_pack(&draw
, DRAW
, cfg
) {
353 cfg
.position
= coordinates
;
354 cfg
.textures
= panfrost_pool_upload(pool
, &texture_t
.gpu
, sizeof(texture_t
.gpu
));
355 cfg
.samplers
= sampler
.gpu
;
356 cfg
.state
= shader_meta_t
.gpu
;
357 cfg
.varying_buffers
= varying_buffer
.gpu
;
358 cfg
.varyings
= varying
.gpu
;
359 cfg
.viewport
= viewport
.gpu
;
363 pan_pack(&primitive
, PRIMITIVE
, cfg
) {
364 cfg
.draw_mode
= MALI_DRAW_MODE_TRIANGLES
;
365 cfg
.index_count
= vertex_count
;
369 panfrost_pack_work_groups_compute(&invocation
, 1, vertex_count
, 1, 1, 1, 1, true);
371 payload
.prefix
.primitive
= primitive
;
372 memcpy(&payload
.postfix
, &draw
, MALI_DRAW_LENGTH
);
373 payload
.prefix
.invocation
= invocation
;
375 panfrost_new_job(pool
, scoreboard
, MALI_JOB_TYPE_TILER
, false, 0, &payload
, sizeof(payload
), true);