2 * Copyright (C) 2020 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
29 #include "pan_encoder.h"
31 #include "pan_scoreboard.h"
32 #include "pan_texture.h"
33 #include "panfrost-quirks.h"
34 #include "../midgard/midgard_compile.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "util/u_math.h"
38 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
39 * missing in many cases. We instead use software paths as fallbacks to
40 * implement blits, which are done as TILER jobs. No vertex shader is
41 * necessary since we can supply screen-space coordinates directly.
43 * This is primarily designed as a fallback for preloads but could be extended
44 * for other clears/blits if needed in the future. */
47 panfrost_build_blit_shader(panfrost_program
*program
, unsigned gpu_id
, gl_frag_result loc
, nir_alu_type T
, bool ms
)
49 bool is_colour
= loc
>= FRAG_RESULT_DATA0
;
52 nir_builder_init_simple_shader(&_b
, NULL
, MESA_SHADER_FRAGMENT
, &midgard_nir_options
);
54 nir_shader
*shader
= b
->shader
;
56 nir_variable
*c_src
= nir_variable_create(shader
, nir_var_shader_in
, glsl_vector_type(GLSL_TYPE_FLOAT
, 2), "coord");
57 nir_variable
*c_out
= nir_variable_create(shader
, nir_var_shader_out
, glsl_vector_type(
58 GLSL_TYPE_FLOAT
, is_colour
? 4 : 1), "out");
60 c_src
->data
.location
= VARYING_SLOT_TEX0
;
61 c_out
->data
.location
= loc
;
63 nir_ssa_def
*coord
= nir_load_var(b
, c_src
);
65 nir_tex_instr
*tex
= nir_tex_instr_create(shader
, ms
? 3 : 1);
70 tex
->src
[0].src_type
= nir_tex_src_coord
;
71 tex
->src
[0].src
= nir_src_for_ssa(nir_f2i32(b
, coord
));
72 tex
->coord_components
= 2;
74 tex
->src
[1].src_type
= nir_tex_src_ms_index
;
75 tex
->src
[1].src
= nir_src_for_ssa(nir_load_sample_id(b
));
77 tex
->src
[2].src_type
= nir_tex_src_lod
;
78 tex
->src
[2].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
79 tex
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
80 tex
->op
= nir_texop_txf_ms
;
82 tex
->op
= nir_texop_tex
;
84 tex
->src
[0].src_type
= nir_tex_src_coord
;
85 tex
->src
[0].src
= nir_src_for_ssa(coord
);
86 tex
->coord_components
= 2;
88 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
91 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
, 4, 32, NULL
);
92 nir_builder_instr_insert(b
, &tex
->instr
);
95 nir_store_var(b
, c_out
, &tex
->dest
.ssa
, 0xFF);
97 nir_store_var(b
, c_out
, nir_channel(b
, &tex
->dest
.ssa
, 0), 0xFF);
99 midgard_compile_shader_nir(shader
, program
, false, 0, gpu_id
, false, true);
103 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
104 * time overhead. There's only ~30 of them at the moment, so this is fine */
107 panfrost_init_blit_shaders(struct panfrost_device
*dev
)
109 static const struct {
113 { FRAG_RESULT_DEPTH
, 1 << PAN_BLIT_FLOAT
},
114 { FRAG_RESULT_STENCIL
, 1 << PAN_BLIT_UINT
},
115 { FRAG_RESULT_DATA0
, ~0 },
116 { FRAG_RESULT_DATA1
, ~0 },
117 { FRAG_RESULT_DATA2
, ~0 },
118 { FRAG_RESULT_DATA3
, ~0 },
119 { FRAG_RESULT_DATA4
, ~0 },
120 { FRAG_RESULT_DATA5
, ~0 },
121 { FRAG_RESULT_DATA6
, ~0 },
122 { FRAG_RESULT_DATA7
, ~0 }
125 nir_alu_type nir_types
[PAN_BLIT_NUM_TYPES
] = {
131 /* Total size = # of shaders * bytes per shader. There are
132 * shaders for each RT (so up to DATA7 -- overestimate is
133 * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
134 * variants. These shaders are simple enough that they should be less
135 * than 8 quadwords each (again, overestimate is fine). */
138 unsigned total_size
= (FRAG_RESULT_DATA7
* PAN_BLIT_NUM_TYPES
)
141 dev
->blit_shaders
.bo
= panfrost_bo_create(dev
, total_size
, PAN_BO_EXECUTE
);
143 /* Don't bother generating multisampling variants if we don't actually
144 * support multisampling */
145 bool has_ms
= !(dev
->quirks
& MIDGARD_SFBD
);
147 for (unsigned ms
= 0; ms
<= has_ms
; ++ms
) {
148 for (unsigned i
= 0; i
< ARRAY_SIZE(shader_descs
); ++i
) {
149 unsigned loc
= shader_descs
[i
].loc
;
151 for (enum pan_blit_type T
= 0; T
< PAN_BLIT_NUM_TYPES
; ++T
) {
152 if (!(shader_descs
[i
].types
& (1 << T
)))
155 panfrost_program program
;
156 panfrost_build_blit_shader(&program
, dev
->gpu_id
, loc
,
159 assert(offset
+ program
.compiled
.size
< total_size
);
160 memcpy(dev
->blit_shaders
.bo
->cpu
+ offset
, program
.compiled
.data
, program
.compiled
.size
);
162 dev
->blit_shaders
.loads
[loc
][T
][ms
] = (dev
->blit_shaders
.bo
->gpu
+ offset
) | program
.first_tag
;
163 offset
+= ALIGN_POT(program
.compiled
.size
, 64);
164 util_dynarray_fini(&program
.compiled
);
170 /* Add a shader-based load on Midgard (draw-time for GL). Shaders are
175 struct pan_pool
*pool
,
176 struct pan_scoreboard
*scoreboard
,
177 mali_ptr blend_shader
,
179 mali_ptr coordinates
, unsigned vertex_count
,
180 struct pan_image
*image
,
183 bool srgb
= util_format_is_srgb(image
->format
);
184 unsigned width
= u_minify(image
->width0
, image
->first_level
);
185 unsigned height
= u_minify(image
->height0
, image
->first_level
);
187 struct panfrost_transfer viewport
= panfrost_pool_alloc(pool
, MALI_VIEWPORT_LENGTH
);
188 struct panfrost_transfer sampler
= panfrost_pool_alloc(pool
, MALI_MIDGARD_SAMPLER_LENGTH
);
189 struct panfrost_transfer varying
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_LENGTH
);
190 struct panfrost_transfer varying_buffer
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_BUFFER_LENGTH
);
192 pan_pack(viewport
.cpu
, VIEWPORT
, cfg
) {
193 cfg
.scissor_maximum_x
= width
- 1; /* Inclusive */
194 cfg
.scissor_maximum_y
= height
- 1;
197 pan_pack(varying_buffer
.cpu
, ATTRIBUTE_BUFFER
, cfg
) {
198 cfg
.pointer
= coordinates
;
199 cfg
.stride
= 4 * sizeof(float);
200 cfg
.size
= cfg
.stride
* vertex_count
;
203 pan_pack(varying
.cpu
, ATTRIBUTE
, cfg
) {
204 cfg
.buffer_index
= 0;
205 cfg
.format
= (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_RGBA32F
<< 12);
208 struct mali_blend_equation_packed eq
;
210 pan_pack(&eq
, BLEND_EQUATION
, cfg
) {
211 cfg
.rgb_mode
= 0x122;
212 cfg
.alpha_mode
= 0x122;
214 if (loc
< FRAG_RESULT_DATA0
)
215 cfg
.color_mask
= 0x0;
218 union midgard_blend replace
= {
223 replace
.shader
= blend_shader
;
225 /* Determine the sampler type needed. Stencil is always sampled as
226 * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
228 enum pan_blit_type T
=
229 (loc
== FRAG_RESULT_STENCIL
) ? PAN_BLIT_UINT
:
230 (util_format_is_pure_uint(image
->format
)) ? PAN_BLIT_UINT
:
231 (util_format_is_pure_sint(image
->format
)) ? PAN_BLIT_INT
:
234 bool ms
= image
->nr_samples
> 1;
236 struct mali_midgard_properties_packed properties
;
238 struct panfrost_transfer shader_meta_t
= panfrost_pool_alloc_aligned(
239 pool
, MALI_STATE_LENGTH
+ 8 * sizeof(struct midgard_blend_rt
), 128);
241 pan_pack(&properties
, MIDGARD_PROPERTIES
, cfg
) {
242 cfg
.work_register_count
= 4;
243 cfg
.early_z_enable
= (loc
>= FRAG_RESULT_DATA0
);
244 cfg
.stencil_from_shader
= (loc
== FRAG_RESULT_STENCIL
);
245 cfg
.depth_source
= (loc
== FRAG_RESULT_DEPTH
) ?
246 MALI_DEPTH_SOURCE_SHADER
:
247 MALI_DEPTH_SOURCE_FIXED_FUNCTION
;
250 pan_pack(shader_meta_t
.cpu
, STATE
, cfg
) {
251 cfg
.shader
.shader
= pool
->dev
->blit_shaders
.loads
[loc
][T
][ms
];
252 cfg
.shader
.varying_count
= 1;
253 cfg
.shader
.texture_count
= 1;
254 cfg
.shader
.sampler_count
= 1;
256 cfg
.properties
= properties
.opaque
[0];
258 cfg
.multisample_misc
.sample_mask
= 0xFFFF;
259 cfg
.multisample_misc
.multisample_enable
= ms
;
260 cfg
.multisample_misc
.evaluate_per_sample
= ms
;
261 cfg
.multisample_misc
.depth_write_mask
= (loc
== FRAG_RESULT_DEPTH
);
262 cfg
.multisample_misc
.depth_function
= MALI_FUNC_ALWAYS
;
264 cfg
.stencil_mask_misc
.stencil_enable
= (loc
== FRAG_RESULT_STENCIL
);
265 cfg
.stencil_mask_misc
.stencil_mask_front
= 0xFF;
266 cfg
.stencil_mask_misc
.stencil_mask_back
= 0xFF;
267 cfg
.stencil_mask_misc
.unknown_1
= 0x7;
269 cfg
.stencil_front
.compare_function
= MALI_FUNC_ALWAYS
;
270 cfg
.stencil_front
.stencil_fail
= MALI_STENCIL_OP_REPLACE
;
271 cfg
.stencil_front
.depth_fail
= MALI_STENCIL_OP_REPLACE
;
272 cfg
.stencil_front
.depth_pass
= MALI_STENCIL_OP_REPLACE
;
274 cfg
.stencil_back
= cfg
.stencil_front
;
276 if (pool
->dev
->quirks
& MIDGARD_SFBD
) {
277 cfg
.stencil_mask_misc
.sfbd_write_enable
= true;
278 cfg
.stencil_mask_misc
.sfbd_dither_disable
= true;
279 cfg
.stencil_mask_misc
.sfbd_srgb
= srgb
;
280 cfg
.multisample_misc
.sfbd_blend_shader
= blend_shader
;
281 memcpy(&cfg
.sfbd_blend
, &replace
, sizeof(replace
));
282 } else if (!(pool
->dev
->quirks
& IS_BIFROST
)) {
283 memcpy(&cfg
.sfbd_blend
, &blend_shader
, sizeof(blend_shader
));
286 assert(cfg
.shader
.shader
);
289 /* Create the texture descriptor. We partially compute the base address
290 * ourselves to account for layer, such that the texture descriptor
291 * itself is for a 2D texture with array size 1 even for 3D/array
292 * textures, removing the need to separately key the blit shaders for
293 * 2D and 3D variants */
295 struct panfrost_transfer texture_t
= panfrost_pool_alloc_aligned(
296 pool
, MALI_MIDGARD_TEXTURE_LENGTH
+ sizeof(mali_ptr
) * 2 * MAX2(image
->nr_samples
, 1), 128);
298 panfrost_new_texture(texture_t
.cpu
,
299 image
->width0
, image
->height0
,
300 MAX2(image
->nr_samples
, 1), 1,
301 image
->format
, MALI_TEXTURE_DIMENSION_2D
,
303 image
->first_level
, image
->last_level
,
307 (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_CHANNEL_B
<< 6) | (MALI_CHANNEL_A
<< 9),
308 image
->bo
->gpu
+ image
->first_layer
*
309 panfrost_get_layer_stride(image
->slices
,
310 image
->dim
== MALI_TEXTURE_DIMENSION_3D
,
311 image
->cubemap_stride
, image
->first_level
),
314 pan_pack(sampler
.cpu
, MIDGARD_SAMPLER
, cfg
)
315 cfg
.normalized_coordinates
= false;
317 for (unsigned i
= 0; i
< 8; ++i
) {
318 void *dest
= shader_meta_t
.cpu
+ MALI_STATE_LENGTH
+ sizeof(struct midgard_blend_rt
) * i
;
320 if (loc
== (FRAG_RESULT_DATA0
+ i
)) {
321 struct midgard_blend_rt blend_rt
= {
326 pan_pack(&flags
, BLEND_FLAGS
, cfg
) {
327 cfg
.dither_disable
= true;
329 cfg
.midgard_blend_shader
= blend_shader
;
331 blend_rt
.flags
.opaque
[0] = flags
;
334 blend_rt
.blend
.shader
= blend_shader
;
336 memcpy(dest
, &blend_rt
, sizeof(struct midgard_blend_rt
));
338 memset(dest
, 0x0, sizeof(struct midgard_blend_rt
));
342 struct midgard_payload_vertex_tiler payload
= {};
343 struct mali_primitive_packed primitive
;
344 struct mali_draw_packed draw
;
345 struct mali_invocation_packed invocation
;
347 pan_pack(&draw
, DRAW
, cfg
) {
349 cfg
.position
= coordinates
;
350 cfg
.textures
= panfrost_pool_upload(pool
, &texture_t
.gpu
, sizeof(texture_t
.gpu
));
351 cfg
.samplers
= sampler
.gpu
;
352 cfg
.state
= shader_meta_t
.gpu
;
353 cfg
.varying_buffers
= varying_buffer
.gpu
;
354 cfg
.varyings
= varying
.gpu
;
355 cfg
.viewport
= viewport
.gpu
;
359 pan_pack(&primitive
, PRIMITIVE
, cfg
) {
360 cfg
.draw_mode
= MALI_DRAW_MODE_TRIANGLES
;
361 cfg
.index_count
= vertex_count
;
365 panfrost_pack_work_groups_compute(&invocation
, 1, vertex_count
, 1, 1, 1, 1, true);
367 payload
.prefix
.primitive
= primitive
;
368 memcpy(&payload
.postfix
, &draw
, MALI_DRAW_LENGTH
);
369 payload
.prefix
.invocation
= invocation
;
371 panfrost_new_job(pool
, scoreboard
, MALI_JOB_TYPE_TILER
, false, 0, &payload
, sizeof(payload
), true);