2 * Copyright (C) 2020 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
29 #include "pan_encoder.h"
31 #include "pan_scoreboard.h"
32 #include "pan_texture.h"
33 #include "panfrost-quirks.h"
34 #include "../midgard/midgard_compile.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "util/u_math.h"
38 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
39 * missing in many cases. We instead use software paths as fallbacks to
40 * implement blits, which are done as TILER jobs. No vertex shader is
41 * necessary since we can supply screen-space coordinates directly.
43 * This is primarily designed as a fallback for preloads but could be extended
44 * for other clears/blits if needed in the future. */
47 panfrost_build_blit_shader(panfrost_program
*program
, unsigned gpu_id
, gl_frag_result loc
, nir_alu_type T
, bool ms
)
49 bool is_colour
= loc
>= FRAG_RESULT_DATA0
;
51 nir_shader
*shader
= nir_shader_create(NULL
, MESA_SHADER_FRAGMENT
, &midgard_nir_options
, NULL
);
52 nir_function
*fn
= nir_function_create(shader
, "main");
53 nir_function_impl
*impl
= nir_function_impl_create(fn
);
55 nir_variable
*c_src
= nir_variable_create(shader
, nir_var_shader_in
, glsl_vector_type(GLSL_TYPE_FLOAT
, 2), "coord");
56 nir_variable
*c_out
= nir_variable_create(shader
, nir_var_shader_out
, glsl_vector_type(
57 GLSL_TYPE_FLOAT
, is_colour
? 4 : 1), "out");
59 c_src
->data
.location
= VARYING_SLOT_TEX0
;
60 c_out
->data
.location
= loc
;
64 nir_builder_init(b
, impl
);
65 b
->cursor
= nir_before_block(nir_start_block(impl
));
67 nir_ssa_def
*coord
= nir_load_var(b
, c_src
);
69 nir_tex_instr
*tex
= nir_tex_instr_create(shader
, ms
? 3 : 1);
74 tex
->src
[0].src_type
= nir_tex_src_coord
;
75 tex
->src
[0].src
= nir_src_for_ssa(nir_f2i32(b
, coord
));
76 tex
->coord_components
= 2;
78 tex
->src
[1].src_type
= nir_tex_src_ms_index
;
79 tex
->src
[1].src
= nir_src_for_ssa(nir_load_sample_id(b
));
81 tex
->src
[2].src_type
= nir_tex_src_lod
;
82 tex
->src
[2].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
83 tex
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
84 tex
->op
= nir_texop_txf_ms
;
86 tex
->op
= nir_texop_tex
;
88 tex
->src
[0].src_type
= nir_tex_src_coord
;
89 tex
->src
[0].src
= nir_src_for_ssa(coord
);
90 tex
->coord_components
= 2;
92 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
95 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
, 4, 32, NULL
);
96 nir_builder_instr_insert(b
, &tex
->instr
);
99 nir_store_var(b
, c_out
, &tex
->dest
.ssa
, 0xFF);
101 nir_store_var(b
, c_out
, nir_channel(b
, &tex
->dest
.ssa
, 0), 0xFF);
103 midgard_compile_shader_nir(shader
, program
, false, 0, gpu_id
, false, true);
106 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
107 * time overhead. There's only ~30 of them at the moment, so this is fine */
110 panfrost_init_blit_shaders(struct panfrost_device
*dev
)
112 static const struct {
116 { FRAG_RESULT_DEPTH
, 1 << PAN_BLIT_FLOAT
},
117 { FRAG_RESULT_STENCIL
, 1 << PAN_BLIT_UINT
},
118 { FRAG_RESULT_DATA0
, ~0 },
119 { FRAG_RESULT_DATA1
, ~0 },
120 { FRAG_RESULT_DATA2
, ~0 },
121 { FRAG_RESULT_DATA3
, ~0 },
122 { FRAG_RESULT_DATA4
, ~0 },
123 { FRAG_RESULT_DATA5
, ~0 },
124 { FRAG_RESULT_DATA6
, ~0 },
125 { FRAG_RESULT_DATA7
, ~0 }
128 nir_alu_type nir_types
[PAN_BLIT_NUM_TYPES
] = {
134 /* Total size = # of shaders * bytes per shader. There are
135 * shaders for each RT (so up to DATA7 -- overestimate is
136 * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
137 * variants. These shaders are simple enough that they should be less
138 * than 8 quadwords each (again, overestimate is fine). */
141 unsigned total_size
= (FRAG_RESULT_DATA7
* PAN_BLIT_NUM_TYPES
)
144 dev
->blit_shaders
.bo
= panfrost_bo_create(dev
, total_size
, PAN_BO_EXECUTE
);
146 /* Don't bother generating multisampling variants if we don't actually
147 * support multisampling */
148 bool has_ms
= !(dev
->quirks
& MIDGARD_SFBD
);
150 for (unsigned ms
= 0; ms
<= has_ms
; ++ms
) {
151 for (unsigned i
= 0; i
< ARRAY_SIZE(shader_descs
); ++i
) {
152 unsigned loc
= shader_descs
[i
].loc
;
154 for (enum pan_blit_type T
= 0; T
< PAN_BLIT_NUM_TYPES
; ++T
) {
155 if (!(shader_descs
[i
].types
& (1 << T
)))
158 panfrost_program program
;
159 panfrost_build_blit_shader(&program
, dev
->gpu_id
, loc
,
162 assert(offset
+ program
.compiled
.size
< total_size
);
163 memcpy(dev
->blit_shaders
.bo
->cpu
+ offset
, program
.compiled
.data
, program
.compiled
.size
);
165 dev
->blit_shaders
.loads
[loc
][T
][ms
] = (dev
->blit_shaders
.bo
->gpu
+ offset
) | program
.first_tag
;
166 offset
+= ALIGN_POT(program
.compiled
.size
, 64);
167 util_dynarray_fini(&program
.compiled
);
173 /* Add a shader-based load on Midgard (draw-time for GL). Shaders are
178 struct pan_pool
*pool
,
179 struct pan_scoreboard
*scoreboard
,
180 mali_ptr blend_shader
,
182 mali_ptr coordinates
, unsigned vertex_count
,
183 struct pan_image
*image
,
186 unsigned width
= u_minify(image
->width0
, image
->first_level
);
187 unsigned height
= u_minify(image
->height0
, image
->first_level
);
189 struct panfrost_transfer viewport
= panfrost_pool_alloc(pool
, MALI_VIEWPORT_LENGTH
);
190 struct panfrost_transfer sampler
= panfrost_pool_alloc(pool
, MALI_MIDGARD_SAMPLER_LENGTH
);
191 struct panfrost_transfer varying
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_LENGTH
);
192 struct panfrost_transfer varying_buffer
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_BUFFER_LENGTH
);
194 pan_pack(viewport
.cpu
, VIEWPORT
, cfg
) {
195 cfg
.scissor_maximum_x
= width
- 1; /* Inclusive */
196 cfg
.scissor_maximum_y
= height
- 1;
199 pan_pack(varying_buffer
.cpu
, ATTRIBUTE_BUFFER
, cfg
) {
200 cfg
.pointer
= coordinates
;
201 cfg
.stride
= 4 * sizeof(float);
202 cfg
.size
= cfg
.stride
* vertex_count
;
205 pan_pack(varying
.cpu
, ATTRIBUTE
, cfg
) {
206 cfg
.buffer_index
= 0;
207 cfg
.format
= (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_RGBA32F
<< 12);
210 struct mali_stencil_packed stencil
;
211 pan_pack(&stencil
, STENCIL
, cfg
) {
212 cfg
.compare_function
= MALI_FUNC_ALWAYS
;
213 cfg
.stencil_fail
= MALI_STENCIL_OP_REPLACE
;
214 cfg
.depth_fail
= MALI_STENCIL_OP_REPLACE
;
215 cfg
.depth_pass
= MALI_STENCIL_OP_REPLACE
;
218 union midgard_blend replace
= {
222 .color_mask
= MALI_MASK_R
| MALI_MASK_G
| MALI_MASK_B
| MALI_MASK_A
,
227 replace
.shader
= blend_shader
;
229 /* Determine the sampler type needed. Stencil is always sampled as
230 * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
232 enum pan_blit_type T
=
233 (loc
== FRAG_RESULT_STENCIL
) ? PAN_BLIT_UINT
:
234 (util_format_is_pure_uint(image
->format
)) ? PAN_BLIT_UINT
:
235 (util_format_is_pure_sint(image
->format
)) ? PAN_BLIT_INT
:
238 bool ms
= image
->nr_samples
> 1;
240 struct mali_shader_meta shader_meta
= {
241 .shader
= pool
->dev
->blit_shaders
.loads
[loc
][T
][ms
],
250 .unknown2_3
= MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS
) | 0x10,
252 .stencil_mask_front
= ~0,
253 .stencil_mask_back
= ~0,
254 .stencil_front
= stencil
,
255 .stencil_back
= stencil
,
257 .shader
= blend_shader
262 shader_meta
.unknown2_3
|= MALI_HAS_MSAA
| MALI_PER_SAMPLE
;
264 shader_meta
.unknown2_4
|= MALI_NO_MSAA
;
266 assert(shader_meta
.shader
);
268 if (pool
->dev
->quirks
& MIDGARD_SFBD
) {
269 shader_meta
.unknown2_4
|= (0x10 | MALI_NO_DITHER
);
270 shader_meta
.blend
= replace
;
272 if (loc
< FRAG_RESULT_DATA0
)
273 shader_meta
.blend
.equation
.color_mask
= 0x0;
276 if (loc
== FRAG_RESULT_DEPTH
) {
277 shader_meta
.midgard1
.flags_lo
|= MALI_WRITES_Z
;
278 shader_meta
.unknown2_3
|= MALI_DEPTH_WRITEMASK
;
279 } else if (loc
== FRAG_RESULT_STENCIL
) {
280 shader_meta
.midgard1
.flags_hi
|= MALI_WRITES_S
;
281 shader_meta
.unknown2_4
|= MALI_STENCIL_TEST
;
283 shader_meta
.midgard1
.flags_lo
|= MALI_EARLY_Z
;
286 /* Create the texture descriptor. We partially compute the base address
287 * ourselves to account for layer, such that the texture descriptor
288 * itself is for a 2D texture with array size 1 even for 3D/array
289 * textures, removing the need to separately key the blit shaders for
290 * 2D and 3D variants */
292 struct panfrost_transfer texture_t
= panfrost_pool_alloc(pool
, MALI_MIDGARD_TEXTURE_LENGTH
+ sizeof(mali_ptr
) * 2 * MAX2(image
->nr_samples
, 1));
294 panfrost_new_texture(texture_t
.cpu
,
295 image
->width0
, image
->height0
,
296 MAX2(image
->nr_samples
, 1), 1,
297 image
->format
, MALI_TEXTURE_DIMENSION_2D
,
299 image
->first_level
, image
->last_level
,
303 (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_CHANNEL_B
<< 6) | (MALI_CHANNEL_A
<< 9),
304 image
->bo
->gpu
+ image
->first_layer
*
305 panfrost_get_layer_stride(image
->slices
,
306 image
->dim
== MALI_TEXTURE_DIMENSION_3D
,
307 image
->cubemap_stride
, image
->first_level
),
310 pan_pack(sampler
.cpu
, MIDGARD_SAMPLER
, cfg
)
311 cfg
.normalized_coordinates
= false;
313 struct panfrost_transfer shader_meta_t
= panfrost_pool_alloc(pool
, sizeof(shader_meta
) + 8 * sizeof(struct midgard_blend_rt
));
314 memcpy(shader_meta_t
.cpu
, &shader_meta
, sizeof(shader_meta
));
316 for (unsigned i
= 0; i
< 8; ++i
) {
317 void *dest
= shader_meta_t
.cpu
+ sizeof(shader_meta
) + sizeof(struct midgard_blend_rt
) * i
;
319 if (loc
== (FRAG_RESULT_DATA0
+ i
)) {
320 struct midgard_blend_rt blend_rt
= {
321 .flags
= 0x200 | MALI_BLEND_NO_DITHER
,
325 if (util_format_is_srgb(image
->format
))
326 blend_rt
.flags
|= MALI_BLEND_SRGB
;
329 blend_rt
.flags
|= MALI_BLEND_MRT_SHADER
;
330 blend_rt
.blend
.shader
= blend_shader
;
333 memcpy(dest
, &blend_rt
, sizeof(struct midgard_blend_rt
));
335 memset(dest
, 0x0, sizeof(struct midgard_blend_rt
));
339 struct midgard_payload_vertex_tiler payload
= {
341 .draw_mode
= MALI_DRAW_MODE_TRIANGLES
,
342 .unknown_draw
= 0x3000,
343 .index_count
= MALI_POSITIVE(vertex_count
)
347 .position_varying
= coordinates
,
348 .textures
= panfrost_pool_upload(pool
, &texture_t
.gpu
, sizeof(texture_t
.gpu
)),
349 .sampler_descriptor
= sampler
.gpu
,
350 .shader
= shader_meta_t
.gpu
,
351 .varyings
= varying_buffer
.gpu
,
352 .varying_meta
= varying
.gpu
,
353 .viewport
= viewport
.gpu
,
358 panfrost_pack_work_groups_compute(&payload
.prefix
, 1, vertex_count
, 1, 1, 1, 1, true);
359 payload
.prefix
.workgroups_x_shift_3
= 6;
361 panfrost_new_job(pool
, scoreboard
, MALI_JOB_TYPE_TILER
, false, 0, &payload
, sizeof(payload
), true);