2 * Copyright (C) 2020 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
29 #include "pan_encoder.h"
31 #include "pan_scoreboard.h"
32 #include "pan_texture.h"
33 #include "panfrost-quirks.h"
34 #include "../midgard/midgard_compile.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "util/u_math.h"
38 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
39 * missing in many cases. We instead use software paths as fallbacks to
40 * implement blits, which are done as TILER jobs. No vertex shader is
41 * necessary since we can supply screen-space coordinates directly.
43 * This is primarily designed as a fallback for preloads but could be extended
44 * for other clears/blits if needed in the future. */
47 panfrost_build_blit_shader(panfrost_program
*program
, unsigned gpu_id
, gl_frag_result loc
, nir_alu_type T
, bool ms
)
49 bool is_colour
= loc
>= FRAG_RESULT_DATA0
;
51 nir_shader
*shader
= nir_shader_create(NULL
, MESA_SHADER_FRAGMENT
, &midgard_nir_options
, NULL
);
52 nir_function
*fn
= nir_function_create(shader
, "main");
53 nir_function_impl
*impl
= nir_function_impl_create(fn
);
55 nir_variable
*c_src
= nir_variable_create(shader
, nir_var_shader_in
, glsl_vector_type(GLSL_TYPE_FLOAT
, 2), "coord");
56 nir_variable
*c_out
= nir_variable_create(shader
, nir_var_shader_out
, glsl_vector_type(
57 GLSL_TYPE_FLOAT
, is_colour
? 4 : 1), "out");
59 c_src
->data
.location
= VARYING_SLOT_TEX0
;
60 c_out
->data
.location
= loc
;
64 nir_builder_init(b
, impl
);
65 b
->cursor
= nir_before_block(nir_start_block(impl
));
67 nir_ssa_def
*coord
= nir_load_var(b
, c_src
);
69 nir_tex_instr
*tex
= nir_tex_instr_create(shader
, ms
? 3 : 1);
74 tex
->src
[0].src_type
= nir_tex_src_coord
;
75 tex
->src
[0].src
= nir_src_for_ssa(nir_f2i32(b
, coord
));
76 tex
->coord_components
= 2;
78 tex
->src
[1].src_type
= nir_tex_src_ms_index
;
79 tex
->src
[1].src
= nir_src_for_ssa(nir_load_sample_id(b
));
81 tex
->src
[2].src_type
= nir_tex_src_lod
;
82 tex
->src
[2].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
83 tex
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
84 tex
->op
= nir_texop_txf_ms
;
86 tex
->op
= nir_texop_tex
;
88 tex
->src
[0].src_type
= nir_tex_src_coord
;
89 tex
->src
[0].src
= nir_src_for_ssa(coord
);
90 tex
->coord_components
= 2;
92 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
95 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
, 4, 32, NULL
);
96 nir_builder_instr_insert(b
, &tex
->instr
);
99 nir_store_var(b
, c_out
, &tex
->dest
.ssa
, 0xFF);
101 nir_store_var(b
, c_out
, nir_channel(b
, &tex
->dest
.ssa
, 0), 0xFF);
103 midgard_compile_shader_nir(shader
, program
, false, 0, gpu_id
, false, true);
107 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
108 * time overhead. There's only ~30 of them at the moment, so this is fine */
111 panfrost_init_blit_shaders(struct panfrost_device
*dev
)
113 static const struct {
117 { FRAG_RESULT_DEPTH
, 1 << PAN_BLIT_FLOAT
},
118 { FRAG_RESULT_STENCIL
, 1 << PAN_BLIT_UINT
},
119 { FRAG_RESULT_DATA0
, ~0 },
120 { FRAG_RESULT_DATA1
, ~0 },
121 { FRAG_RESULT_DATA2
, ~0 },
122 { FRAG_RESULT_DATA3
, ~0 },
123 { FRAG_RESULT_DATA4
, ~0 },
124 { FRAG_RESULT_DATA5
, ~0 },
125 { FRAG_RESULT_DATA6
, ~0 },
126 { FRAG_RESULT_DATA7
, ~0 }
129 nir_alu_type nir_types
[PAN_BLIT_NUM_TYPES
] = {
135 /* Total size = # of shaders * bytes per shader. There are
136 * shaders for each RT (so up to DATA7 -- overestimate is
137 * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
138 * variants. These shaders are simple enough that they should be less
139 * than 8 quadwords each (again, overestimate is fine). */
142 unsigned total_size
= (FRAG_RESULT_DATA7
* PAN_BLIT_NUM_TYPES
)
145 dev
->blit_shaders
.bo
= panfrost_bo_create(dev
, total_size
, PAN_BO_EXECUTE
);
147 /* Don't bother generating multisampling variants if we don't actually
148 * support multisampling */
149 bool has_ms
= !(dev
->quirks
& MIDGARD_SFBD
);
151 for (unsigned ms
= 0; ms
<= has_ms
; ++ms
) {
152 for (unsigned i
= 0; i
< ARRAY_SIZE(shader_descs
); ++i
) {
153 unsigned loc
= shader_descs
[i
].loc
;
155 for (enum pan_blit_type T
= 0; T
< PAN_BLIT_NUM_TYPES
; ++T
) {
156 if (!(shader_descs
[i
].types
& (1 << T
)))
159 panfrost_program program
;
160 panfrost_build_blit_shader(&program
, dev
->gpu_id
, loc
,
163 assert(offset
+ program
.compiled
.size
< total_size
);
164 memcpy(dev
->blit_shaders
.bo
->cpu
+ offset
, program
.compiled
.data
, program
.compiled
.size
);
166 dev
->blit_shaders
.loads
[loc
][T
][ms
] = (dev
->blit_shaders
.bo
->gpu
+ offset
) | program
.first_tag
;
167 offset
+= ALIGN_POT(program
.compiled
.size
, 64);
168 util_dynarray_fini(&program
.compiled
);
174 /* Add a shader-based load on Midgard (draw-time for GL). Shaders are
179 struct pan_pool
*pool
,
180 struct pan_scoreboard
*scoreboard
,
181 mali_ptr blend_shader
,
183 mali_ptr coordinates
, unsigned vertex_count
,
184 struct pan_image
*image
,
187 unsigned width
= u_minify(image
->width0
, image
->first_level
);
188 unsigned height
= u_minify(image
->height0
, image
->first_level
);
190 struct panfrost_transfer viewport
= panfrost_pool_alloc(pool
, MALI_VIEWPORT_LENGTH
);
191 struct panfrost_transfer sampler
= panfrost_pool_alloc(pool
, MALI_MIDGARD_SAMPLER_LENGTH
);
192 struct panfrost_transfer varying
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_LENGTH
);
193 struct panfrost_transfer varying_buffer
= panfrost_pool_alloc(pool
, MALI_ATTRIBUTE_BUFFER_LENGTH
);
195 pan_pack(viewport
.cpu
, VIEWPORT
, cfg
) {
196 cfg
.scissor_maximum_x
= width
- 1; /* Inclusive */
197 cfg
.scissor_maximum_y
= height
- 1;
200 pan_pack(varying_buffer
.cpu
, ATTRIBUTE_BUFFER
, cfg
) {
201 cfg
.pointer
= coordinates
;
202 cfg
.stride
= 4 * sizeof(float);
203 cfg
.size
= cfg
.stride
* vertex_count
;
206 pan_pack(varying
.cpu
, ATTRIBUTE
, cfg
) {
207 cfg
.buffer_index
= 0;
208 cfg
.format
= (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_RGBA32F
<< 12);
211 struct mali_stencil_packed stencil
;
212 pan_pack(&stencil
, STENCIL
, cfg
) {
213 cfg
.compare_function
= MALI_FUNC_ALWAYS
;
214 cfg
.stencil_fail
= MALI_STENCIL_OP_REPLACE
;
215 cfg
.depth_fail
= MALI_STENCIL_OP_REPLACE
;
216 cfg
.depth_pass
= MALI_STENCIL_OP_REPLACE
;
219 union midgard_blend replace
= {
223 .color_mask
= MALI_MASK_R
| MALI_MASK_G
| MALI_MASK_B
| MALI_MASK_A
,
228 replace
.shader
= blend_shader
;
230 /* Determine the sampler type needed. Stencil is always sampled as
231 * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
233 enum pan_blit_type T
=
234 (loc
== FRAG_RESULT_STENCIL
) ? PAN_BLIT_UINT
:
235 (util_format_is_pure_uint(image
->format
)) ? PAN_BLIT_UINT
:
236 (util_format_is_pure_sint(image
->format
)) ? PAN_BLIT_INT
:
239 bool ms
= image
->nr_samples
> 1;
241 struct mali_shader_meta shader_meta
= {
242 .shader
= pool
->dev
->blit_shaders
.loads
[loc
][T
][ms
],
251 .unknown2_3
= MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS
) | 0x10,
253 .stencil_mask_front
= ~0,
254 .stencil_mask_back
= ~0,
255 .stencil_front
= stencil
,
256 .stencil_back
= stencil
,
258 .shader
= blend_shader
263 shader_meta
.unknown2_3
|= MALI_HAS_MSAA
| MALI_PER_SAMPLE
;
265 shader_meta
.unknown2_4
|= MALI_NO_MSAA
;
267 assert(shader_meta
.shader
);
269 if (pool
->dev
->quirks
& MIDGARD_SFBD
) {
270 shader_meta
.unknown2_4
|= (0x10 | MALI_NO_DITHER
);
271 shader_meta
.blend
= replace
;
273 if (loc
< FRAG_RESULT_DATA0
)
274 shader_meta
.blend
.equation
.color_mask
= 0x0;
277 if (loc
== FRAG_RESULT_DEPTH
) {
278 shader_meta
.midgard1
.flags_lo
|= MALI_WRITES_Z
;
279 shader_meta
.unknown2_3
|= MALI_DEPTH_WRITEMASK
;
280 } else if (loc
== FRAG_RESULT_STENCIL
) {
281 shader_meta
.midgard1
.flags_hi
|= MALI_WRITES_S
;
282 shader_meta
.unknown2_4
|= MALI_STENCIL_TEST
;
284 shader_meta
.midgard1
.flags_lo
|= MALI_EARLY_Z
;
287 /* Create the texture descriptor. We partially compute the base address
288 * ourselves to account for layer, such that the texture descriptor
289 * itself is for a 2D texture with array size 1 even for 3D/array
290 * textures, removing the need to separately key the blit shaders for
291 * 2D and 3D variants */
293 struct panfrost_transfer texture_t
= panfrost_pool_alloc_aligned(
294 pool
, MALI_MIDGARD_TEXTURE_LENGTH
+ sizeof(mali_ptr
) * 2 * MAX2(image
->nr_samples
, 1), 128);
296 panfrost_new_texture(texture_t
.cpu
,
297 image
->width0
, image
->height0
,
298 MAX2(image
->nr_samples
, 1), 1,
299 image
->format
, MALI_TEXTURE_DIMENSION_2D
,
301 image
->first_level
, image
->last_level
,
305 (MALI_CHANNEL_R
<< 0) | (MALI_CHANNEL_G
<< 3) | (MALI_CHANNEL_B
<< 6) | (MALI_CHANNEL_A
<< 9),
306 image
->bo
->gpu
+ image
->first_layer
*
307 panfrost_get_layer_stride(image
->slices
,
308 image
->dim
== MALI_TEXTURE_DIMENSION_3D
,
309 image
->cubemap_stride
, image
->first_level
),
312 pan_pack(sampler
.cpu
, MIDGARD_SAMPLER
, cfg
)
313 cfg
.normalized_coordinates
= false;
315 struct panfrost_transfer shader_meta_t
= panfrost_pool_alloc_aligned(
316 pool
, sizeof(shader_meta
) + 8 * sizeof(struct midgard_blend_rt
), 128);
318 memcpy(shader_meta_t
.cpu
, &shader_meta
, sizeof(shader_meta
));
320 for (unsigned i
= 0; i
< 8; ++i
) {
321 void *dest
= shader_meta_t
.cpu
+ sizeof(shader_meta
) + sizeof(struct midgard_blend_rt
) * i
;
323 if (loc
== (FRAG_RESULT_DATA0
+ i
)) {
324 struct midgard_blend_rt blend_rt
= {
325 .flags
= 0x200 | MALI_BLEND_NO_DITHER
,
329 if (util_format_is_srgb(image
->format
))
330 blend_rt
.flags
|= MALI_BLEND_SRGB
;
333 blend_rt
.flags
|= MALI_BLEND_MRT_SHADER
;
334 blend_rt
.blend
.shader
= blend_shader
;
337 memcpy(dest
, &blend_rt
, sizeof(struct midgard_blend_rt
));
339 memset(dest
, 0x0, sizeof(struct midgard_blend_rt
));
343 struct midgard_payload_vertex_tiler payload
= {
345 .draw_mode
= MALI_DRAW_MODE_TRIANGLES
,
346 .unknown_draw
= 0x3000,
347 .index_count
= MALI_POSITIVE(vertex_count
)
351 .position_varying
= coordinates
,
352 .textures
= panfrost_pool_upload(pool
, &texture_t
.gpu
, sizeof(texture_t
.gpu
)),
353 .sampler_descriptor
= sampler
.gpu
,
354 .shader
= shader_meta_t
.gpu
,
355 .varyings
= varying_buffer
.gpu
,
356 .varying_meta
= varying
.gpu
,
357 .viewport
= viewport
.gpu
,
362 panfrost_pack_work_groups_compute(&payload
.prefix
, 1, vertex_count
, 1, 1, 1, 1, true);
363 payload
.prefix
.workgroups_x_shift_3
= 6;
365 panfrost_new_job(pool
, scoreboard
, MALI_JOB_TYPE_TILER
, false, 0, &payload
, sizeof(payload
), true);