2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 #include "pan_scoreboard.h"
27 #include "pan_device.h"
28 #include "panfrost-quirks.h"
31 * There are various types of Mali jobs:
33 * - WRITE_VALUE: generic write primitive, used to zero tiler field
34 * - VERTEX: runs a vertex shader
35 * - TILER: runs tiling and sets up a fragment shader
36 * - FRAGMENT: runs fragment shaders and writes out
37 * - COMPUTE: runs a compute shader
38 * - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost)
39 * - GEOMETRY: runs a geometry shader (unimplemented)
40 * - CACHE_FLUSH: unseen in the wild, theoretically cache flush
42 * In between a full batch and a single Mali job is the "job chain", a series
43 * of Mali jobs together forming a linked list. Within the job chain, each Mali
44 * job can set (up to) two dependencies on other earlier jobs in the chain.
45 * This dependency graph forms a scoreboard. The general idea of a scoreboard
46 * applies: when there is a data dependency of job B on job A, job B sets one
47 * of its dependency indices to job A, ensuring that job B won't start until
50 * More specifically, here are a set of rules:
52 * - A write value job must appear if and only if there is at least one tiler
53 * job, and tiler jobs must depend on it.
55 * - Vertex jobs and tiler jobs are independent.
57 * - A tiler job must have a dependency on its data source. If it's getting
58 * data from a vertex job, it depends on the vertex job. If it's getting data
59 * from software, this is null.
61 * - Tiler jobs must depend on the write value job (chained or otherwise).
63 * - Tiler jobs must be strictly ordered. So each tiler job must depend on the
64 * previous job in the chain.
66 * - Jobs linking via next_job has no bearing on order of execution, rather it
67 * just establishes the linked list of jobs, EXCEPT:
69 * - A job's dependencies must appear earlier in the linked list (job chain).
71 * Justification for each rule:
73 * - Write value jobs are used to write a zero into a magic tiling field, which
74 * enables tiling to work. If tiling occurs, they are needed; if it does not,
75 * we cannot emit them since then tiling partially occurs and it's bad.
77 * - The hardware has no notion of a "vertex/tiler job" (at least not our
78 * hardware -- other revs have fused jobs, but --- crap, this just got even
79 * more complicated). They are independent units that take in data, process
80 * it, and spit out data.
82 * - Any job must depend on its data source, in fact, or risk a
83 * read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo
84 * tiler jobs depend on the corresponding vertex job (if it's there).
86 * - The tiler is not thread-safe; this dependency prevents race conditions
87 * between two different jobs trying to write to the tiler outputs at the
90 * - Internally, jobs are scoreboarded; the next job fields just form a linked
91 * list to allow the jobs to be read in; the execution order is from
92 * resolving the dependency fields instead.
94 * - The hardware cannot set a dependency on a job it doesn't know about yet,
95 * and dependencies are processed in-order of the next job fields.
99 /* Generates, uploads, and queues a a new job. All fields are written in order
100 * except for next_job accounting (TODO: Should we be clever and defer the
101 * upload of the header here until next job to keep the access pattern totally
102 * linear? Or is that just a micro op at this point?). Returns the generated
103 * index for dep management.
105 * Inject is used to inject a job at the front, for wallpapering. If you are
106 * not wallpapering and set this, dragons will eat you. */
110 struct pan_pool
*pool
,
111 struct pan_scoreboard
*scoreboard
,
112 enum mali_job_type type
,
115 void *payload
, size_t payload_size
,
118 unsigned global_dep
= 0;
120 if (type
== MALI_JOB_TYPE_TILER
) {
121 /* Tiler jobs must be chained, and on Midgard, the first tiler
122 * job must depend on the write value job, whose index we
125 if (scoreboard
->tiler_dep
)
126 global_dep
= scoreboard
->tiler_dep
;
127 else if (!(pool
->dev
->quirks
& IS_BIFROST
)) {
128 scoreboard
->write_value_index
= ++scoreboard
->job_index
;
129 global_dep
= scoreboard
->write_value_index
;
133 /* Assign the index */
134 unsigned index
= ++scoreboard
->job_index
;
136 struct mali_job_descriptor_header job
= {
137 .job_descriptor_size
= 1,
139 .job_barrier
= barrier
,
141 .job_dependency_index_1
= local_dep
,
142 .job_dependency_index_2
= global_dep
,
146 job
.next_job
= scoreboard
->first_job
;
148 struct panfrost_transfer transfer
=
149 panfrost_pool_alloc_aligned(pool
, sizeof(job
) + payload_size
, 64);
150 memcpy(transfer
.cpu
, &job
, sizeof(job
));
151 memcpy(transfer
.cpu
+ sizeof(job
), payload
, payload_size
);
154 scoreboard
->first_job
= transfer
.gpu
;
159 if (type
== MALI_JOB_TYPE_TILER
)
160 scoreboard
->tiler_dep
= index
;
162 if (scoreboard
->prev_job
)
163 scoreboard
->prev_job
->next_job
= transfer
.gpu
;
165 scoreboard
->first_job
= transfer
.gpu
;
167 scoreboard
->prev_job
= (struct mali_job_descriptor_header
*) transfer
.cpu
;
171 /* Generates a write value job, used to initialize the tiler structures. Note
172 * this is called right before frame submission. */
175 panfrost_scoreboard_initialize_tiler(struct pan_pool
*pool
,
176 struct pan_scoreboard
*scoreboard
,
177 mali_ptr polygon_list
)
179 /* Check if we even need tiling */
180 if (pool
->dev
->quirks
& IS_BIFROST
|| !scoreboard
->tiler_dep
)
183 /* Okay, we do. Let's generate it. We'll need the job's polygon list
184 * regardless of size. */
186 struct mali_job_descriptor_header job
= {
187 .job_type
= MALI_JOB_TYPE_WRITE_VALUE
,
188 .job_index
= scoreboard
->write_value_index
,
189 .job_descriptor_size
= 1,
190 .next_job
= scoreboard
->first_job
193 struct mali_payload_write_value payload
= {
194 .address
= polygon_list
,
195 .value_descriptor
= MALI_WRITE_VALUE_ZERO
,
198 struct panfrost_transfer transfer
= panfrost_pool_alloc(pool
, sizeof(job
) + sizeof(payload
));
199 memcpy(transfer
.cpu
, &job
, sizeof(job
));
200 memcpy(transfer
.cpu
+ sizeof(job
), &payload
, sizeof(payload
));
202 scoreboard
->first_job
= transfer
.gpu
;