panfrost: Move pool routines to common code
[mesa.git] / src / gallium / drivers / panfrost / pan_scoreboard.c
1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25 #include "pan_context.h"
26 #include "pan_job.h"
27 #include "pan_pool.h"
28 #include "panfrost-quirks.h"
29 #include "util/bitset.h"
30
31 /*
32 * Within a batch (panfrost_job), there are various types of Mali jobs:
33 *
34 * - WRITE_VALUE: generic write primitive, used to zero tiler field
35 * - VERTEX: runs a vertex shader
36 * - TILER: runs tiling and sets up a fragment shader
37 * - FRAGMENT: runs fragment shaders and writes out
38 * - COMPUTE: runs a compute shader
39 * - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost)
40 * - GEOMETRY: runs a geometry shader (unimplemented)
41 * - CACHE_FLUSH: unseen in the wild, theoretically cache flush
42 *
43 * In between a full batch and a single Mali job is the "job chain", a series
44 * of Mali jobs together forming a linked list. Within the job chain, each Mali
45 * job can set (up to) two dependencies on other earlier jobs in the chain.
46 * This dependency graph forms a scoreboard. The general idea of a scoreboard
47 * applies: when there is a data dependency of job B on job A, job B sets one
48 * of its dependency indices to job A, ensuring that job B won't start until
49 * job A finishes.
50 *
51 * More specifically, here are a set of rules:
52 *
53 * - A write value job must appear if and only if there is at least one tiler
54 * job, and tiler jobs must depend on it.
55 *
56 * - Vertex jobs and tiler jobs are independent.
57 *
58 * - A tiler job must have a dependency on its data source. If it's getting
59 * data from a vertex job, it depends on the vertex job. If it's getting data
60 * from software, this is null.
61 *
62 * - Tiler jobs must depend on the write value job (chained or otherwise).
63 *
64 * - Tiler jobs must be strictly ordered. So each tiler job must depend on the
65 * previous job in the chain.
66 *
67 * - Jobs linking via next_job has no bearing on order of execution, rather it
68 * just establishes the linked list of jobs, EXCEPT:
69 *
70 * - A job's dependencies must appear earlier in the linked list (job chain).
71 *
72 * Justification for each rule:
73 *
74 * - Write value jobs are used to write a zero into a magic tiling field, which
75 * enables tiling to work. If tiling occurs, they are needed; if it does not,
76 * we cannot emit them since then tiling partially occurs and it's bad.
77 *
78 * - The hardware has no notion of a "vertex/tiler job" (at least not our
79 * hardware -- other revs have fused jobs, but --- crap, this just got even
80 * more complicated). They are independent units that take in data, process
81 * it, and spit out data.
82 *
83 * - Any job must depend on its data source, in fact, or risk a
84 * read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo
85 * tiler jobs depend on the corresponding vertex job (if it's there).
86 *
87 * - The tiler is not thread-safe; this dependency prevents race conditions
88 * between two different jobs trying to write to the tiler outputs at the
89 * same time.
90 *
91 * - Internally, jobs are scoreboarded; the next job fields just form a linked
92 * list to allow the jobs to be read in; the execution order is from
93 * resolving the dependency fields instead.
94 *
95 * - The hardware cannot set a dependency on a job it doesn't know about yet,
96 * and dependencies are processed in-order of the next job fields.
97 *
98 */
99
100 /* Generates, uploads, and queues a a new job. All fields are written in order
101 * except for next_job accounting (TODO: Should we be clever and defer the
102 * upload of the header here until next job to keep the access pattern totally
103 * linear? Or is that just a micro op at this point?). Returns the generated
104 * index for dep management.
105 *
106 * Inject is used to inject a job at the front, for wallpapering. If you are
107 * not wallpapering and set this, dragons will eat you. */
108
109 unsigned
110 panfrost_new_job(
111 struct panfrost_batch *batch,
112 enum mali_job_type type,
113 bool barrier,
114 unsigned local_dep,
115 void *payload, size_t payload_size,
116 bool inject)
117 {
118 struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
119
120 unsigned global_dep = 0;
121
122 if (type == JOB_TYPE_TILER) {
123 /* Tiler jobs must be chained, and on Midgard, the first tiler
124 * job must depend on the write value job, whose index we
125 * reserve now */
126
127 if (batch->tiler_dep)
128 global_dep = batch->tiler_dep;
129 else if (!(dev->quirks & IS_BIFROST)) {
130 batch->write_value_index = ++batch->job_index;
131 global_dep = batch->write_value_index;
132 }
133 }
134
135 /* Assign the index */
136 unsigned index = ++batch->job_index;
137
138 struct mali_job_descriptor_header job = {
139 .job_descriptor_size = 1,
140 .job_type = type,
141 .job_barrier = barrier,
142 .job_index = index,
143 .job_dependency_index_1 = local_dep,
144 .job_dependency_index_2 = global_dep,
145 };
146
147 if (inject)
148 job.next_job = batch->first_job;
149
150 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool, sizeof(job) + payload_size);
151 memcpy(transfer.cpu, &job, sizeof(job));
152 memcpy(transfer.cpu + sizeof(job), payload, payload_size);
153
154 if (inject) {
155 batch->first_job = transfer.gpu;
156 return index;
157 }
158
159 /* Form a chain */
160 if (type == JOB_TYPE_TILER)
161 batch->tiler_dep = index;
162
163 if (batch->prev_job)
164 batch->prev_job->next_job = transfer.gpu;
165 else
166 batch->first_job = transfer.gpu;
167
168 batch->prev_job = (struct mali_job_descriptor_header *) transfer.cpu;
169 return index;
170 }
171
172 /* Generates a write value job, used to initialize the tiler structures. Note
173 * this is called right before frame submission. */
174
175 void
176 panfrost_scoreboard_initialize_tiler(struct panfrost_batch *batch)
177 {
178 struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
179
180 /* Check if we even need tiling */
181 if (dev->quirks & IS_BIFROST || !batch->tiler_dep)
182 return;
183
184 /* Okay, we do. Let's generate it. We'll need the job's polygon list
185 * regardless of size. */
186
187 mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch,
188 MALI_TILER_MINIMUM_HEADER_SIZE);
189
190 struct mali_job_descriptor_header job = {
191 .job_type = JOB_TYPE_WRITE_VALUE,
192 .job_index = batch->write_value_index,
193 .job_descriptor_size = 1,
194 .next_job = batch->first_job
195 };
196
197 struct mali_payload_write_value payload = {
198 .address = polygon_list,
199 .value_descriptor = MALI_WRITE_VALUE_ZERO,
200 };
201
202 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool, sizeof(job) + sizeof(payload));
203 memcpy(transfer.cpu, &job, sizeof(job));
204 memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload));
205
206 batch->first_job = transfer.gpu;
207 }