vc4: Move job-submit skip cases to vc4_job_submit().
[mesa.git] / src / gallium / drivers / vc4 / vc4_job.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file vc4_job.c
25 *
26 * Functions for submitting VC4 render jobs to the kernel.
27 */
28
29 #include <xf86drm.h>
30 #include "vc4_context.h"
31
32 void
33 vc4_job_init(struct vc4_context *vc4)
34 {
35 vc4_init_cl(vc4, &vc4->bcl);
36 vc4_init_cl(vc4, &vc4->shader_rec);
37 vc4_init_cl(vc4, &vc4->uniforms);
38 vc4_init_cl(vc4, &vc4->bo_handles);
39 vc4_init_cl(vc4, &vc4->bo_pointers);
40 vc4_job_reset(vc4);
41 }
42
43 void
44 vc4_job_reset(struct vc4_context *vc4)
45 {
46 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
47 for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
48 vc4_bo_unreference(&referenced_bos[i]);
49 }
50 vc4_reset_cl(&vc4->bcl);
51 vc4_reset_cl(&vc4->shader_rec);
52 vc4_reset_cl(&vc4->uniforms);
53 vc4_reset_cl(&vc4->bo_handles);
54 vc4_reset_cl(&vc4->bo_pointers);
55 vc4->shader_rec_count = 0;
56
57 vc4->needs_flush = false;
58 vc4->draw_calls_queued = 0;
59
60 /* We have no hardware context saved between our draw calls, so we
61 * need to flag the next draw as needing all state emitted. Emitting
62 * all state at the start of our draws is also what ensures that we
63 * return to the state we need after a previous tile has finished.
64 */
65 vc4->dirty = ~0;
66 vc4->resolve = 0;
67 vc4->cleared = 0;
68
69 vc4->draw_min_x = ~0;
70 vc4->draw_min_y = ~0;
71 vc4->draw_max_x = 0;
72 vc4->draw_max_y = 0;
73 }
74
75 static void
76 vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
77 struct drm_vc4_submit_rcl_surface *submit_surf,
78 struct pipe_surface *psurf,
79 bool is_depth, bool is_write)
80 {
81 struct vc4_surface *surf = vc4_surface(psurf);
82
83 if (!surf) {
84 submit_surf->hindex = ~0;
85 return;
86 }
87
88 struct vc4_resource *rsc = vc4_resource(psurf->texture);
89 submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
90 submit_surf->offset = surf->offset;
91
92 if (psurf->texture->nr_samples <= 1) {
93 if (is_depth) {
94 submit_surf->bits =
95 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
96 VC4_LOADSTORE_TILE_BUFFER_BUFFER);
97
98 } else {
99 submit_surf->bits =
100 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
101 VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
102 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
103 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
104 VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
105 VC4_LOADSTORE_TILE_BUFFER_FORMAT);
106 }
107 submit_surf->bits |=
108 VC4_SET_FIELD(surf->tiling,
109 VC4_LOADSTORE_TILE_BUFFER_TILING);
110 } else {
111 assert(!is_write);
112 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
113 }
114
115 if (is_write)
116 rsc->writes++;
117 }
118
119 static void
120 vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
121 struct drm_vc4_submit_rcl_surface *submit_surf,
122 struct pipe_surface *psurf)
123 {
124 struct vc4_surface *surf = vc4_surface(psurf);
125
126 if (!surf) {
127 submit_surf->hindex = ~0;
128 return;
129 }
130
131 struct vc4_resource *rsc = vc4_resource(psurf->texture);
132 submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
133 submit_surf->offset = surf->offset;
134
135 if (psurf->texture->nr_samples <= 1) {
136 submit_surf->bits =
137 VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
138 VC4_RENDER_CONFIG_FORMAT_BGR565 :
139 VC4_RENDER_CONFIG_FORMAT_RGBA8888,
140 VC4_RENDER_CONFIG_FORMAT) |
141 VC4_SET_FIELD(surf->tiling,
142 VC4_RENDER_CONFIG_MEMORY_FORMAT);
143 }
144
145 rsc->writes++;
146 }
147
148 static void
149 vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4,
150 struct drm_vc4_submit_rcl_surface *submit_surf,
151 struct pipe_surface *psurf)
152 {
153 struct vc4_surface *surf = vc4_surface(psurf);
154
155 if (!surf) {
156 submit_surf->hindex = ~0;
157 return;
158 }
159
160 struct vc4_resource *rsc = vc4_resource(psurf->texture);
161 submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
162 submit_surf->offset = surf->offset;
163 submit_surf->bits = 0;
164 rsc->writes++;
165 }
166
167 /**
168 * Submits the job to the kernel and then reinitializes it.
169 */
170 void
171 vc4_job_submit(struct vc4_context *vc4)
172 {
173 if (!vc4->needs_flush)
174 return;
175
176 /* The RCL setup would choke if the draw bounds cause no drawing, so
177 * just drop the drawing if that's the case.
178 */
179 if (vc4->draw_max_x <= vc4->draw_min_x ||
180 vc4->draw_max_y <= vc4->draw_min_y) {
181 vc4_job_reset(vc4);
182 return;
183 }
184
185 if (vc4_debug & VC4_DEBUG_CL) {
186 fprintf(stderr, "BCL:\n");
187 vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
188 }
189
190 if (cl_offset(&vc4->bcl) > 0) {
191 /* Increment the semaphore indicating that binning is done and
192 * unblocking the render thread. Note that this doesn't act
193 * until the FLUSH completes.
194 */
195 cl_ensure_space(&vc4->bcl, 8);
196 struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
197 cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
198 /* The FLUSH caps all of our bin lists with a
199 * VC4_PACKET_RETURN.
200 */
201 cl_u8(&bcl, VC4_PACKET_FLUSH);
202 cl_end(&vc4->bcl, bcl);
203 }
204 struct drm_vc4_submit_cl submit;
205 memset(&submit, 0, sizeof(submit));
206
207 cl_ensure_space(&vc4->bo_handles, 6 * sizeof(uint32_t));
208 cl_ensure_space(&vc4->bo_pointers, 6 * sizeof(struct vc4_bo *));
209
210 vc4_submit_setup_rcl_surface(vc4, &submit.color_read,
211 vc4->color_read, false, false);
212 vc4_submit_setup_rcl_render_config_surface(vc4, &submit.color_write,
213 vc4->color_write);
214 vc4_submit_setup_rcl_surface(vc4, &submit.zs_read,
215 vc4->zs_read, true, false);
216 vc4_submit_setup_rcl_surface(vc4, &submit.zs_write,
217 vc4->zs_write, true, true);
218
219 vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_color_write,
220 vc4->msaa_color_write);
221 vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_zs_write,
222 vc4->msaa_zs_write);
223
224 if (vc4->msaa) {
225 /* This bit controls how many pixels the general
226 * (i.e. subsampled) loads/stores are iterating over
227 * (multisample loads replicate out to the other samples).
228 */
229 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
230 /* Controls whether color_write's
231 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
232 */
233 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
234 }
235
236 submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
237 submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
238 submit.bin_cl = (uintptr_t)vc4->bcl.base;
239 submit.bin_cl_size = cl_offset(&vc4->bcl);
240 submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
241 submit.shader_rec_size = cl_offset(&vc4->shader_rec);
242 submit.shader_rec_count = vc4->shader_rec_count;
243 submit.uniforms = (uintptr_t)vc4->uniforms.base;
244 submit.uniforms_size = cl_offset(&vc4->uniforms);
245
246 assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
247 submit.min_x_tile = vc4->draw_min_x / vc4->tile_width;
248 submit.min_y_tile = vc4->draw_min_y / vc4->tile_height;
249 submit.max_x_tile = (vc4->draw_max_x - 1) / vc4->tile_width;
250 submit.max_y_tile = (vc4->draw_max_y - 1) / vc4->tile_height;
251 submit.width = vc4->draw_width;
252 submit.height = vc4->draw_height;
253 if (vc4->cleared) {
254 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
255 submit.clear_color[0] = vc4->clear_color[0];
256 submit.clear_color[1] = vc4->clear_color[1];
257 submit.clear_z = vc4->clear_depth;
258 submit.clear_s = vc4->clear_stencil;
259 }
260
261 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
262 int ret;
263
264 #ifndef USE_VC4_SIMULATOR
265 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
266 #else
267 ret = vc4_simulator_flush(vc4, &submit);
268 #endif
269 static bool warned = false;
270 if (ret && !warned) {
271 fprintf(stderr, "Draw call returned %s. "
272 "Expect corruption.\n", strerror(errno));
273 warned = true;
274 } else if (!ret) {
275 vc4->last_emit_seqno = submit.seqno;
276 }
277 }
278
279 if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
280 if (!vc4_wait_seqno(vc4->screen,
281 vc4->last_emit_seqno - 5,
282 PIPE_TIMEOUT_INFINITE,
283 "job throttling")) {
284 fprintf(stderr, "Job throttling failed\n");
285 }
286 }
287
288 if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
289 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
290 PIPE_TIMEOUT_INFINITE, "sync")) {
291 fprintf(stderr, "Wait failed.\n");
292 abort();
293 }
294 }
295
296 vc4_job_reset(vc4);
297 }