panfrost: Print synced traces to stderr
[mesa.git] / src / gallium / drivers / panfrost / pan_varyings.c
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include "pan_bo.h"
27 #include "pan_context.h"
28 #include "pan_format.h"
29 #include "util/u_prim.h"
30
31 static mali_ptr
32 panfrost_emit_varyings(
33 struct panfrost_context *ctx,
34 union mali_attr *slot,
35 unsigned stride,
36 unsigned count)
37 {
38 /* Fill out the descriptor */
39 slot->stride = stride;
40 slot->size = stride * count;
41 slot->shift = slot->extra_flags = 0;
42
43 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
44 struct panfrost_transfer transfer =
45 panfrost_allocate_transient(batch, slot->size);
46
47 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
48
49 return transfer.gpu;
50 }
51
52 static void
53 panfrost_emit_streamout(
54 struct panfrost_context *ctx,
55 union mali_attr *slot,
56 unsigned stride,
57 unsigned offset,
58 unsigned count,
59 struct pipe_stream_output_target *target)
60 {
61 /* Fill out the descriptor */
62 slot->stride = stride * 4;
63 slot->shift = slot->extra_flags = 0;
64
65 unsigned max_size = target->buffer_size;
66 unsigned expected_size = slot->stride * count;
67
68 slot->size = MIN2(max_size, expected_size);
69
70 /* Grab the BO and bind it to the batch */
71 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
72 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
73
74 /* Varyings are WRITE from the perspective of the VERTEX but READ from
75 * the perspective of the TILER and FRAGMENT.
76 */
77 panfrost_batch_add_bo(batch, bo,
78 PAN_BO_ACCESS_SHARED |
79 PAN_BO_ACCESS_RW |
80 PAN_BO_ACCESS_VERTEX_TILER |
81 PAN_BO_ACCESS_FRAGMENT);
82
83 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
84 slot->elements = addr;
85 }
86
87 /* Given a shader and buffer indices, link varying metadata together */
88
89 static bool
90 is_special_varying(gl_varying_slot loc)
91 {
92 switch (loc) {
93 case VARYING_SLOT_POS:
94 case VARYING_SLOT_PSIZ:
95 case VARYING_SLOT_PNTC:
96 case VARYING_SLOT_FACE:
97 return true;
98 default:
99 return false;
100 }
101 }
102
103 static void
104 panfrost_emit_varying_meta(
105 void *outptr, struct panfrost_shader_state *ss,
106 signed general, signed gl_Position,
107 signed gl_PointSize, signed gl_PointCoord,
108 signed gl_FrontFacing)
109 {
110 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
111
112 for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
113 gl_varying_slot location = ss->varyings_loc[i];
114 int index = -1;
115
116 switch (location) {
117 case VARYING_SLOT_POS:
118 index = gl_Position;
119 break;
120 case VARYING_SLOT_PSIZ:
121 index = gl_PointSize;
122 break;
123 case VARYING_SLOT_PNTC:
124 index = gl_PointCoord;
125 break;
126 case VARYING_SLOT_FACE:
127 index = gl_FrontFacing;
128 break;
129 default:
130 index = general;
131 break;
132 }
133
134 assert(index >= 0);
135 out[i].index = index;
136 }
137 }
138
139 static bool
140 has_point_coord(unsigned mask, gl_varying_slot loc)
141 {
142 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
143 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
144 else if (loc == VARYING_SLOT_PNTC)
145 return (mask & (1 << 8));
146 else
147 return false;
148 }
149
150 /* Helpers for manipulating stream out information so we can pack varyings
151 * accordingly. Compute the src_offset for a given captured varying */
152
153 static struct pipe_stream_output
154 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
155 {
156 for (unsigned i = 0; i < info->num_outputs; ++i) {
157 if (info->output[i].register_index == loc)
158 return info->output[i];
159 }
160
161 unreachable("Varying not captured");
162 }
163
164 /* TODO: Integers */
165 static enum mali_format
166 pan_xfb_format(unsigned nr_components)
167 {
168 switch (nr_components) {
169 case 1: return MALI_R32F;
170 case 2: return MALI_RG32F;
171 case 3: return MALI_RGB32F;
172 case 4: return MALI_RGBA32F;
173 default: unreachable("Invalid format");
174 }
175 }
176
177 void
178 panfrost_emit_varying_descriptor(
179 struct panfrost_context *ctx,
180 unsigned vertex_count)
181 {
182 /* Load the shaders */
183
184 struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
185 struct panfrost_shader_state *fs = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
186 unsigned int num_gen_varyings = 0;
187
188 /* Allocate the varying descriptor */
189
190 size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
191 size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
192
193 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
194 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
195 vs_size + fs_size);
196
197 struct pipe_stream_output_info so = vs->stream_output;
198
199 /* Check if this varying is linked by us. This is the case for
200 * general-purpose, non-captured varyings. If it is, link it. If it's
201 * not, use the provided stream out information to determine the
202 * offset, since it was already linked for us. */
203
204 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
205 gl_varying_slot loc = vs->varyings_loc[i];
206
207 bool special = is_special_varying(loc);
208 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
209
210 if (captured) {
211 struct pipe_stream_output o = pan_get_so(&so, loc);
212
213 unsigned dst_offset = o.dst_offset * 4; /* dwords */
214 vs->varyings[i].src_offset = dst_offset;
215 } else if (!special) {
216 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
217 }
218 }
219
220 /* Conversely, we need to set src_offset for the captured varyings.
221 * Here, the layout is defined by the stream out info, not us */
222
223 /* Link up with fragment varyings */
224 bool reads_point_coord = fs->reads_point_coord;
225
226 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
227 gl_varying_slot loc = fs->varyings_loc[i];
228 signed vs_idx = -1;
229
230 /* Link up */
231 for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
232 if (vs->varyings_loc[j] == loc) {
233 vs_idx = j;
234 break;
235 }
236 }
237
238 /* Either assign or reuse */
239 if (vs_idx >= 0)
240 fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset;
241 else
242 fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
243
244 if (has_point_coord(fs->point_sprite_mask, loc))
245 reads_point_coord = true;
246 }
247
248 memcpy(trans.cpu, vs->varyings, vs_size);
249 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
250
251 union mali_attr varyings[PIPE_MAX_ATTRIBS];
252 memset(varyings, 0, sizeof(varyings));
253
254 /* Figure out how many streamout buffers could be bound */
255 unsigned so_count = ctx->streamout.num_targets;
256 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
257 gl_varying_slot loc = vs->varyings_loc[i];
258
259 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
260 if (!captured) continue;
261
262 struct pipe_stream_output o = pan_get_so(&so, loc);
263 so_count = MAX2(so_count, o.output_buffer + 1);
264 }
265
266 signed idx = so_count;
267 signed general = idx++;
268 signed gl_Position = idx++;
269 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
270 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
271 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
272 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
273
274 /* Emit the stream out buffers */
275
276 unsigned output_count = u_stream_outputs_for_vertices(
277 ctx->active_prim, ctx->vertex_count);
278
279 for (unsigned i = 0; i < so_count; ++i) {
280 struct pipe_stream_output_target *target =
281 (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL;
282
283 if (target) {
284 panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target);
285 } else {
286 /* Emit a dummy buffer */
287 panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count);
288
289 /* Clear the attribute type */
290 varyings[i].elements &= ~0xF;
291 }
292 }
293
294 panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
295 vertex_count);
296
297 /* fp32 vec4 gl_Position */
298 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
299 panfrost_emit_varyings(ctx, &varyings[gl_Position],
300 sizeof(float) * 4, vertex_count);
301
302
303 if (vs->writes_point_size)
304 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
305 panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
306 2, vertex_count);
307
308 if (reads_point_coord)
309 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
310
311 if (fs->reads_face)
312 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
313
314 if (fs->reads_frag_coord)
315 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
316
317 /* Let's go ahead and link varying meta to the buffer in question, now
318 * that that information is available. VARYING_SLOT_POS is mapped to
319 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
320 * */
321
322 panfrost_emit_varying_meta(trans.cpu, vs,
323 general, gl_Position, gl_PointSize,
324 gl_PointCoord, gl_FrontFacing);
325
326 panfrost_emit_varying_meta(trans.cpu + vs_size, fs,
327 general, gl_FragCoord, gl_PointSize,
328 gl_PointCoord, gl_FrontFacing);
329
330 /* Replace streamout */
331
332 struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
333 struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
334
335 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
336 gl_varying_slot loc = vs->varyings_loc[i];
337
338 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
339 if (!captured) continue;
340
341 struct pipe_stream_output o = pan_get_so(&so, loc);
342 ovs[i].index = o.output_buffer;
343
344 /* Set the type appropriately. TODO: Integer varyings XXX */
345 assert(o.stream == 0);
346 ovs[i].format = pan_xfb_format(o.num_components);
347 ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components);
348
349 /* Link to the fragment */
350 signed fs_idx = -1;
351
352 /* Link up */
353 for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
354 if (fs->varyings_loc[j] == loc) {
355 fs_idx = j;
356 break;
357 }
358 }
359
360 if (fs_idx >= 0) {
361 ofs[fs_idx].index = ovs[i].index;
362 ofs[fs_idx].format = ovs[i].format;
363 ofs[fs_idx].swizzle = ovs[i].swizzle;
364 }
365 }
366
367 /* Replace point sprite */
368 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
369 /* If we have a point sprite replacement, handle that here. We
370 * have to translate location first. TODO: Flip y in shader.
371 * We're already keying ... just time crunch .. */
372
373 if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) {
374 ofs[i].index = gl_PointCoord;
375
376 /* Swizzle out the z/w to 0/1 */
377 ofs[i].format = MALI_RG16F;
378 ofs[i].swizzle =
379 panfrost_get_default_swizzle(2);
380 }
381 }
382
383 /* Fix up unaligned addresses */
384 for (unsigned i = 0; i < so_count; ++i) {
385 if (varyings[i].elements < MALI_RECORD_SPECIAL)
386 continue;
387
388 unsigned align = (varyings[i].elements & 63);
389
390 /* While we're at it, the SO buffers are linear */
391
392 if (!align) {
393 varyings[i].elements |= MALI_ATTR_LINEAR;
394 continue;
395 }
396
397 /* We need to adjust alignment */
398 varyings[i].elements &= ~63;
399 varyings[i].elements |= MALI_ATTR_LINEAR;
400 varyings[i].size += align;
401
402 for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
403 if (ovs[v].index == i)
404 ovs[v].src_offset = vs->varyings[v].src_offset + align;
405 }
406
407 for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
408 if (ofs[f].index == i)
409 ofs[f].src_offset = fs->varyings[f].src_offset + align;
410 }
411 }
412
413 mali_ptr varyings_p = panfrost_upload_transient(batch, &varyings, idx * sizeof(union mali_attr));
414 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
415 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;
416
417 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
418 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size;
419 }