panfrost: Stop passing screen around for BO operations
[mesa.git] / src / gallium / drivers / panfrost / pan_varyings.c
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include "pan_bo.h"
27 #include "pan_context.h"
28 #include "util/u_prim.h"
29
30 static mali_ptr
31 panfrost_emit_varyings(
32 struct panfrost_context *ctx,
33 union mali_attr *slot,
34 unsigned stride,
35 unsigned count)
36 {
37 /* Fill out the descriptor */
38 slot->stride = stride;
39 slot->size = stride * count;
40 slot->shift = slot->extra_flags = 0;
41
42 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
43 struct panfrost_transfer transfer =
44 panfrost_allocate_transient(batch, slot->size);
45
46 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
47
48 return transfer.gpu;
49 }
50
51 static void
52 panfrost_emit_streamout(
53 struct panfrost_context *ctx,
54 union mali_attr *slot,
55 unsigned stride,
56 unsigned offset,
57 unsigned count,
58 struct pipe_stream_output_target *target)
59 {
60 /* Fill out the descriptor */
61 slot->stride = stride * 4;
62 slot->shift = slot->extra_flags = 0;
63
64 unsigned max_size = target->buffer_size;
65 unsigned expected_size = slot->stride * count;
66
67 slot->size = MIN2(max_size, expected_size);
68
69 /* Grab the BO and bind it to the batch */
70 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
71 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
72 panfrost_batch_add_bo(batch, bo);
73
74 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
75 slot->elements = addr;
76 }
77
78 /* Given a shader and buffer indices, link varying metadata together */
79
80 static bool
81 is_special_varying(gl_varying_slot loc)
82 {
83 switch (loc) {
84 case VARYING_SLOT_POS:
85 case VARYING_SLOT_PSIZ:
86 case VARYING_SLOT_PNTC:
87 case VARYING_SLOT_FACE:
88 return true;
89 default:
90 return false;
91 }
92 }
93
94 static void
95 panfrost_emit_varying_meta(
96 void *outptr, struct panfrost_shader_state *ss,
97 signed general, signed gl_Position,
98 signed gl_PointSize, signed gl_PointCoord,
99 signed gl_FrontFacing)
100 {
101 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
102
103 for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
104 gl_varying_slot location = ss->varyings_loc[i];
105 int index = -1;
106
107 switch (location) {
108 case VARYING_SLOT_POS:
109 index = gl_Position;
110 break;
111 case VARYING_SLOT_PSIZ:
112 index = gl_PointSize;
113 break;
114 case VARYING_SLOT_PNTC:
115 index = gl_PointCoord;
116 break;
117 case VARYING_SLOT_FACE:
118 index = gl_FrontFacing;
119 break;
120 default:
121 index = general;
122 break;
123 }
124
125 assert(index >= 0);
126 out[i].index = index;
127 }
128 }
129
130 static bool
131 has_point_coord(unsigned mask, gl_varying_slot loc)
132 {
133 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
134 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
135 else if (loc == VARYING_SLOT_PNTC)
136 return (mask & (1 << 8));
137 else
138 return false;
139 }
140
141 /* Helpers for manipulating stream out information so we can pack varyings
142 * accordingly. Compute the src_offset for a given captured varying */
143
144 static struct pipe_stream_output
145 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
146 {
147 for (unsigned i = 0; i < info->num_outputs; ++i) {
148 if (info->output[i].register_index == loc)
149 return info->output[i];
150 }
151
152 unreachable("Varying not captured");
153 }
154
155 /* TODO: Integers */
156 static enum mali_format
157 pan_xfb_format(unsigned nr_components)
158 {
159 switch (nr_components) {
160 case 1: return MALI_R32F;
161 case 2: return MALI_RG32F;
162 case 3: return MALI_RGB32F;
163 case 4: return MALI_RGBA32F;
164 default: unreachable("Invalid format");
165 }
166 }
167
168 void
169 panfrost_emit_varying_descriptor(
170 struct panfrost_context *ctx,
171 unsigned vertex_count)
172 {
173 /* Load the shaders */
174
175 struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
176 struct panfrost_shader_state *fs = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
177 unsigned int num_gen_varyings = 0;
178
179 /* Allocate the varying descriptor */
180
181 size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
182 size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
183
184 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
185 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
186 vs_size + fs_size);
187
188 struct pipe_stream_output_info so = vs->stream_output;
189
190 /* Check if this varying is linked by us. This is the case for
191 * general-purpose, non-captured varyings. If it is, link it. If it's
192 * not, use the provided stream out information to determine the
193 * offset, since it was already linked for us. */
194
195 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
196 gl_varying_slot loc = vs->varyings_loc[i];
197
198 bool special = is_special_varying(loc);
199 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
200
201 if (captured) {
202 struct pipe_stream_output o = pan_get_so(&so, loc);
203
204 unsigned dst_offset = o.dst_offset * 4; /* dwords */
205 vs->varyings[i].src_offset = dst_offset;
206 } else if (!special) {
207 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
208 }
209 }
210
211 /* Conversely, we need to set src_offset for the captured varyings.
212 * Here, the layout is defined by the stream out info, not us */
213
214 /* Link up with fragment varyings */
215 bool reads_point_coord = fs->reads_point_coord;
216
217 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
218 gl_varying_slot loc = fs->varyings_loc[i];
219 signed vs_idx = -1;
220
221 /* Link up */
222 for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
223 if (vs->varyings_loc[j] == loc) {
224 vs_idx = j;
225 break;
226 }
227 }
228
229 /* Either assign or reuse */
230 if (vs_idx >= 0)
231 fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset;
232 else
233 fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
234
235 if (has_point_coord(fs->point_sprite_mask, loc))
236 reads_point_coord = true;
237 }
238
239 memcpy(trans.cpu, vs->varyings, vs_size);
240 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
241
242 union mali_attr varyings[PIPE_MAX_ATTRIBS];
243 memset(varyings, 0, sizeof(varyings));
244
245 /* Figure out how many streamout buffers could be bound */
246 unsigned so_count = ctx->streamout.num_targets;
247 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
248 gl_varying_slot loc = vs->varyings_loc[i];
249
250 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
251 if (!captured) continue;
252
253 struct pipe_stream_output o = pan_get_so(&so, loc);
254 so_count = MAX2(so_count, o.output_buffer + 1);
255 }
256
257 signed idx = so_count;
258 signed general = idx++;
259 signed gl_Position = idx++;
260 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
261 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
262 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
263 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
264
265 /* Emit the stream out buffers */
266
267 unsigned output_count = u_stream_outputs_for_vertices(
268 ctx->active_prim, ctx->vertex_count);
269
270 for (unsigned i = 0; i < so_count; ++i) {
271 struct pipe_stream_output_target *target =
272 (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL;
273
274 if (target) {
275 panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target);
276 } else {
277 /* Emit a dummy buffer */
278 panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count);
279
280 /* Clear the attribute type */
281 varyings[i].elements &= ~0xF;
282 }
283 }
284
285 panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
286 vertex_count);
287
288 /* fp32 vec4 gl_Position */
289 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
290 panfrost_emit_varyings(ctx, &varyings[gl_Position],
291 sizeof(float) * 4, vertex_count);
292
293
294 if (vs->writes_point_size)
295 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
296 panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
297 2, vertex_count);
298
299 if (reads_point_coord)
300 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
301
302 if (fs->reads_face)
303 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
304
305 if (fs->reads_frag_coord)
306 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
307
308 /* Let's go ahead and link varying meta to the buffer in question, now
309 * that that information is available. VARYING_SLOT_POS is mapped to
310 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
311 * */
312
313 panfrost_emit_varying_meta(trans.cpu, vs,
314 general, gl_Position, gl_PointSize,
315 gl_PointCoord, gl_FrontFacing);
316
317 panfrost_emit_varying_meta(trans.cpu + vs_size, fs,
318 general, gl_FragCoord, gl_PointSize,
319 gl_PointCoord, gl_FrontFacing);
320
321 /* Replace streamout */
322
323 struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
324 struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
325
326 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
327 gl_varying_slot loc = vs->varyings_loc[i];
328
329 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
330 if (!captured) continue;
331
332 struct pipe_stream_output o = pan_get_so(&so, loc);
333 ovs[i].index = o.output_buffer;
334
335 /* Set the type appropriately. TODO: Integer varyings XXX */
336 assert(o.stream == 0);
337 ovs[i].format = pan_xfb_format(o.num_components);
338 ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components);
339
340 /* Link to the fragment */
341 signed fs_idx = -1;
342
343 /* Link up */
344 for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
345 if (fs->varyings_loc[j] == loc) {
346 fs_idx = j;
347 break;
348 }
349 }
350
351 if (fs_idx >= 0) {
352 ofs[fs_idx].index = ovs[i].index;
353 ofs[fs_idx].format = ovs[i].format;
354 ofs[fs_idx].swizzle = ovs[i].swizzle;
355 }
356 }
357
358 /* Replace point sprite */
359 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
360 /* If we have a point sprite replacement, handle that here. We
361 * have to translate location first. TODO: Flip y in shader.
362 * We're already keying ... just time crunch .. */
363
364 if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) {
365 ofs[i].index = gl_PointCoord;
366
367 /* Swizzle out the z/w to 0/1 */
368 ofs[i].format = MALI_RG16F;
369 ofs[i].swizzle =
370 panfrost_get_default_swizzle(2);
371 }
372 }
373
374 /* Fix up unaligned addresses */
375 for (unsigned i = 0; i < so_count; ++i) {
376 if (varyings[i].elements < MALI_VARYING_SPECIAL)
377 continue;
378
379 unsigned align = (varyings[i].elements & 63);
380
381 /* While we're at it, the SO buffers are linear */
382
383 if (!align) {
384 varyings[i].elements |= MALI_ATTR_LINEAR;
385 continue;
386 }
387
388 /* We need to adjust alignment */
389 varyings[i].elements &= ~63;
390 varyings[i].elements |= MALI_ATTR_LINEAR;
391 varyings[i].size += align;
392
393 for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
394 if (ovs[v].index == i)
395 ovs[v].src_offset = vs->varyings[v].src_offset + align;
396 }
397
398 for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
399 if (ofs[f].index == i)
400 ofs[f].src_offset = fs->varyings[f].src_offset + align;
401 }
402 }
403
404 mali_ptr varyings_p = panfrost_upload_transient(batch, &varyings, idx * sizeof(union mali_attr));
405 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
406 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;
407
408 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
409 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size;
410 }