panfrost: Pass a batch to panfrost_{allocate,upload}_transient()
[mesa.git] / src / gallium / drivers / panfrost / pan_varyings.c
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include "pan_context.h"
27 #include "util/u_prim.h"
28
29 static mali_ptr
30 panfrost_emit_varyings(
31 struct panfrost_context *ctx,
32 union mali_attr *slot,
33 unsigned stride,
34 unsigned count)
35 {
36 /* Fill out the descriptor */
37 slot->stride = stride;
38 slot->size = stride * count;
39 slot->shift = slot->extra_flags = 0;
40
41 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
42 struct panfrost_transfer transfer =
43 panfrost_allocate_transient(batch, slot->size);
44
45 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
46
47 return transfer.gpu;
48 }
49
50 static void
51 panfrost_emit_streamout(
52 struct panfrost_context *ctx,
53 union mali_attr *slot,
54 unsigned stride,
55 unsigned offset,
56 unsigned count,
57 struct pipe_stream_output_target *target)
58 {
59 /* Fill out the descriptor */
60 slot->stride = stride * 4;
61 slot->shift = slot->extra_flags = 0;
62
63 unsigned max_size = target->buffer_size;
64 unsigned expected_size = slot->stride * count;
65
66 slot->size = MIN2(max_size, expected_size);
67
68 /* Grab the BO and bind it to the batch */
69 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
70 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
71 panfrost_batch_add_bo(batch, bo);
72
73 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
74 slot->elements = addr;
75 }
76
77 /* Given a shader and buffer indices, link varying metadata together */
78
79 static bool
80 is_special_varying(gl_varying_slot loc)
81 {
82 switch (loc) {
83 case VARYING_SLOT_POS:
84 case VARYING_SLOT_PSIZ:
85 case VARYING_SLOT_PNTC:
86 case VARYING_SLOT_FACE:
87 return true;
88 default:
89 return false;
90 }
91 }
92
93 static void
94 panfrost_emit_varying_meta(
95 void *outptr, struct panfrost_shader_state *ss,
96 signed general, signed gl_Position,
97 signed gl_PointSize, signed gl_PointCoord,
98 signed gl_FrontFacing)
99 {
100 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
101
102 for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
103 gl_varying_slot location = ss->varyings_loc[i];
104 int index = -1;
105
106 switch (location) {
107 case VARYING_SLOT_POS:
108 index = gl_Position;
109 break;
110 case VARYING_SLOT_PSIZ:
111 index = gl_PointSize;
112 break;
113 case VARYING_SLOT_PNTC:
114 index = gl_PointCoord;
115 break;
116 case VARYING_SLOT_FACE:
117 index = gl_FrontFacing;
118 break;
119 default:
120 index = general;
121 break;
122 }
123
124 assert(index >= 0);
125 out[i].index = index;
126 }
127 }
128
129 static bool
130 has_point_coord(unsigned mask, gl_varying_slot loc)
131 {
132 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
133 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
134 else if (loc == VARYING_SLOT_PNTC)
135 return (mask & (1 << 8));
136 else
137 return false;
138 }
139
140 /* Helpers for manipulating stream out information so we can pack varyings
141 * accordingly. Compute the src_offset for a given captured varying */
142
143 static struct pipe_stream_output
144 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
145 {
146 for (unsigned i = 0; i < info->num_outputs; ++i) {
147 if (info->output[i].register_index == loc)
148 return info->output[i];
149 }
150
151 unreachable("Varying not captured");
152 }
153
154 /* TODO: Integers */
155 static enum mali_format
156 pan_xfb_format(unsigned nr_components)
157 {
158 switch (nr_components) {
159 case 1: return MALI_R32F;
160 case 2: return MALI_RG32F;
161 case 3: return MALI_RGB32F;
162 case 4: return MALI_RGBA32F;
163 default: unreachable("Invalid format");
164 }
165 }
166
167 void
168 panfrost_emit_varying_descriptor(
169 struct panfrost_context *ctx,
170 unsigned vertex_count)
171 {
172 /* Load the shaders */
173
174 struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
175 struct panfrost_shader_state *fs = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
176 unsigned int num_gen_varyings = 0;
177
178 /* Allocate the varying descriptor */
179
180 size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
181 size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
182
183 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
184 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
185 vs_size + fs_size);
186
187 struct pipe_stream_output_info so = vs->stream_output;
188
189 /* Check if this varying is linked by us. This is the case for
190 * general-purpose, non-captured varyings. If it is, link it. If it's
191 * not, use the provided stream out information to determine the
192 * offset, since it was already linked for us. */
193
194 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
195 gl_varying_slot loc = vs->varyings_loc[i];
196
197 bool special = is_special_varying(loc);
198 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
199
200 if (captured) {
201 struct pipe_stream_output o = pan_get_so(&so, loc);
202
203 unsigned dst_offset = o.dst_offset * 4; /* dwords */
204 vs->varyings[i].src_offset = dst_offset;
205 } else if (!special) {
206 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
207 }
208 }
209
210 /* Conversely, we need to set src_offset for the captured varyings.
211 * Here, the layout is defined by the stream out info, not us */
212
213 /* Link up with fragment varyings */
214 bool reads_point_coord = fs->reads_point_coord;
215
216 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
217 gl_varying_slot loc = fs->varyings_loc[i];
218 signed vs_idx = -1;
219
220 /* Link up */
221 for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
222 if (vs->varyings_loc[j] == loc) {
223 vs_idx = j;
224 break;
225 }
226 }
227
228 /* Either assign or reuse */
229 if (vs_idx >= 0)
230 fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset;
231 else
232 fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
233
234 if (has_point_coord(fs->point_sprite_mask, loc))
235 reads_point_coord = true;
236 }
237
238 memcpy(trans.cpu, vs->varyings, vs_size);
239 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
240
241 union mali_attr varyings[PIPE_MAX_ATTRIBS];
242 memset(varyings, 0, sizeof(varyings));
243
244 /* Figure out how many streamout buffers could be bound */
245 unsigned so_count = ctx->streamout.num_targets;
246 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
247 gl_varying_slot loc = vs->varyings_loc[i];
248
249 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
250 if (!captured) continue;
251
252 struct pipe_stream_output o = pan_get_so(&so, loc);
253 so_count = MAX2(so_count, o.output_buffer + 1);
254 }
255
256 signed idx = so_count;
257 signed general = idx++;
258 signed gl_Position = idx++;
259 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
260 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
261 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
262 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
263
264 /* Emit the stream out buffers */
265
266 unsigned output_count = u_stream_outputs_for_vertices(
267 ctx->active_prim, ctx->vertex_count);
268
269 for (unsigned i = 0; i < so_count; ++i) {
270 struct pipe_stream_output_target *target =
271 (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL;
272
273 if (target) {
274 panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target);
275 } else {
276 /* Emit a dummy buffer */
277 panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count);
278
279 /* Clear the attribute type */
280 varyings[i].elements &= ~0xF;
281 }
282 }
283
284 panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
285 vertex_count);
286
287 /* fp32 vec4 gl_Position */
288 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
289 panfrost_emit_varyings(ctx, &varyings[gl_Position],
290 sizeof(float) * 4, vertex_count);
291
292
293 if (vs->writes_point_size)
294 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
295 panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
296 2, vertex_count);
297
298 if (reads_point_coord)
299 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
300
301 if (fs->reads_face)
302 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
303
304 if (fs->reads_frag_coord)
305 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
306
307 /* Let's go ahead and link varying meta to the buffer in question, now
308 * that that information is available. VARYING_SLOT_POS is mapped to
309 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
310 * */
311
312 panfrost_emit_varying_meta(trans.cpu, vs,
313 general, gl_Position, gl_PointSize,
314 gl_PointCoord, gl_FrontFacing);
315
316 panfrost_emit_varying_meta(trans.cpu + vs_size, fs,
317 general, gl_FragCoord, gl_PointSize,
318 gl_PointCoord, gl_FrontFacing);
319
320 /* Replace streamout */
321
322 struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
323 struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
324
325 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
326 gl_varying_slot loc = vs->varyings_loc[i];
327
328 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
329 if (!captured) continue;
330
331 struct pipe_stream_output o = pan_get_so(&so, loc);
332 ovs[i].index = o.output_buffer;
333
334 /* Set the type appropriately. TODO: Integer varyings XXX */
335 assert(o.stream == 0);
336 ovs[i].format = pan_xfb_format(o.num_components);
337 ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components);
338
339 /* Link to the fragment */
340 signed fs_idx = -1;
341
342 /* Link up */
343 for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
344 if (fs->varyings_loc[j] == loc) {
345 fs_idx = j;
346 break;
347 }
348 }
349
350 if (fs_idx >= 0) {
351 ofs[fs_idx].index = ovs[i].index;
352 ofs[fs_idx].format = ovs[i].format;
353 ofs[fs_idx].swizzle = ovs[i].swizzle;
354 }
355 }
356
357 /* Replace point sprite */
358 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
359 /* If we have a point sprite replacement, handle that here. We
360 * have to translate location first. TODO: Flip y in shader.
361 * We're already keying ... just time crunch .. */
362
363 if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) {
364 ofs[i].index = gl_PointCoord;
365
366 /* Swizzle out the z/w to 0/1 */
367 ofs[i].format = MALI_RG16F;
368 ofs[i].swizzle =
369 panfrost_get_default_swizzle(2);
370 }
371 }
372
373 /* Fix up unaligned addresses */
374 for (unsigned i = 0; i < so_count; ++i) {
375 if (varyings[i].elements < MALI_VARYING_SPECIAL)
376 continue;
377
378 unsigned align = (varyings[i].elements & 63);
379
380 /* While we're at it, the SO buffers are linear */
381
382 if (!align) {
383 varyings[i].elements |= MALI_ATTR_LINEAR;
384 continue;
385 }
386
387 /* We need to adjust alignment */
388 varyings[i].elements &= ~63;
389 varyings[i].elements |= MALI_ATTR_LINEAR;
390 varyings[i].size += align;
391
392 for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
393 if (ovs[v].index == i)
394 ovs[v].src_offset = vs->varyings[v].src_offset + align;
395 }
396
397 for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
398 if (ofs[f].index == i)
399 ofs[f].src_offset = fs->varyings[f].src_offset + align;
400 }
401 }
402
403 mali_ptr varyings_p = panfrost_upload_transient(batch, &varyings, idx * sizeof(union mali_attr));
404 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
405 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;
406
407 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
408 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size;
409 }