llvmpipe: add grid launch
[mesa.git] / src / gallium / drivers / panfrost / pan_varyings.c
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include "pan_context.h"
27 #include "util/u_prim.h"
28
29 static mali_ptr
30 panfrost_emit_varyings(
31 struct panfrost_context *ctx,
32 union mali_attr *slot,
33 unsigned stride,
34 unsigned count)
35 {
36 /* Fill out the descriptor */
37 slot->stride = stride;
38 slot->size = stride * count;
39 slot->shift = slot->extra_flags = 0;
40
41 struct panfrost_transfer transfer =
42 panfrost_allocate_transient(ctx, slot->size);
43
44 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
45
46 return transfer.gpu;
47 }
48
49 static void
50 panfrost_emit_streamout(
51 struct panfrost_context *ctx,
52 union mali_attr *slot,
53 unsigned stride,
54 unsigned offset,
55 unsigned count,
56 struct pipe_stream_output_target *target)
57 {
58 /* Fill out the descriptor */
59 slot->stride = stride * 4;
60 slot->shift = slot->extra_flags = 0;
61
62 unsigned max_size = target->buffer_size;
63 unsigned expected_size = slot->stride * count;
64
65 slot->size = MIN2(max_size, expected_size);
66
67 /* Grab the BO and bind it to the batch */
68 struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
69 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
70 panfrost_job_add_bo(batch, bo);
71
72 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
73 slot->elements = addr;
74 }
75
76 /* Given a shader and buffer indices, link varying metadata together */
77
78 static bool
79 is_special_varying(gl_varying_slot loc)
80 {
81 switch (loc) {
82 case VARYING_SLOT_POS:
83 case VARYING_SLOT_PSIZ:
84 case VARYING_SLOT_PNTC:
85 case VARYING_SLOT_FACE:
86 return true;
87 default:
88 return false;
89 }
90 }
91
92 static void
93 panfrost_emit_varying_meta(
94 void *outptr, struct panfrost_shader_state *ss,
95 signed general, signed gl_Position,
96 signed gl_PointSize, signed gl_PointCoord,
97 signed gl_FrontFacing)
98 {
99 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
100
101 for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
102 gl_varying_slot location = ss->varyings_loc[i];
103 int index = -1;
104
105 switch (location) {
106 case VARYING_SLOT_POS:
107 index = gl_Position;
108 break;
109 case VARYING_SLOT_PSIZ:
110 index = gl_PointSize;
111 break;
112 case VARYING_SLOT_PNTC:
113 index = gl_PointCoord;
114 break;
115 case VARYING_SLOT_FACE:
116 index = gl_FrontFacing;
117 break;
118 default:
119 index = general;
120 break;
121 }
122
123 assert(index >= 0);
124 out[i].index = index;
125 }
126 }
127
128 static bool
129 has_point_coord(unsigned mask, gl_varying_slot loc)
130 {
131 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
132 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
133 else if (loc == VARYING_SLOT_PNTC)
134 return (mask & (1 << 8));
135 else
136 return false;
137 }
138
139 /* Helpers for manipulating stream out information so we can pack varyings
140 * accordingly. Compute the src_offset for a given captured varying */
141
142 static struct pipe_stream_output
143 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
144 {
145 for (unsigned i = 0; i < info->num_outputs; ++i) {
146 if (info->output[i].register_index == loc)
147 return info->output[i];
148 }
149
150 unreachable("Varying not captured");
151 }
152
153 /* TODO: Integers */
154 static enum mali_format
155 pan_xfb_format(unsigned nr_components)
156 {
157 switch (nr_components) {
158 case 1: return MALI_R32F;
159 case 2: return MALI_RG32F;
160 case 3: return MALI_RGB32F;
161 case 4: return MALI_RGBA32F;
162 default: unreachable("Invalid format");
163 }
164 }
165
166 void
167 panfrost_emit_varying_descriptor(
168 struct panfrost_context *ctx,
169 unsigned vertex_count)
170 {
171 /* Load the shaders */
172
173 struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
174 struct panfrost_shader_state *fs = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
175 unsigned int num_gen_varyings = 0;
176
177 /* Allocate the varying descriptor */
178
179 size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
180 size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
181
182 struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
183 vs_size + fs_size);
184
185 struct pipe_stream_output_info so = vs->stream_output;
186
187 /* Check if this varying is linked by us. This is the case for
188 * general-purpose, non-captured varyings. If it is, link it. If it's
189 * not, use the provided stream out information to determine the
190 * offset, since it was already linked for us. */
191
192 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
193 gl_varying_slot loc = vs->varyings_loc[i];
194
195 bool special = is_special_varying(loc);
196 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
197
198 if (captured) {
199 struct pipe_stream_output o = pan_get_so(&so, loc);
200
201 unsigned dst_offset = o.dst_offset * 4; /* dwords */
202 vs->varyings[i].src_offset = dst_offset;
203 } else if (!special) {
204 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
205 }
206 }
207
208 /* Conversely, we need to set src_offset for the captured varyings.
209 * Here, the layout is defined by the stream out info, not us */
210
211 /* Link up with fragment varyings */
212 bool reads_point_coord = fs->reads_point_coord;
213
214 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
215 gl_varying_slot loc = fs->varyings_loc[i];
216 signed vs_idx = -1;
217
218 /* Link up */
219 for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
220 if (vs->varyings_loc[j] == loc) {
221 vs_idx = j;
222 break;
223 }
224 }
225
226 /* Either assign or reuse */
227 if (vs_idx >= 0)
228 fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset;
229 else
230 fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
231
232 if (has_point_coord(fs->point_sprite_mask, loc))
233 reads_point_coord = true;
234 }
235
236 memcpy(trans.cpu, vs->varyings, vs_size);
237 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
238
239 union mali_attr varyings[PIPE_MAX_ATTRIBS];
240 memset(varyings, 0, sizeof(varyings));
241
242 /* Figure out how many streamout buffers could be bound */
243 unsigned so_count = ctx->streamout.num_targets;
244 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
245 gl_varying_slot loc = vs->varyings_loc[i];
246
247 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
248 if (!captured) continue;
249
250 struct pipe_stream_output o = pan_get_so(&so, loc);
251 so_count = MAX2(so_count, o.output_buffer + 1);
252 }
253
254 signed idx = so_count;
255 signed general = idx++;
256 signed gl_Position = idx++;
257 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
258 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
259 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
260 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
261
262 /* Emit the stream out buffers */
263
264 unsigned output_count = u_stream_outputs_for_vertices(
265 ctx->active_prim, ctx->vertex_count);
266
267 for (unsigned i = 0; i < so_count; ++i) {
268 struct pipe_stream_output_target *target =
269 (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL;
270
271 if (target) {
272 panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target);
273 } else {
274 /* Emit a dummy buffer */
275 panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count);
276
277 /* Clear the attribute type */
278 varyings[i].elements &= ~0xF;
279 }
280 }
281
282 panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
283 vertex_count);
284
285 /* fp32 vec4 gl_Position */
286 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
287 panfrost_emit_varyings(ctx, &varyings[gl_Position],
288 sizeof(float) * 4, vertex_count);
289
290
291 if (vs->writes_point_size)
292 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
293 panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
294 2, vertex_count);
295
296 if (reads_point_coord)
297 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
298
299 if (fs->reads_face)
300 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
301
302 if (fs->reads_frag_coord)
303 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
304
305 /* Let's go ahead and link varying meta to the buffer in question, now
306 * that that information is available. VARYING_SLOT_POS is mapped to
307 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
308 * */
309
310 panfrost_emit_varying_meta(trans.cpu, vs,
311 general, gl_Position, gl_PointSize,
312 gl_PointCoord, gl_FrontFacing);
313
314 panfrost_emit_varying_meta(trans.cpu + vs_size, fs,
315 general, gl_FragCoord, gl_PointSize,
316 gl_PointCoord, gl_FrontFacing);
317
318 /* Replace streamout */
319
320 struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
321 struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
322
323 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
324 gl_varying_slot loc = vs->varyings_loc[i];
325
326 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
327 if (!captured) continue;
328
329 struct pipe_stream_output o = pan_get_so(&so, loc);
330 ovs[i].index = o.output_buffer;
331
332 /* Set the type appropriately. TODO: Integer varyings XXX */
333 assert(o.stream == 0);
334 ovs[i].format = pan_xfb_format(o.num_components);
335 ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components);
336
337 /* Link to the fragment */
338 signed fs_idx = -1;
339
340 /* Link up */
341 for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
342 if (fs->varyings_loc[j] == loc) {
343 fs_idx = j;
344 break;
345 }
346 }
347
348 if (fs_idx >= 0) {
349 ofs[fs_idx].index = ovs[i].index;
350 ofs[fs_idx].format = ovs[i].format;
351 ofs[fs_idx].swizzle = ovs[i].swizzle;
352 }
353 }
354
355 /* Replace point sprite */
356 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
357 /* If we have a point sprite replacement, handle that here. We
358 * have to translate location first. TODO: Flip y in shader.
359 * We're already keying ... just time crunch .. */
360
361 if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) {
362 ofs[i].index = gl_PointCoord;
363
364 /* Swizzle out the z/w to 0/1 */
365 ofs[i].format = MALI_RG16F;
366 ofs[i].swizzle =
367 panfrost_get_default_swizzle(2);
368 }
369 }
370
371 /* Fix up unaligned addresses */
372 for (unsigned i = 0; i < so_count; ++i) {
373 if (varyings[i].elements < MALI_VARYING_SPECIAL)
374 continue;
375
376 unsigned align = (varyings[i].elements & 63);
377
378 /* While we're at it, the SO buffers are linear */
379
380 if (!align) {
381 varyings[i].elements |= MALI_ATTR_LINEAR;
382 continue;
383 }
384
385 /* We need to adjust alignment */
386 varyings[i].elements &= ~63;
387 varyings[i].elements |= MALI_ATTR_LINEAR;
388 varyings[i].size += align;
389
390 for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
391 if (ovs[v].index == i)
392 ovs[v].src_offset = vs->varyings[v].src_offset + align;
393 }
394
395 for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
396 if (ofs[f].index == i)
397 ofs[f].src_offset = fs->varyings[f].src_offset + align;
398 }
399 }
400
401 mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr));
402 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
403 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;
404
405 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
406 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size;
407 }