pan/midgard: Use fp32 blend shaders
[mesa.git] / src / gallium / drivers / panfrost / pan_varyings.c
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include "pan_bo.h"
27 #include "pan_context.h"
28 #include "util/u_prim.h"
29
30 static mali_ptr
31 panfrost_emit_varyings(
32 struct panfrost_context *ctx,
33 union mali_attr *slot,
34 unsigned stride,
35 unsigned count)
36 {
37 /* Fill out the descriptor */
38 slot->stride = stride;
39 slot->size = stride * count;
40 slot->shift = slot->extra_flags = 0;
41
42 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
43 struct panfrost_transfer transfer =
44 panfrost_allocate_transient(batch, slot->size);
45
46 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
47
48 return transfer.gpu;
49 }
50
51 static void
52 panfrost_emit_streamout(
53 struct panfrost_context *ctx,
54 union mali_attr *slot,
55 unsigned stride,
56 unsigned offset,
57 unsigned count,
58 struct pipe_stream_output_target *target)
59 {
60 /* Fill out the descriptor */
61 slot->stride = stride * 4;
62 slot->shift = slot->extra_flags = 0;
63
64 unsigned max_size = target->buffer_size;
65 unsigned expected_size = slot->stride * count;
66
67 slot->size = MIN2(max_size, expected_size);
68
69 /* Grab the BO and bind it to the batch */
70 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
71 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
72
73 /* Varyings are WRITE from the perspective of the VERTEX but READ from
74 * the perspective of the TILER and FRAGMENT.
75 */
76 panfrost_batch_add_bo(batch, bo,
77 PAN_BO_ACCESS_SHARED |
78 PAN_BO_ACCESS_RW |
79 PAN_BO_ACCESS_VERTEX_TILER |
80 PAN_BO_ACCESS_FRAGMENT);
81
82 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
83 slot->elements = addr;
84 }
85
86 /* Given a shader and buffer indices, link varying metadata together */
87
88 static bool
89 is_special_varying(gl_varying_slot loc)
90 {
91 switch (loc) {
92 case VARYING_SLOT_POS:
93 case VARYING_SLOT_PSIZ:
94 case VARYING_SLOT_PNTC:
95 case VARYING_SLOT_FACE:
96 return true;
97 default:
98 return false;
99 }
100 }
101
102 static void
103 panfrost_emit_varying_meta(
104 void *outptr, struct panfrost_shader_state *ss,
105 signed general, signed gl_Position,
106 signed gl_PointSize, signed gl_PointCoord,
107 signed gl_FrontFacing)
108 {
109 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
110
111 for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
112 gl_varying_slot location = ss->varyings_loc[i];
113 int index = -1;
114
115 switch (location) {
116 case VARYING_SLOT_POS:
117 index = gl_Position;
118 break;
119 case VARYING_SLOT_PSIZ:
120 index = gl_PointSize;
121 break;
122 case VARYING_SLOT_PNTC:
123 index = gl_PointCoord;
124 break;
125 case VARYING_SLOT_FACE:
126 index = gl_FrontFacing;
127 break;
128 default:
129 index = general;
130 break;
131 }
132
133 assert(index >= 0);
134 out[i].index = index;
135 }
136 }
137
138 static bool
139 has_point_coord(unsigned mask, gl_varying_slot loc)
140 {
141 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
142 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
143 else if (loc == VARYING_SLOT_PNTC)
144 return (mask & (1 << 8));
145 else
146 return false;
147 }
148
149 /* Helpers for manipulating stream out information so we can pack varyings
150 * accordingly. Compute the src_offset for a given captured varying */
151
152 static struct pipe_stream_output
153 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
154 {
155 for (unsigned i = 0; i < info->num_outputs; ++i) {
156 if (info->output[i].register_index == loc)
157 return info->output[i];
158 }
159
160 unreachable("Varying not captured");
161 }
162
163 /* TODO: Integers */
164 static enum mali_format
165 pan_xfb_format(unsigned nr_components)
166 {
167 switch (nr_components) {
168 case 1: return MALI_R32F;
169 case 2: return MALI_RG32F;
170 case 3: return MALI_RGB32F;
171 case 4: return MALI_RGBA32F;
172 default: unreachable("Invalid format");
173 }
174 }
175
176 void
177 panfrost_emit_varying_descriptor(
178 struct panfrost_context *ctx,
179 unsigned vertex_count)
180 {
181 /* Load the shaders */
182
183 struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
184 struct panfrost_shader_state *fs = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
185 unsigned int num_gen_varyings = 0;
186
187 /* Allocate the varying descriptor */
188
189 size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
190 size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
191
192 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
193 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
194 vs_size + fs_size);
195
196 struct pipe_stream_output_info so = vs->stream_output;
197
198 /* Check if this varying is linked by us. This is the case for
199 * general-purpose, non-captured varyings. If it is, link it. If it's
200 * not, use the provided stream out information to determine the
201 * offset, since it was already linked for us. */
202
203 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
204 gl_varying_slot loc = vs->varyings_loc[i];
205
206 bool special = is_special_varying(loc);
207 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
208
209 if (captured) {
210 struct pipe_stream_output o = pan_get_so(&so, loc);
211
212 unsigned dst_offset = o.dst_offset * 4; /* dwords */
213 vs->varyings[i].src_offset = dst_offset;
214 } else if (!special) {
215 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
216 }
217 }
218
219 /* Conversely, we need to set src_offset for the captured varyings.
220 * Here, the layout is defined by the stream out info, not us */
221
222 /* Link up with fragment varyings */
223 bool reads_point_coord = fs->reads_point_coord;
224
225 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
226 gl_varying_slot loc = fs->varyings_loc[i];
227 signed vs_idx = -1;
228
229 /* Link up */
230 for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
231 if (vs->varyings_loc[j] == loc) {
232 vs_idx = j;
233 break;
234 }
235 }
236
237 /* Either assign or reuse */
238 if (vs_idx >= 0)
239 fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset;
240 else
241 fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
242
243 if (has_point_coord(fs->point_sprite_mask, loc))
244 reads_point_coord = true;
245 }
246
247 memcpy(trans.cpu, vs->varyings, vs_size);
248 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
249
250 union mali_attr varyings[PIPE_MAX_ATTRIBS];
251 memset(varyings, 0, sizeof(varyings));
252
253 /* Figure out how many streamout buffers could be bound */
254 unsigned so_count = ctx->streamout.num_targets;
255 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
256 gl_varying_slot loc = vs->varyings_loc[i];
257
258 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
259 if (!captured) continue;
260
261 struct pipe_stream_output o = pan_get_so(&so, loc);
262 so_count = MAX2(so_count, o.output_buffer + 1);
263 }
264
265 signed idx = so_count;
266 signed general = idx++;
267 signed gl_Position = idx++;
268 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
269 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
270 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
271 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
272
273 /* Emit the stream out buffers */
274
275 unsigned output_count = u_stream_outputs_for_vertices(
276 ctx->active_prim, ctx->vertex_count);
277
278 for (unsigned i = 0; i < so_count; ++i) {
279 struct pipe_stream_output_target *target =
280 (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL;
281
282 if (target) {
283 panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target);
284 } else {
285 /* Emit a dummy buffer */
286 panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count);
287
288 /* Clear the attribute type */
289 varyings[i].elements &= ~0xF;
290 }
291 }
292
293 panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
294 vertex_count);
295
296 /* fp32 vec4 gl_Position */
297 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
298 panfrost_emit_varyings(ctx, &varyings[gl_Position],
299 sizeof(float) * 4, vertex_count);
300
301
302 if (vs->writes_point_size)
303 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
304 panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
305 2, vertex_count);
306
307 if (reads_point_coord)
308 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
309
310 if (fs->reads_face)
311 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
312
313 if (fs->reads_frag_coord)
314 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
315
316 /* Let's go ahead and link varying meta to the buffer in question, now
317 * that that information is available. VARYING_SLOT_POS is mapped to
318 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
319 * */
320
321 panfrost_emit_varying_meta(trans.cpu, vs,
322 general, gl_Position, gl_PointSize,
323 gl_PointCoord, gl_FrontFacing);
324
325 panfrost_emit_varying_meta(trans.cpu + vs_size, fs,
326 general, gl_FragCoord, gl_PointSize,
327 gl_PointCoord, gl_FrontFacing);
328
329 /* Replace streamout */
330
331 struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
332 struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
333
334 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
335 gl_varying_slot loc = vs->varyings_loc[i];
336
337 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
338 if (!captured) continue;
339
340 struct pipe_stream_output o = pan_get_so(&so, loc);
341 ovs[i].index = o.output_buffer;
342
343 /* Set the type appropriately. TODO: Integer varyings XXX */
344 assert(o.stream == 0);
345 ovs[i].format = pan_xfb_format(o.num_components);
346 ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components);
347
348 /* Link to the fragment */
349 signed fs_idx = -1;
350
351 /* Link up */
352 for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
353 if (fs->varyings_loc[j] == loc) {
354 fs_idx = j;
355 break;
356 }
357 }
358
359 if (fs_idx >= 0) {
360 ofs[fs_idx].index = ovs[i].index;
361 ofs[fs_idx].format = ovs[i].format;
362 ofs[fs_idx].swizzle = ovs[i].swizzle;
363 }
364 }
365
366 /* Replace point sprite */
367 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
368 /* If we have a point sprite replacement, handle that here. We
369 * have to translate location first. TODO: Flip y in shader.
370 * We're already keying ... just time crunch .. */
371
372 if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) {
373 ofs[i].index = gl_PointCoord;
374
375 /* Swizzle out the z/w to 0/1 */
376 ofs[i].format = MALI_RG16F;
377 ofs[i].swizzle =
378 panfrost_get_default_swizzle(2);
379 }
380 }
381
382 /* Fix up unaligned addresses */
383 for (unsigned i = 0; i < so_count; ++i) {
384 if (varyings[i].elements < MALI_VARYING_SPECIAL)
385 continue;
386
387 unsigned align = (varyings[i].elements & 63);
388
389 /* While we're at it, the SO buffers are linear */
390
391 if (!align) {
392 varyings[i].elements |= MALI_ATTR_LINEAR;
393 continue;
394 }
395
396 /* We need to adjust alignment */
397 varyings[i].elements &= ~63;
398 varyings[i].elements |= MALI_ATTR_LINEAR;
399 varyings[i].size += align;
400
401 for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
402 if (ovs[v].index == i)
403 ovs[v].src_offset = vs->varyings[v].src_offset + align;
404 }
405
406 for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
407 if (ofs[f].index == i)
408 ofs[f].src_offset = fs->varyings[f].src_offset + align;
409 }
410 }
411
412 mali_ptr varyings_p = panfrost_upload_transient(batch, &varyings, idx * sizeof(union mali_attr));
413 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
414 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;
415
416 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
417 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size;
418 }