panfrost: Don't check reads_point_coord
[mesa.git] / src / gallium / drivers / panfrost / pan_varyings.c
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include "pan_context.h"
27 #include "util/u_prim.h"
28
29 static mali_ptr
30 panfrost_emit_varyings(
31 struct panfrost_context *ctx,
32 union mali_attr *slot,
33 unsigned stride,
34 unsigned count)
35 {
36 /* Fill out the descriptor */
37 slot->stride = stride;
38 slot->size = stride * count;
39 slot->shift = slot->extra_flags = 0;
40
41 struct panfrost_transfer transfer =
42 panfrost_allocate_transient(ctx, slot->size);
43
44 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
45
46 return transfer.gpu;
47 }
48
49 static void
50 panfrost_emit_streamout(
51 struct panfrost_context *ctx,
52 union mali_attr *slot,
53 unsigned stride,
54 unsigned offset,
55 unsigned count,
56 struct pipe_stream_output_target *target)
57 {
58 /* Fill out the descriptor */
59 slot->stride = stride * 4;
60 slot->shift = slot->extra_flags = 0;
61
62 unsigned max_size = target->buffer_size;
63 unsigned expected_size = slot->stride * count;
64
65 slot->size = MIN2(max_size, expected_size);
66
67 /* Grab the BO and bind it to the batch */
68 struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
69 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
70 panfrost_job_add_bo(batch, bo);
71
72 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
73 slot->elements = addr;
74 }
75
76 static void
77 panfrost_emit_point_coord(union mali_attr *slot)
78 {
79 slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR;
80 slot->stride = slot->size = slot->shift = slot->extra_flags = 0;
81 }
82
83 static void
84 panfrost_emit_front_face(union mali_attr *slot)
85 {
86 slot->elements = MALI_VARYING_FRONT_FACING | MALI_ATTR_INTERNAL;
87 }
88
89 /* Given a shader and buffer indices, link varying metadata together */
90
91 static bool
92 is_special_varying(gl_varying_slot loc)
93 {
94 switch (loc) {
95 case VARYING_SLOT_POS:
96 case VARYING_SLOT_PSIZ:
97 case VARYING_SLOT_PNTC:
98 case VARYING_SLOT_FACE:
99 return true;
100 default:
101 return false;
102 }
103 }
104
105 static void
106 panfrost_emit_varying_meta(
107 void *outptr, struct panfrost_shader_state *ss,
108 signed general, signed gl_Position,
109 signed gl_PointSize, signed gl_PointCoord,
110 signed gl_FrontFacing)
111 {
112 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
113
114 for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
115 gl_varying_slot location = ss->varyings_loc[i];
116 int index = -1;
117
118 switch (location) {
119 case VARYING_SLOT_POS:
120 index = gl_Position;
121 break;
122 case VARYING_SLOT_PSIZ:
123 index = gl_PointSize;
124 break;
125 case VARYING_SLOT_PNTC:
126 index = gl_PointCoord;
127 break;
128 case VARYING_SLOT_FACE:
129 index = gl_FrontFacing;
130 break;
131 default:
132 index = general;
133 break;
134 }
135
136 assert(index >= 0);
137 out[i].index = index;
138 }
139 }
140
141 static bool
142 has_point_coord(unsigned mask, gl_varying_slot loc)
143 {
144 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
145 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
146 else if (loc == VARYING_SLOT_PNTC)
147 return (mask & (1 << 8));
148 else
149 return false;
150 }
151
152 /* Helpers for manipulating stream out information so we can pack varyings
153 * accordingly. Compute the src_offset for a given captured varying */
154
155 static struct pipe_stream_output
156 pan_get_so(struct pipe_stream_output_info info, gl_varying_slot loc)
157 {
158 for (unsigned i = 0; i < info.num_outputs; ++i) {
159 if (info.output[i].register_index == loc)
160 return info.output[i];
161 }
162
163 unreachable("Varying not captured");
164 }
165
166 /* TODO: Integers */
167 static enum mali_format
168 pan_xfb_format(unsigned nr_components)
169 {
170 switch (nr_components) {
171 case 1: return MALI_R32F;
172 case 2: return MALI_RG32F;
173 case 3: return MALI_RGB32F;
174 case 4: return MALI_RGBA32F;
175 default: unreachable("Invalid format");
176 }
177 }
178
179 void
180 panfrost_emit_varying_descriptor(
181 struct panfrost_context *ctx,
182 unsigned vertex_count)
183 {
184 /* Load the shaders */
185
186 struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
187 struct panfrost_shader_state *fs = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
188 unsigned int num_gen_varyings = 0;
189
190 /* Allocate the varying descriptor */
191
192 size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
193 size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
194
195 struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
196 vs_size + fs_size);
197
198 struct pipe_stream_output_info so = vs->stream_output;
199
200 /* Check if this varying is linked by us. This is the case for
201 * general-purpose, non-captured varyings. If it is, link it. If it's
202 * not, use the provided stream out information to determine the
203 * offset, since it was already linked for us. */
204
205 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
206 gl_varying_slot loc = vs->varyings_loc[i];
207
208 bool special = is_special_varying(loc);
209 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
210
211 if (captured) {
212 struct pipe_stream_output o = pan_get_so(so, loc);
213
214 unsigned dst_offset = o.dst_offset * 4; /* dwords */
215 vs->varyings[i].src_offset = dst_offset;
216 } else if (!special) {
217 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
218 }
219 }
220
221 /* Conversely, we need to set src_offset for the captured varyings.
222 * Here, the layout is defined by the stream out info, not us */
223
224 /* Link up with fragment varyings */
225 bool reads_point_coord = fs->reads_point_coord;
226
227 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
228 gl_varying_slot loc = fs->varyings_loc[i];
229 signed vs_idx = -1;
230
231 /* Link up */
232 for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
233 if (vs->varyings_loc[j] == loc) {
234 vs_idx = j;
235 break;
236 }
237 }
238
239 /* Either assign or reuse */
240 if (vs_idx >= 0)
241 fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset;
242 else
243 fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
244
245 if (has_point_coord(fs->point_sprite_mask, loc))
246 reads_point_coord = true;
247 }
248
249 memcpy(trans.cpu, vs->varyings, vs_size);
250 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
251
252 union mali_attr varyings[PIPE_MAX_ATTRIBS];
253
254 /* Figure out how many streamout buffers could be bound */
255 unsigned so_count = ctx->streamout.num_targets;
256 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
257 gl_varying_slot loc = vs->varyings_loc[i];
258
259 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
260 if (!captured) continue;
261
262 struct pipe_stream_output o = pan_get_so(so, loc);
263 so_count = MAX2(so_count, o.output_buffer + 1);
264 }
265
266 signed idx = so_count;
267 signed general = idx++;
268 signed gl_Position = idx++;
269 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
270 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
271 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
272
273 /* Emit the stream out buffers */
274
275 unsigned output_count = u_stream_outputs_for_vertices(
276 ctx->active_prim, ctx->vertex_count);
277
278 for (unsigned i = 0; i < so_count; ++i) {
279 struct pipe_stream_output_target *target =
280 (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL;
281
282 if (target) {
283 panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target);
284 } else {
285 /* Emit a dummy buffer */
286 panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count);
287
288 /* Clear the attribute type */
289 varyings[i].elements &= ~0xF;
290 }
291 }
292
293 panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
294 vertex_count);
295
296 /* fp32 vec4 gl_Position */
297 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
298 panfrost_emit_varyings(ctx, &varyings[gl_Position],
299 sizeof(float) * 4, vertex_count);
300
301
302 if (vs->writes_point_size)
303 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
304 panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
305 2, vertex_count);
306
307 if (reads_point_coord)
308 panfrost_emit_point_coord(&varyings[gl_PointCoord]);
309
310 if (fs->reads_face)
311 panfrost_emit_front_face(&varyings[gl_FrontFacing]);
312
313 /* Let's go ahead and link varying meta to the buffer in question, now
314 * that that information is available */
315
316 panfrost_emit_varying_meta(trans.cpu, vs,
317 general, gl_Position, gl_PointSize,
318 gl_PointCoord, gl_FrontFacing);
319
320 panfrost_emit_varying_meta(trans.cpu + vs_size, fs,
321 general, gl_Position, gl_PointSize,
322 gl_PointCoord, gl_FrontFacing);
323
324 /* Replace streamout */
325
326 struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
327 struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
328
329 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
330 gl_varying_slot loc = vs->varyings_loc[i];
331
332 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
333 if (!captured) continue;
334
335 struct pipe_stream_output o = pan_get_so(so, loc);
336 ovs[i].index = o.output_buffer;
337
338 /* Set the type appropriately. TODO: Integer varyings XXX */
339 assert(o.stream == 0);
340 ovs[i].format = pan_xfb_format(o.num_components);
341 ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components);
342
343 /* Link to the fragment */
344 signed fs_idx = -1;
345
346 /* Link up */
347 for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
348 if (fs->varyings_loc[j] == loc) {
349 fs_idx = j;
350 break;
351 }
352 }
353
354 if (fs_idx >= 0) {
355 ofs[fs_idx].index = ovs[i].index;
356 ofs[fs_idx].format = ovs[i].format;
357 ofs[fs_idx].swizzle = ovs[i].swizzle;
358 }
359 }
360
361 /* Replace point sprite */
362 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
363 /* If we have a point sprite replacement, handle that here. We
364 * have to translate location first. TODO: Flip y in shader.
365 * We're already keying ... just time crunch .. */
366
367 if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) {
368 ofs[i].index = gl_PointCoord;
369
370 /* Swizzle out the z/w to 0/1 */
371 ofs[i].format = MALI_RG16F;
372 ofs[i].swizzle =
373 panfrost_get_default_swizzle(2);
374 }
375 }
376
377 /* Fix up unaligned addresses */
378 for (unsigned i = 0; i < so_count; ++i) {
379 unsigned align = (varyings[i].elements & 63);
380
381 /* While we're at it, the SO buffers are linear */
382
383 if (!align) {
384 varyings[i].elements |= MALI_ATTR_LINEAR;
385 continue;
386 }
387
388 /* We need to adjust alignment */
389 varyings[i].elements &= ~63;
390 varyings[i].elements |= MALI_ATTR_LINEAR;
391 varyings[i].size += align;
392
393 for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
394 if (ovs[v].index == i)
395 ovs[v].src_offset = vs->varyings[v].src_offset + align;
396 }
397
398 for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
399 if (ofs[f].index == i)
400 ofs[f].src_offset = fs->varyings[f].src_offset + align;
401 }
402 }
403
404 mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr));
405 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
406 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;
407
408 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
409 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size;
410 }