2 * Copyright (C) 2018-2019 Alyssa Rosenzweig
3 * Copyright (C) 2019 Collabora, Ltd.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 #include "pan_context.h"
28 #include "pan_format.h"
29 #include "util/u_prim.h"
32 panfrost_emit_varyings(
33 struct panfrost_context
*ctx
,
34 union mali_attr
*slot
,
38 /* Fill out the descriptor */
39 slot
->stride
= stride
;
40 slot
->size
= stride
* count
;
41 slot
->shift
= slot
->extra_flags
= 0;
43 struct panfrost_batch
*batch
= panfrost_get_batch_for_fbo(ctx
);
44 struct panfrost_transfer transfer
=
45 panfrost_allocate_transient(batch
, slot
->size
);
47 slot
->elements
= transfer
.gpu
| MALI_ATTR_LINEAR
;
53 panfrost_emit_streamout(
54 struct panfrost_context
*ctx
,
55 union mali_attr
*slot
,
59 struct pipe_stream_output_target
*target
)
61 /* Fill out the descriptor */
62 slot
->stride
= stride
* 4;
63 slot
->shift
= slot
->extra_flags
= 0;
65 unsigned max_size
= target
->buffer_size
;
66 unsigned expected_size
= slot
->stride
* count
;
68 slot
->size
= MIN2(max_size
, expected_size
);
70 /* Grab the BO and bind it to the batch */
71 struct panfrost_batch
*batch
= panfrost_get_batch_for_fbo(ctx
);
72 struct panfrost_bo
*bo
= pan_resource(target
->buffer
)->bo
;
74 /* Varyings are WRITE from the perspective of the VERTEX but READ from
75 * the perspective of the TILER and FRAGMENT.
77 panfrost_batch_add_bo(batch
, bo
,
78 PAN_BO_ACCESS_SHARED
|
80 PAN_BO_ACCESS_VERTEX_TILER
|
81 PAN_BO_ACCESS_FRAGMENT
);
83 mali_ptr addr
= bo
->gpu
+ target
->buffer_offset
+ (offset
* slot
->stride
);
84 slot
->elements
= addr
;
87 /* Given a shader and buffer indices, link varying metadata together */
90 is_special_varying(gl_varying_slot loc
)
93 case VARYING_SLOT_POS
:
94 case VARYING_SLOT_PSIZ
:
95 case VARYING_SLOT_PNTC
:
96 case VARYING_SLOT_FACE
:
104 panfrost_emit_varying_meta(
105 void *outptr
, struct panfrost_shader_state
*ss
,
106 signed general
, signed gl_Position
,
107 signed gl_PointSize
, signed gl_PointCoord
,
108 signed gl_FrontFacing
)
110 struct mali_attr_meta
*out
= (struct mali_attr_meta
*) outptr
;
112 for (unsigned i
= 0; i
< ss
->tripipe
->varying_count
; ++i
) {
113 gl_varying_slot location
= ss
->varyings_loc
[i
];
117 case VARYING_SLOT_POS
:
120 case VARYING_SLOT_PSIZ
:
121 index
= gl_PointSize
;
123 case VARYING_SLOT_PNTC
:
124 index
= gl_PointCoord
;
126 case VARYING_SLOT_FACE
:
127 index
= gl_FrontFacing
;
135 out
[i
].index
= index
;
140 has_point_coord(unsigned mask
, gl_varying_slot loc
)
142 if ((loc
>= VARYING_SLOT_TEX0
) && (loc
<= VARYING_SLOT_TEX7
))
143 return (mask
& (1 << (loc
- VARYING_SLOT_TEX0
)));
144 else if (loc
== VARYING_SLOT_PNTC
)
145 return (mask
& (1 << 8));
150 /* Helpers for manipulating stream out information so we can pack varyings
151 * accordingly. Compute the src_offset for a given captured varying */
153 static struct pipe_stream_output
154 pan_get_so(struct pipe_stream_output_info
*info
, gl_varying_slot loc
)
156 for (unsigned i
= 0; i
< info
->num_outputs
; ++i
) {
157 if (info
->output
[i
].register_index
== loc
)
158 return info
->output
[i
];
161 unreachable("Varying not captured");
165 static enum mali_format
166 pan_xfb_format(unsigned nr_components
)
168 switch (nr_components
) {
169 case 1: return MALI_R32F
;
170 case 2: return MALI_RG32F
;
171 case 3: return MALI_RGB32F
;
172 case 4: return MALI_RGBA32F
;
173 default: unreachable("Invalid format");
178 panfrost_emit_varying_descriptor(
179 struct panfrost_context
*ctx
,
180 unsigned vertex_count
)
182 /* Load the shaders */
184 struct panfrost_shader_state
*vs
= &ctx
->shader
[PIPE_SHADER_VERTEX
]->variants
[ctx
->shader
[PIPE_SHADER_VERTEX
]->active_variant
];
185 struct panfrost_shader_state
*fs
= &ctx
->shader
[PIPE_SHADER_FRAGMENT
]->variants
[ctx
->shader
[PIPE_SHADER_FRAGMENT
]->active_variant
];
186 unsigned int num_gen_varyings
= 0;
188 /* Allocate the varying descriptor */
190 size_t vs_size
= sizeof(struct mali_attr_meta
) * vs
->tripipe
->varying_count
;
191 size_t fs_size
= sizeof(struct mali_attr_meta
) * fs
->tripipe
->varying_count
;
193 struct panfrost_batch
*batch
= panfrost_get_batch_for_fbo(ctx
);
194 struct panfrost_transfer trans
= panfrost_allocate_transient(batch
,
197 struct pipe_stream_output_info so
= vs
->stream_output
;
199 /* Check if this varying is linked by us. This is the case for
200 * general-purpose, non-captured varyings. If it is, link it. If it's
201 * not, use the provided stream out information to determine the
202 * offset, since it was already linked for us. */
204 for (unsigned i
= 0; i
< vs
->tripipe
->varying_count
; i
++) {
205 gl_varying_slot loc
= vs
->varyings_loc
[i
];
207 bool special
= is_special_varying(loc
);
208 bool captured
= ((vs
->so_mask
& (1ll << loc
)) ? true : false);
211 struct pipe_stream_output o
= pan_get_so(&so
, loc
);
213 unsigned dst_offset
= o
.dst_offset
* 4; /* dwords */
214 vs
->varyings
[i
].src_offset
= dst_offset
;
215 } else if (!special
) {
216 vs
->varyings
[i
].src_offset
= 16 * (num_gen_varyings
++);
220 /* Conversely, we need to set src_offset for the captured varyings.
221 * Here, the layout is defined by the stream out info, not us */
223 /* Link up with fragment varyings */
224 bool reads_point_coord
= fs
->reads_point_coord
;
226 for (unsigned i
= 0; i
< fs
->tripipe
->varying_count
; i
++) {
227 gl_varying_slot loc
= fs
->varyings_loc
[i
];
231 for (unsigned j
= 0; j
< vs
->tripipe
->varying_count
; ++j
) {
232 if (vs
->varyings_loc
[j
] == loc
) {
238 /* Either assign or reuse */
240 fs
->varyings
[i
].src_offset
= vs
->varyings
[vs_idx
].src_offset
;
242 fs
->varyings
[i
].src_offset
= 16 * (num_gen_varyings
++);
244 if (has_point_coord(fs
->point_sprite_mask
, loc
))
245 reads_point_coord
= true;
248 memcpy(trans
.cpu
, vs
->varyings
, vs_size
);
249 memcpy(trans
.cpu
+ vs_size
, fs
->varyings
, fs_size
);
251 union mali_attr varyings
[PIPE_MAX_ATTRIBS
];
252 memset(varyings
, 0, sizeof(varyings
));
254 /* Figure out how many streamout buffers could be bound */
255 unsigned so_count
= ctx
->streamout
.num_targets
;
256 for (unsigned i
= 0; i
< vs
->tripipe
->varying_count
; i
++) {
257 gl_varying_slot loc
= vs
->varyings_loc
[i
];
259 bool captured
= ((vs
->so_mask
& (1ll << loc
)) ? true : false);
260 if (!captured
) continue;
262 struct pipe_stream_output o
= pan_get_so(&so
, loc
);
263 so_count
= MAX2(so_count
, o
.output_buffer
+ 1);
266 signed idx
= so_count
;
267 signed general
= idx
++;
268 signed gl_Position
= idx
++;
269 signed gl_PointSize
= vs
->writes_point_size
? (idx
++) : -1;
270 signed gl_PointCoord
= reads_point_coord
? (idx
++) : -1;
271 signed gl_FrontFacing
= fs
->reads_face
? (idx
++) : -1;
272 signed gl_FragCoord
= fs
->reads_frag_coord
? (idx
++) : -1;
274 /* Emit the stream out buffers */
276 unsigned output_count
= u_stream_outputs_for_vertices(
277 ctx
->active_prim
, ctx
->vertex_count
);
279 for (unsigned i
= 0; i
< so_count
; ++i
) {
280 struct pipe_stream_output_target
*target
=
281 (i
< ctx
->streamout
.num_targets
) ? ctx
->streamout
.targets
[i
] : NULL
;
284 panfrost_emit_streamout(ctx
, &varyings
[i
], so
.stride
[i
], ctx
->streamout
.offsets
[i
], output_count
, target
);
286 /* Emit a dummy buffer */
287 panfrost_emit_varyings(ctx
, &varyings
[i
], so
.stride
[i
] * 4, output_count
);
289 /* Clear the attribute type */
290 varyings
[i
].elements
&= ~0xF;
294 panfrost_emit_varyings(ctx
, &varyings
[general
], num_gen_varyings
* 16,
297 /* fp32 vec4 gl_Position */
298 ctx
->payloads
[PIPE_SHADER_FRAGMENT
].postfix
.position_varying
=
299 panfrost_emit_varyings(ctx
, &varyings
[gl_Position
],
300 sizeof(float) * 4, vertex_count
);
303 if (vs
->writes_point_size
)
304 ctx
->payloads
[PIPE_SHADER_FRAGMENT
].primitive_size
.pointer
=
305 panfrost_emit_varyings(ctx
, &varyings
[gl_PointSize
],
308 if (reads_point_coord
)
309 varyings
[gl_PointCoord
].elements
= MALI_VARYING_POINT_COORD
;
312 varyings
[gl_FrontFacing
].elements
= MALI_VARYING_FRONT_FACING
;
314 if (fs
->reads_frag_coord
)
315 varyings
[gl_FragCoord
].elements
= MALI_VARYING_FRAG_COORD
;
317 /* Let's go ahead and link varying meta to the buffer in question, now
318 * that that information is available. VARYING_SLOT_POS is mapped to
319 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
322 panfrost_emit_varying_meta(trans
.cpu
, vs
,
323 general
, gl_Position
, gl_PointSize
,
324 gl_PointCoord
, gl_FrontFacing
);
326 panfrost_emit_varying_meta(trans
.cpu
+ vs_size
, fs
,
327 general
, gl_FragCoord
, gl_PointSize
,
328 gl_PointCoord
, gl_FrontFacing
);
330 /* Replace streamout */
332 struct mali_attr_meta
*ovs
= (struct mali_attr_meta
*) (trans
.cpu
);
333 struct mali_attr_meta
*ofs
= (struct mali_attr_meta
*) (trans
.cpu
+ vs_size
);
335 for (unsigned i
= 0; i
< vs
->tripipe
->varying_count
; i
++) {
336 gl_varying_slot loc
= vs
->varyings_loc
[i
];
338 bool captured
= ((vs
->so_mask
& (1ll << loc
)) ? true : false);
339 if (!captured
) continue;
341 struct pipe_stream_output o
= pan_get_so(&so
, loc
);
342 ovs
[i
].index
= o
.output_buffer
;
344 /* Set the type appropriately. TODO: Integer varyings XXX */
345 assert(o
.stream
== 0);
346 ovs
[i
].format
= pan_xfb_format(o
.num_components
);
347 ovs
[i
].swizzle
= panfrost_get_default_swizzle(o
.num_components
);
349 /* Link to the fragment */
353 for (unsigned j
= 0; j
< fs
->tripipe
->varying_count
; ++j
) {
354 if (fs
->varyings_loc
[j
] == loc
) {
361 ofs
[fs_idx
].index
= ovs
[i
].index
;
362 ofs
[fs_idx
].format
= ovs
[i
].format
;
363 ofs
[fs_idx
].swizzle
= ovs
[i
].swizzle
;
367 /* Replace point sprite */
368 for (unsigned i
= 0; i
< fs
->tripipe
->varying_count
; i
++) {
369 /* If we have a point sprite replacement, handle that here. We
370 * have to translate location first. TODO: Flip y in shader.
371 * We're already keying ... just time crunch .. */
373 if (has_point_coord(fs
->point_sprite_mask
, fs
->varyings_loc
[i
])) {
374 ofs
[i
].index
= gl_PointCoord
;
376 /* Swizzle out the z/w to 0/1 */
377 ofs
[i
].format
= MALI_RG16F
;
379 panfrost_get_default_swizzle(2);
383 /* Fix up unaligned addresses */
384 for (unsigned i
= 0; i
< so_count
; ++i
) {
385 if (varyings
[i
].elements
< MALI_RECORD_SPECIAL
)
388 unsigned align
= (varyings
[i
].elements
& 63);
390 /* While we're at it, the SO buffers are linear */
393 varyings
[i
].elements
|= MALI_ATTR_LINEAR
;
397 /* We need to adjust alignment */
398 varyings
[i
].elements
&= ~63;
399 varyings
[i
].elements
|= MALI_ATTR_LINEAR
;
400 varyings
[i
].size
+= align
;
402 for (unsigned v
= 0; v
< vs
->tripipe
->varying_count
; ++v
) {
403 if (ovs
[v
].index
== i
)
404 ovs
[v
].src_offset
= vs
->varyings
[v
].src_offset
+ align
;
407 for (unsigned f
= 0; f
< fs
->tripipe
->varying_count
; ++f
) {
408 if (ofs
[f
].index
== i
)
409 ofs
[f
].src_offset
= fs
->varyings
[f
].src_offset
+ align
;
413 mali_ptr varyings_p
= panfrost_upload_transient(batch
, &varyings
, idx
* sizeof(union mali_attr
));
414 ctx
->payloads
[PIPE_SHADER_VERTEX
].postfix
.varyings
= varyings_p
;
415 ctx
->payloads
[PIPE_SHADER_FRAGMENT
].postfix
.varyings
= varyings_p
;
417 ctx
->payloads
[PIPE_SHADER_VERTEX
].postfix
.varying_meta
= trans
.gpu
;
418 ctx
->payloads
[PIPE_SHADER_FRAGMENT
].postfix
.varying_meta
= trans
.gpu
+ vs_size
;