2 * Copyright © 2019 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Implements lowering for logical operations.
27 * V3D doesn't have any hardware support for logic ops. Instead, you read the
28 * current contents of the destination from the tile buffer, then do math using
29 * your output color and that destination value, and update the output color
33 #include "util/format/u_format.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "compiler/nir/nir_format_convert.h"
36 #include "v3d_compiler.h"
39 typedef nir_ssa_def
*(*nir_pack_func
)(nir_builder
*b
, nir_ssa_def
*c
);
40 typedef nir_ssa_def
*(*nir_unpack_func
)(nir_builder
*b
, nir_ssa_def
*c
);
43 logicop_depends_on_dst_color(int logicop_func
)
45 switch (logicop_func
) {
46 case PIPE_LOGICOP_SET
:
47 case PIPE_LOGICOP_CLEAR
:
48 case PIPE_LOGICOP_COPY
:
49 case PIPE_LOGICOP_COPY_INVERTED
:
57 v3d_logicop(nir_builder
*b
, int logicop_func
,
58 nir_ssa_def
*src
, nir_ssa_def
*dst
)
60 switch (logicop_func
) {
61 case PIPE_LOGICOP_CLEAR
:
62 return nir_imm_int(b
, 0);
63 case PIPE_LOGICOP_NOR
:
64 return nir_inot(b
, nir_ior(b
, src
, dst
));
65 case PIPE_LOGICOP_AND_INVERTED
:
66 return nir_iand(b
, nir_inot(b
, src
), dst
);
67 case PIPE_LOGICOP_COPY_INVERTED
:
68 return nir_inot(b
, src
);
69 case PIPE_LOGICOP_AND_REVERSE
:
70 return nir_iand(b
, src
, nir_inot(b
, dst
));
71 case PIPE_LOGICOP_INVERT
:
72 return nir_inot(b
, dst
);
73 case PIPE_LOGICOP_XOR
:
74 return nir_ixor(b
, src
, dst
);
75 case PIPE_LOGICOP_NAND
:
76 return nir_inot(b
, nir_iand(b
, src
, dst
));
77 case PIPE_LOGICOP_AND
:
78 return nir_iand(b
, src
, dst
);
79 case PIPE_LOGICOP_EQUIV
:
80 return nir_inot(b
, nir_ixor(b
, src
, dst
));
81 case PIPE_LOGICOP_NOOP
:
83 case PIPE_LOGICOP_OR_INVERTED
:
84 return nir_ior(b
, nir_inot(b
, src
), dst
);
85 case PIPE_LOGICOP_OR_REVERSE
:
86 return nir_ior(b
, src
, nir_inot(b
, dst
));
88 return nir_ior(b
, src
, dst
);
89 case PIPE_LOGICOP_SET
:
90 return nir_imm_int(b
, ~0);
92 fprintf(stderr
, "Unknown logic op %d\n", logicop_func
);
94 case PIPE_LOGICOP_COPY
:
100 v3d_nir_get_swizzled_channel(nir_builder
*b
, nir_ssa_def
**srcs
, int swiz
)
104 case PIPE_SWIZZLE_NONE
:
105 fprintf(stderr
, "warning: unknown swizzle\n");
108 return nir_imm_float(b
, 0.0);
110 return nir_imm_float(b
, 1.0);
120 v3d_nir_swizzle_and_pack(nir_builder
*b
, nir_ssa_def
**chans
,
121 const uint8_t *swiz
, nir_pack_func pack_func
)
124 for (int i
= 0; i
< 4; i
++)
125 c
[i
] = v3d_nir_get_swizzled_channel(b
, chans
, swiz
[i
]);
127 return pack_func(b
, nir_vec4(b
, c
[0], c
[1], c
[2], c
[3]));
131 v3d_nir_unpack_and_swizzle(nir_builder
*b
, nir_ssa_def
*packed
,
132 const uint8_t *swiz
, nir_unpack_func unpack_func
)
134 nir_ssa_def
*unpacked
= unpack_func(b
, packed
);
136 nir_ssa_def
*unpacked_chans
[4];
137 for (int i
= 0; i
< 4; i
++)
138 unpacked_chans
[i
] = nir_channel(b
, unpacked
, i
);
141 for (int i
= 0; i
< 4; i
++)
142 c
[i
] = v3d_nir_get_swizzled_channel(b
, unpacked_chans
, swiz
[i
]);
144 return nir_vec4(b
, c
[0], c
[1], c
[2], c
[3]);
148 pack_unorm_rgb10a2(nir_builder
*b
, nir_ssa_def
*c
)
150 const unsigned bits
[4] = { 10, 10, 10, 2 };
151 nir_ssa_def
*unorm
= nir_format_float_to_unorm(b
, c
, bits
);
153 nir_ssa_def
*chans
[4];
154 for (int i
= 0; i
< 4; i
++)
155 chans
[i
] = nir_channel(b
, unorm
, i
);
157 nir_ssa_def
*result
= nir_mov(b
, chans
[0]);
158 int offset
= bits
[0];
159 for (int i
= 1; i
< 4; i
++) {
160 nir_ssa_def
*shifted_chan
=
161 nir_ishl(b
, chans
[i
], nir_imm_int(b
, offset
));
162 result
= nir_ior(b
, result
, shifted_chan
);
169 unpack_unorm_rgb10a2(nir_builder
*b
, nir_ssa_def
*c
)
171 const unsigned bits
[4] = { 10, 10, 10, 2 };
172 const unsigned masks
[4] = { BITFIELD_MASK(bits
[0]),
173 BITFIELD_MASK(bits
[1]),
174 BITFIELD_MASK(bits
[2]),
175 BITFIELD_MASK(bits
[3]) };
177 nir_ssa_def
*chans
[4];
178 for (int i
= 0; i
< 4; i
++) {
179 nir_ssa_def
*unorm
= nir_iand(b
, c
, nir_imm_int(b
, masks
[i
]));
180 chans
[i
] = nir_format_unorm_to_float(b
, unorm
, &bits
[i
]);
181 c
= nir_ushr(b
, c
, nir_imm_int(b
, bits
[i
]));
184 return nir_vec4(b
, chans
[0], chans
[1], chans
[2], chans
[3]);
187 static const uint8_t *
188 v3d_get_format_swizzle_for_rt(struct v3d_compile
*c
, int rt
)
190 static const uint8_t ident
[4] = { 0, 1, 2, 3 };
192 /* We will automatically swap R and B channels for BGRA formats
193 * on tile loads and stores (see 'swap_rb' field in v3d_resource) so
194 * we want to treat these surfaces as if they were regular RGBA formats.
196 if (c
->fs_key
->color_fmt
[rt
].swizzle
[0] == 2 &&
197 c
->fs_key
->color_fmt
[rt
].format
!= PIPE_FORMAT_B5G6R5_UNORM
) {
200 return c
->fs_key
->color_fmt
[rt
].swizzle
;
205 v3d_nir_get_tlb_color(nir_builder
*b
, int rt
, int sample
)
207 nir_ssa_def
*color
[4];
208 for (int i
= 0; i
< 4; i
++) {
209 nir_intrinsic_instr
*load
=
210 nir_intrinsic_instr_create(b
->shader
,
211 nir_intrinsic_load_tlb_color_v3d
);
212 load
->num_components
= 1;
213 nir_intrinsic_set_base(load
, sample
);
214 nir_intrinsic_set_component(load
, i
);
215 load
->src
[0] = nir_src_for_ssa(nir_imm_int(b
, rt
));
216 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, NULL
);
217 nir_builder_instr_insert(b
, &load
->instr
);
218 color
[i
] = &load
->dest
.ssa
;
221 return nir_vec4(b
, color
[0], color
[1], color
[2], color
[3]);
225 v3d_emit_logic_op_raw(struct v3d_compile
*c
, nir_builder
*b
,
226 nir_ssa_def
**src_chans
, nir_ssa_def
**dst_chans
,
229 const uint8_t *fmt_swz
= v3d_get_format_swizzle_for_rt(c
, rt
);
231 nir_ssa_def
*op_res
[4];
232 for (int i
= 0; i
< 4; i
++) {
233 nir_ssa_def
*src
= src_chans
[i
];
235 v3d_nir_get_swizzled_channel(b
, dst_chans
, fmt_swz
[i
]);
236 op_res
[i
] = v3d_logicop(b
, c
->fs_key
->logicop_func
, src
, dst
);
240 for (int i
= 0; i
< 4; i
++)
241 r
[i
] = v3d_nir_get_swizzled_channel(b
, op_res
, fmt_swz
[i
]);
243 return nir_vec4(b
, r
[0], r
[1], r
[2], r
[3]);
247 v3d_emit_logic_op_unorm(struct v3d_compile
*c
, nir_builder
*b
,
248 nir_ssa_def
**src_chans
, nir_ssa_def
**dst_chans
,
250 nir_pack_func pack_func
, nir_unpack_func unpack_func
)
252 const uint8_t src_swz
[4] = { 0, 1, 2, 3 };
253 nir_ssa_def
*packed_src
=
254 v3d_nir_swizzle_and_pack(b
, src_chans
, src_swz
, pack_func
);
256 const uint8_t *fmt_swz
= v3d_get_format_swizzle_for_rt(c
, rt
);
257 nir_ssa_def
*packed_dst
=
258 v3d_nir_swizzle_and_pack(b
, dst_chans
, fmt_swz
, pack_func
);
260 nir_ssa_def
*packed_result
=
261 v3d_logicop(b
, c
->fs_key
->logicop_func
, packed_src
, packed_dst
);
263 return v3d_nir_unpack_and_swizzle(b
, packed_result
, fmt_swz
, unpack_func
);
267 v3d_nir_emit_logic_op(struct v3d_compile
*c
, nir_builder
*b
,
268 nir_ssa_def
*src
, int rt
, int sample
)
270 nir_ssa_def
*dst
= v3d_nir_get_tlb_color(b
, rt
, sample
);
272 nir_ssa_def
*src_chans
[4], *dst_chans
[4];
273 for (unsigned i
= 0; i
< 4; i
++) {
274 src_chans
[i
] = nir_channel(b
, src
, i
);
275 dst_chans
[i
] = nir_channel(b
, dst
, i
);
278 if (c
->fs_key
->color_fmt
[rt
].format
== PIPE_FORMAT_R10G10B10A2_UNORM
) {
279 return v3d_emit_logic_op_unorm(
280 c
, b
, src_chans
, dst_chans
, rt
, 0,
281 pack_unorm_rgb10a2
, unpack_unorm_rgb10a2
);
284 if (util_format_is_unorm(c
->fs_key
->color_fmt
[rt
].format
)) {
285 return v3d_emit_logic_op_unorm(
286 c
, b
, src_chans
, dst_chans
, rt
, 0,
287 nir_pack_unorm_4x8
, nir_unpack_unorm_4x8
);
290 return v3d_emit_logic_op_raw(c
, b
, src_chans
, dst_chans
, rt
, 0);
294 v3d_emit_ms_output(struct v3d_compile
*c
, nir_builder
*b
,
295 nir_ssa_def
*color
, nir_src
*offset
,
296 nir_alu_type type
, int rt
, int sample
)
299 nir_intrinsic_instr
*store
=
300 nir_intrinsic_instr_create(b
->shader
,
301 nir_intrinsic_store_tlb_sample_color_v3d
);
302 store
->num_components
= 4;
303 nir_intrinsic_set_base(store
, sample
);
304 nir_intrinsic_set_component(store
, 0);
305 nir_intrinsic_set_type(store
, type
);
306 store
->src
[0] = nir_src_for_ssa(color
);
307 store
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, rt
));
308 nir_builder_instr_insert(b
, &store
->instr
);
312 v3d_nir_lower_logic_op_instr(struct v3d_compile
*c
,
314 nir_intrinsic_instr
*intr
,
317 nir_ssa_def
*frag_color
= intr
->src
[0].ssa
;
320 const int logic_op
= c
->fs_key
->logicop_func
;
321 if (c
->fs_key
->msaa
&& logicop_depends_on_dst_color(logic_op
)) {
322 c
->msaa_per_sample_output
= true;
324 nir_src
*offset
= &intr
->src
[1];
325 nir_alu_type type
= nir_intrinsic_type(intr
);
326 for (int i
= 0; i
< V3D_MAX_SAMPLES
; i
++) {
327 nir_ssa_def
*sample
=
328 v3d_nir_emit_logic_op(c
, b
, frag_color
, rt
, i
);
330 v3d_emit_ms_output(c
, b
, sample
, offset
, type
, rt
, i
);
333 nir_instr_remove(&intr
->instr
);
335 nir_ssa_def
*result
=
336 v3d_nir_emit_logic_op(c
, b
, frag_color
, rt
, 0);
338 nir_instr_rewrite_src(&intr
->instr
, &intr
->src
[0],
339 nir_src_for_ssa(result
));
340 intr
->num_components
= result
->num_components
;
345 v3d_nir_lower_logic_ops_block(nir_block
*block
, struct v3d_compile
*c
)
347 nir_foreach_instr_safe(instr
, block
) {
348 if (instr
->type
!= nir_instr_type_intrinsic
)
351 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
352 if (intr
->intrinsic
!= nir_intrinsic_store_output
)
355 nir_foreach_shader_out_variable(var
, c
->s
) {
356 const int driver_loc
= var
->data
.driver_location
;
357 if (driver_loc
!= nir_intrinsic_base(intr
))
360 const int loc
= var
->data
.location
;
361 if (loc
!= FRAG_RESULT_COLOR
&&
362 (loc
< FRAG_RESULT_DATA0
||
363 loc
>= FRAG_RESULT_DATA0
+ V3D_MAX_DRAW_BUFFERS
)) {
367 /* Logic operations do not apply on floating point or
368 * sRGB enabled render targets.
370 const int rt
= driver_loc
;
371 assert(rt
< V3D_MAX_DRAW_BUFFERS
);
373 const enum pipe_format format
=
374 c
->fs_key
->color_fmt
[rt
].format
;
375 if (util_format_is_float(format
) ||
376 util_format_is_srgb(format
)) {
380 nir_function_impl
*impl
=
381 nir_cf_node_get_function(&block
->cf_node
);
383 nir_builder_init(&b
, impl
);
384 b
.cursor
= nir_before_instr(&intr
->instr
);
385 v3d_nir_lower_logic_op_instr(c
, &b
, intr
, rt
);
393 v3d_nir_lower_logic_ops(nir_shader
*s
, struct v3d_compile
*c
)
395 /* Nothing to do if logic op is 'copy src to dst' or if logic ops are
396 * disabled (we set the logic op to copy in that case).
398 if (c
->fs_key
->logicop_func
== PIPE_LOGICOP_COPY
)
401 nir_foreach_function(function
, s
) {
402 if (function
->impl
) {
403 nir_foreach_block(block
, function
->impl
)
404 v3d_nir_lower_logic_ops_block(block
, c
);
406 nir_metadata_preserve(function
->impl
,
407 nir_metadata_block_index
|
408 nir_metadata_dominance
);