2 * Copyright © 2019 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Implements lowering for logical operations.
27 * V3D doesn't have any hardware support for logic ops. Instead, you read the
28 * current contents of the destination from the tile buffer, then do math using
29 * your output color and that destination value, and update the output color
33 #include "util/u_format.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "compiler/nir/nir_format_convert.h"
36 #include "v3d_compiler.h"
39 typedef nir_ssa_def
*(*nir_pack_func
)(nir_builder
*b
, nir_ssa_def
*c
);
40 typedef nir_ssa_def
*(*nir_unpack_func
)(nir_builder
*b
, nir_ssa_def
*c
);
43 v3d_logicop(nir_builder
*b
, int logicop_func
,
44 nir_ssa_def
*src
, nir_ssa_def
*dst
)
46 switch (logicop_func
) {
47 case PIPE_LOGICOP_CLEAR
:
48 return nir_imm_int(b
, 0);
49 case PIPE_LOGICOP_NOR
:
50 return nir_inot(b
, nir_ior(b
, src
, dst
));
51 case PIPE_LOGICOP_AND_INVERTED
:
52 return nir_iand(b
, nir_inot(b
, src
), dst
);
53 case PIPE_LOGICOP_COPY_INVERTED
:
54 return nir_inot(b
, src
);
55 case PIPE_LOGICOP_AND_REVERSE
:
56 return nir_iand(b
, src
, nir_inot(b
, dst
));
57 case PIPE_LOGICOP_INVERT
:
58 return nir_inot(b
, dst
);
59 case PIPE_LOGICOP_XOR
:
60 return nir_ixor(b
, src
, dst
);
61 case PIPE_LOGICOP_NAND
:
62 return nir_inot(b
, nir_iand(b
, src
, dst
));
63 case PIPE_LOGICOP_AND
:
64 return nir_iand(b
, src
, dst
);
65 case PIPE_LOGICOP_EQUIV
:
66 return nir_inot(b
, nir_ixor(b
, src
, dst
));
67 case PIPE_LOGICOP_NOOP
:
69 case PIPE_LOGICOP_OR_INVERTED
:
70 return nir_ior(b
, nir_inot(b
, src
), dst
);
71 case PIPE_LOGICOP_OR_REVERSE
:
72 return nir_ior(b
, src
, nir_inot(b
, dst
));
74 return nir_ior(b
, src
, dst
);
75 case PIPE_LOGICOP_SET
:
76 return nir_imm_int(b
, ~0);
78 fprintf(stderr
, "Unknown logic op %d\n", logicop_func
);
80 case PIPE_LOGICOP_COPY
:
86 v3d_nir_get_swizzled_channel(nir_builder
*b
, nir_ssa_def
**srcs
, int swiz
)
90 case PIPE_SWIZZLE_NONE
:
91 fprintf(stderr
, "warning: unknown swizzle\n");
94 return nir_imm_float(b
, 0.0);
96 return nir_imm_float(b
, 1.0);
106 v3d_nir_swizzle_and_pack(nir_builder
*b
, nir_ssa_def
**chans
,
107 const uint8_t *swiz
, nir_pack_func pack_func
)
110 for (int i
= 0; i
< 4; i
++)
111 c
[i
] = v3d_nir_get_swizzled_channel(b
, chans
, swiz
[i
]);
113 return pack_func(b
, nir_vec4(b
, c
[0], c
[1], c
[2], c
[3]));
117 v3d_nir_unpack_and_swizzle(nir_builder
*b
, nir_ssa_def
*packed
,
118 const uint8_t *swiz
, nir_unpack_func unpack_func
)
120 nir_ssa_def
*unpacked
= unpack_func(b
, packed
);
122 nir_ssa_def
*unpacked_chans
[4];
123 for (int i
= 0; i
< 4; i
++)
124 unpacked_chans
[i
] = nir_channel(b
, unpacked
, i
);
127 for (int i
= 0; i
< 4; i
++)
128 c
[i
] = v3d_nir_get_swizzled_channel(b
, unpacked_chans
, swiz
[i
]);
130 return nir_vec4(b
, c
[0], c
[1], c
[2], c
[3]);
134 pack_unorm_rgb10a2(nir_builder
*b
, nir_ssa_def
*c
)
136 const unsigned bits
[4] = { 10, 10, 10, 2 };
137 nir_ssa_def
*unorm
= nir_format_float_to_unorm(b
, c
, bits
);
139 nir_ssa_def
*chans
[4];
140 for (int i
= 0; i
< 4; i
++)
141 chans
[i
] = nir_channel(b
, unorm
, i
);
143 nir_ssa_def
*result
= nir_mov(b
, chans
[0]);
144 int offset
= bits
[0];
145 for (int i
= 1; i
< 4; i
++) {
146 nir_ssa_def
*shifted_chan
=
147 nir_ishl(b
, chans
[i
], nir_imm_int(b
, offset
));
148 result
= nir_ior(b
, result
, shifted_chan
);
155 unpack_unorm_rgb10a2(nir_builder
*b
, nir_ssa_def
*c
)
157 const unsigned bits
[4] = { 10, 10, 10, 2 };
158 const unsigned masks
[4] = { BITFIELD_MASK(bits
[0]),
159 BITFIELD_MASK(bits
[1]),
160 BITFIELD_MASK(bits
[2]),
161 BITFIELD_MASK(bits
[3]) };
163 nir_ssa_def
*chans
[4];
164 for (int i
= 0; i
< 4; i
++) {
165 nir_ssa_def
*unorm
= nir_iand(b
, c
, nir_imm_int(b
, masks
[i
]));
166 chans
[i
] = nir_format_unorm_to_float(b
, unorm
, &bits
[i
]);
167 c
= nir_ushr(b
, c
, nir_imm_int(b
, bits
[i
]));
170 return nir_vec4(b
, chans
[0], chans
[1], chans
[2], chans
[3]);
173 static const uint8_t *
174 v3d_get_format_swizzle_for_rt(struct v3d_compile
*c
, int rt
)
176 static const uint8_t ident
[4] = { 0, 1, 2, 3 };
178 /* We will automatically swap R and B channels for BGRA formats
179 * on tile loads and stores (see 'swap_rb' field in v3d_resource) so
180 * we want to treat these surfaces as if they were regular RGBA formats.
182 if (c
->fs_key
->color_fmt
[rt
].swizzle
[0] == 2 &&
183 c
->fs_key
->color_fmt
[rt
].format
!= PIPE_FORMAT_B5G6R5_UNORM
) {
186 return c
->fs_key
->color_fmt
[rt
].swizzle
;
191 v3d_nir_get_tlb_color(nir_builder
*b
, int rt
, int sample
)
193 nir_ssa_def
*color
[4];
194 for (int i
= 0; i
< 4; i
++) {
195 nir_intrinsic_instr
*load
=
196 nir_intrinsic_instr_create(b
->shader
,
197 nir_intrinsic_load_tlb_color_v3d
);
198 load
->num_components
= 1;
199 nir_intrinsic_set_base(load
, sample
);
200 nir_intrinsic_set_component(load
, i
);
201 load
->src
[0] = nir_src_for_ssa(nir_imm_int(b
, rt
));
202 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, NULL
);
203 nir_builder_instr_insert(b
, &load
->instr
);
204 color
[i
] = &load
->dest
.ssa
;
207 return nir_vec4(b
, color
[0], color
[1], color
[2], color
[3]);
211 v3d_emit_logic_op_raw(struct v3d_compile
*c
, nir_builder
*b
,
212 nir_ssa_def
**src_chans
, nir_ssa_def
**dst_chans
,
215 const uint8_t *fmt_swz
= v3d_get_format_swizzle_for_rt(c
, rt
);
217 nir_ssa_def
*op_res
[4];
218 for (int i
= 0; i
< 4; i
++) {
219 nir_ssa_def
*src
= src_chans
[i
];
221 v3d_nir_get_swizzled_channel(b
, dst_chans
, fmt_swz
[i
]);
222 op_res
[i
] = v3d_logicop(b
, c
->fs_key
->logicop_func
, src
, dst
);
226 for (int i
= 0; i
< 4; i
++)
227 r
[i
] = v3d_nir_get_swizzled_channel(b
, op_res
, fmt_swz
[i
]);
229 return nir_vec4(b
, r
[0], r
[1], r
[2], r
[3]);
233 v3d_emit_logic_op_unorm(struct v3d_compile
*c
, nir_builder
*b
,
234 nir_ssa_def
**src_chans
, nir_ssa_def
**dst_chans
,
236 nir_pack_func pack_func
, nir_unpack_func unpack_func
)
238 const uint8_t src_swz
[4] = { 0, 1, 2, 3 };
239 nir_ssa_def
*packed_src
=
240 v3d_nir_swizzle_and_pack(b
, src_chans
, src_swz
, pack_func
);
242 const uint8_t *fmt_swz
= v3d_get_format_swizzle_for_rt(c
, rt
);
243 nir_ssa_def
*packed_dst
=
244 v3d_nir_swizzle_and_pack(b
, dst_chans
, fmt_swz
, pack_func
);
246 nir_ssa_def
*packed_result
=
247 v3d_logicop(b
, c
->fs_key
->logicop_func
, packed_src
, packed_dst
);
249 return v3d_nir_unpack_and_swizzle(b
, packed_result
, fmt_swz
, unpack_func
);
253 v3d_nir_emit_logic_op(struct v3d_compile
*c
, nir_builder
*b
,
254 nir_ssa_def
*src
, int rt
, int sample
)
256 nir_ssa_def
*dst
= v3d_nir_get_tlb_color(b
, rt
, sample
);
258 nir_ssa_def
*src_chans
[4], *dst_chans
[4];
259 for (unsigned i
= 0; i
< 4; i
++) {
260 src_chans
[i
] = nir_channel(b
, src
, i
);
261 dst_chans
[i
] = nir_channel(b
, dst
, i
);
264 if (c
->fs_key
->color_fmt
[rt
].format
== PIPE_FORMAT_R10G10B10A2_UNORM
) {
265 return v3d_emit_logic_op_unorm(
266 c
, b
, src_chans
, dst_chans
, rt
, 0,
267 pack_unorm_rgb10a2
, unpack_unorm_rgb10a2
);
270 if (util_format_is_unorm(c
->fs_key
->color_fmt
[rt
].format
)) {
271 return v3d_emit_logic_op_unorm(
272 c
, b
, src_chans
, dst_chans
, rt
, 0,
273 nir_pack_unorm_4x8
, nir_unpack_unorm_4x8
);
276 return v3d_emit_logic_op_raw(c
, b
, src_chans
, dst_chans
, rt
, 0);
280 v3d_nir_lower_logic_op_instr(struct v3d_compile
*c
,
282 nir_intrinsic_instr
*intr
,
285 nir_ssa_def
*frag_color
= intr
->src
[0].ssa
;
287 /* XXX: this is not correct for MSAA render targets */
288 nir_ssa_def
*result
= v3d_nir_emit_logic_op(c
, b
, frag_color
, rt
, 0);
290 nir_instr_rewrite_src(&intr
->instr
, &intr
->src
[0],
291 nir_src_for_ssa(result
));
292 intr
->num_components
= result
->num_components
;
296 v3d_nir_lower_logic_ops_block(nir_block
*block
, struct v3d_compile
*c
)
298 nir_foreach_instr_safe(instr
, block
) {
299 if (instr
->type
!= nir_instr_type_intrinsic
)
302 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
303 if (intr
->intrinsic
!= nir_intrinsic_store_output
)
306 nir_foreach_variable(var
, &c
->s
->outputs
) {
307 const int driver_loc
= var
->data
.driver_location
;
308 if (driver_loc
!= nir_intrinsic_base(intr
))
311 const int loc
= var
->data
.location
;
312 if (loc
!= FRAG_RESULT_COLOR
&&
313 (loc
< FRAG_RESULT_DATA0
||
314 loc
>= FRAG_RESULT_DATA0
+ V3D_MAX_DRAW_BUFFERS
)) {
318 /* Logic operations do not apply on floating point or
319 * sRGB enabled render targets.
321 const int rt
= driver_loc
;
322 assert(rt
< V3D_MAX_DRAW_BUFFERS
);
324 const enum pipe_format format
=
325 c
->fs_key
->color_fmt
[rt
].format
;
326 if (util_format_is_float(format
) ||
327 util_format_is_srgb(format
)) {
331 nir_function_impl
*impl
=
332 nir_cf_node_get_function(&block
->cf_node
);
334 nir_builder_init(&b
, impl
);
335 b
.cursor
= nir_before_instr(&intr
->instr
);
336 v3d_nir_lower_logic_op_instr(c
, &b
, intr
, rt
);
344 v3d_nir_lower_logic_ops(nir_shader
*s
, struct v3d_compile
*c
)
346 /* Nothing to do if logic op is 'copy src to dst' or if logic ops are
347 * disabled (we set the logic op to copy in that case).
349 if (c
->fs_key
->logicop_func
== PIPE_LOGICOP_COPY
)
352 nir_foreach_function(function
, s
) {
353 if (function
->impl
) {
354 nir_foreach_block(block
, function
->impl
)
355 v3d_nir_lower_logic_ops_block(block
, c
);
357 nir_metadata_preserve(function
->impl
,
358 nir_metadata_block_index
|
359 nir_metadata_dominance
);