2 * Copyright (C) 2020 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
29 /* Bifrost requires special functions to be lowered in various machine specific
30 * ways. The routines in this file are used in codegen for this. */
32 /* New Bifrost has a FEXP2_FAST instruction but requires an auxiliary
36 bi_emit_fexp2_new(bi_context
*ctx
, nir_alu_instr
*instr
)
38 /* FMA_MSCALE T, X, 1.0, 0, 0x18 */
40 bi_instruction mscale
= {
42 .op
= { .mscale
= true },
43 .dest
= bi_make_temp(ctx
),
44 .dest_type
= nir_type_float32
,
46 pan_src_index(&instr
->src
[0].src
),
47 BIR_INDEX_CONSTANT
| 0,
49 BIR_INDEX_CONSTANT
| 32,
58 /* 0x3f80000000 = 1.0f as fp32
59 * 24 = shift to multiply by 2^24 */
60 .u64
= (0x3f800000) | (24ull << 32)
62 .swizzle
= { { instr
->src
[0].swizzle
[0] } }
67 bi_instruction f2i
= {
69 .dest
= bi_make_temp(ctx
),
70 .dest_type
= nir_type_int32
,
71 .src
= { mscale
.dest
},
72 .src_types
= { nir_type_float32
},
73 .roundmode
= BIFROST_RTE
76 /* FEXP2_FAST T, T, X */
78 bi_instruction fexp
= {
80 .op
= { .special
= BI_SPECIAL_EXP2_LOW
},
81 .dest
= pan_dest_index(&instr
->dest
.dest
),
82 .dest_type
= nir_type_float32
,
83 .src
= { f2i
.dest
, mscale
.src
[0] },
84 .src_types
= { nir_type_int32
, nir_type_float32
},
85 .swizzle
= { {}, { instr
->src
[0].swizzle
[0] } }
93 /* Even on new Bifrost, there are a bunch of reductions to do */
96 bi_emit_flog2_new(bi_context
*ctx
, nir_alu_instr
*instr
)
99 bi_instruction frexpe
= {
101 .op
= { .frexp
= BI_FREXPE_LOG
},
102 .dest
= bi_make_temp(ctx
),
103 .dest_type
= nir_type_int32
,
104 .src
= { pan_src_index(&instr
->src
[0].src
) },
105 .src_types
= { nir_type_float32
},
106 .swizzle
= { { instr
->src
[0].swizzle
[0] } }
110 bi_instruction i2f
= {
112 .dest
= bi_make_temp(ctx
),
113 .dest_type
= nir_type_float32
,
114 .src
= { frexpe
.dest
},
115 .src_types
= { nir_type_int32
},
116 .roundmode
= BIFROST_RTZ
119 /* ADD_FREXPM (x-1), -1.0, X */
120 bi_instruction x_minus_1
= {
121 .type
= BI_REDUCE_FMA
,
122 .op
= { .reduce
= BI_REDUCE_ADD_FREXPM
},
123 .dest
= bi_make_temp(ctx
),
124 .dest_type
= nir_type_float32
,
127 pan_src_index(&instr
->src
[0].src
),
129 .src_types
= { nir_type_float32
, nir_type_float32
},
131 .u64
= 0xBF800000 /* -1.0 */
133 .swizzle
= { {}, { instr
->src
[0].swizzle
[0] } }
136 /* FLOG2_HELP log2(x)/(x-1), x */
137 bi_instruction help
= {
139 .op
= { .table
= BI_TABLE_LOG2_U_OVER_U_1_LOW
},
140 .dest
= bi_make_temp(ctx
),
141 .dest_type
= nir_type_float32
,
142 .src
= { pan_src_index(&instr
->src
[0].src
) },
143 .src_types
= { nir_type_float32
},
144 .swizzle
= { { instr
->src
[0].swizzle
[0] } }
147 /* FMA log2(x)/(x - 1), (x - 1), M */
148 bi_instruction fma
= {
150 .dest
= pan_dest_index(&instr
->dest
.dest
),
151 .dest_type
= nir_type_float32
,
164 bi_emit(ctx
, frexpe
);
166 bi_emit(ctx
, x_minus_1
);
172 bi_emit_fexp2(bi_context
*ctx
, nir_alu_instr
*instr
)
175 bi_emit_fexp2_new(ctx
, instr
);
179 bi_emit_flog2(bi_context
*ctx
, nir_alu_instr
*instr
)
182 bi_emit_flog2_new(ctx
, instr
);