panfrost: Pre-allocate memory for pool
[mesa.git] / src / panfrost / bifrost / bi_special.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28
29 /* Bifrost requires special functions to be lowered in various machine specific
30 * ways. The routines in this file are used in codegen for this. */
31
32 /* New Bifrost has a FEXP2_FAST instruction but requires an auxiliary
33 * parameter. */
34
35 static void
36 bi_emit_fexp2_new(bi_context *ctx, nir_alu_instr *instr)
37 {
38 /* FMA_MSCALE T, X, 1.0, 0, 0x18 */
39
40 bi_instruction mscale = {
41 .type = BI_FMA,
42 .op = { .mscale = true },
43 .dest = bi_make_temp(ctx),
44 .dest_type = nir_type_float32,
45 .src = {
46 pan_src_index(&instr->src[0].src),
47 BIR_INDEX_CONSTANT | 0,
48 BIR_INDEX_ZERO,
49 BIR_INDEX_CONSTANT | 32,
50 },
51 .src_types = {
52 nir_type_float32,
53 nir_type_float32,
54 nir_type_float32,
55 nir_type_int32,
56 },
57 .constant = {
58 /* 0x3f80000000 = 1.0f as fp32
59 * 24 = shift to multiply by 2^24 */
60 .u64 = (0x3f800000) | (24ull << 32)
61 },
62 .swizzle = { { instr->src[0].swizzle[0] } }
63 };
64
65 /* F2I_RTE T, T */
66
67 bi_instruction f2i = {
68 .type = BI_CONVERT,
69 .dest = bi_make_temp(ctx),
70 .dest_type = nir_type_int32,
71 .src = { mscale.dest },
72 .src_types = { nir_type_float32 },
73 .roundmode = BIFROST_RTE
74 };
75
76 /* FEXP2_FAST T, T, X */
77
78 bi_instruction fexp = {
79 .type = BI_SPECIAL,
80 .op = { .special = BI_SPECIAL_EXP2_LOW },
81 .dest = pan_dest_index(&instr->dest.dest),
82 .dest_type = nir_type_float32,
83 .src = { f2i.dest, mscale.src[0] },
84 .src_types = { nir_type_int32, nir_type_float32 },
85 .swizzle = { {}, { instr->src[0].swizzle[0] } }
86 };
87
88 bi_emit(ctx, mscale);
89 bi_emit(ctx, f2i);
90 bi_emit(ctx, fexp);
91 }
92
93 /* Even on new Bifrost, there are a bunch of reductions to do */
94
95 static void
96 bi_emit_flog2_new(bi_context *ctx, nir_alu_instr *instr)
97 {
98 /* LOG_FREXPE X */
99 bi_instruction frexpe = {
100 .type = BI_FREXP,
101 .op = { .frexp = BI_FREXPE_LOG },
102 .dest = bi_make_temp(ctx),
103 .dest_type = nir_type_int32,
104 .src = { pan_src_index(&instr->src[0].src) },
105 .src_types = { nir_type_float32 },
106 .swizzle = { { instr->src[0].swizzle[0] } }
107 };
108
109 /* I32_TO_F32 m */
110 bi_instruction i2f = {
111 .type = BI_CONVERT,
112 .dest = bi_make_temp(ctx),
113 .dest_type = nir_type_float32,
114 .src = { frexpe.dest },
115 .src_types = { nir_type_int32 },
116 .roundmode = BIFROST_RTZ
117 };
118
119 /* ADD_FREXPM (x-1), -1.0, X */
120 bi_instruction x_minus_1 = {
121 .type = BI_REDUCE_FMA,
122 .op = { .reduce = BI_REDUCE_ADD_FREXPM },
123 .dest = bi_make_temp(ctx),
124 .dest_type = nir_type_float32,
125 .src = {
126 BIR_INDEX_CONSTANT,
127 pan_src_index(&instr->src[0].src),
128 },
129 .src_types = { nir_type_float32, nir_type_float32 },
130 .constant = {
131 .u64 = 0xBF800000 /* -1.0 */
132 },
133 .swizzle = { {}, { instr->src[0].swizzle[0] } }
134 };
135
136 /* FLOG2_HELP log2(x)/(x-1), x */
137 bi_instruction help = {
138 .type = BI_TABLE,
139 .op = { .table = BI_TABLE_LOG2_U_OVER_U_1_LOW },
140 .dest = bi_make_temp(ctx),
141 .dest_type = nir_type_float32,
142 .src = { pan_src_index(&instr->src[0].src) },
143 .src_types = { nir_type_float32 },
144 .swizzle = { { instr->src[0].swizzle[0] } }
145 };
146
147 /* FMA log2(x)/(x - 1), (x - 1), M */
148 bi_instruction fma = {
149 .type = BI_FMA,
150 .dest = pan_dest_index(&instr->dest.dest),
151 .dest_type = nir_type_float32,
152 .src = {
153 help.dest,
154 x_minus_1.dest,
155 i2f.dest
156 },
157 .src_types = {
158 nir_type_float32,
159 nir_type_float32,
160 nir_type_float32
161 }
162 };
163
164 bi_emit(ctx, frexpe);
165 bi_emit(ctx, i2f);
166 bi_emit(ctx, x_minus_1);
167 bi_emit(ctx, help);
168 bi_emit(ctx, fma);
169 }
170
171 void
172 bi_emit_fexp2(bi_context *ctx, nir_alu_instr *instr)
173 {
174 /* TODO: G71 */
175 bi_emit_fexp2_new(ctx, instr);
176 }
177
178 void
179 bi_emit_flog2(bi_context *ctx, nir_alu_instr *instr)
180 {
181 /* TODO: G71 */
182 bi_emit_flog2_new(ctx, instr);
183 }