Merge branch 'master' of ../mesa into vulkan
[mesa.git] / src / gallium / drivers / vc4 / vc4_nir_lower_io.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vc4_qir.h"
25 #include "glsl/nir/nir_builder.h"
26
27 /**
28 * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
29 * something amenable to the VC4 architecture.
30 *
31 * Currently, it split inputs, outputs, and uniforms into scalars, drops any
32 * non-position outputs in coordinate shaders, and fixes up the addressing on
33 * indirect uniform loads.
34 */
35
36 static void
37 replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
38 nir_ssa_def **comps)
39 {
40
41 /* Batch things back together into a vec4. This will get split by the
42 * later ALU scalarization pass.
43 */
44 nir_ssa_def *vec = nir_vec4(b, comps[0], comps[1], comps[2], comps[3]);
45
46 /* Replace the old intrinsic with a reference to our reconstructed
47 * vec4.
48 */
49 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
50 nir_instr_remove(&intr->instr);
51 }
52
53 static void
54 vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
55 nir_intrinsic_instr *intr)
56 {
57 b->cursor = nir_before_instr(&intr->instr);
58
59 if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
60 VC4_NIR_TLB_COLOR_READ_INPUT) {
61 /* This doesn't need any lowering. */
62 return;
63 }
64
65 nir_variable *input_var = NULL;
66 foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
67 if (var->data.driver_location == intr->const_index[0]) {
68 input_var = var;
69 break;
70 }
71 }
72 assert(input_var);
73
74 /* All TGSI-to-NIR inputs are vec4. */
75 assert(intr->num_components == 4);
76
77 /* Generate scalar loads equivalent to the original VEC4. */
78 nir_ssa_def *dests[4];
79 for (unsigned i = 0; i < intr->num_components; i++) {
80 nir_intrinsic_instr *intr_comp =
81 nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
82 intr_comp->num_components = 1;
83 intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
84 nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
85 nir_builder_instr_insert(b, &intr_comp->instr);
86
87 dests[i] = &intr_comp->dest.ssa;
88 }
89
90 switch (c->stage) {
91 case QSTAGE_FRAG:
92 if (input_var->data.location == VARYING_SLOT_FACE) {
93 dests[0] = nir_fsub(b,
94 nir_imm_float(b, 1.0),
95 nir_fmul(b,
96 nir_i2f(b, dests[0]),
97 nir_imm_float(b, 2.0)));
98 dests[1] = nir_imm_float(b, 0.0);
99 dests[2] = nir_imm_float(b, 0.0);
100 dests[3] = nir_imm_float(b, 1.0);
101 } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
102 if (c->fs_key->point_sprite_mask &
103 (1 << (input_var->data.location -
104 VARYING_SLOT_VAR0))) {
105 if (!c->fs_key->is_points) {
106 dests[0] = nir_imm_float(b, 0.0);
107 dests[1] = nir_imm_float(b, 0.0);
108 }
109 if (c->fs_key->point_coord_upper_left) {
110 dests[1] = nir_fsub(b,
111 nir_imm_float(b, 1.0),
112 dests[1]);
113 }
114 dests[2] = nir_imm_float(b, 0.0);
115 dests[3] = nir_imm_float(b, 1.0);
116 }
117 }
118 break;
119 case QSTAGE_COORD:
120 case QSTAGE_VERT:
121 break;
122 }
123
124 replace_intrinsic_with_vec4(b, intr, dests);
125 }
126
127 static void
128 vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
129 nir_intrinsic_instr *intr)
130 {
131 nir_variable *output_var = NULL;
132 foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
133 if (var->data.driver_location == intr->const_index[0]) {
134 output_var = var;
135 break;
136 }
137 }
138 assert(output_var);
139
140 if (c->stage == QSTAGE_COORD &&
141 output_var->data.location != VARYING_SLOT_POS &&
142 output_var->data.location != VARYING_SLOT_PSIZ) {
143 nir_instr_remove(&intr->instr);
144 return;
145 }
146
147 /* Color output is lowered by vc4_nir_lower_blend(). */
148 if (c->stage == QSTAGE_FRAG &&
149 (output_var->data.location == FRAG_RESULT_COLOR ||
150 output_var->data.location == FRAG_RESULT_DATA0)) {
151 intr->const_index[0] *= 4;
152 return;
153 }
154
155 /* All TGSI-to-NIR outputs are VEC4. */
156 assert(intr->num_components == 4);
157
158 b->cursor = nir_before_instr(&intr->instr);
159
160 for (unsigned i = 0; i < intr->num_components; i++) {
161 nir_intrinsic_instr *intr_comp =
162 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
163 intr_comp->num_components = 1;
164 intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
165
166 assert(intr->src[0].is_ssa);
167 intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
168 intr->src[0].ssa,
169 &i, 1, false));
170 nir_builder_instr_insert(b, &intr_comp->instr);
171 }
172
173 nir_instr_remove(&intr->instr);
174 }
175
176 static void
177 vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
178 nir_intrinsic_instr *intr)
179 {
180 /* All TGSI-to-NIR uniform loads are vec4, but we may create dword
181 * loads in our lowering passes.
182 */
183 if (intr->num_components == 1)
184 return;
185 assert(intr->num_components == 4);
186
187 b->cursor = nir_before_instr(&intr->instr);
188
189 /* Generate scalar loads equivalent to the original VEC4. */
190 nir_ssa_def *dests[4];
191 for (unsigned i = 0; i < intr->num_components; i++) {
192 nir_intrinsic_instr *intr_comp =
193 nir_intrinsic_instr_create(c->s, intr->intrinsic);
194 intr_comp->num_components = 1;
195 nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
196
197 if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) {
198 /* Convert the variable TGSI register index to a byte
199 * offset.
200 */
201 intr_comp->src[0] =
202 nir_src_for_ssa(nir_ishl(b,
203 intr->src[0].ssa,
204 nir_imm_int(b, 4)));
205
206 /* Convert the offset to be a byte index, too. */
207 intr_comp->const_index[0] = (intr->const_index[0] * 16 +
208 i * 4);
209 } else {
210 /* We want a dword index for non-indirect uniform
211 * loads.
212 */
213 intr_comp->const_index[0] = (intr->const_index[0] * 4 +
214 i);
215 }
216
217 dests[i] = &intr_comp->dest.ssa;
218
219 nir_builder_instr_insert(b, &intr_comp->instr);
220 }
221
222 replace_intrinsic_with_vec4(b, intr, dests);
223 }
224
225 static void
226 vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
227 struct nir_instr *instr)
228 {
229 if (instr->type != nir_instr_type_intrinsic)
230 return;
231 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
232
233 switch (intr->intrinsic) {
234 case nir_intrinsic_load_input:
235 vc4_nir_lower_input(c, b, intr);
236 break;
237
238 case nir_intrinsic_store_output:
239 vc4_nir_lower_output(c, b, intr);
240 break;
241
242 case nir_intrinsic_load_uniform:
243 case nir_intrinsic_load_uniform_indirect:
244 vc4_nir_lower_uniform(c, b, intr);
245 break;
246
247 default:
248 break;
249 }
250 }
251
252 static bool
253 vc4_nir_lower_io_block(nir_block *block, void *arg)
254 {
255 struct vc4_compile *c = arg;
256 nir_function_impl *impl =
257 nir_cf_node_get_function(&block->cf_node);
258
259 nir_builder b;
260 nir_builder_init(&b, impl);
261
262 nir_foreach_instr_safe(block, instr)
263 vc4_nir_lower_io_instr(c, &b, instr);
264
265 return true;
266 }
267
268 static bool
269 vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
270 {
271 nir_foreach_block(impl, vc4_nir_lower_io_block, c);
272
273 nir_metadata_preserve(impl, nir_metadata_block_index |
274 nir_metadata_dominance);
275
276 return true;
277 }
278
279 void
280 vc4_nir_lower_io(struct vc4_compile *c)
281 {
282 nir_foreach_overload(c->s, overload) {
283 if (overload->impl)
284 vc4_nir_lower_io_impl(c, overload->impl);
285 }
286 }