r600/sfn: Add a basic nir shader backend
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_emitaluinstruction.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30
31 #include "gallium/drivers/r600/r600_shader.h"
32
33 namespace r600 {
34
35 using std::vector;
36
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38 EmitInstruction (processor)
39 {
40
41 }
42
43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45 const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46
47 r600::sfn_log << SfnLog::instr << "emit '"
48 << *ir
49 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50 << "' (" << __func__ << ")\n";
51
52 split_constants(instr);
53
54 switch (instr.op) {
55 case nir_op_b2f32: return emit_alu_b2f(instr);
56 case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
57 case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
58 case nir_op_mov:return emit_alu_op1(instr, op1_mov);
59 case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
60 case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
61 case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
62 case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
63 case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
64 case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
65 case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
66 case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
67 case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
68 case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
69
70 case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
71 case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
72 case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
73 case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
74 case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
75 case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
76
77 case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
78 case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
79 case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
80
81 case nir_op_fsign: return emit_fsign(instr);
82 case nir_op_fdph: return emit_fdph(instr);
83
84 case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
85 case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
86 case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
87 case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
88 case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
89
90 case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
91 case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
92 case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
93 case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
94 case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
95 case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
96 case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
97 case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
98 case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
99 case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
100 case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
101 case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
102 case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
103 case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
104 case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
105 case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
106 case nir_op_iabs: return emit_alu_iabs(instr);
107 case nir_op_ineg: return emit_alu_ineg(instr);
108 case nir_op_idiv: return emit_alu_div_int(instr, true, false);
109 case nir_op_udiv: return emit_alu_div_int(instr, false, false);
110 case nir_op_umod: return emit_alu_div_int(instr, false, true);
111 case nir_op_isign: return emit_alu_isign(instr);
112
113 case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
114 case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
115 case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
116
117 case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
118
119 case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
120 case nir_op_fne: return emit_alu_op2(instr, op2_setne_dx10);
121 case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
122
123 case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
124 case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
125 case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
126 case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
127 case nir_op_fadd: return emit_alu_op2(instr, op2_add);
128 case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
129 case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
130 case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
131 case nir_op_fdot2: return emit_dot(instr, 2);
132 case nir_op_fdot3: return emit_dot(instr, 3);
133 case nir_op_fdot4: return emit_dot(instr, 4);
134
135 case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
136 case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
137 case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
138
139 case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
140 case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
141 case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
142
143 case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
144 case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
145 case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
146
147 case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
148 case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
149 case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
150
151
152 case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
153 case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
154 case nir_op_vec2: return emit_create_vec(instr, 2);
155 case nir_op_vec3: return emit_create_vec(instr, 3);
156 case nir_op_vec4: return emit_create_vec(instr, 4);
157
158 case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
159 case nir_op_ufind_msb: return emit_find_msb(instr, false);
160 case nir_op_ifind_msb: return emit_find_msb(instr, true);
161 case nir_op_b2i32: return emit_b2i32(instr);
162 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
163 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
164 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
165 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
166 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
167 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
168
169
170 /* These are in the ALU instruction list, but they should be texture instructions */
171 case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
172 case nir_op_fddx_coarse:
173 case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
174
175 case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
176 case nir_op_fddy_coarse:
177 case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
178
179 default:
180 return false;
181 }
182 }
183
184 void EmitAluInstruction::split_constants(const nir_alu_instr& instr)
185 {
186 const nir_op_info *op_info = &nir_op_infos[instr.op];
187 if (op_info->num_inputs < 2)
188 return;
189
190 int nconst = 0;
191 std::array<PValue,4> c;
192 std::array<int,4> idx;
193 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
194 PValue src = from_nir(instr.src[i], 0);
195 assert(src);
196 if (src->type() == Value::kconst) {
197 c[nconst] = src;
198
199 idx[nconst++] = i;
200 }
201 }
202 if (nconst < 2)
203 return;
204
205 unsigned sel = c[0]->sel();
206 sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
207
208 for (int i = 1; i < nconst; ++i) {
209 sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
210 if (c[i]->sel() != sel) {
211 load_uniform(instr.src[idx[i]]);
212 }
213 }
214 }
215
216 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
217 {
218 if (instr.src[0].negate || instr.src[0].abs) {
219 std::cerr << "source modifiers not supported with int ops\n";
220 return false;
221 }
222
223 AluInstruction *ir = nullptr;
224 for (int i = 0; i < 4 ; ++i) {
225 if (instr.dest.write_mask & (1 << i)){
226 ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
227 from_nir(instr.src[0], i), write);
228 emit_instruction(ir);
229 }
230 }
231 if (ir)
232 ir->set_flag(alu_last_instr);
233 return true;
234 }
235
236 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
237 const AluOpFlags& flags)
238 {
239 AluInstruction *ir = nullptr;
240 for (int i = 0; i < 4 ; ++i) {
241 if (instr.dest.write_mask & (1 << i)){
242 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
243 from_nir(instr.src[0], i), write);
244
245 if (flags.test(alu_src0_abs) || instr.src[0].abs)
246 ir->set_flag(alu_src0_abs);
247
248 if (instr.src[0].negate ^ flags.test(alu_src0_neg))
249 ir->set_flag(alu_src0_neg);
250
251 if (flags.test(alu_dst_clamp) || instr.dest.saturate)
252 ir->set_flag(alu_dst_clamp);
253
254 emit_instruction(ir);
255 }
256 }
257 make_last(ir);
258
259 return true;
260 }
261
262 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
263 {
264 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
265 // then shift back
266
267 const float inv_2_pi = 0.15915494f;
268
269 PValue v[4]; // this might need some additional temp register creation
270 for (unsigned i = 0; i < 4 ; ++i)
271 v[i] = from_nir(instr.dest, i);
272
273 PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
274 AluInstruction *ir = nullptr;
275 for (unsigned i = 0; i < 4 ; ++i) {
276 if (!(instr.dest.write_mask & (1 << i)))
277 continue;
278 ir = new AluInstruction(op3_muladd_ieee, v[i],
279 {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5},
280 {alu_write});
281 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
282 emit_instruction(ir);
283 }
284 make_last(ir);
285
286 for (unsigned i = 0; i < 4 ; ++i) {
287 if (!(instr.dest.write_mask & (1 << i)))
288 continue;
289 ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
290 emit_instruction(ir);
291 }
292 make_last(ir);
293
294 for (unsigned i = 0; i < 4 ; ++i) {
295 if (!(instr.dest.write_mask & (1 << i)))
296 continue;
297 ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
298 ir->set_flag(alu_src1_neg);
299 emit_instruction(ir);
300 }
301 make_last(ir);
302
303 for (unsigned i = 0; i < 4 ; ++i) {
304 if (!(instr.dest.write_mask & (1 << i)))
305 continue;
306
307 ir = new AluInstruction(opcode, v[i], v[i], last_write);
308 emit_instruction(ir);
309 }
310 return true;
311 }
312
313 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
314 bool absolute)
315 {
316 AluInstruction *ir = nullptr;
317 std::set<int> src_idx;
318 for (int i = 0; i < 4 ; ++i) {
319 if (instr.dest.write_mask & (1 << i)){
320 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
321 from_nir(instr.src[0], i), last_write);
322 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
323 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
324 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
325 emit_instruction(ir);
326 }
327 }
328 return true;
329 }
330
331 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
332 {
333 AluInstruction *ir = nullptr;
334 std::array<PValue, 4> v;
335
336 for (int i = 0; i < 4; ++i) {
337 if (!(instr.dest.write_mask & (1 << i)))
338 continue;
339 v[i] = from_nir(instr.dest, i);
340 ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write});
341 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
342 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
343 emit_instruction(ir);
344 }
345 make_last(ir);
346
347 for (int i = 0; i < 4; ++i) {
348 if (!(instr.dest.write_mask & (1 << i)))
349 continue;
350 ir = new AluInstruction(op, v[i], v[i], {alu_write});
351 emit_instruction(ir);
352 if (op == op1_flt_to_uint)
353 make_last(ir);
354 }
355 make_last(ir);
356
357 return true;
358 }
359
360 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
361 {
362 int sel_tmp = allocate_temp_register();
363 int sel_tmp2 = allocate_temp_register();
364 GPRVector tmp(sel_tmp, {0,1,2,3});
365 GPRVector tmp2(sel_tmp2, {0,1,2,3});
366 AluInstruction *ir = nullptr;
367 EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
368 for (int i = 0; i < 4; ++i) {
369 if (!(instr.dest.write_mask & (1 << i)))
370 continue;
371
372 ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write);
373 emit_instruction(ir);
374 }
375 make_last(ir);
376
377 for (int i = 0; i < 4 ; ++i) {
378 if (!(instr.dest.write_mask & (1 << i)))
379 continue;
380
381 ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
382 PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
383 emit_instruction(ir);
384 }
385 make_last(ir);
386
387 for (int i = 0; i < 4 ; ++i) {
388 if (!(instr.dest.write_mask & (1 << i)))
389 continue;
390
391 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
392 tmp2.reg_i(i), tmp.reg_i(i), write);
393 emit_instruction(ir);
394 }
395 make_last(ir);
396
397 return true;
398 }
399
400 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
401 {
402 AluInstruction *ir = nullptr;
403 for (int i = 0; i < 4 ; ++i) {
404 if (!(instr.dest.write_mask & (1 << i)))
405 continue;
406
407 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
408 from_nir(instr.src[0], i), Value::one_i, write);
409 emit_instruction(ir);
410 }
411 make_last(ir);
412
413 return true;
414 }
415
416 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
417 {
418 AluInstruction *ir = nullptr;
419 for (unsigned i = 0; i < 2; ++i) {
420 if (!(instr.dest.write_mask & (1 << i)))
421 continue;
422 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
423 from_nir(instr.src[0], i), write);
424 emit_instruction(ir);
425 }
426 ir->set_flag(alu_last_instr);
427 return true;
428 }
429
430 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
431 {
432 emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
433 from_nir(instr.src[0], comp), last_write));
434 return true;
435 }
436
437 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
438 {
439 AluInstruction *ir = nullptr;
440 std::set<int> src_slot;
441 for(unsigned i = 0; i < nc; ++i) {
442 if (instr.dest.write_mask & (1 << i)){
443 auto src = from_nir(instr.src[i], 0);
444 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
445 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
446
447 // FIXME: This is a rather crude approach to fix the problem that
448 // r600 can't read from four different slots of the same component
449 // here we check only for the register index
450 if (src->type() == Value::gpr)
451 src_slot.insert(src->sel());
452 if (src_slot.size() >= 3) {
453 src_slot.clear();
454 ir->set_flag(alu_last_instr);
455 }
456 emit_instruction(ir);
457 }
458 }
459 if (ir)
460 ir->set_flag(alu_last_instr);
461 return true;
462 }
463
464 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
465 {
466 const nir_alu_src& src0 = instr.src[0];
467 const nir_alu_src& src1 = instr.src[1];
468
469 AluInstruction *ir = nullptr;
470 for (int i = 0; i < n ; ++i) {
471 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
472 from_nir(src0, i), from_nir(src1, i),
473 instr.dest.write_mask & (1 << i) ? write : empty);
474
475 if (src0.negate) ir->set_flag(alu_src0_neg);
476 if (src0.abs) ir->set_flag(alu_src0_abs);
477 if (src1.negate) ir->set_flag(alu_src1_neg);
478 if (src1.abs) ir->set_flag(alu_src1_abs);
479
480 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
481 emit_instruction(ir);
482 }
483 for (int i = n; i < 4 ; ++i) {
484 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
485 Value::zero, Value::zero,
486 instr.dest.write_mask & (1 << i) ? write : empty);
487 emit_instruction(ir);
488 }
489
490 if (ir)
491 ir->set_flag(alu_last_instr);
492 return true;
493 }
494
495 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
496 {
497 const nir_alu_src& src0 = instr.src[0];
498 const nir_alu_src& src1 = instr.src[1];
499
500 AluInstruction *ir = nullptr;
501 for (int i = 0; i < 3 ; ++i) {
502 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
503 from_nir(src0, i), from_nir(src1, i),
504 instr.dest.write_mask & (1 << i) ? write : empty);
505 if (src0.negate) ir->set_flag(alu_src0_neg);
506 if (src0.abs) ir->set_flag(alu_src0_abs);
507 if (src1.negate) ir->set_flag(alu_src1_neg);
508 if (src1.abs) ir->set_flag(alu_src1_abs);
509 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
510 emit_instruction(ir);
511 }
512
513 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
514 from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty);
515 if (src1.negate) ir->set_flag(alu_src1_neg);
516 if (src1.abs) ir->set_flag(alu_src1_abs);
517 emit_instruction(ir);
518
519 ir->set_flag(alu_last_instr);
520 return true;
521
522 }
523
524 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
525 {
526 AluInstruction *ir = nullptr;
527 for (int i = 0; i < 4 ; ++i) {
528 if (instr.dest.write_mask & (1 << i)) {
529 ir = new AluInstruction(op, from_nir(instr.dest, i),
530 from_nir(instr.src[0], i), Value::zero,
531 write);
532 emit_instruction(ir);
533 }
534 }
535 if (ir)
536 ir->set_flag(alu_last_instr);
537 return true;
538 }
539
540 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
541 {
542 AluInstruction *ir = nullptr;
543 for (int i = 0; i < 4 ; ++i) {
544 if (instr.dest.write_mask & (1 << i)){
545 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
546 from_nir(instr.src[0], i), Value::one_f, write);
547 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
548 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
549 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
550 emit_instruction(ir);
551 }
552 }
553 if (ir)
554 ir->set_flag(alu_last_instr);
555 return true;
556 }
557
558 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
559 {
560
561 AluInstruction *ir = nullptr;
562 PValue v[4]; // this might need some additional temp register creation
563 for (unsigned i = 0; i < 4 ; ++i)
564 v[i] = from_nir(instr.dest, i);
565
566 EAluOp combine = all ? op2_and_int : op2_or_int;
567
568 /* For integers we can not use the modifiers, so this needs some emulation */
569 /* Should actually be lowered with NIR */
570 if (instr.src[0].negate == instr.src[1].negate &&
571 instr.src[0].abs == instr.src[1].abs) {
572
573 for (unsigned i = 0; i < nc ; ++i) {
574 ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i),
575 from_nir(instr.src[1], i), write);
576 emit_instruction(ir);
577 }
578 if (ir)
579 ir->set_flag(alu_last_instr);
580 } else {
581 std::cerr << "Negate in iequal/inequal not (yet) supported\n";
582 return false;
583 }
584
585 for (unsigned i = 0; i < nc/2 ; ++i) {
586 ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
587 emit_instruction(ir);
588 }
589 if (ir)
590 ir->set_flag(alu_last_instr);
591
592 if (nc > 2) {
593 ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
594 emit_instruction(ir);
595 }
596
597 return true;
598 }
599
600 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
601 {
602 AluInstruction *ir = nullptr;
603 PValue v[4]; // this might need some additional temp register creation
604 for (unsigned i = 0; i < 4 ; ++i)
605 v[i] = from_nir(instr.dest, i);
606
607 for (unsigned i = 0; i < nc ; ++i) {
608 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
609 from_nir(instr.src[1],i), write);
610
611 if (instr.src[0].abs)
612 ir->set_flag(alu_src0_abs);
613 if (instr.src[0].negate)
614 ir->set_flag(alu_src0_neg);
615
616 if (instr.src[1].abs)
617 ir->set_flag(alu_src1_abs);
618 if (instr.src[1].negate)
619 ir->set_flag(alu_src1_neg);
620
621 emit_instruction(ir);
622 }
623 if (ir)
624 ir->set_flag(alu_last_instr);
625
626 for (unsigned i = 0; i < nc ; ++i) {
627 ir = new AluInstruction(op1_max4, v[i], v[i], write);
628 if (all) ir->set_flag(alu_src0_neg);
629 emit_instruction(ir);
630 }
631
632 for (unsigned i = nc; i < 4 ; ++i) {
633 ir = new AluInstruction(op1_max4, v[i],
634 all ? Value::one_f : Value::zero, write);
635 if (all)
636 ir->set_flag(alu_src0_neg);
637
638 emit_instruction(ir);
639 }
640
641 ir->set_flag(alu_last_instr);
642
643 if (all)
644 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
645 else
646 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
647
648 ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
649 if (all)
650 ir->set_flag(alu_src1_neg);
651 emit_instruction(ir);
652
653 return true;
654 }
655
656 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
657 {
658 AluInstruction *ir = nullptr;
659 PValue v[4]; // this might need some additional temp register creation
660 for (unsigned i = 0; i < 4 ; ++i)
661 v[i] = from_nir(instr.dest, i);
662
663 for (unsigned i = 0; i < 2 ; ++i) {
664 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
665 from_nir(instr.src[1],i), write);
666 if (instr.src[0].abs)
667 ir->set_flag(alu_src0_abs);
668 if (instr.src[0].negate)
669 ir->set_flag(alu_src0_neg);
670
671 if (instr.src[1].abs)
672 ir->set_flag(alu_src1_abs);
673 if (instr.src[1].negate)
674 ir->set_flag(alu_src1_neg);
675
676 emit_instruction(ir);
677 }
678 if (ir)
679 ir->set_flag(alu_last_instr);
680
681 op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
682 ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
683 emit_instruction(ir);
684
685 return true;
686 }
687
688 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
689 {
690 const nir_alu_src& src0 = instr.src[0];
691 const nir_alu_src& src1 = instr.src[1];
692
693 AluInstruction *ir = nullptr;
694 for (int i = 0; i < 4 ; ++i) {
695 if (instr.dest.write_mask & (1 << i)){
696 ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write);
697 if (src0.negate) ir->set_flag(alu_src0_neg);
698 if (src0.abs) ir->set_flag(alu_src0_abs);
699 if (src1.negate) ir->set_flag(alu_src1_neg);
700 if (src1.abs) ir->set_flag(alu_src1_abs);
701 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
702 emit_instruction(ir);
703 }
704 }
705 return true;
706 }
707
708 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
709 {
710
711 const nir_alu_src& src0 = instr.src[0];
712 const nir_alu_src& src1 = instr.src[1];
713
714 if (src0.negate || src1.negate ||
715 src0.abs || src1.abs) {
716 std::cerr << "R600: don't support modifiers with integer operations";
717 return false;
718 }
719 return emit_alu_op2(instr, opcode, opts);
720 }
721
722 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
723 {
724 const nir_alu_src *src0 = &instr.src[0];
725 const nir_alu_src *src1 = &instr.src[1];
726
727 if (ops & op2_opt_reverse)
728 std::swap(src0, src1);
729
730 bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
731
732 AluInstruction *ir = nullptr;
733 for (int i = 0; i < 4 ; ++i) {
734 if (instr.dest.write_mask & (1 << i)){
735 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
736 from_nir(*src0, i), from_nir(*src1, i), write);
737
738 if (src0->negate) ir->set_flag(alu_src0_neg);
739 if (src0->abs) ir->set_flag(alu_src0_abs);
740 if (src1_negate) ir->set_flag(alu_src1_neg);
741 if (src1->abs) ir->set_flag(alu_src1_abs);
742 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
743 emit_instruction(ir);
744 }
745 }
746 if (ir)
747 ir->set_flag(alu_last_instr);
748 return true;
749 }
750
751 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
752 {
753 const nir_alu_src *src0 = &instr.src[0];
754 const nir_alu_src *src1 = &instr.src[1];
755
756 if (ops & op2_opt_reverse)
757 std::swap(src0, src1);
758
759 GPRVector::Values v0;
760 for (int i = 0; i < 4 ; ++i)
761 v0[i] = from_nir(*src0, i);
762
763 GPRVector::Values v1;
764 for (int i = 0; i < 4 ; ++i)
765 v1[i] = from_nir(*src1, i);
766
767 if (src0->abs || src0->negate) {
768 int src0_tmp = allocate_temp_register();
769 GPRVector::Values v0_temp;
770 AluInstruction *ir = nullptr;
771 for (int i = 0; i < 4 ; ++i) {
772 if (instr.dest.write_mask & (1 << i)) {
773 v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
774 ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
775 if (src0->abs) ir->set_flag(alu_src0_abs);
776 if (src0->negate) ir->set_flag(alu_src0_neg);
777 emit_instruction(ir);
778 v0[i] = v0_temp[i];
779 }
780 }
781 if (ir)
782 ir->set_flag(alu_last_instr);
783 }
784
785 if (src1->abs || src1->negate) {
786 int src1_tmp = allocate_temp_register();
787 GPRVector::Values v1_temp;
788 AluInstruction *ir = nullptr;
789 for (int i = 0; i < 4 ; ++i) {
790 if (instr.dest.write_mask & (1 << i)) {
791 v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
792 ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
793 if (src1->abs) ir->set_flag(alu_src0_abs);
794 if (src1->negate) ir->set_flag(alu_src0_neg);
795 emit_instruction(ir);
796 v1[i] = v1_temp[i];
797 }
798 }
799 if (ir)
800 ir->set_flag(alu_last_instr);
801 }
802
803 AluInstruction *ir = nullptr;
804 for (int i = 0; i < 4 ; ++i) {
805 if (instr.dest.write_mask & (1 << i)){
806 ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
807 emit_instruction(ir);
808 }
809 }
810 if (ir)
811 ir->set_flag(alu_last_instr);
812 return true;
813 }
814
815
816 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
817 {
818 int sel_tmp = allocate_temp_register();
819 GPRVector tmp(sel_tmp, {0,1,2,3});
820
821 AluInstruction *ir = nullptr;
822 PValue help[4];
823
824 for (int i = 0; i < 4 ; ++i) {
825 if (instr.dest.write_mask & (1 << i)){
826 help[i] = from_nir(instr.dest, i);
827 auto s = from_nir(instr.src[0], i);
828 ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
829 emit_instruction(ir);
830 }
831 }
832 if (ir)
833 ir->set_flag(alu_last_instr);
834
835 for (int i = 0; i < 4 ; ++i) {
836 if (instr.dest.write_mask & (1 << i)){
837 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
838 emit_instruction(ir);
839 }
840 }
841 if (ir)
842 ir->set_flag(alu_last_instr);
843
844 for (int i = 0; i < 4 ; ++i) {
845 if (instr.dest.write_mask & (1 << i)){
846
847 ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
848 PValue(new LiteralValue(-1,0)), help[i], write);
849 emit_instruction(ir);
850 }
851 }
852 if (ir)
853 ir->set_flag(alu_last_instr);
854 return true;
855 }
856
857 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
858 {
859 PValue help[4];
860 PValue src[4];
861 AluInstruction *ir = nullptr;
862
863 for (int i = 0; i < 4 ; ++i) {
864 help[i] = from_nir(instr.dest, i);
865 src[i] = from_nir(instr.src[0], i);
866 }
867
868 if (instr.src[0].abs) {
869
870 for (int i = 0; i < 4 ; ++i) {
871 if (instr.dest.write_mask & (1 << i)){
872 ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
873 ir->set_flag(alu_src0_abs);
874 emit_instruction(ir);
875 }
876 }
877 if (ir)
878 ir->set_flag(alu_last_instr);
879
880 if (instr.src[0].negate) {
881 for (int i = 0; i < 4 ; ++i) {
882 if (instr.dest.write_mask & (1 << i)){
883 ir = new AluInstruction(op1_mov, help[i], help[i], write);
884 ir->set_flag(alu_src0_neg);
885 emit_instruction(ir);
886 }
887 }
888 if (ir)
889 ir->set_flag(alu_last_instr);
890 }
891
892 return true;
893 }
894
895 for (int i = 0; i < 4 ; ++i) {
896 if (instr.dest.write_mask & (1 << i)){
897 ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
898 if (instr.src[0].negate) {
899 ir->set_flag(alu_src0_neg);
900 ir->set_flag(alu_src2_neg);
901 }
902 emit_instruction(ir);
903 }
904 }
905
906 if (ir)
907 ir->set_flag(alu_last_instr);
908
909 for (int i = 0; i < 4 ; ++i) {
910 if (instr.dest.write_mask & (1 << i)){
911 ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
912 ir->set_flag(alu_src0_neg);
913 ir->set_flag(alu_src1_neg);
914 emit_instruction(ir);
915 }
916 }
917 if (ir)
918 ir->set_flag(alu_last_instr);
919 return true;
920 }
921
922 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
923 std::array<uint8_t, 3> reorder)
924 {
925 const nir_alu_src *src[3];
926 src[0] = &instr.src[reorder[0]];
927 src[1] = &instr.src[reorder[1]];
928 src[2] = &instr.src[reorder[2]];
929
930 AluInstruction *ir = nullptr;
931 for (int i = 0; i < 4 ; ++i) {
932 if (instr.dest.write_mask & (1 << i)){
933 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
934 from_nir(*src[0], i), from_nir(*src[1], i),
935 from_nir(*src[2], i), write);
936
937 if (src[0]->negate) ir->set_flag(alu_src0_neg);
938 if (src[1]->negate) ir->set_flag(alu_src1_neg);
939 if (src[2]->negate) ir->set_flag(alu_src2_neg);
940
941 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
942 ir->set_flag(alu_write);
943 emit_instruction(ir);
944 }
945 }
946 if (ir)
947 ir->set_flag(alu_last_instr);
948 return true;
949 }
950
951 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
952 {
953 AluInstruction *ir = nullptr;
954 for (int i = 0; i < 4 ; ++i) {
955 if (instr.dest.write_mask & (1 << i)){
956 ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
957 from_nir(instr.src[0], i), write);
958 emit_instruction(ir);
959 }
960 }
961 if (ir)
962 ir->set_flag(alu_last_instr);
963
964 return true;
965 }
966
967 static const char swz[] = "xyzw01?_";
968
969
970
971 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
972 {
973 int sel_tmp = allocate_temp_register();
974 GPRVector tmp(sel_tmp, {0,1,2,3});
975
976 std::array<PValue,4> src;
977 AluInstruction *ir = nullptr;
978 for (int i = 0; i < 4 ; ++i) {
979 if (instr.dest.write_mask & (1 << i)){
980 src[i] = from_nir(instr.src[0],i);
981 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write);
982 emit_instruction(ir);
983 }
984 }
985 if (ir)
986 ir->set_flag(alu_last_instr);
987
988 for (int i = 0; i < 4 ; ++i) {
989 if (instr.dest.write_mask & (1 << i)){
990 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i],
991 src[i], tmp.reg_i(i), write);
992 emit_instruction(ir);
993 }
994 }
995 if (ir)
996 ir->set_flag(alu_last_instr);
997 return true;
998 }
999
1000 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1001 {
1002
1003 int sel_tmp = allocate_temp_register();
1004 int sel_tmp0 = allocate_temp_register();
1005 int sel_tmp1 = allocate_temp_register();
1006
1007 PValue asrc1(new GPRValue(sel_tmp, 0));
1008 PValue asrc2(new GPRValue(sel_tmp, 1));
1009 PValue rsign(new GPRValue(sel_tmp, 2));
1010 PValue err(new GPRValue(sel_tmp, 3));
1011
1012 GPRVector tmp0(sel_tmp0, {0,1,2,3});
1013 GPRVector tmp1(sel_tmp1, {0,1,2,3});
1014
1015 std::array<PValue, 4> src0;
1016 std::array<PValue, 4> src1;
1017
1018 for (int i = 0; i < 4 ; ++i) {
1019 if (instr.dest.write_mask & (1 << i)) {
1020 src0[i] = from_nir(instr.src[0], i);
1021 src1[i] = from_nir(instr.src[1], i);
1022 }
1023 }
1024
1025
1026 for (int i = 3; i >= 0 ; --i) {
1027 if (!(instr.dest.write_mask & (1 << i)))
1028 continue;
1029 if (use_signed) {
1030 emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1031 emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1032 emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1033
1034
1035 emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1036 emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1037 } else {
1038 asrc1 = src0[i];
1039 asrc2 = src1[i];
1040 }
1041
1042 emit_instruction(op1_recip_uint, tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1043
1044 emit_instruction(op2_mullo_uint, tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1045
1046 emit_instruction(op2_sub_int, tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1047 emit_instruction(op2_mulhi_uint, tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1048
1049 emit_instruction(op3_cnde_int, tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1050
1051 emit_instruction(op2_mulhi_uint, err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1052
1053 emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1054 emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1055
1056 emit_instruction(op3_cnde_int, tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1057
1058 emit_instruction(op2_mulhi_uint, tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1059 emit_instruction(op2_mullo_uint, tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1060
1061 emit_instruction(op2_sub_int, tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1062
1063
1064 emit_instruction(op2_setge_uint, tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1065 emit_instruction(op2_setge_uint, tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1066
1067 if (mod) {
1068 emit_instruction(op2_sub_int, tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1069 emit_instruction(op2_add_int, tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1070 } else {
1071 emit_instruction(op2_add_int, tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1072 emit_instruction(op2_sub_int, tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1073 }
1074
1075 emit_instruction(op2_and_int, tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1076
1077 if (mod)
1078 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1079 else
1080 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1081
1082 if (use_signed) {
1083 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1084 emit_instruction(op2_sub_int, tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1085
1086 if (mod)
1087 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1088 {alu_write, alu_last_instr});
1089 else
1090 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1091 {alu_write, alu_last_instr});
1092 } else {
1093 emit_instruction(op3_cnde_int, from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1094 }
1095 }
1096 return true;
1097 }
1098
1099 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s,
1100 GPRVector::Values& v, int ncomp)
1101 {
1102
1103 AluInstruction *alu = nullptr;
1104 for (int i = 0; i < ncomp; ++i) {
1105 alu = new AluInstruction(op1_mov, v[i], s[i], {alu_write});
1106 if (src.abs)
1107 alu->set_flag(alu_src0_abs);
1108 if (src.negate)
1109 alu->set_flag(alu_src0_neg);
1110 emit_instruction(alu);
1111 }
1112 make_last(alu);
1113 }
1114
1115 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1116 bool fine)
1117 {
1118
1119 GPRVector::Values v;
1120 GPRVector::Values s;
1121 GPRVector::Values *source = &s;
1122 std::array<int, 4> writemask = {0,1,2,3};
1123
1124 int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components :
1125 instr.src[0].src.reg.reg->num_components;
1126
1127 for (int i = 0; i < 4; ++i) {
1128 writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1129 v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1130 s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0);
1131 }
1132
1133 if (instr.src[0].abs || instr.src[0].negate) {
1134 split_alu_modifiers(instr.src[0], s, v, ncomp);
1135 source = &v;
1136 }
1137
1138 /* This is querying the dreivatives of the output fb, so we would either need
1139 * access to the neighboring pixels or to the framebuffer. Neither is currently
1140 * implemented */
1141 GPRVector dst(v);
1142 GPRVector src(*source);
1143
1144 auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1145 tex->set_dest_swizzle(writemask);
1146
1147 if (fine) {
1148 std::cerr << "Sewt fine flag\n";
1149 tex->set_flag(TexInstruction::grad_fine);
1150 }
1151
1152 emit_instruction(tex);
1153
1154 return true;
1155 }
1156
1157 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1158 {
1159 int itmp = allocate_temp_register();
1160 std::array<PValue, 4> tmp;
1161 std::array<PValue, 4> dst;
1162 std::array<PValue, 4> src0;
1163 std::array<PValue, 4> shift;
1164
1165 PValue l32(new LiteralValue(32));
1166 unsigned write_mask = instr.dest.write_mask;
1167
1168 AluInstruction *ir = nullptr;
1169 for (int i = 0; i < 4; i++) {
1170 if (!(write_mask & (1<<i)))
1171 continue;
1172 dst[i] = from_nir(instr.dest, i);
1173 src0[i] = from_nir(instr.src[0], i);
1174 shift[i] = from_nir(instr.src[2], i);
1175
1176 ir = new AluInstruction(opcode, dst[i],
1177 {src0[i], from_nir(instr.src[1], i), shift[i]},
1178 {alu_write});
1179 emit_instruction(ir);
1180 }
1181 make_last(ir);
1182
1183 for (int i = 0; i < 4; i++) {
1184 if (!(write_mask & (1<<i)))
1185 continue;
1186 tmp[i] = PValue(new GPRValue(itmp, i));
1187 ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1188 {alu_write});
1189 emit_instruction(ir);
1190 }
1191 make_last(ir);
1192
1193 for (int i = 0; i < 4; i++) {
1194 if (!(write_mask & (1<<i)))
1195 continue;
1196 ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1197 {alu_write});
1198 emit_instruction(ir);
1199 }
1200 make_last(ir);
1201
1202 return true;
1203 }
1204
1205 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1206 {
1207 auto t0 = get_temp_vec4();
1208 auto t1 = get_temp_vec4();
1209 auto t2 = get_temp_vec4();
1210
1211 PValue l32(new LiteralValue(32));
1212 unsigned write_mask = instr.dest.write_mask;
1213 if (!write_mask) return true;
1214
1215 AluInstruction *ir = nullptr;
1216 for (int i = 0; i < 4; i++) {
1217 if (!(write_mask & (1<<i)))
1218 continue;
1219
1220 ir = new AluInstruction(op2_setge_int, t0[i], {from_nir(instr.src[3], i), l32}, {alu_write});
1221 emit_instruction(ir);
1222 }
1223 make_last(ir);
1224
1225 for (int i = 0; i < 4; i++) {
1226 if (!(write_mask & (1<<i)))
1227 continue;
1228 ir = new AluInstruction(op2_bfm_int, t1[i], {from_nir(instr.src[3], i),
1229 from_nir(instr.src[2], i)}, {alu_write});
1230 emit_instruction(ir);
1231 }
1232 ir->set_flag(alu_last_instr);
1233
1234 for (int i = 0; i < 4; i++) {
1235 if (!(write_mask & (1<<i)))
1236 continue;
1237 ir = new AluInstruction(op2_lshl_int, t2[i], {from_nir(instr.src[1], i),
1238 from_nir(instr.src[2], i)}, {alu_write});
1239 emit_instruction(ir);
1240 }
1241 ir->set_flag(alu_last_instr);
1242
1243
1244 for (int i = 0; i < 4; i++) {
1245 if (!(write_mask & (1<<i)))
1246 continue;
1247 ir = new AluInstruction(op3_bfi_int, from_nir(instr.dest, i),
1248 {t1[i], t2[i], from_nir(instr.src[0], i)}, {alu_write});
1249 emit_instruction(ir);
1250 }
1251 ir->set_flag(alu_last_instr);
1252
1253 for (int i = 0; i < 4; i++) {
1254 if (!(write_mask & (1<<i)))
1255 continue;
1256 ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1257 {t0[i], from_nir(instr.dest, i),
1258 from_nir(instr.src[1], i)}, {alu_write});
1259 emit_instruction(ir);
1260 }
1261 ir->set_flag(alu_last_instr);
1262
1263 return true;
1264 }
1265
1266 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1267 {
1268 emit_instruction(op2_lshr_int, from_nir(instr.dest, 0),
1269 {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))},
1270 {alu_write, alu_last_instr});
1271
1272 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1273 {from_nir(instr.dest, 0)},{alu_write, alu_last_instr});
1274
1275 return true;
1276 }
1277
1278 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1279 {
1280 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1281 {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr});
1282 return true;
1283 }
1284
1285 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1286 {
1287 int it0 = allocate_temp_register();
1288 PValue x(new GPRValue(it0, 0));
1289 PValue y(new GPRValue(it0, 1));
1290
1291 emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write});
1292 emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr});
1293
1294 emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1295
1296 emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1297
1298 return true;
1299 }
1300
1301 }