r600/sfn: Handle umul24 and umad24
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_emitaluinstruction.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30
31 #include "gallium/drivers/r600/r600_shader.h"
32
33 namespace r600 {
34
35 using std::vector;
36
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38 EmitInstruction (processor)
39 {
40
41 }
42
43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45 const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46
47 r600::sfn_log << SfnLog::instr << "emit '"
48 << *ir
49 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50 << "' (" << __func__ << ")\n";
51
52 split_constants(instr);
53
54 switch (instr.op) {
55 case nir_op_b2f32: return emit_alu_b2f(instr);
56 case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
57 case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
58 case nir_op_b2b1:
59 case nir_op_mov:return emit_alu_op1(instr, op1_mov);
60 case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
61 case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
62 case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
63 case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
64 case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
65 case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
66 case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
67 case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
68 case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
69 case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
70
71 case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
72 case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
73 case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
74 case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
75 case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
76 case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
77
78 case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
79 case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
80 case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
81
82 case nir_op_fsign: return emit_fsign(instr);
83 case nir_op_fdph: return emit_fdph(instr);
84
85 case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
86 case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
87 case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
88 case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
89 case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
90
91 case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
92 case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
93 case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
94 case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
95 case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
96 case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
97 case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
98 case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
99 case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
100 case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
101 case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
102 case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
103 case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
104 case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
105 case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
106 case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
107 case nir_op_iabs: return emit_alu_iabs(instr);
108 case nir_op_ineg: return emit_alu_ineg(instr);
109 case nir_op_idiv: return emit_alu_div_int(instr, true, false);
110 case nir_op_udiv: return emit_alu_div_int(instr, false, false);
111 case nir_op_umod: return emit_alu_div_int(instr, false, true);
112 case nir_op_isign: return emit_alu_isign(instr);
113
114 case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
115 case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
116 case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
117
118 case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
119
120 case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
121 case nir_op_fne: return emit_alu_op2(instr, op2_setne_dx10);
122 case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
123
124 case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
125 case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
126 case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
127 case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
128 case nir_op_fadd: return emit_alu_op2(instr, op2_add);
129 case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
130 case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
131 case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
132 case nir_op_fdot2: return emit_dot(instr, 2);
133 case nir_op_fdot3: return emit_dot(instr, 3);
134 case nir_op_fdot4: return emit_dot(instr, 4);
135
136 case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
137 case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
138 case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
139
140 case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
141 case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
142 case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
143
144 case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
145 case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
146 case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
147
148 case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
149 case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
150 case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
151
152
153 case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
154 case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
155 case nir_op_vec2: return emit_create_vec(instr, 2);
156 case nir_op_vec3: return emit_create_vec(instr, 3);
157 case nir_op_vec4: return emit_create_vec(instr, 4);
158
159 case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
160 case nir_op_ufind_msb: return emit_find_msb(instr, false);
161 case nir_op_ifind_msb: return emit_find_msb(instr, true);
162 case nir_op_b2i32: return emit_b2i32(instr);
163 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
164 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
165 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
166 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
167 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
168 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
169
170
171 /* These are in the ALU instruction list, but they should be texture instructions */
172 case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
173 case nir_op_fddx_coarse:
174 case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
175
176 case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
177 case nir_op_fddy_coarse:
178 case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
179
180 case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2});
181 case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
182 default:
183 return false;
184 }
185 }
186
187 void EmitAluInstruction::split_constants(const nir_alu_instr& instr)
188 {
189 const nir_op_info *op_info = &nir_op_infos[instr.op];
190 if (op_info->num_inputs < 2)
191 return;
192
193 int nconst = 0;
194 std::array<PValue,4> c;
195 std::array<int,4> idx;
196 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
197 PValue src = from_nir(instr.src[i], 0);
198 assert(src);
199 if (src->type() == Value::kconst) {
200 c[nconst] = src;
201
202 idx[nconst++] = i;
203 }
204 }
205 if (nconst < 2)
206 return;
207
208 unsigned sel = c[0]->sel();
209 sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
210
211 for (int i = 1; i < nconst; ++i) {
212 sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
213 if (c[i]->sel() != sel) {
214 load_uniform(instr.src[idx[i]]);
215 }
216 }
217 }
218
219 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
220 {
221 if (instr.src[0].negate || instr.src[0].abs) {
222 std::cerr << "source modifiers not supported with int ops\n";
223 return false;
224 }
225
226 AluInstruction *ir = nullptr;
227 for (int i = 0; i < 4 ; ++i) {
228 if (instr.dest.write_mask & (1 << i)){
229 ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
230 from_nir(instr.src[0], i), write);
231 emit_instruction(ir);
232 }
233 }
234 if (ir)
235 ir->set_flag(alu_last_instr);
236 return true;
237 }
238
239 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
240 const AluOpFlags& flags)
241 {
242 AluInstruction *ir = nullptr;
243 for (int i = 0; i < 4 ; ++i) {
244 if (instr.dest.write_mask & (1 << i)){
245 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
246 from_nir(instr.src[0], i), write);
247
248 if (flags.test(alu_src0_abs) || instr.src[0].abs)
249 ir->set_flag(alu_src0_abs);
250
251 if (instr.src[0].negate ^ flags.test(alu_src0_neg))
252 ir->set_flag(alu_src0_neg);
253
254 if (flags.test(alu_dst_clamp) || instr.dest.saturate)
255 ir->set_flag(alu_dst_clamp);
256
257 emit_instruction(ir);
258 }
259 }
260 make_last(ir);
261
262 return true;
263 }
264
265 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
266 {
267 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
268 // then shift back
269
270 const float inv_2_pi = 0.15915494f;
271
272 PValue v[4]; // this might need some additional temp register creation
273 for (unsigned i = 0; i < 4 ; ++i)
274 v[i] = from_nir(instr.dest, i);
275
276 PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
277 AluInstruction *ir = nullptr;
278 for (unsigned i = 0; i < 4 ; ++i) {
279 if (!(instr.dest.write_mask & (1 << i)))
280 continue;
281 ir = new AluInstruction(op3_muladd_ieee, v[i],
282 {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5},
283 {alu_write});
284 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
285 emit_instruction(ir);
286 }
287 make_last(ir);
288
289 for (unsigned i = 0; i < 4 ; ++i) {
290 if (!(instr.dest.write_mask & (1 << i)))
291 continue;
292 ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
293 emit_instruction(ir);
294 }
295 make_last(ir);
296
297 for (unsigned i = 0; i < 4 ; ++i) {
298 if (!(instr.dest.write_mask & (1 << i)))
299 continue;
300 ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
301 ir->set_flag(alu_src1_neg);
302 emit_instruction(ir);
303 }
304 make_last(ir);
305
306 for (unsigned i = 0; i < 4 ; ++i) {
307 if (!(instr.dest.write_mask & (1 << i)))
308 continue;
309
310 ir = new AluInstruction(opcode, v[i], v[i], last_write);
311 emit_instruction(ir);
312 }
313 return true;
314 }
315
316 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
317 bool absolute)
318 {
319 AluInstruction *ir = nullptr;
320 std::set<int> src_idx;
321 for (int i = 0; i < 4 ; ++i) {
322 if (instr.dest.write_mask & (1 << i)){
323 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
324 from_nir(instr.src[0], i), last_write);
325 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
326 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
327 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
328 emit_instruction(ir);
329 }
330 }
331 return true;
332 }
333
334 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
335 {
336 AluInstruction *ir = nullptr;
337 std::array<PValue, 4> v;
338
339 for (int i = 0; i < 4; ++i) {
340 if (!(instr.dest.write_mask & (1 << i)))
341 continue;
342 v[i] = from_nir(instr.dest, i);
343 ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write});
344 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
345 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
346 emit_instruction(ir);
347 }
348 make_last(ir);
349
350 for (int i = 0; i < 4; ++i) {
351 if (!(instr.dest.write_mask & (1 << i)))
352 continue;
353 ir = new AluInstruction(op, v[i], v[i], {alu_write});
354 emit_instruction(ir);
355 if (op == op1_flt_to_uint)
356 make_last(ir);
357 }
358 make_last(ir);
359
360 return true;
361 }
362
363 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
364 {
365 int sel_tmp = allocate_temp_register();
366 int sel_tmp2 = allocate_temp_register();
367 GPRVector tmp(sel_tmp, {0,1,2,3});
368 GPRVector tmp2(sel_tmp2, {0,1,2,3});
369 AluInstruction *ir = nullptr;
370 EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
371 for (int i = 0; i < 4; ++i) {
372 if (!(instr.dest.write_mask & (1 << i)))
373 continue;
374
375 ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write);
376 emit_instruction(ir);
377 }
378 make_last(ir);
379
380 for (int i = 0; i < 4 ; ++i) {
381 if (!(instr.dest.write_mask & (1 << i)))
382 continue;
383
384 ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
385 PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
386 emit_instruction(ir);
387 }
388 make_last(ir);
389
390 for (int i = 0; i < 4 ; ++i) {
391 if (!(instr.dest.write_mask & (1 << i)))
392 continue;
393
394 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
395 tmp2.reg_i(i), tmp.reg_i(i), write);
396 emit_instruction(ir);
397 }
398 make_last(ir);
399
400 return true;
401 }
402
403 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
404 {
405 AluInstruction *ir = nullptr;
406 for (int i = 0; i < 4 ; ++i) {
407 if (!(instr.dest.write_mask & (1 << i)))
408 continue;
409
410 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
411 from_nir(instr.src[0], i), Value::one_i, write);
412 emit_instruction(ir);
413 }
414 make_last(ir);
415
416 return true;
417 }
418
419 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
420 {
421 AluInstruction *ir = nullptr;
422 for (unsigned i = 0; i < 2; ++i) {
423 if (!(instr.dest.write_mask & (1 << i)))
424 continue;
425 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
426 from_nir(instr.src[0], i), write);
427 emit_instruction(ir);
428 }
429 ir->set_flag(alu_last_instr);
430 return true;
431 }
432
433 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
434 {
435 emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
436 from_nir(instr.src[0], comp), last_write));
437 return true;
438 }
439
440 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
441 {
442 AluInstruction *ir = nullptr;
443 std::set<int> src_slot;
444 for(unsigned i = 0; i < nc; ++i) {
445 if (instr.dest.write_mask & (1 << i)){
446 auto src = from_nir(instr.src[i], 0);
447 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
448 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
449
450 // FIXME: This is a rather crude approach to fix the problem that
451 // r600 can't read from four different slots of the same component
452 // here we check only for the register index
453 if (src->type() == Value::gpr)
454 src_slot.insert(src->sel());
455 if (src_slot.size() >= 3) {
456 src_slot.clear();
457 ir->set_flag(alu_last_instr);
458 }
459 emit_instruction(ir);
460 }
461 }
462 if (ir)
463 ir->set_flag(alu_last_instr);
464 return true;
465 }
466
467 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
468 {
469 const nir_alu_src& src0 = instr.src[0];
470 const nir_alu_src& src1 = instr.src[1];
471
472 AluInstruction *ir = nullptr;
473 for (int i = 0; i < n ; ++i) {
474 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
475 from_nir(src0, i), from_nir(src1, i),
476 instr.dest.write_mask & (1 << i) ? write : empty);
477
478 if (src0.negate) ir->set_flag(alu_src0_neg);
479 if (src0.abs) ir->set_flag(alu_src0_abs);
480 if (src1.negate) ir->set_flag(alu_src1_neg);
481 if (src1.abs) ir->set_flag(alu_src1_abs);
482
483 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
484 emit_instruction(ir);
485 }
486 for (int i = n; i < 4 ; ++i) {
487 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
488 Value::zero, Value::zero,
489 instr.dest.write_mask & (1 << i) ? write : empty);
490 emit_instruction(ir);
491 }
492
493 if (ir)
494 ir->set_flag(alu_last_instr);
495 return true;
496 }
497
498 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
499 {
500 const nir_alu_src& src0 = instr.src[0];
501 const nir_alu_src& src1 = instr.src[1];
502
503 AluInstruction *ir = nullptr;
504 for (int i = 0; i < 3 ; ++i) {
505 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
506 from_nir(src0, i), from_nir(src1, i),
507 instr.dest.write_mask & (1 << i) ? write : empty);
508 if (src0.negate) ir->set_flag(alu_src0_neg);
509 if (src0.abs) ir->set_flag(alu_src0_abs);
510 if (src1.negate) ir->set_flag(alu_src1_neg);
511 if (src1.abs) ir->set_flag(alu_src1_abs);
512 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
513 emit_instruction(ir);
514 }
515
516 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
517 from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty);
518 if (src1.negate) ir->set_flag(alu_src1_neg);
519 if (src1.abs) ir->set_flag(alu_src1_abs);
520 emit_instruction(ir);
521
522 ir->set_flag(alu_last_instr);
523 return true;
524
525 }
526
527 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
528 {
529 AluInstruction *ir = nullptr;
530 for (int i = 0; i < 4 ; ++i) {
531 if (instr.dest.write_mask & (1 << i)) {
532 ir = new AluInstruction(op, from_nir(instr.dest, i),
533 from_nir(instr.src[0], i), Value::zero,
534 write);
535 emit_instruction(ir);
536 }
537 }
538 if (ir)
539 ir->set_flag(alu_last_instr);
540 return true;
541 }
542
543 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
544 {
545 AluInstruction *ir = nullptr;
546 for (int i = 0; i < 4 ; ++i) {
547 if (instr.dest.write_mask & (1 << i)){
548 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
549 from_nir(instr.src[0], i), Value::one_f, write);
550 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
551 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
552 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
553 emit_instruction(ir);
554 }
555 }
556 if (ir)
557 ir->set_flag(alu_last_instr);
558 return true;
559 }
560
561 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
562 {
563
564 AluInstruction *ir = nullptr;
565 PValue v[4]; // this might need some additional temp register creation
566 for (unsigned i = 0; i < 4 ; ++i)
567 v[i] = from_nir(instr.dest, i);
568
569 EAluOp combine = all ? op2_and_int : op2_or_int;
570
571 /* For integers we can not use the modifiers, so this needs some emulation */
572 /* Should actually be lowered with NIR */
573 if (instr.src[0].negate == instr.src[1].negate &&
574 instr.src[0].abs == instr.src[1].abs) {
575
576 for (unsigned i = 0; i < nc ; ++i) {
577 ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i),
578 from_nir(instr.src[1], i), write);
579 emit_instruction(ir);
580 }
581 if (ir)
582 ir->set_flag(alu_last_instr);
583 } else {
584 std::cerr << "Negate in iequal/inequal not (yet) supported\n";
585 return false;
586 }
587
588 for (unsigned i = 0; i < nc/2 ; ++i) {
589 ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
590 emit_instruction(ir);
591 }
592 if (ir)
593 ir->set_flag(alu_last_instr);
594
595 if (nc > 2) {
596 ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
597 emit_instruction(ir);
598 }
599
600 return true;
601 }
602
603 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
604 {
605 AluInstruction *ir = nullptr;
606 PValue v[4]; // this might need some additional temp register creation
607 for (unsigned i = 0; i < 4 ; ++i)
608 v[i] = from_nir(instr.dest, i);
609
610 for (unsigned i = 0; i < nc ; ++i) {
611 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
612 from_nir(instr.src[1],i), write);
613
614 if (instr.src[0].abs)
615 ir->set_flag(alu_src0_abs);
616 if (instr.src[0].negate)
617 ir->set_flag(alu_src0_neg);
618
619 if (instr.src[1].abs)
620 ir->set_flag(alu_src1_abs);
621 if (instr.src[1].negate)
622 ir->set_flag(alu_src1_neg);
623
624 emit_instruction(ir);
625 }
626 if (ir)
627 ir->set_flag(alu_last_instr);
628
629 for (unsigned i = 0; i < nc ; ++i) {
630 ir = new AluInstruction(op1_max4, v[i], v[i], write);
631 if (all) ir->set_flag(alu_src0_neg);
632 emit_instruction(ir);
633 }
634
635 for (unsigned i = nc; i < 4 ; ++i) {
636 ir = new AluInstruction(op1_max4, v[i],
637 all ? Value::one_f : Value::zero, write);
638 if (all)
639 ir->set_flag(alu_src0_neg);
640
641 emit_instruction(ir);
642 }
643
644 ir->set_flag(alu_last_instr);
645
646 if (all)
647 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
648 else
649 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
650
651 ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
652 if (all)
653 ir->set_flag(alu_src1_neg);
654 emit_instruction(ir);
655
656 return true;
657 }
658
659 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
660 {
661 AluInstruction *ir = nullptr;
662 PValue v[4]; // this might need some additional temp register creation
663 for (unsigned i = 0; i < 4 ; ++i)
664 v[i] = from_nir(instr.dest, i);
665
666 for (unsigned i = 0; i < 2 ; ++i) {
667 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
668 from_nir(instr.src[1],i), write);
669 if (instr.src[0].abs)
670 ir->set_flag(alu_src0_abs);
671 if (instr.src[0].negate)
672 ir->set_flag(alu_src0_neg);
673
674 if (instr.src[1].abs)
675 ir->set_flag(alu_src1_abs);
676 if (instr.src[1].negate)
677 ir->set_flag(alu_src1_neg);
678
679 emit_instruction(ir);
680 }
681 if (ir)
682 ir->set_flag(alu_last_instr);
683
684 op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
685 ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
686 emit_instruction(ir);
687
688 return true;
689 }
690
691 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
692 {
693 const nir_alu_src& src0 = instr.src[0];
694 const nir_alu_src& src1 = instr.src[1];
695
696 AluInstruction *ir = nullptr;
697 for (int i = 0; i < 4 ; ++i) {
698 if (instr.dest.write_mask & (1 << i)){
699 ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write);
700 if (src0.negate) ir->set_flag(alu_src0_neg);
701 if (src0.abs) ir->set_flag(alu_src0_abs);
702 if (src1.negate) ir->set_flag(alu_src1_neg);
703 if (src1.abs) ir->set_flag(alu_src1_abs);
704 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
705 emit_instruction(ir);
706 }
707 }
708 return true;
709 }
710
711 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
712 {
713
714 const nir_alu_src& src0 = instr.src[0];
715 const nir_alu_src& src1 = instr.src[1];
716
717 if (src0.negate || src1.negate ||
718 src0.abs || src1.abs) {
719 std::cerr << "R600: don't support modifiers with integer operations";
720 return false;
721 }
722 return emit_alu_op2(instr, opcode, opts);
723 }
724
725 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
726 {
727 const nir_alu_src *src0 = &instr.src[0];
728 const nir_alu_src *src1 = &instr.src[1];
729
730 if (ops & op2_opt_reverse)
731 std::swap(src0, src1);
732
733 bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
734
735 AluInstruction *ir = nullptr;
736 for (int i = 0; i < 4 ; ++i) {
737 if (instr.dest.write_mask & (1 << i)){
738 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
739 from_nir(*src0, i), from_nir(*src1, i), write);
740
741 if (src0->negate) ir->set_flag(alu_src0_neg);
742 if (src0->abs) ir->set_flag(alu_src0_abs);
743 if (src1_negate) ir->set_flag(alu_src1_neg);
744 if (src1->abs) ir->set_flag(alu_src1_abs);
745 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
746 emit_instruction(ir);
747 }
748 }
749 if (ir)
750 ir->set_flag(alu_last_instr);
751 return true;
752 }
753
754 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
755 {
756 const nir_alu_src *src0 = &instr.src[0];
757 const nir_alu_src *src1 = &instr.src[1];
758
759 if (ops & op2_opt_reverse)
760 std::swap(src0, src1);
761
762 GPRVector::Values v0;
763 for (int i = 0; i < 4 ; ++i)
764 v0[i] = from_nir(*src0, i);
765
766 GPRVector::Values v1;
767 for (int i = 0; i < 4 ; ++i)
768 v1[i] = from_nir(*src1, i);
769
770 if (src0->abs || src0->negate) {
771 int src0_tmp = allocate_temp_register();
772 GPRVector::Values v0_temp;
773 AluInstruction *ir = nullptr;
774 for (int i = 0; i < 4 ; ++i) {
775 if (instr.dest.write_mask & (1 << i)) {
776 v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
777 ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
778 if (src0->abs) ir->set_flag(alu_src0_abs);
779 if (src0->negate) ir->set_flag(alu_src0_neg);
780 emit_instruction(ir);
781 v0[i] = v0_temp[i];
782 }
783 }
784 if (ir)
785 ir->set_flag(alu_last_instr);
786 }
787
788 if (src1->abs || src1->negate) {
789 int src1_tmp = allocate_temp_register();
790 GPRVector::Values v1_temp;
791 AluInstruction *ir = nullptr;
792 for (int i = 0; i < 4 ; ++i) {
793 if (instr.dest.write_mask & (1 << i)) {
794 v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
795 ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
796 if (src1->abs) ir->set_flag(alu_src0_abs);
797 if (src1->negate) ir->set_flag(alu_src0_neg);
798 emit_instruction(ir);
799 v1[i] = v1_temp[i];
800 }
801 }
802 if (ir)
803 ir->set_flag(alu_last_instr);
804 }
805
806 AluInstruction *ir = nullptr;
807 for (int i = 0; i < 4 ; ++i) {
808 if (instr.dest.write_mask & (1 << i)){
809 ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
810 emit_instruction(ir);
811 }
812 }
813 if (ir)
814 ir->set_flag(alu_last_instr);
815 return true;
816 }
817
818
819 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
820 {
821 int sel_tmp = allocate_temp_register();
822 GPRVector tmp(sel_tmp, {0,1,2,3});
823
824 AluInstruction *ir = nullptr;
825 PValue help[4];
826
827 for (int i = 0; i < 4 ; ++i) {
828 if (instr.dest.write_mask & (1 << i)){
829 help[i] = from_nir(instr.dest, i);
830 auto s = from_nir(instr.src[0], i);
831 ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
832 emit_instruction(ir);
833 }
834 }
835 if (ir)
836 ir->set_flag(alu_last_instr);
837
838 for (int i = 0; i < 4 ; ++i) {
839 if (instr.dest.write_mask & (1 << i)){
840 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
841 emit_instruction(ir);
842 }
843 }
844 if (ir)
845 ir->set_flag(alu_last_instr);
846
847 for (int i = 0; i < 4 ; ++i) {
848 if (instr.dest.write_mask & (1 << i)){
849
850 ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
851 PValue(new LiteralValue(-1,0)), help[i], write);
852 emit_instruction(ir);
853 }
854 }
855 if (ir)
856 ir->set_flag(alu_last_instr);
857 return true;
858 }
859
860 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
861 {
862 PValue help[4];
863 PValue src[4];
864 AluInstruction *ir = nullptr;
865
866 for (int i = 0; i < 4 ; ++i) {
867 help[i] = from_nir(instr.dest, i);
868 src[i] = from_nir(instr.src[0], i);
869 }
870
871 if (instr.src[0].abs) {
872
873 for (int i = 0; i < 4 ; ++i) {
874 if (instr.dest.write_mask & (1 << i)){
875 ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
876 ir->set_flag(alu_src0_abs);
877 emit_instruction(ir);
878 }
879 }
880 if (ir)
881 ir->set_flag(alu_last_instr);
882
883 if (instr.src[0].negate) {
884 for (int i = 0; i < 4 ; ++i) {
885 if (instr.dest.write_mask & (1 << i)){
886 ir = new AluInstruction(op1_mov, help[i], help[i], write);
887 ir->set_flag(alu_src0_neg);
888 emit_instruction(ir);
889 }
890 }
891 if (ir)
892 ir->set_flag(alu_last_instr);
893 }
894
895 return true;
896 }
897
898 for (int i = 0; i < 4 ; ++i) {
899 if (instr.dest.write_mask & (1 << i)){
900 ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
901 if (instr.src[0].negate) {
902 ir->set_flag(alu_src0_neg);
903 ir->set_flag(alu_src2_neg);
904 }
905 emit_instruction(ir);
906 }
907 }
908
909 if (ir)
910 ir->set_flag(alu_last_instr);
911
912 for (int i = 0; i < 4 ; ++i) {
913 if (instr.dest.write_mask & (1 << i)){
914 ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
915 ir->set_flag(alu_src0_neg);
916 ir->set_flag(alu_src1_neg);
917 emit_instruction(ir);
918 }
919 }
920 if (ir)
921 ir->set_flag(alu_last_instr);
922 return true;
923 }
924
925 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
926 std::array<uint8_t, 3> reorder)
927 {
928 const nir_alu_src *src[3];
929 src[0] = &instr.src[reorder[0]];
930 src[1] = &instr.src[reorder[1]];
931 src[2] = &instr.src[reorder[2]];
932
933 AluInstruction *ir = nullptr;
934 for (int i = 0; i < 4 ; ++i) {
935 if (instr.dest.write_mask & (1 << i)){
936 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
937 from_nir(*src[0], i), from_nir(*src[1], i),
938 from_nir(*src[2], i), write);
939
940 if (src[0]->negate) ir->set_flag(alu_src0_neg);
941 if (src[1]->negate) ir->set_flag(alu_src1_neg);
942 if (src[2]->negate) ir->set_flag(alu_src2_neg);
943
944 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
945 ir->set_flag(alu_write);
946 emit_instruction(ir);
947 }
948 }
949 if (ir)
950 ir->set_flag(alu_last_instr);
951 return true;
952 }
953
954 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
955 {
956 AluInstruction *ir = nullptr;
957 for (int i = 0; i < 4 ; ++i) {
958 if (instr.dest.write_mask & (1 << i)){
959 ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
960 from_nir(instr.src[0], i), write);
961 emit_instruction(ir);
962 }
963 }
964 if (ir)
965 ir->set_flag(alu_last_instr);
966
967 return true;
968 }
969
970 static const char swz[] = "xyzw01?_";
971
972
973
974 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
975 {
976 int sel_tmp = allocate_temp_register();
977 GPRVector tmp(sel_tmp, {0,1,2,3});
978
979 std::array<PValue,4> src;
980 AluInstruction *ir = nullptr;
981 for (int i = 0; i < 4 ; ++i) {
982 if (instr.dest.write_mask & (1 << i)){
983 src[i] = from_nir(instr.src[0],i);
984 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write);
985 emit_instruction(ir);
986 }
987 }
988 if (ir)
989 ir->set_flag(alu_last_instr);
990
991 for (int i = 0; i < 4 ; ++i) {
992 if (instr.dest.write_mask & (1 << i)){
993 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i],
994 src[i], tmp.reg_i(i), write);
995 emit_instruction(ir);
996 }
997 }
998 if (ir)
999 ir->set_flag(alu_last_instr);
1000 return true;
1001 }
1002
1003 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1004 {
1005
1006 int sel_tmp = allocate_temp_register();
1007 int sel_tmp0 = allocate_temp_register();
1008 int sel_tmp1 = allocate_temp_register();
1009
1010 PValue asrc1(new GPRValue(sel_tmp, 0));
1011 PValue asrc2(new GPRValue(sel_tmp, 1));
1012 PValue rsign(new GPRValue(sel_tmp, 2));
1013 PValue err(new GPRValue(sel_tmp, 3));
1014
1015 GPRVector tmp0(sel_tmp0, {0,1,2,3});
1016 GPRVector tmp1(sel_tmp1, {0,1,2,3});
1017
1018 std::array<PValue, 4> src0;
1019 std::array<PValue, 4> src1;
1020
1021 for (int i = 0; i < 4 ; ++i) {
1022 if (instr.dest.write_mask & (1 << i)) {
1023 src0[i] = from_nir(instr.src[0], i);
1024 src1[i] = from_nir(instr.src[1], i);
1025 }
1026 }
1027
1028
1029 for (int i = 3; i >= 0 ; --i) {
1030 if (!(instr.dest.write_mask & (1 << i)))
1031 continue;
1032 if (use_signed) {
1033 emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1034 emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1035 emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1036
1037
1038 emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1039 emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1040 } else {
1041 asrc1 = src0[i];
1042 asrc2 = src1[i];
1043 }
1044
1045 emit_instruction(op1_recip_uint, tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1046
1047 emit_instruction(op2_mullo_uint, tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1048
1049 emit_instruction(op2_sub_int, tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1050 emit_instruction(op2_mulhi_uint, tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1051
1052 emit_instruction(op3_cnde_int, tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1053
1054 emit_instruction(op2_mulhi_uint, err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1055
1056 emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1057 emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1058
1059 emit_instruction(op3_cnde_int, tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1060
1061 emit_instruction(op2_mulhi_uint, tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1062 emit_instruction(op2_mullo_uint, tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1063
1064 emit_instruction(op2_sub_int, tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1065
1066
1067 emit_instruction(op2_setge_uint, tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1068 emit_instruction(op2_setge_uint, tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1069
1070 if (mod) {
1071 emit_instruction(op2_sub_int, tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1072 emit_instruction(op2_add_int, tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1073 } else {
1074 emit_instruction(op2_add_int, tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1075 emit_instruction(op2_sub_int, tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1076 }
1077
1078 emit_instruction(op2_and_int, tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1079
1080 if (mod)
1081 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1082 else
1083 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1084
1085 if (use_signed) {
1086 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1087 emit_instruction(op2_sub_int, tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1088
1089 if (mod)
1090 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1091 {alu_write, alu_last_instr});
1092 else
1093 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1094 {alu_write, alu_last_instr});
1095 } else {
1096 emit_instruction(op3_cnde_int, from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1097 }
1098 }
1099 return true;
1100 }
1101
1102 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s,
1103 GPRVector::Values& v, int ncomp)
1104 {
1105
1106 AluInstruction *alu = nullptr;
1107 for (int i = 0; i < ncomp; ++i) {
1108 alu = new AluInstruction(op1_mov, v[i], s[i], {alu_write});
1109 if (src.abs)
1110 alu->set_flag(alu_src0_abs);
1111 if (src.negate)
1112 alu->set_flag(alu_src0_neg);
1113 emit_instruction(alu);
1114 }
1115 make_last(alu);
1116 }
1117
1118 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1119 bool fine)
1120 {
1121
1122 GPRVector::Values v;
1123 GPRVector::Values s;
1124 GPRVector::Values *source = &s;
1125 std::array<int, 4> writemask = {0,1,2,3};
1126
1127 int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components :
1128 instr.src[0].src.reg.reg->num_components;
1129
1130 for (int i = 0; i < 4; ++i) {
1131 writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1132 v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1133 s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0);
1134 }
1135
1136 if (instr.src[0].abs || instr.src[0].negate) {
1137 split_alu_modifiers(instr.src[0], s, v, ncomp);
1138 source = &v;
1139 }
1140
1141 /* This is querying the dreivatives of the output fb, so we would either need
1142 * access to the neighboring pixels or to the framebuffer. Neither is currently
1143 * implemented */
1144 GPRVector dst(v);
1145 GPRVector src(*source);
1146
1147 auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1148 tex->set_dest_swizzle(writemask);
1149
1150 if (fine) {
1151 std::cerr << "Sewt fine flag\n";
1152 tex->set_flag(TexInstruction::grad_fine);
1153 }
1154
1155 emit_instruction(tex);
1156
1157 return true;
1158 }
1159
1160 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1161 {
1162 int itmp = allocate_temp_register();
1163 std::array<PValue, 4> tmp;
1164 std::array<PValue, 4> dst;
1165 std::array<PValue, 4> src0;
1166 std::array<PValue, 4> shift;
1167
1168 PValue l32(new LiteralValue(32));
1169 unsigned write_mask = instr.dest.write_mask;
1170
1171 AluInstruction *ir = nullptr;
1172 for (int i = 0; i < 4; i++) {
1173 if (!(write_mask & (1<<i)))
1174 continue;
1175 dst[i] = from_nir(instr.dest, i);
1176 src0[i] = from_nir(instr.src[0], i);
1177 shift[i] = from_nir(instr.src[2], i);
1178
1179 ir = new AluInstruction(opcode, dst[i],
1180 {src0[i], from_nir(instr.src[1], i), shift[i]},
1181 {alu_write});
1182 emit_instruction(ir);
1183 }
1184 make_last(ir);
1185
1186 for (int i = 0; i < 4; i++) {
1187 if (!(write_mask & (1<<i)))
1188 continue;
1189 tmp[i] = PValue(new GPRValue(itmp, i));
1190 ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1191 {alu_write});
1192 emit_instruction(ir);
1193 }
1194 make_last(ir);
1195
1196 for (int i = 0; i < 4; i++) {
1197 if (!(write_mask & (1<<i)))
1198 continue;
1199 ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1200 {alu_write});
1201 emit_instruction(ir);
1202 }
1203 make_last(ir);
1204
1205 return true;
1206 }
1207
1208 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1209 {
1210 auto t0 = get_temp_vec4();
1211 auto t1 = get_temp_vec4();
1212 auto t2 = get_temp_vec4();
1213
1214 PValue l32(new LiteralValue(32));
1215 unsigned write_mask = instr.dest.write_mask;
1216 if (!write_mask) return true;
1217
1218 AluInstruction *ir = nullptr;
1219 for (int i = 0; i < 4; i++) {
1220 if (!(write_mask & (1<<i)))
1221 continue;
1222
1223 ir = new AluInstruction(op2_setge_int, t0[i], {from_nir(instr.src[3], i), l32}, {alu_write});
1224 emit_instruction(ir);
1225 }
1226 make_last(ir);
1227
1228 for (int i = 0; i < 4; i++) {
1229 if (!(write_mask & (1<<i)))
1230 continue;
1231 ir = new AluInstruction(op2_bfm_int, t1[i], {from_nir(instr.src[3], i),
1232 from_nir(instr.src[2], i)}, {alu_write});
1233 emit_instruction(ir);
1234 }
1235 ir->set_flag(alu_last_instr);
1236
1237 for (int i = 0; i < 4; i++) {
1238 if (!(write_mask & (1<<i)))
1239 continue;
1240 ir = new AluInstruction(op2_lshl_int, t2[i], {from_nir(instr.src[1], i),
1241 from_nir(instr.src[2], i)}, {alu_write});
1242 emit_instruction(ir);
1243 }
1244 ir->set_flag(alu_last_instr);
1245
1246
1247 for (int i = 0; i < 4; i++) {
1248 if (!(write_mask & (1<<i)))
1249 continue;
1250 ir = new AluInstruction(op3_bfi_int, from_nir(instr.dest, i),
1251 {t1[i], t2[i], from_nir(instr.src[0], i)}, {alu_write});
1252 emit_instruction(ir);
1253 }
1254 ir->set_flag(alu_last_instr);
1255
1256 for (int i = 0; i < 4; i++) {
1257 if (!(write_mask & (1<<i)))
1258 continue;
1259 ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1260 {t0[i], from_nir(instr.dest, i),
1261 from_nir(instr.src[1], i)}, {alu_write});
1262 emit_instruction(ir);
1263 }
1264 ir->set_flag(alu_last_instr);
1265
1266 return true;
1267 }
1268
1269 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1270 {
1271 emit_instruction(op2_lshr_int, from_nir(instr.dest, 0),
1272 {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))},
1273 {alu_write, alu_last_instr});
1274
1275 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1276 {from_nir(instr.dest, 0)},{alu_write, alu_last_instr});
1277
1278 return true;
1279 }
1280
1281 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1282 {
1283 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1284 {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr});
1285 return true;
1286 }
1287
1288 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1289 {
1290 int it0 = allocate_temp_register();
1291 PValue x(new GPRValue(it0, 0));
1292 PValue y(new GPRValue(it0, 1));
1293
1294 emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write});
1295 emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr});
1296
1297 emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1298
1299 emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1300
1301 return true;
1302 }
1303
1304 }