r600/sfn: Skip move instructions if they are only ssa and without modifiers
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_emitaluinstruction.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30
31 #include "gallium/drivers/r600/r600_shader.h"
32
33 namespace r600 {
34
35 using std::vector;
36
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38 EmitInstruction (processor)
39 {
40
41 }
42
43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45 const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46
47 r600::sfn_log << SfnLog::instr << "emit '"
48 << *ir
49 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50 << "' (" << __func__ << ")\n";
51
52 split_constants(instr);
53
54 switch (instr.op) {
55 case nir_op_b2f32: return emit_alu_b2f(instr);
56 case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
57 case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
58 case nir_op_b2b1:
59 case nir_op_mov:return emit_mov(instr);
60 case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
61 case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
62 case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
63 case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
64 case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
65 case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
66 case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
67 case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
68 case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
69 case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
70
71 case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
72 case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
73 case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
74 case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
75 case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
76 case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
77
78 case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
79 case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
80 case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
81
82 case nir_op_fsign: return emit_fsign(instr);
83 case nir_op_fdph: return emit_fdph(instr);
84
85 case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
86 case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
87 case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
88 case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
89 case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
90
91 case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
92 case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
93 case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
94 case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
95 case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
96 case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
97 case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
98 case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
99 case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
100 case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
101 case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
102 case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
103 case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
104 case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
105 case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
106 case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
107 case nir_op_iabs: return emit_alu_iabs(instr);
108 case nir_op_ineg: return emit_alu_ineg(instr);
109 case nir_op_idiv: return emit_alu_div_int(instr, true, false);
110 case nir_op_udiv: return emit_alu_div_int(instr, false, false);
111 case nir_op_umod: return emit_alu_div_int(instr, false, true);
112 case nir_op_isign: return emit_alu_isign(instr);
113
114 case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
115 case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
116 case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
117
118 case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
119
120 case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
121 case nir_op_fne: return emit_alu_op2(instr, op2_setne_dx10);
122 case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
123
124 case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
125 case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
126 case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
127 case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
128 case nir_op_fadd: return emit_alu_op2(instr, op2_add);
129 case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
130 case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
131 case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
132 case nir_op_fdot2: return emit_dot(instr, 2);
133 case nir_op_fdot3: return emit_dot(instr, 3);
134 case nir_op_fdot4: return emit_dot(instr, 4);
135
136 case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
137 case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
138 case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
139
140 case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
141 case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
142 case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
143
144 case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
145 case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
146 case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
147
148 case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
149 case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
150 case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
151
152
153 case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
154 case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
155 case nir_op_vec2: return emit_create_vec(instr, 2);
156 case nir_op_vec3: return emit_create_vec(instr, 3);
157 case nir_op_vec4: return emit_create_vec(instr, 4);
158
159 case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
160 case nir_op_ufind_msb: return emit_find_msb(instr, false);
161 case nir_op_ifind_msb: return emit_find_msb(instr, true);
162 case nir_op_b2i32: return emit_b2i32(instr);
163 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
164 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
165 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
166 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
167 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
168 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
169
170
171 /* These are in the ALU instruction list, but they should be texture instructions */
172 case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
173 case nir_op_fddx_coarse:
174 case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
175
176 case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
177 case nir_op_fddy_coarse:
178 case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
179
180 case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2});
181 case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
182 default:
183 return false;
184 }
185 }
186
187 void EmitAluInstruction::split_constants(const nir_alu_instr& instr)
188 {
189 const nir_op_info *op_info = &nir_op_infos[instr.op];
190 if (op_info->num_inputs < 2)
191 return;
192
193 int nconst = 0;
194 std::array<PValue,4> c;
195 std::array<int,4> idx;
196 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
197 PValue src = from_nir(instr.src[i], 0);
198 assert(src);
199 if (src->type() == Value::kconst) {
200 c[nconst] = src;
201
202 idx[nconst++] = i;
203 }
204 }
205 if (nconst < 2)
206 return;
207
208 unsigned sel = c[0]->sel();
209 sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
210
211 for (int i = 1; i < nconst; ++i) {
212 sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
213 if (c[i]->sel() != sel) {
214 load_uniform(instr.src[idx[i]]);
215 }
216 }
217 }
218
219 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
220 {
221 if (instr.src[0].negate || instr.src[0].abs) {
222 std::cerr << "source modifiers not supported with int ops\n";
223 return false;
224 }
225
226 AluInstruction *ir = nullptr;
227 for (int i = 0; i < 4 ; ++i) {
228 if (instr.dest.write_mask & (1 << i)){
229 ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
230 from_nir(instr.src[0], i), write);
231 emit_instruction(ir);
232 }
233 }
234 if (ir)
235 ir->set_flag(alu_last_instr);
236 return true;
237 }
238
239 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
240 const AluOpFlags& flags)
241 {
242 AluInstruction *ir = nullptr;
243 for (int i = 0; i < 4 ; ++i) {
244 if (instr.dest.write_mask & (1 << i)){
245 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
246 from_nir(instr.src[0], i), write);
247
248 if (flags.test(alu_src0_abs) || instr.src[0].abs)
249 ir->set_flag(alu_src0_abs);
250
251 if (instr.src[0].negate ^ flags.test(alu_src0_neg))
252 ir->set_flag(alu_src0_neg);
253
254 if (flags.test(alu_dst_clamp) || instr.dest.saturate)
255 ir->set_flag(alu_dst_clamp);
256
257 emit_instruction(ir);
258 }
259 }
260 make_last(ir);
261
262 return true;
263 }
264
265 bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
266 {
267 /* If the op is a plain move beween SSA values we can just forward
268 * the register reference to the original register */
269 if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
270 !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) {
271 bool result = true;
272 for (int i = 0; i < 4 ; ++i) {
273 if (instr.dest.write_mask & (1 << i)){
274 auto src = from_nir(instr.src[0], i);
275 result &= inject_register(instr.dest.dest.ssa.index, i,
276 src, true);
277
278 if (src->type() == Value::kconst) {
279 add_uniform((instr.dest.dest.ssa.index << 2) + i, src);
280 }
281 }
282 }
283 return result;
284 } else {
285 return emit_alu_op1(instr, op1_mov);
286 }
287 }
288
289 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
290 {
291 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
292 // then shift back
293
294 const float inv_2_pi = 0.15915494f;
295
296 PValue v[4]; // this might need some additional temp register creation
297 for (unsigned i = 0; i < 4 ; ++i)
298 v[i] = from_nir(instr.dest, i);
299
300 PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
301 AluInstruction *ir = nullptr;
302 for (unsigned i = 0; i < 4 ; ++i) {
303 if (!(instr.dest.write_mask & (1 << i)))
304 continue;
305 ir = new AluInstruction(op3_muladd_ieee, v[i],
306 {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5},
307 {alu_write});
308 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
309 emit_instruction(ir);
310 }
311 make_last(ir);
312
313 for (unsigned i = 0; i < 4 ; ++i) {
314 if (!(instr.dest.write_mask & (1 << i)))
315 continue;
316 ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
317 emit_instruction(ir);
318 }
319 make_last(ir);
320
321 for (unsigned i = 0; i < 4 ; ++i) {
322 if (!(instr.dest.write_mask & (1 << i)))
323 continue;
324 ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
325 ir->set_flag(alu_src1_neg);
326 emit_instruction(ir);
327 }
328 make_last(ir);
329
330 for (unsigned i = 0; i < 4 ; ++i) {
331 if (!(instr.dest.write_mask & (1 << i)))
332 continue;
333
334 ir = new AluInstruction(opcode, v[i], v[i], last_write);
335 emit_instruction(ir);
336 }
337 return true;
338 }
339
340 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
341 bool absolute)
342 {
343 AluInstruction *ir = nullptr;
344 std::set<int> src_idx;
345
346 if (get_chip_class() == CAYMAN) {
347 int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
348 for (int i = 0; i < last_slot; ++i) {
349 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
350 from_nir(instr.src[0], 0), instr.dest.write_mask & (1 << i) ? write : empty);
351 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
352 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
353 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
354
355 if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
356
357 emit_instruction(ir);
358 }
359 } else {
360 for (int i = 0; i < 4 ; ++i) {
361 if (instr.dest.write_mask & (1 << i)){
362 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
363 from_nir(instr.src[0], i), last_write);
364 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
365 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
366 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
367 emit_instruction(ir);
368 }
369 }
370 }
371 return true;
372 }
373
374 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
375 {
376 AluInstruction *ir = nullptr;
377 std::array<PValue, 4> v;
378
379 for (int i = 0; i < 4; ++i) {
380 if (!(instr.dest.write_mask & (1 << i)))
381 continue;
382 v[i] = from_nir(instr.dest, i);
383 ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write});
384 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
385 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
386 emit_instruction(ir);
387 }
388 make_last(ir);
389
390 for (int i = 0; i < 4; ++i) {
391 if (!(instr.dest.write_mask & (1 << i)))
392 continue;
393 ir = new AluInstruction(op, v[i], v[i], {alu_write});
394 emit_instruction(ir);
395 if (op == op1_flt_to_uint)
396 make_last(ir);
397 }
398 make_last(ir);
399
400 return true;
401 }
402
403 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
404 {
405 int sel_tmp = allocate_temp_register();
406 int sel_tmp2 = allocate_temp_register();
407 GPRVector tmp(sel_tmp, {0,1,2,3});
408 GPRVector tmp2(sel_tmp2, {0,1,2,3});
409 AluInstruction *ir = nullptr;
410 EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
411 for (int i = 0; i < 4; ++i) {
412 if (!(instr.dest.write_mask & (1 << i)))
413 continue;
414
415 ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write);
416 emit_instruction(ir);
417 }
418 make_last(ir);
419
420 for (int i = 0; i < 4 ; ++i) {
421 if (!(instr.dest.write_mask & (1 << i)))
422 continue;
423
424 ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
425 PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
426 emit_instruction(ir);
427 }
428 make_last(ir);
429
430 for (int i = 0; i < 4 ; ++i) {
431 if (!(instr.dest.write_mask & (1 << i)))
432 continue;
433
434 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
435 tmp2.reg_i(i), tmp.reg_i(i), write);
436 emit_instruction(ir);
437 }
438 make_last(ir);
439
440 return true;
441 }
442
443 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
444 {
445 AluInstruction *ir = nullptr;
446 for (int i = 0; i < 4 ; ++i) {
447 if (!(instr.dest.write_mask & (1 << i)))
448 continue;
449
450 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
451 from_nir(instr.src[0], i), Value::one_i, write);
452 emit_instruction(ir);
453 }
454 make_last(ir);
455
456 return true;
457 }
458
459 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
460 {
461 AluInstruction *ir = nullptr;
462 for (unsigned i = 0; i < 2; ++i) {
463 if (!(instr.dest.write_mask & (1 << i)))
464 continue;
465 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
466 from_nir(instr.src[0], i), write);
467 emit_instruction(ir);
468 }
469 ir->set_flag(alu_last_instr);
470 return true;
471 }
472
473 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
474 {
475 emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
476 from_nir(instr.src[0], comp), last_write));
477 return true;
478 }
479
480 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
481 {
482 AluInstruction *ir = nullptr;
483 std::set<int> src_slot;
484 for(unsigned i = 0; i < nc; ++i) {
485 if (instr.dest.write_mask & (1 << i)){
486 auto src = from_nir(instr.src[i], 0);
487 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
488 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
489
490 // FIXME: This is a rather crude approach to fix the problem that
491 // r600 can't read from four different slots of the same component
492 // here we check only for the register index
493 if (src->type() == Value::gpr)
494 src_slot.insert(src->sel());
495 if (src_slot.size() >= 3) {
496 src_slot.clear();
497 ir->set_flag(alu_last_instr);
498 }
499 emit_instruction(ir);
500 }
501 }
502 if (ir)
503 ir->set_flag(alu_last_instr);
504 return true;
505 }
506
507 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
508 {
509 const nir_alu_src& src0 = instr.src[0];
510 const nir_alu_src& src1 = instr.src[1];
511
512 AluInstruction *ir = nullptr;
513 for (int i = 0; i < n ; ++i) {
514 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
515 from_nir(src0, i), from_nir(src1, i),
516 instr.dest.write_mask & (1 << i) ? write : empty);
517
518 if (src0.negate) ir->set_flag(alu_src0_neg);
519 if (src0.abs) ir->set_flag(alu_src0_abs);
520 if (src1.negate) ir->set_flag(alu_src1_neg);
521 if (src1.abs) ir->set_flag(alu_src1_abs);
522
523 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
524 emit_instruction(ir);
525 }
526 for (int i = n; i < 4 ; ++i) {
527 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
528 Value::zero, Value::zero,
529 instr.dest.write_mask & (1 << i) ? write : empty);
530 emit_instruction(ir);
531 }
532
533 if (ir)
534 ir->set_flag(alu_last_instr);
535 return true;
536 }
537
538 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
539 {
540 const nir_alu_src& src0 = instr.src[0];
541 const nir_alu_src& src1 = instr.src[1];
542
543 AluInstruction *ir = nullptr;
544 for (int i = 0; i < 3 ; ++i) {
545 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
546 from_nir(src0, i), from_nir(src1, i),
547 instr.dest.write_mask & (1 << i) ? write : empty);
548 if (src0.negate) ir->set_flag(alu_src0_neg);
549 if (src0.abs) ir->set_flag(alu_src0_abs);
550 if (src1.negate) ir->set_flag(alu_src1_neg);
551 if (src1.abs) ir->set_flag(alu_src1_abs);
552 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
553 emit_instruction(ir);
554 }
555
556 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
557 from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty);
558 if (src1.negate) ir->set_flag(alu_src1_neg);
559 if (src1.abs) ir->set_flag(alu_src1_abs);
560 emit_instruction(ir);
561
562 ir->set_flag(alu_last_instr);
563 return true;
564
565 }
566
567 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
568 {
569 AluInstruction *ir = nullptr;
570 for (int i = 0; i < 4 ; ++i) {
571 if (instr.dest.write_mask & (1 << i)) {
572 ir = new AluInstruction(op, from_nir(instr.dest, i),
573 from_nir(instr.src[0], i), Value::zero,
574 write);
575 emit_instruction(ir);
576 }
577 }
578 if (ir)
579 ir->set_flag(alu_last_instr);
580 return true;
581 }
582
583 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
584 {
585 AluInstruction *ir = nullptr;
586 for (int i = 0; i < 4 ; ++i) {
587 if (instr.dest.write_mask & (1 << i)){
588 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
589 from_nir(instr.src[0], i), Value::one_f, write);
590 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
591 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
592 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
593 emit_instruction(ir);
594 }
595 }
596 if (ir)
597 ir->set_flag(alu_last_instr);
598 return true;
599 }
600
601 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
602 {
603
604 AluInstruction *ir = nullptr;
605 PValue v[4]; // this might need some additional temp register creation
606 for (unsigned i = 0; i < 4 ; ++i)
607 v[i] = from_nir(instr.dest, i);
608
609 EAluOp combine = all ? op2_and_int : op2_or_int;
610
611 /* For integers we can not use the modifiers, so this needs some emulation */
612 /* Should actually be lowered with NIR */
613 if (instr.src[0].negate == instr.src[1].negate &&
614 instr.src[0].abs == instr.src[1].abs) {
615
616 for (unsigned i = 0; i < nc ; ++i) {
617 ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i),
618 from_nir(instr.src[1], i), write);
619 emit_instruction(ir);
620 }
621 if (ir)
622 ir->set_flag(alu_last_instr);
623 } else {
624 std::cerr << "Negate in iequal/inequal not (yet) supported\n";
625 return false;
626 }
627
628 for (unsigned i = 0; i < nc/2 ; ++i) {
629 ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
630 emit_instruction(ir);
631 }
632 if (ir)
633 ir->set_flag(alu_last_instr);
634
635 if (nc > 2) {
636 ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
637 emit_instruction(ir);
638 }
639
640 return true;
641 }
642
643 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
644 {
645 AluInstruction *ir = nullptr;
646 PValue v[4]; // this might need some additional temp register creation
647 for (unsigned i = 0; i < 4 ; ++i)
648 v[i] = from_nir(instr.dest, i);
649
650 for (unsigned i = 0; i < nc ; ++i) {
651 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
652 from_nir(instr.src[1],i), write);
653
654 if (instr.src[0].abs)
655 ir->set_flag(alu_src0_abs);
656 if (instr.src[0].negate)
657 ir->set_flag(alu_src0_neg);
658
659 if (instr.src[1].abs)
660 ir->set_flag(alu_src1_abs);
661 if (instr.src[1].negate)
662 ir->set_flag(alu_src1_neg);
663
664 emit_instruction(ir);
665 }
666 if (ir)
667 ir->set_flag(alu_last_instr);
668
669 for (unsigned i = 0; i < nc ; ++i) {
670 ir = new AluInstruction(op1_max4, v[i], v[i], write);
671 if (all) ir->set_flag(alu_src0_neg);
672 emit_instruction(ir);
673 }
674
675 for (unsigned i = nc; i < 4 ; ++i) {
676 ir = new AluInstruction(op1_max4, v[i],
677 all ? Value::one_f : Value::zero, write);
678 if (all)
679 ir->set_flag(alu_src0_neg);
680
681 emit_instruction(ir);
682 }
683
684 ir->set_flag(alu_last_instr);
685
686 if (all)
687 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
688 else
689 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
690
691 ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
692 if (all)
693 ir->set_flag(alu_src1_neg);
694 emit_instruction(ir);
695
696 return true;
697 }
698
699 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
700 {
701 AluInstruction *ir = nullptr;
702 PValue v[4]; // this might need some additional temp register creation
703 for (unsigned i = 0; i < 4 ; ++i)
704 v[i] = from_nir(instr.dest, i);
705
706 for (unsigned i = 0; i < 2 ; ++i) {
707 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
708 from_nir(instr.src[1],i), write);
709 if (instr.src[0].abs)
710 ir->set_flag(alu_src0_abs);
711 if (instr.src[0].negate)
712 ir->set_flag(alu_src0_neg);
713
714 if (instr.src[1].abs)
715 ir->set_flag(alu_src1_abs);
716 if (instr.src[1].negate)
717 ir->set_flag(alu_src1_neg);
718
719 emit_instruction(ir);
720 }
721 if (ir)
722 ir->set_flag(alu_last_instr);
723
724 op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
725 ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
726 emit_instruction(ir);
727
728 return true;
729 }
730
731 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
732 {
733 const nir_alu_src& src0 = instr.src[0];
734 const nir_alu_src& src1 = instr.src[1];
735
736 AluInstruction *ir = nullptr;
737
738 if (get_chip_class() == CAYMAN) {
739 int lasti = util_last_bit(instr.dest.write_mask);
740 for (int k = 0; k < lasti ; ++k) {
741 if (instr.dest.write_mask & (1 << k)) {
742
743 for (int i = 0; i < 4; i++) {
744 ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, k), from_nir(src1, k), (i == k) ? write : empty);
745 if (src0.negate) ir->set_flag(alu_src0_neg);
746 if (src0.abs) ir->set_flag(alu_src0_abs);
747 if (src1.negate) ir->set_flag(alu_src1_neg);
748 if (src1.abs) ir->set_flag(alu_src1_abs);
749 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
750 if (i == 3) ir->set_flag(alu_last_instr);
751 emit_instruction(ir);
752 }
753 }
754 }
755 } else {
756 for (int i = 0; i < 4 ; ++i) {
757 if (instr.dest.write_mask & (1 << i)){
758 ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write);
759 if (src0.negate) ir->set_flag(alu_src0_neg);
760 if (src0.abs) ir->set_flag(alu_src0_abs);
761 if (src1.negate) ir->set_flag(alu_src1_neg);
762 if (src1.abs) ir->set_flag(alu_src1_abs);
763 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
764 emit_instruction(ir);
765 }
766 }
767 }
768 return true;
769 }
770
771 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
772 {
773
774 const nir_alu_src& src0 = instr.src[0];
775 const nir_alu_src& src1 = instr.src[1];
776
777 if (src0.negate || src1.negate ||
778 src0.abs || src1.abs) {
779 std::cerr << "R600: don't support modifiers with integer operations";
780 return false;
781 }
782 return emit_alu_op2(instr, opcode, opts);
783 }
784
785 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
786 {
787 const nir_alu_src *src0 = &instr.src[0];
788 const nir_alu_src *src1 = &instr.src[1];
789
790 if (ops & op2_opt_reverse)
791 std::swap(src0, src1);
792
793 bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
794
795 AluInstruction *ir = nullptr;
796 for (int i = 0; i < 4 ; ++i) {
797 if (instr.dest.write_mask & (1 << i)){
798 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
799 from_nir(*src0, i), from_nir(*src1, i), write);
800
801 if (src0->negate) ir->set_flag(alu_src0_neg);
802 if (src0->abs) ir->set_flag(alu_src0_abs);
803 if (src1_negate) ir->set_flag(alu_src1_neg);
804 if (src1->abs) ir->set_flag(alu_src1_abs);
805 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
806 emit_instruction(ir);
807 }
808 }
809 if (ir)
810 ir->set_flag(alu_last_instr);
811 return true;
812 }
813
814 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
815 {
816 const nir_alu_src *src0 = &instr.src[0];
817 const nir_alu_src *src1 = &instr.src[1];
818
819 if (ops & op2_opt_reverse)
820 std::swap(src0, src1);
821
822 GPRVector::Values v0;
823 for (int i = 0; i < 4 ; ++i)
824 v0[i] = from_nir(*src0, i);
825
826 GPRVector::Values v1;
827 for (int i = 0; i < 4 ; ++i)
828 v1[i] = from_nir(*src1, i);
829
830 if (src0->abs || src0->negate) {
831 int src0_tmp = allocate_temp_register();
832 GPRVector::Values v0_temp;
833 AluInstruction *ir = nullptr;
834 for (int i = 0; i < 4 ; ++i) {
835 if (instr.dest.write_mask & (1 << i)) {
836 v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
837 ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
838 if (src0->abs) ir->set_flag(alu_src0_abs);
839 if (src0->negate) ir->set_flag(alu_src0_neg);
840 emit_instruction(ir);
841 v0[i] = v0_temp[i];
842 }
843 }
844 if (ir)
845 ir->set_flag(alu_last_instr);
846 }
847
848 if (src1->abs || src1->negate) {
849 int src1_tmp = allocate_temp_register();
850 GPRVector::Values v1_temp;
851 AluInstruction *ir = nullptr;
852 for (int i = 0; i < 4 ; ++i) {
853 if (instr.dest.write_mask & (1 << i)) {
854 v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
855 ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
856 if (src1->abs) ir->set_flag(alu_src0_abs);
857 if (src1->negate) ir->set_flag(alu_src0_neg);
858 emit_instruction(ir);
859 v1[i] = v1_temp[i];
860 }
861 }
862 if (ir)
863 ir->set_flag(alu_last_instr);
864 }
865
866 AluInstruction *ir = nullptr;
867 for (int i = 0; i < 4 ; ++i) {
868 if (instr.dest.write_mask & (1 << i)){
869 ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
870 emit_instruction(ir);
871 }
872 }
873 if (ir)
874 ir->set_flag(alu_last_instr);
875 return true;
876 }
877
878
879 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
880 {
881 int sel_tmp = allocate_temp_register();
882 GPRVector tmp(sel_tmp, {0,1,2,3});
883
884 AluInstruction *ir = nullptr;
885 PValue help[4];
886
887 for (int i = 0; i < 4 ; ++i) {
888 if (instr.dest.write_mask & (1 << i)){
889 help[i] = from_nir(instr.dest, i);
890 auto s = from_nir(instr.src[0], i);
891 ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
892 emit_instruction(ir);
893 }
894 }
895 if (ir)
896 ir->set_flag(alu_last_instr);
897
898 for (int i = 0; i < 4 ; ++i) {
899 if (instr.dest.write_mask & (1 << i)){
900 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
901 emit_instruction(ir);
902 }
903 }
904 if (ir)
905 ir->set_flag(alu_last_instr);
906
907 for (int i = 0; i < 4 ; ++i) {
908 if (instr.dest.write_mask & (1 << i)){
909
910 ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
911 PValue(new LiteralValue(-1,0)), help[i], write);
912 emit_instruction(ir);
913 }
914 }
915 if (ir)
916 ir->set_flag(alu_last_instr);
917 return true;
918 }
919
920 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
921 {
922 PValue help[4];
923 PValue src[4];
924 AluInstruction *ir = nullptr;
925
926 for (int i = 0; i < 4 ; ++i) {
927 help[i] = from_nir(instr.dest, i);
928 src[i] = from_nir(instr.src[0], i);
929 }
930
931 if (instr.src[0].abs) {
932
933 for (int i = 0; i < 4 ; ++i) {
934 if (instr.dest.write_mask & (1 << i)){
935 ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
936 ir->set_flag(alu_src0_abs);
937 emit_instruction(ir);
938 }
939 }
940 if (ir)
941 ir->set_flag(alu_last_instr);
942
943 if (instr.src[0].negate) {
944 for (int i = 0; i < 4 ; ++i) {
945 if (instr.dest.write_mask & (1 << i)){
946 ir = new AluInstruction(op1_mov, help[i], help[i], write);
947 ir->set_flag(alu_src0_neg);
948 emit_instruction(ir);
949 }
950 }
951 if (ir)
952 ir->set_flag(alu_last_instr);
953 }
954
955 return true;
956 }
957
958 for (int i = 0; i < 4 ; ++i) {
959 if (instr.dest.write_mask & (1 << i)){
960 ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
961 if (instr.src[0].negate) {
962 ir->set_flag(alu_src0_neg);
963 ir->set_flag(alu_src2_neg);
964 }
965 emit_instruction(ir);
966 }
967 }
968
969 if (ir)
970 ir->set_flag(alu_last_instr);
971
972 for (int i = 0; i < 4 ; ++i) {
973 if (instr.dest.write_mask & (1 << i)){
974 ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
975 ir->set_flag(alu_src0_neg);
976 ir->set_flag(alu_src1_neg);
977 emit_instruction(ir);
978 }
979 }
980 if (ir)
981 ir->set_flag(alu_last_instr);
982 return true;
983 }
984
985 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
986 std::array<uint8_t, 3> reorder)
987 {
988 const nir_alu_src *src[3];
989 src[0] = &instr.src[reorder[0]];
990 src[1] = &instr.src[reorder[1]];
991 src[2] = &instr.src[reorder[2]];
992
993 AluInstruction *ir = nullptr;
994 for (int i = 0; i < 4 ; ++i) {
995 if (instr.dest.write_mask & (1 << i)){
996 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
997 from_nir(*src[0], i), from_nir(*src[1], i),
998 from_nir(*src[2], i), write);
999
1000 if (src[0]->negate) ir->set_flag(alu_src0_neg);
1001 if (src[1]->negate) ir->set_flag(alu_src1_neg);
1002 if (src[2]->negate) ir->set_flag(alu_src2_neg);
1003
1004 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
1005 ir->set_flag(alu_write);
1006 emit_instruction(ir);
1007 }
1008 }
1009 if (ir)
1010 ir->set_flag(alu_last_instr);
1011 return true;
1012 }
1013
1014 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
1015 {
1016 AluInstruction *ir = nullptr;
1017 for (int i = 0; i < 4 ; ++i) {
1018 if (instr.dest.write_mask & (1 << i)){
1019 ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
1020 from_nir(instr.src[0], i), write);
1021 emit_instruction(ir);
1022 }
1023 }
1024 if (ir)
1025 ir->set_flag(alu_last_instr);
1026
1027 return true;
1028 }
1029
1030 static const char swz[] = "xyzw01?_";
1031
1032
1033
1034 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
1035 {
1036 int sel_tmp = allocate_temp_register();
1037 GPRVector tmp(sel_tmp, {0,1,2,3});
1038
1039 std::array<PValue,4> src;
1040 AluInstruction *ir = nullptr;
1041 for (int i = 0; i < 4 ; ++i) {
1042 if (instr.dest.write_mask & (1 << i)){
1043 src[i] = from_nir(instr.src[0],i);
1044 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write);
1045 emit_instruction(ir);
1046 }
1047 }
1048 if (ir)
1049 ir->set_flag(alu_last_instr);
1050
1051 for (int i = 0; i < 4 ; ++i) {
1052 if (instr.dest.write_mask & (1 << i)){
1053 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i],
1054 src[i], tmp.reg_i(i), write);
1055 emit_instruction(ir);
1056 }
1057 }
1058 if (ir)
1059 ir->set_flag(alu_last_instr);
1060 return true;
1061 }
1062
1063 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1064 {
1065
1066 int sel_tmp = allocate_temp_register();
1067 int sel_tmp0 = allocate_temp_register();
1068 int sel_tmp1 = allocate_temp_register();
1069
1070 PValue asrc1(new GPRValue(sel_tmp, 0));
1071 PValue asrc2(new GPRValue(sel_tmp, 1));
1072 PValue rsign(new GPRValue(sel_tmp, 2));
1073 PValue err(new GPRValue(sel_tmp, 3));
1074
1075 GPRVector tmp0(sel_tmp0, {0,1,2,3});
1076 GPRVector tmp1(sel_tmp1, {0,1,2,3});
1077
1078 std::array<PValue, 4> src0;
1079 std::array<PValue, 4> src1;
1080
1081 for (int i = 0; i < 4 ; ++i) {
1082 if (instr.dest.write_mask & (1 << i)) {
1083 src0[i] = from_nir(instr.src[0], i);
1084 src1[i] = from_nir(instr.src[1], i);
1085 }
1086 }
1087
1088
1089 for (int i = 3; i >= 0 ; --i) {
1090 if (!(instr.dest.write_mask & (1 << i)))
1091 continue;
1092 if (use_signed) {
1093 emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1094 emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1095 emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1096
1097
1098 emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1099 emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1100 } else {
1101 asrc1 = src0[i];
1102 asrc2 = src1[i];
1103 }
1104
1105 emit_instruction(op1_recip_uint, tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1106
1107 emit_instruction(op2_mullo_uint, tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1108
1109 emit_instruction(op2_sub_int, tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1110 emit_instruction(op2_mulhi_uint, tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1111
1112 emit_instruction(op3_cnde_int, tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1113
1114 emit_instruction(op2_mulhi_uint, err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1115
1116 emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1117 emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1118
1119 emit_instruction(op3_cnde_int, tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1120
1121 emit_instruction(op2_mulhi_uint, tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1122 emit_instruction(op2_mullo_uint, tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1123
1124 emit_instruction(op2_sub_int, tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1125
1126
1127 emit_instruction(op2_setge_uint, tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1128 emit_instruction(op2_setge_uint, tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1129
1130 if (mod) {
1131 emit_instruction(op2_sub_int, tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1132 emit_instruction(op2_add_int, tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1133 } else {
1134 emit_instruction(op2_add_int, tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1135 emit_instruction(op2_sub_int, tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1136 }
1137
1138 emit_instruction(op2_and_int, tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1139
1140 if (mod)
1141 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1142 else
1143 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1144
1145 if (use_signed) {
1146 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1147 emit_instruction(op2_sub_int, tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1148
1149 if (mod)
1150 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1151 {alu_write, alu_last_instr});
1152 else
1153 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1154 {alu_write, alu_last_instr});
1155 } else {
1156 emit_instruction(op3_cnde_int, from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1157 }
1158 }
1159 return true;
1160 }
1161
1162 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s,
1163 GPRVector::Values& v, int ncomp)
1164 {
1165
1166 AluInstruction *alu = nullptr;
1167 for (int i = 0; i < ncomp; ++i) {
1168 alu = new AluInstruction(op1_mov, v[i], s[i], {alu_write});
1169 if (src.abs)
1170 alu->set_flag(alu_src0_abs);
1171 if (src.negate)
1172 alu->set_flag(alu_src0_neg);
1173 emit_instruction(alu);
1174 }
1175 make_last(alu);
1176 }
1177
1178 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1179 bool fine)
1180 {
1181
1182 GPRVector::Values v;
1183 GPRVector::Values s;
1184 GPRVector::Values *source = &s;
1185 std::array<int, 4> writemask = {0,1,2,3};
1186
1187 int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components :
1188 instr.src[0].src.reg.reg->num_components;
1189
1190 for (int i = 0; i < 4; ++i) {
1191 writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1192 v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1193 s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0);
1194 }
1195
1196 if (instr.src[0].abs || instr.src[0].negate) {
1197 split_alu_modifiers(instr.src[0], s, v, ncomp);
1198 source = &v;
1199 }
1200
1201 /* This is querying the dreivatives of the output fb, so we would either need
1202 * access to the neighboring pixels or to the framebuffer. Neither is currently
1203 * implemented */
1204 GPRVector dst(v);
1205 GPRVector src(*source);
1206
1207 auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1208 tex->set_dest_swizzle(writemask);
1209
1210 if (fine) {
1211 std::cerr << "Sewt fine flag\n";
1212 tex->set_flag(TexInstruction::grad_fine);
1213 }
1214
1215 emit_instruction(tex);
1216
1217 return true;
1218 }
1219
1220 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1221 {
1222 int itmp = allocate_temp_register();
1223 std::array<PValue, 4> tmp;
1224 std::array<PValue, 4> dst;
1225 std::array<PValue, 4> src0;
1226 std::array<PValue, 4> shift;
1227
1228 PValue l32(new LiteralValue(32));
1229 unsigned write_mask = instr.dest.write_mask;
1230
1231 AluInstruction *ir = nullptr;
1232 for (int i = 0; i < 4; i++) {
1233 if (!(write_mask & (1<<i)))
1234 continue;
1235 dst[i] = from_nir(instr.dest, i);
1236 src0[i] = from_nir(instr.src[0], i);
1237 shift[i] = from_nir(instr.src[2], i);
1238
1239 ir = new AluInstruction(opcode, dst[i],
1240 {src0[i], from_nir(instr.src[1], i), shift[i]},
1241 {alu_write});
1242 emit_instruction(ir);
1243 }
1244 make_last(ir);
1245
1246 for (int i = 0; i < 4; i++) {
1247 if (!(write_mask & (1<<i)))
1248 continue;
1249 tmp[i] = PValue(new GPRValue(itmp, i));
1250 ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1251 {alu_write});
1252 emit_instruction(ir);
1253 }
1254 make_last(ir);
1255
1256 for (int i = 0; i < 4; i++) {
1257 if (!(write_mask & (1<<i)))
1258 continue;
1259 ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1260 {alu_write});
1261 emit_instruction(ir);
1262 }
1263 make_last(ir);
1264
1265 return true;
1266 }
1267
1268 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1269 {
1270 auto t0 = get_temp_vec4();
1271 auto t1 = get_temp_vec4();
1272 auto t2 = get_temp_vec4();
1273
1274 PValue l32(new LiteralValue(32));
1275 unsigned write_mask = instr.dest.write_mask;
1276 if (!write_mask) return true;
1277
1278 AluInstruction *ir = nullptr;
1279 for (int i = 0; i < 4; i++) {
1280 if (!(write_mask & (1<<i)))
1281 continue;
1282
1283 ir = new AluInstruction(op2_setge_int, t0[i], {from_nir(instr.src[3], i), l32}, {alu_write});
1284 emit_instruction(ir);
1285 }
1286 make_last(ir);
1287
1288 for (int i = 0; i < 4; i++) {
1289 if (!(write_mask & (1<<i)))
1290 continue;
1291 ir = new AluInstruction(op2_bfm_int, t1[i], {from_nir(instr.src[3], i),
1292 from_nir(instr.src[2], i)}, {alu_write});
1293 emit_instruction(ir);
1294 }
1295 ir->set_flag(alu_last_instr);
1296
1297 for (int i = 0; i < 4; i++) {
1298 if (!(write_mask & (1<<i)))
1299 continue;
1300 ir = new AluInstruction(op2_lshl_int, t2[i], {from_nir(instr.src[1], i),
1301 from_nir(instr.src[2], i)}, {alu_write});
1302 emit_instruction(ir);
1303 }
1304 ir->set_flag(alu_last_instr);
1305
1306
1307 for (int i = 0; i < 4; i++) {
1308 if (!(write_mask & (1<<i)))
1309 continue;
1310 ir = new AluInstruction(op3_bfi_int, from_nir(instr.dest, i),
1311 {t1[i], t2[i], from_nir(instr.src[0], i)}, {alu_write});
1312 emit_instruction(ir);
1313 }
1314 ir->set_flag(alu_last_instr);
1315
1316 for (int i = 0; i < 4; i++) {
1317 if (!(write_mask & (1<<i)))
1318 continue;
1319 ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1320 {t0[i], from_nir(instr.dest, i),
1321 from_nir(instr.src[1], i)}, {alu_write});
1322 emit_instruction(ir);
1323 }
1324 ir->set_flag(alu_last_instr);
1325
1326 return true;
1327 }
1328
1329 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1330 {
1331 emit_instruction(op2_lshr_int, from_nir(instr.dest, 0),
1332 {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))},
1333 {alu_write, alu_last_instr});
1334
1335 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1336 {from_nir(instr.dest, 0)},{alu_write, alu_last_instr});
1337
1338 return true;
1339 }
1340
1341 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1342 {
1343 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1344 {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr});
1345 return true;
1346 }
1347
1348 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1349 {
1350 int it0 = allocate_temp_register();
1351 PValue x(new GPRValue(it0, 0));
1352 PValue y(new GPRValue(it0, 1));
1353
1354 emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write});
1355 emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr});
1356
1357 emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1358
1359 emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1360
1361 return true;
1362 }
1363
1364 }