nir: rename nir_op_fne to nir_op_fneu
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_emitaluinstruction.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30
31 #include "gallium/drivers/r600/r600_shader.h"
32
33 namespace r600 {
34
35 using std::vector;
36
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38 EmitInstruction (processor)
39 {
40
41 }
42
43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45 const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46
47 r600::sfn_log << SfnLog::instr << "emit '"
48 << *ir
49 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50 << "' (" << __func__ << ")\n";
51
52 split_constants(instr);
53
54 switch (instr.op) {
55 case nir_op_b2f32: return emit_alu_b2f(instr);
56 case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
57 case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
58 case nir_op_b2b1:
59 case nir_op_mov:return emit_mov(instr);
60 case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
61 case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
62 case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
63 case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
64 case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
65 case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
66 case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
67 case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
68 case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
69 case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
70
71 case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
72 case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
73 case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
74 case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
75 case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
76 case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
77
78 case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
79 case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
80 case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
81
82 case nir_op_fsign: return emit_fsign(instr);
83 case nir_op_fdph: return emit_fdph(instr);
84
85 case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
86 case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
87 case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
88 case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
89 case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
90
91 case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
92 case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
93 case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
94 case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
95 case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
96 case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
97 case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
98 case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
99 case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
100 case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
101 case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
102 case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
103 case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
104 case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
105 case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
106 case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
107 case nir_op_iabs: return emit_alu_iabs(instr);
108 case nir_op_ineg: return emit_alu_ineg(instr);
109 case nir_op_idiv: return emit_alu_div_int(instr, true, false);
110 case nir_op_udiv: return emit_alu_div_int(instr, false, false);
111 case nir_op_umod: return emit_alu_div_int(instr, false, true);
112 case nir_op_isign: return emit_alu_isign(instr);
113
114 case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
115 case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
116 case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
117
118 case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
119
120 case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
121 case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
122 case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
123
124 case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
125 case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
126 case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
127 case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
128 case nir_op_fadd: return emit_alu_op2(instr, op2_add);
129 case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
130 case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
131 case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
132 case nir_op_fdot2: return emit_dot(instr, 2);
133 case nir_op_fdot3: return emit_dot(instr, 3);
134 case nir_op_fdot4: return emit_dot(instr, 4);
135
136 case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
137 case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
138 case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
139
140 case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
141 case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
142 case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
143
144 case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
145 case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
146 case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
147
148 case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
149 case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
150 case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
151
152
153 case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
154 case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
155 case nir_op_vec2: return emit_create_vec(instr, 2);
156 case nir_op_vec3: return emit_create_vec(instr, 3);
157 case nir_op_vec4: return emit_create_vec(instr, 4);
158
159 case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
160 case nir_op_ufind_msb: return emit_find_msb(instr, false);
161 case nir_op_ifind_msb: return emit_find_msb(instr, true);
162 case nir_op_b2i32: return emit_b2i32(instr);
163 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
164 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
165 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
166 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
167 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
168 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
169
170
171 /* These are in the ALU instruction list, but they should be texture instructions */
172 case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
173 case nir_op_fddx_coarse:
174 case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
175
176 case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
177 case nir_op_fddy_coarse:
178 case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
179
180 case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2});
181 case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
182 default:
183 return false;
184 }
185 }
186
187 void EmitAluInstruction::split_constants(const nir_alu_instr& instr)
188 {
189 const nir_op_info *op_info = &nir_op_infos[instr.op];
190 if (op_info->num_inputs < 2)
191 return;
192
193 int nconst = 0;
194 std::array<const UniformValue *,4> c;
195 std::array<int,4> idx;
196 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
197 PValue src = from_nir(instr.src[i], 0);
198 assert(src);
199 if (src->type() == Value::kconst) {
200 c[nconst] = static_cast<const UniformValue *>(src.get());
201
202 idx[nconst++] = i;
203 }
204 }
205 if (nconst < 2)
206 return;
207
208 unsigned sel = c[0]->sel();
209 unsigned kcache = c[0]->kcache_bank();
210 sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
211
212 for (int i = 1; i < nconst; ++i) {
213 sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
214 if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
215 load_uniform(instr.src[idx[i]]);
216 }
217 }
218 }
219
220 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
221 {
222 if (instr.src[0].negate || instr.src[0].abs) {
223 std::cerr << "source modifiers not supported with int ops\n";
224 return false;
225 }
226
227 AluInstruction *ir = nullptr;
228 for (int i = 0; i < 4 ; ++i) {
229 if (instr.dest.write_mask & (1 << i)){
230 ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
231 from_nir(instr.src[0], i), write);
232 emit_instruction(ir);
233 }
234 }
235 if (ir)
236 ir->set_flag(alu_last_instr);
237 return true;
238 }
239
240 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
241 const AluOpFlags& flags)
242 {
243 AluInstruction *ir = nullptr;
244 for (int i = 0; i < 4 ; ++i) {
245 if (instr.dest.write_mask & (1 << i)){
246 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
247 from_nir(instr.src[0], i), write);
248
249 if (flags.test(alu_src0_abs) || instr.src[0].abs)
250 ir->set_flag(alu_src0_abs);
251
252 if (instr.src[0].negate ^ flags.test(alu_src0_neg))
253 ir->set_flag(alu_src0_neg);
254
255 if (flags.test(alu_dst_clamp) || instr.dest.saturate)
256 ir->set_flag(alu_dst_clamp);
257
258 emit_instruction(ir);
259 }
260 }
261 make_last(ir);
262
263 return true;
264 }
265
266 bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
267 {
268 /* If the op is a plain move beween SSA values we can just forward
269 * the register reference to the original register */
270 if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
271 !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) {
272 bool result = true;
273 for (int i = 0; i < 4 ; ++i) {
274 if (instr.dest.write_mask & (1 << i)){
275 auto src = from_nir(instr.src[0], i);
276 result &= inject_register(instr.dest.dest.ssa.index, i,
277 src, true);
278
279 if (src->type() == Value::kconst) {
280 add_uniform((instr.dest.dest.ssa.index << 2) + i, src);
281 }
282 }
283 }
284 return result;
285 } else {
286 return emit_alu_op1(instr, op1_mov);
287 }
288 }
289
290 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
291 {
292 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
293 // then shift back
294
295 const float inv_2_pi = 0.15915494f;
296
297 PValue v[4]; // this might need some additional temp register creation
298 for (unsigned i = 0; i < 4 ; ++i)
299 v[i] = from_nir(instr.dest, i);
300
301 PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
302 AluInstruction *ir = nullptr;
303 for (unsigned i = 0; i < 4 ; ++i) {
304 if (!(instr.dest.write_mask & (1 << i)))
305 continue;
306 ir = new AluInstruction(op3_muladd_ieee, v[i],
307 {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5},
308 {alu_write});
309 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
310 emit_instruction(ir);
311 }
312 make_last(ir);
313
314 for (unsigned i = 0; i < 4 ; ++i) {
315 if (!(instr.dest.write_mask & (1 << i)))
316 continue;
317 ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
318 emit_instruction(ir);
319 }
320 make_last(ir);
321
322 for (unsigned i = 0; i < 4 ; ++i) {
323 if (!(instr.dest.write_mask & (1 << i)))
324 continue;
325 ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
326 ir->set_flag(alu_src1_neg);
327 emit_instruction(ir);
328 }
329 make_last(ir);
330
331 for (unsigned i = 0; i < 4 ; ++i) {
332 if (!(instr.dest.write_mask & (1 << i)))
333 continue;
334
335 ir = new AluInstruction(opcode, v[i], v[i], last_write);
336 emit_instruction(ir);
337 }
338 return true;
339 }
340
341 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
342 bool absolute)
343 {
344 AluInstruction *ir = nullptr;
345 std::set<int> src_idx;
346
347 if (get_chip_class() == CAYMAN) {
348 int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
349 for (int i = 0; i < last_slot; ++i) {
350 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
351 from_nir(instr.src[0], 0), instr.dest.write_mask & (1 << i) ? write : empty);
352 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
353 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
354 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
355
356 if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
357
358 emit_instruction(ir);
359 }
360 } else {
361 for (int i = 0; i < 4 ; ++i) {
362 if (instr.dest.write_mask & (1 << i)){
363 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
364 from_nir(instr.src[0], i), last_write);
365 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
366 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
367 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
368 emit_instruction(ir);
369 }
370 }
371 }
372 return true;
373 }
374
375 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
376 {
377 AluInstruction *ir = nullptr;
378 std::array<PValue, 4> v;
379
380 for (int i = 0; i < 4; ++i) {
381 if (!(instr.dest.write_mask & (1 << i)))
382 continue;
383 v[i] = from_nir(instr.dest, i);
384 ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write});
385 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
386 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
387 emit_instruction(ir);
388 }
389 make_last(ir);
390
391 for (int i = 0; i < 4; ++i) {
392 if (!(instr.dest.write_mask & (1 << i)))
393 continue;
394 ir = new AluInstruction(op, v[i], v[i], {alu_write});
395 emit_instruction(ir);
396 if (op == op1_flt_to_uint)
397 make_last(ir);
398 }
399 make_last(ir);
400
401 return true;
402 }
403
404 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
405 {
406 int sel_tmp = allocate_temp_register();
407 int sel_tmp2 = allocate_temp_register();
408 GPRVector tmp(sel_tmp, {0,1,2,3});
409 GPRVector tmp2(sel_tmp2, {0,1,2,3});
410 AluInstruction *ir = nullptr;
411 EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
412 for (int i = 0; i < 4; ++i) {
413 if (!(instr.dest.write_mask & (1 << i)))
414 continue;
415
416 ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write);
417 emit_instruction(ir);
418 }
419 make_last(ir);
420
421 for (int i = 0; i < 4 ; ++i) {
422 if (!(instr.dest.write_mask & (1 << i)))
423 continue;
424
425 ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
426 PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
427 emit_instruction(ir);
428 }
429 make_last(ir);
430
431 for (int i = 0; i < 4 ; ++i) {
432 if (!(instr.dest.write_mask & (1 << i)))
433 continue;
434
435 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
436 tmp2.reg_i(i), tmp.reg_i(i), write);
437 emit_instruction(ir);
438 }
439 make_last(ir);
440
441 return true;
442 }
443
444 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
445 {
446 AluInstruction *ir = nullptr;
447 for (int i = 0; i < 4 ; ++i) {
448 if (!(instr.dest.write_mask & (1 << i)))
449 continue;
450
451 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
452 from_nir(instr.src[0], i), Value::one_i, write);
453 emit_instruction(ir);
454 }
455 make_last(ir);
456
457 return true;
458 }
459
460 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
461 {
462 AluInstruction *ir = nullptr;
463 for (unsigned i = 0; i < 2; ++i) {
464 if (!(instr.dest.write_mask & (1 << i)))
465 continue;
466 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
467 from_nir(instr.src[0], i), write);
468 emit_instruction(ir);
469 }
470 ir->set_flag(alu_last_instr);
471 return true;
472 }
473
474 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
475 {
476 emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
477 from_nir(instr.src[0], comp), last_write));
478 return true;
479 }
480
481 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
482 {
483 AluInstruction *ir = nullptr;
484 std::set<int> src_slot;
485 for(unsigned i = 0; i < nc; ++i) {
486 if (instr.dest.write_mask & (1 << i)){
487 auto src = from_nir(instr.src[i], 0);
488 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
489 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
490
491 // FIXME: This is a rather crude approach to fix the problem that
492 // r600 can't read from four different slots of the same component
493 // here we check only for the register index
494 if (src->type() == Value::gpr)
495 src_slot.insert(src->sel());
496 if (src_slot.size() >= 3) {
497 src_slot.clear();
498 ir->set_flag(alu_last_instr);
499 }
500 emit_instruction(ir);
501 }
502 }
503 if (ir)
504 ir->set_flag(alu_last_instr);
505 return true;
506 }
507
508 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
509 {
510 const nir_alu_src& src0 = instr.src[0];
511 const nir_alu_src& src1 = instr.src[1];
512
513 AluInstruction *ir = nullptr;
514 for (int i = 0; i < n ; ++i) {
515 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
516 from_nir(src0, i), from_nir(src1, i),
517 instr.dest.write_mask & (1 << i) ? write : empty);
518
519 if (src0.negate) ir->set_flag(alu_src0_neg);
520 if (src0.abs) ir->set_flag(alu_src0_abs);
521 if (src1.negate) ir->set_flag(alu_src1_neg);
522 if (src1.abs) ir->set_flag(alu_src1_abs);
523
524 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
525 emit_instruction(ir);
526 }
527 for (int i = n; i < 4 ; ++i) {
528 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
529 Value::zero, Value::zero,
530 instr.dest.write_mask & (1 << i) ? write : empty);
531 emit_instruction(ir);
532 }
533
534 if (ir)
535 ir->set_flag(alu_last_instr);
536 return true;
537 }
538
539 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
540 {
541 const nir_alu_src& src0 = instr.src[0];
542 const nir_alu_src& src1 = instr.src[1];
543
544 AluInstruction *ir = nullptr;
545 for (int i = 0; i < 3 ; ++i) {
546 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
547 from_nir(src0, i), from_nir(src1, i),
548 instr.dest.write_mask & (1 << i) ? write : empty);
549 if (src0.negate) ir->set_flag(alu_src0_neg);
550 if (src0.abs) ir->set_flag(alu_src0_abs);
551 if (src1.negate) ir->set_flag(alu_src1_neg);
552 if (src1.abs) ir->set_flag(alu_src1_abs);
553 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
554 emit_instruction(ir);
555 }
556
557 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
558 from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty);
559 if (src1.negate) ir->set_flag(alu_src1_neg);
560 if (src1.abs) ir->set_flag(alu_src1_abs);
561 emit_instruction(ir);
562
563 ir->set_flag(alu_last_instr);
564 return true;
565
566 }
567
568 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
569 {
570 AluInstruction *ir = nullptr;
571 for (int i = 0; i < 4 ; ++i) {
572 if (instr.dest.write_mask & (1 << i)) {
573 ir = new AluInstruction(op, from_nir(instr.dest, i),
574 from_nir(instr.src[0], i), Value::zero,
575 write);
576 emit_instruction(ir);
577 }
578 }
579 if (ir)
580 ir->set_flag(alu_last_instr);
581 return true;
582 }
583
584 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
585 {
586 AluInstruction *ir = nullptr;
587 for (int i = 0; i < 4 ; ++i) {
588 if (instr.dest.write_mask & (1 << i)){
589 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
590 from_nir(instr.src[0], i), Value::one_f, write);
591 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
592 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
593 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
594 emit_instruction(ir);
595 }
596 }
597 if (ir)
598 ir->set_flag(alu_last_instr);
599 return true;
600 }
601
602 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
603 {
604
605 AluInstruction *ir = nullptr;
606 PValue v[4]; // this might need some additional temp register creation
607 for (unsigned i = 0; i < 4 ; ++i)
608 v[i] = from_nir(instr.dest, i);
609
610 EAluOp combine = all ? op2_and_int : op2_or_int;
611
612 /* For integers we can not use the modifiers, so this needs some emulation */
613 /* Should actually be lowered with NIR */
614 if (instr.src[0].negate == instr.src[1].negate &&
615 instr.src[0].abs == instr.src[1].abs) {
616
617 for (unsigned i = 0; i < nc ; ++i) {
618 ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i),
619 from_nir(instr.src[1], i), write);
620 emit_instruction(ir);
621 }
622 if (ir)
623 ir->set_flag(alu_last_instr);
624 } else {
625 std::cerr << "Negate in iequal/inequal not (yet) supported\n";
626 return false;
627 }
628
629 for (unsigned i = 0; i < nc/2 ; ++i) {
630 ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
631 emit_instruction(ir);
632 }
633 if (ir)
634 ir->set_flag(alu_last_instr);
635
636 if (nc > 2) {
637 ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
638 emit_instruction(ir);
639 }
640
641 return true;
642 }
643
644 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
645 {
646 AluInstruction *ir = nullptr;
647 PValue v[4]; // this might need some additional temp register creation
648 for (unsigned i = 0; i < 4 ; ++i)
649 v[i] = from_nir(instr.dest, i);
650
651 for (unsigned i = 0; i < nc ; ++i) {
652 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
653 from_nir(instr.src[1],i), write);
654
655 if (instr.src[0].abs)
656 ir->set_flag(alu_src0_abs);
657 if (instr.src[0].negate)
658 ir->set_flag(alu_src0_neg);
659
660 if (instr.src[1].abs)
661 ir->set_flag(alu_src1_abs);
662 if (instr.src[1].negate)
663 ir->set_flag(alu_src1_neg);
664
665 emit_instruction(ir);
666 }
667 if (ir)
668 ir->set_flag(alu_last_instr);
669
670 for (unsigned i = 0; i < nc ; ++i) {
671 ir = new AluInstruction(op1_max4, v[i], v[i], write);
672 if (all) ir->set_flag(alu_src0_neg);
673 emit_instruction(ir);
674 }
675
676 for (unsigned i = nc; i < 4 ; ++i) {
677 ir = new AluInstruction(op1_max4, v[i],
678 all ? Value::one_f : Value::zero, write);
679 if (all)
680 ir->set_flag(alu_src0_neg);
681
682 emit_instruction(ir);
683 }
684
685 ir->set_flag(alu_last_instr);
686
687 if (all)
688 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
689 else
690 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
691
692 ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
693 if (all)
694 ir->set_flag(alu_src1_neg);
695 emit_instruction(ir);
696
697 return true;
698 }
699
700 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
701 {
702 AluInstruction *ir = nullptr;
703 PValue v[4]; // this might need some additional temp register creation
704 for (unsigned i = 0; i < 4 ; ++i)
705 v[i] = from_nir(instr.dest, i);
706
707 for (unsigned i = 0; i < 2 ; ++i) {
708 ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
709 from_nir(instr.src[1],i), write);
710 if (instr.src[0].abs)
711 ir->set_flag(alu_src0_abs);
712 if (instr.src[0].negate)
713 ir->set_flag(alu_src0_neg);
714
715 if (instr.src[1].abs)
716 ir->set_flag(alu_src1_abs);
717 if (instr.src[1].negate)
718 ir->set_flag(alu_src1_neg);
719
720 emit_instruction(ir);
721 }
722 if (ir)
723 ir->set_flag(alu_last_instr);
724
725 op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
726 ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
727 emit_instruction(ir);
728
729 return true;
730 }
731
732 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
733 {
734 const nir_alu_src& src0 = instr.src[0];
735 const nir_alu_src& src1 = instr.src[1];
736
737 AluInstruction *ir = nullptr;
738
739 if (get_chip_class() == CAYMAN) {
740 int lasti = util_last_bit(instr.dest.write_mask);
741 for (int k = 0; k < lasti ; ++k) {
742 if (instr.dest.write_mask & (1 << k)) {
743
744 for (int i = 0; i < 4; i++) {
745 ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, k), from_nir(src1, k), (i == k) ? write : empty);
746 if (src0.negate) ir->set_flag(alu_src0_neg);
747 if (src0.abs) ir->set_flag(alu_src0_abs);
748 if (src1.negate) ir->set_flag(alu_src1_neg);
749 if (src1.abs) ir->set_flag(alu_src1_abs);
750 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
751 if (i == 3) ir->set_flag(alu_last_instr);
752 emit_instruction(ir);
753 }
754 }
755 }
756 } else {
757 for (int i = 0; i < 4 ; ++i) {
758 if (instr.dest.write_mask & (1 << i)){
759 ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write);
760 if (src0.negate) ir->set_flag(alu_src0_neg);
761 if (src0.abs) ir->set_flag(alu_src0_abs);
762 if (src1.negate) ir->set_flag(alu_src1_neg);
763 if (src1.abs) ir->set_flag(alu_src1_abs);
764 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
765 emit_instruction(ir);
766 }
767 }
768 }
769 return true;
770 }
771
772 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
773 {
774
775 const nir_alu_src& src0 = instr.src[0];
776 const nir_alu_src& src1 = instr.src[1];
777
778 if (src0.negate || src1.negate ||
779 src0.abs || src1.abs) {
780 std::cerr << "R600: don't support modifiers with integer operations";
781 return false;
782 }
783 return emit_alu_op2(instr, opcode, opts);
784 }
785
786 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
787 {
788 const nir_alu_src *src0 = &instr.src[0];
789 const nir_alu_src *src1 = &instr.src[1];
790
791 if (ops & op2_opt_reverse)
792 std::swap(src0, src1);
793
794 bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
795
796 AluInstruction *ir = nullptr;
797 for (int i = 0; i < 4 ; ++i) {
798 if (instr.dest.write_mask & (1 << i)){
799 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
800 from_nir(*src0, i), from_nir(*src1, i), write);
801
802 if (src0->negate) ir->set_flag(alu_src0_neg);
803 if (src0->abs) ir->set_flag(alu_src0_abs);
804 if (src1_negate) ir->set_flag(alu_src1_neg);
805 if (src1->abs) ir->set_flag(alu_src1_abs);
806 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
807 emit_instruction(ir);
808 }
809 }
810 if (ir)
811 ir->set_flag(alu_last_instr);
812 return true;
813 }
814
815 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
816 {
817 const nir_alu_src *src0 = &instr.src[0];
818 const nir_alu_src *src1 = &instr.src[1];
819
820 if (ops & op2_opt_reverse)
821 std::swap(src0, src1);
822
823 GPRVector::Values v0;
824 for (int i = 0; i < 4 ; ++i)
825 v0[i] = from_nir(*src0, i);
826
827 GPRVector::Values v1;
828 for (int i = 0; i < 4 ; ++i)
829 v1[i] = from_nir(*src1, i);
830
831 if (src0->abs || src0->negate) {
832 int src0_tmp = allocate_temp_register();
833 GPRVector::Values v0_temp;
834 AluInstruction *ir = nullptr;
835 for (int i = 0; i < 4 ; ++i) {
836 if (instr.dest.write_mask & (1 << i)) {
837 v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
838 ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
839 if (src0->abs) ir->set_flag(alu_src0_abs);
840 if (src0->negate) ir->set_flag(alu_src0_neg);
841 emit_instruction(ir);
842 v0[i] = v0_temp[i];
843 }
844 }
845 if (ir)
846 ir->set_flag(alu_last_instr);
847 }
848
849 if (src1->abs || src1->negate) {
850 int src1_tmp = allocate_temp_register();
851 GPRVector::Values v1_temp;
852 AluInstruction *ir = nullptr;
853 for (int i = 0; i < 4 ; ++i) {
854 if (instr.dest.write_mask & (1 << i)) {
855 v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
856 ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
857 if (src1->abs) ir->set_flag(alu_src0_abs);
858 if (src1->negate) ir->set_flag(alu_src0_neg);
859 emit_instruction(ir);
860 v1[i] = v1_temp[i];
861 }
862 }
863 if (ir)
864 ir->set_flag(alu_last_instr);
865 }
866
867 AluInstruction *ir = nullptr;
868 for (int i = 0; i < 4 ; ++i) {
869 if (instr.dest.write_mask & (1 << i)){
870 ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
871 emit_instruction(ir);
872 }
873 }
874 if (ir)
875 ir->set_flag(alu_last_instr);
876 return true;
877 }
878
879
880 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
881 {
882 int sel_tmp = allocate_temp_register();
883 GPRVector tmp(sel_tmp, {0,1,2,3});
884
885 AluInstruction *ir = nullptr;
886 PValue help[4];
887
888 for (int i = 0; i < 4 ; ++i) {
889 if (instr.dest.write_mask & (1 << i)){
890 help[i] = from_nir(instr.dest, i);
891 auto s = from_nir(instr.src[0], i);
892 ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
893 emit_instruction(ir);
894 }
895 }
896 if (ir)
897 ir->set_flag(alu_last_instr);
898
899 for (int i = 0; i < 4 ; ++i) {
900 if (instr.dest.write_mask & (1 << i)){
901 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
902 emit_instruction(ir);
903 }
904 }
905 if (ir)
906 ir->set_flag(alu_last_instr);
907
908 for (int i = 0; i < 4 ; ++i) {
909 if (instr.dest.write_mask & (1 << i)){
910
911 ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
912 PValue(new LiteralValue(-1,0)), help[i], write);
913 emit_instruction(ir);
914 }
915 }
916 if (ir)
917 ir->set_flag(alu_last_instr);
918 return true;
919 }
920
921 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
922 {
923 PValue help[4];
924 PValue src[4];
925 AluInstruction *ir = nullptr;
926
927 for (int i = 0; i < 4 ; ++i) {
928 help[i] = from_nir(instr.dest, i);
929 src[i] = from_nir(instr.src[0], i);
930 }
931
932 if (instr.src[0].abs) {
933
934 for (int i = 0; i < 4 ; ++i) {
935 if (instr.dest.write_mask & (1 << i)){
936 ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
937 ir->set_flag(alu_src0_abs);
938 emit_instruction(ir);
939 }
940 }
941 if (ir)
942 ir->set_flag(alu_last_instr);
943
944 if (instr.src[0].negate) {
945 for (int i = 0; i < 4 ; ++i) {
946 if (instr.dest.write_mask & (1 << i)){
947 ir = new AluInstruction(op1_mov, help[i], help[i], write);
948 ir->set_flag(alu_src0_neg);
949 emit_instruction(ir);
950 }
951 }
952 if (ir)
953 ir->set_flag(alu_last_instr);
954 }
955
956 return true;
957 }
958
959 for (int i = 0; i < 4 ; ++i) {
960 if (instr.dest.write_mask & (1 << i)){
961 ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
962 if (instr.src[0].negate) {
963 ir->set_flag(alu_src0_neg);
964 ir->set_flag(alu_src2_neg);
965 }
966 emit_instruction(ir);
967 }
968 }
969
970 if (ir)
971 ir->set_flag(alu_last_instr);
972
973 for (int i = 0; i < 4 ; ++i) {
974 if (instr.dest.write_mask & (1 << i)){
975 ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
976 ir->set_flag(alu_src0_neg);
977 ir->set_flag(alu_src1_neg);
978 emit_instruction(ir);
979 }
980 }
981 if (ir)
982 ir->set_flag(alu_last_instr);
983 return true;
984 }
985
986 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
987 std::array<uint8_t, 3> reorder)
988 {
989 const nir_alu_src *src[3];
990 src[0] = &instr.src[reorder[0]];
991 src[1] = &instr.src[reorder[1]];
992 src[2] = &instr.src[reorder[2]];
993
994 AluInstruction *ir = nullptr;
995 for (int i = 0; i < 4 ; ++i) {
996 if (instr.dest.write_mask & (1 << i)){
997 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
998 from_nir(*src[0], i), from_nir(*src[1], i),
999 from_nir(*src[2], i), write);
1000
1001 if (src[0]->negate) ir->set_flag(alu_src0_neg);
1002 if (src[1]->negate) ir->set_flag(alu_src1_neg);
1003 if (src[2]->negate) ir->set_flag(alu_src2_neg);
1004
1005 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
1006 ir->set_flag(alu_write);
1007 emit_instruction(ir);
1008 }
1009 }
1010 if (ir)
1011 ir->set_flag(alu_last_instr);
1012 return true;
1013 }
1014
1015 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
1016 {
1017 AluInstruction *ir = nullptr;
1018 for (int i = 0; i < 4 ; ++i) {
1019 if (instr.dest.write_mask & (1 << i)){
1020 ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
1021 from_nir(instr.src[0], i), write);
1022 emit_instruction(ir);
1023 }
1024 }
1025 if (ir)
1026 ir->set_flag(alu_last_instr);
1027
1028 return true;
1029 }
1030
1031 static const char swz[] = "xyzw01?_";
1032
1033
1034
1035 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
1036 {
1037 int sel_tmp = allocate_temp_register();
1038 GPRVector tmp(sel_tmp, {0,1,2,3});
1039
1040 std::array<PValue,4> src;
1041 AluInstruction *ir = nullptr;
1042 for (int i = 0; i < 4 ; ++i) {
1043 if (instr.dest.write_mask & (1 << i)){
1044 src[i] = from_nir(instr.src[0],i);
1045 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write);
1046 emit_instruction(ir);
1047 }
1048 }
1049 if (ir)
1050 ir->set_flag(alu_last_instr);
1051
1052 for (int i = 0; i < 4 ; ++i) {
1053 if (instr.dest.write_mask & (1 << i)){
1054 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i],
1055 src[i], tmp.reg_i(i), write);
1056 emit_instruction(ir);
1057 }
1058 }
1059 if (ir)
1060 ir->set_flag(alu_last_instr);
1061 return true;
1062 }
1063
1064 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1065 {
1066
1067 int sel_tmp = allocate_temp_register();
1068 int sel_tmp0 = allocate_temp_register();
1069 int sel_tmp1 = allocate_temp_register();
1070
1071 PValue asrc1(new GPRValue(sel_tmp, 0));
1072 PValue asrc2(new GPRValue(sel_tmp, 1));
1073 PValue rsign(new GPRValue(sel_tmp, 2));
1074 PValue err(new GPRValue(sel_tmp, 3));
1075
1076 GPRVector tmp0(sel_tmp0, {0,1,2,3});
1077 GPRVector tmp1(sel_tmp1, {0,1,2,3});
1078
1079 std::array<PValue, 4> src0;
1080 std::array<PValue, 4> src1;
1081
1082 for (int i = 0; i < 4 ; ++i) {
1083 if (instr.dest.write_mask & (1 << i)) {
1084 src0[i] = from_nir(instr.src[0], i);
1085 src1[i] = from_nir(instr.src[1], i);
1086 }
1087 }
1088
1089
1090 for (int i = 3; i >= 0 ; --i) {
1091 if (!(instr.dest.write_mask & (1 << i)))
1092 continue;
1093 if (use_signed) {
1094 emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1095 emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1096 emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1097
1098
1099 emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1100 emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1101 } else {
1102 asrc1 = src0[i];
1103 asrc2 = src1[i];
1104 }
1105
1106 emit_instruction(op1_recip_uint, tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1107
1108 emit_instruction(op2_mullo_uint, tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1109
1110 emit_instruction(op2_sub_int, tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1111 emit_instruction(op2_mulhi_uint, tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1112
1113 emit_instruction(op3_cnde_int, tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1114
1115 emit_instruction(op2_mulhi_uint, err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1116
1117 emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1118 emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1119
1120 emit_instruction(op3_cnde_int, tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1121
1122 emit_instruction(op2_mulhi_uint, tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1123 emit_instruction(op2_mullo_uint, tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1124
1125 emit_instruction(op2_sub_int, tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1126
1127
1128 emit_instruction(op2_setge_uint, tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1129 emit_instruction(op2_setge_uint, tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1130
1131 if (mod) {
1132 emit_instruction(op2_sub_int, tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1133 emit_instruction(op2_add_int, tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1134 } else {
1135 emit_instruction(op2_add_int, tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1136 emit_instruction(op2_sub_int, tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1137 }
1138
1139 emit_instruction(op2_and_int, tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1140
1141 if (mod)
1142 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1143 else
1144 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1145
1146 if (use_signed) {
1147 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1148 emit_instruction(op2_sub_int, tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1149
1150 if (mod)
1151 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1152 {alu_write, alu_last_instr});
1153 else
1154 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1155 {alu_write, alu_last_instr});
1156 } else {
1157 emit_instruction(op3_cnde_int, from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1158 }
1159 }
1160 return true;
1161 }
1162
1163 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s,
1164 GPRVector::Values& v, int ncomp)
1165 {
1166
1167 AluInstruction *alu = nullptr;
1168 for (int i = 0; i < ncomp; ++i) {
1169 alu = new AluInstruction(op1_mov, v[i], s[i], {alu_write});
1170 if (src.abs)
1171 alu->set_flag(alu_src0_abs);
1172 if (src.negate)
1173 alu->set_flag(alu_src0_neg);
1174 emit_instruction(alu);
1175 }
1176 make_last(alu);
1177 }
1178
1179 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1180 bool fine)
1181 {
1182
1183 GPRVector::Values v;
1184 GPRVector::Values s;
1185 GPRVector::Values *source = &s;
1186 std::array<int, 4> writemask = {0,1,2,3};
1187
1188 int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components :
1189 instr.src[0].src.reg.reg->num_components;
1190
1191 for (int i = 0; i < 4; ++i) {
1192 writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1193 v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1194 s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0);
1195 }
1196
1197 if (instr.src[0].abs || instr.src[0].negate) {
1198 split_alu_modifiers(instr.src[0], s, v, ncomp);
1199 source = &v;
1200 }
1201
1202 /* This is querying the dreivatives of the output fb, so we would either need
1203 * access to the neighboring pixels or to the framebuffer. Neither is currently
1204 * implemented */
1205 GPRVector dst(v);
1206 GPRVector src(*source);
1207
1208 auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1209 tex->set_dest_swizzle(writemask);
1210
1211 if (fine) {
1212 std::cerr << "Sewt fine flag\n";
1213 tex->set_flag(TexInstruction::grad_fine);
1214 }
1215
1216 emit_instruction(tex);
1217
1218 return true;
1219 }
1220
1221 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1222 {
1223 int itmp = allocate_temp_register();
1224 std::array<PValue, 4> tmp;
1225 std::array<PValue, 4> dst;
1226 std::array<PValue, 4> src0;
1227 std::array<PValue, 4> shift;
1228
1229 PValue l32(new LiteralValue(32));
1230 unsigned write_mask = instr.dest.write_mask;
1231
1232 AluInstruction *ir = nullptr;
1233 for (int i = 0; i < 4; i++) {
1234 if (!(write_mask & (1<<i)))
1235 continue;
1236 dst[i] = from_nir(instr.dest, i);
1237 src0[i] = from_nir(instr.src[0], i);
1238 shift[i] = from_nir(instr.src[2], i);
1239
1240 ir = new AluInstruction(opcode, dst[i],
1241 {src0[i], from_nir(instr.src[1], i), shift[i]},
1242 {alu_write});
1243 emit_instruction(ir);
1244 }
1245 make_last(ir);
1246
1247 for (int i = 0; i < 4; i++) {
1248 if (!(write_mask & (1<<i)))
1249 continue;
1250 tmp[i] = PValue(new GPRValue(itmp, i));
1251 ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1252 {alu_write});
1253 emit_instruction(ir);
1254 }
1255 make_last(ir);
1256
1257 for (int i = 0; i < 4; i++) {
1258 if (!(write_mask & (1<<i)))
1259 continue;
1260 ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1261 {alu_write});
1262 emit_instruction(ir);
1263 }
1264 make_last(ir);
1265
1266 return true;
1267 }
1268
1269 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1270 {
1271 auto t0 = get_temp_vec4();
1272 auto t1 = get_temp_vec4();
1273 auto t2 = get_temp_vec4();
1274
1275 PValue l32(new LiteralValue(32));
1276 unsigned write_mask = instr.dest.write_mask;
1277 if (!write_mask) return true;
1278
1279 AluInstruction *ir = nullptr;
1280 for (int i = 0; i < 4; i++) {
1281 if (!(write_mask & (1<<i)))
1282 continue;
1283
1284 ir = new AluInstruction(op2_setge_int, t0[i], {from_nir(instr.src[3], i), l32}, {alu_write});
1285 emit_instruction(ir);
1286 }
1287 make_last(ir);
1288
1289 for (int i = 0; i < 4; i++) {
1290 if (!(write_mask & (1<<i)))
1291 continue;
1292 ir = new AluInstruction(op2_bfm_int, t1[i], {from_nir(instr.src[3], i),
1293 from_nir(instr.src[2], i)}, {alu_write});
1294 emit_instruction(ir);
1295 }
1296 ir->set_flag(alu_last_instr);
1297
1298 for (int i = 0; i < 4; i++) {
1299 if (!(write_mask & (1<<i)))
1300 continue;
1301 ir = new AluInstruction(op2_lshl_int, t2[i], {from_nir(instr.src[1], i),
1302 from_nir(instr.src[2], i)}, {alu_write});
1303 emit_instruction(ir);
1304 }
1305 ir->set_flag(alu_last_instr);
1306
1307
1308 for (int i = 0; i < 4; i++) {
1309 if (!(write_mask & (1<<i)))
1310 continue;
1311 ir = new AluInstruction(op3_bfi_int, from_nir(instr.dest, i),
1312 {t1[i], t2[i], from_nir(instr.src[0], i)}, {alu_write});
1313 emit_instruction(ir);
1314 }
1315 ir->set_flag(alu_last_instr);
1316
1317 for (int i = 0; i < 4; i++) {
1318 if (!(write_mask & (1<<i)))
1319 continue;
1320 ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1321 {t0[i], from_nir(instr.dest, i),
1322 from_nir(instr.src[1], i)}, {alu_write});
1323 emit_instruction(ir);
1324 }
1325 ir->set_flag(alu_last_instr);
1326
1327 return true;
1328 }
1329
1330 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1331 {
1332 emit_instruction(op2_lshr_int, from_nir(instr.dest, 0),
1333 {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))},
1334 {alu_write, alu_last_instr});
1335
1336 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1337 {from_nir(instr.dest, 0)},{alu_write, alu_last_instr});
1338
1339 return true;
1340 }
1341
1342 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1343 {
1344 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1345 {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr});
1346 return true;
1347 }
1348
1349 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1350 {
1351 int it0 = allocate_temp_register();
1352 PValue x(new GPRValue(it0, 0));
1353 PValue y(new GPRValue(it0, 1));
1354
1355 emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write});
1356 emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr});
1357
1358 emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1359
1360 emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1361
1362 return true;
1363 }
1364
1365 }