3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
31 #include "gallium/drivers/r600/r600_shader.h"
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor
& processor
):
38 EmitInstruction (processor
)
43 bool EmitAluInstruction::do_emit(nir_instr
* ir
)
45 const nir_alu_instr
& instr
= *nir_instr_as_alu(ir
);
47 r600::sfn_log
<< SfnLog::instr
<< "emit '"
49 << " bitsize: " << static_cast<int>(instr
.dest
.dest
.ssa
.bit_size
)
50 << "' (" << __func__
<< ")\n";
52 split_constants(instr
);
55 case nir_op_b2f32
: return emit_alu_b2f(instr
);
56 case nir_op_i2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_int
);
57 case nir_op_f2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_dx10
);
59 case nir_op_mov
:return emit_mov(instr
);
60 case nir_op_ftrunc
: return emit_alu_op1(instr
, op1_trunc
);
61 case nir_op_fabs
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_abs
});
62 case nir_op_fneg
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_neg
});
63 case nir_op_fsat
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_dst_clamp
});
64 case nir_op_frcp
: return emit_alu_trans_op1(instr
, op1_recip_ieee
);
65 case nir_op_frsq
: return emit_alu_trans_op1(instr
, op1_recipsqrt_ieee1
);
66 case nir_op_fsin
: return emit_alu_trig_op1(instr
, op1_sin
);
67 case nir_op_fcos
: return emit_alu_trig_op1(instr
, op1_cos
);
68 case nir_op_fexp2
: return emit_alu_trans_op1(instr
, op1_exp_ieee
);
69 case nir_op_flog2
: return emit_alu_trans_op1(instr
, op1_log_clamped
);
71 case nir_op_fround_even
: return emit_alu_op1(instr
, op1_rndne
);
72 case nir_op_fsqrt
: return emit_alu_trans_op1(instr
, op1_sqrt_ieee
);
73 case nir_op_i2f32
: return emit_alu_trans_op1(instr
, op1_int_to_flt
);
74 case nir_op_u2f32
: return emit_alu_trans_op1(instr
, op1_uint_to_flt
);
75 case nir_op_f2i32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_int
);
76 case nir_op_f2u32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_uint
);
78 case nir_op_fceil
: return emit_alu_op1(instr
, op1_ceil
);
79 case nir_op_ffract
: return emit_alu_op1(instr
, op1_fract
);
80 case nir_op_ffloor
: return emit_alu_op1(instr
, op1_floor
);
82 case nir_op_fsign
: return emit_fsign(instr
);
83 case nir_op_fdph
: return emit_fdph(instr
);
85 case nir_op_ibitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_int
);
86 case nir_op_ubitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_uint
);
87 case nir_op_bitfield_insert
: return emit_bitfield_insert(instr
);
88 case nir_op_bit_count
: return emit_alu_op1(instr
, op1_bcnt_int
);
89 case nir_op_bitfield_reverse
: return emit_alu_op1(instr
, op1_bfrev_int
);
91 case nir_op_ieq
: return emit_alu_op2_int(instr
, op2_sete_int
);
92 case nir_op_ine
: return emit_alu_op2_int(instr
, op2_setne_int
);
93 case nir_op_ige
: return emit_alu_op2_int(instr
, op2_setge_int
);
94 case nir_op_ishl
: return emit_alu_op2_int(instr
, op2_lshl_int
);
95 case nir_op_ishr
: return emit_alu_op2_int(instr
, op2_ashr_int
);
96 case nir_op_ilt
: return emit_alu_op2_int(instr
, op2_setgt_int
, op2_opt_reverse
);
97 case nir_op_iand
: return emit_alu_op2_int(instr
, op2_and_int
);
98 case nir_op_ixor
: return emit_alu_op2_int(instr
, op2_xor_int
);
99 case nir_op_imin
: return emit_alu_op2_int(instr
, op2_min_int
);
100 case nir_op_imax
: return emit_alu_op2_int(instr
, op2_max_int
);
101 case nir_op_imul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_int
);
102 case nir_op_umul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_uint
);
103 case nir_op_umax
: return emit_alu_op2_int(instr
, op2_max_uint
);
104 case nir_op_umin
: return emit_alu_op2_int(instr
, op2_min_uint
);
105 case nir_op_ior
: return emit_alu_op2_int(instr
, op2_or_int
);
106 case nir_op_inot
: return emit_alu_op1(instr
, op1_not_int
);
107 case nir_op_iabs
: return emit_alu_iabs(instr
);
108 case nir_op_ineg
: return emit_alu_ineg(instr
);
109 case nir_op_idiv
: return emit_alu_div_int(instr
, true, false);
110 case nir_op_udiv
: return emit_alu_div_int(instr
, false, false);
111 case nir_op_umod
: return emit_alu_div_int(instr
, false, true);
112 case nir_op_isign
: return emit_alu_isign(instr
);
114 case nir_op_uge
: return emit_alu_op2_int(instr
, op2_setge_uint
);
115 case nir_op_ult
: return emit_alu_op2_int(instr
, op2_setgt_uint
, op2_opt_reverse
);
116 case nir_op_ushr
: return emit_alu_op2_int(instr
, op2_lshr_int
);
118 case nir_op_flt
: return emit_alu_op2(instr
, op2_setgt_dx10
, op2_opt_reverse
);
120 case nir_op_fge
: return emit_alu_op2(instr
, op2_setge_dx10
);
121 case nir_op_fne
: return emit_alu_op2(instr
, op2_setne_dx10
);
122 case nir_op_feq
: return emit_alu_op2(instr
, op2_sete_dx10
);
124 case nir_op_fmin
: return emit_alu_op2(instr
, op2_min_dx10
);
125 case nir_op_fmax
: return emit_alu_op2(instr
, op2_max_dx10
);
126 case nir_op_fmul
: return emit_alu_op2(instr
, op2_mul_ieee
);
127 case nir_op_imul
: return emit_alu_trans_op2(instr
, op2_mullo_int
);
128 case nir_op_fadd
: return emit_alu_op2(instr
, op2_add
);
129 case nir_op_fsub
: return emit_alu_op2(instr
, op2_add
, op2_opt_neg_src1
);
130 case nir_op_iadd
: return emit_alu_op2_int(instr
, op2_add_int
);
131 case nir_op_isub
: return emit_alu_op2_int(instr
, op2_sub_int
);
132 case nir_op_fdot2
: return emit_dot(instr
, 2);
133 case nir_op_fdot3
: return emit_dot(instr
, 3);
134 case nir_op_fdot4
: return emit_dot(instr
, 4);
136 case nir_op_bany_inequal2
: return emit_any_all_icomp(instr
, op2_setne_int
, 2, false);
137 case nir_op_bany_inequal3
: return emit_any_all_icomp(instr
, op2_setne_int
, 3, false);
138 case nir_op_bany_inequal4
: return emit_any_all_icomp(instr
, op2_setne_int
, 4, false);
140 case nir_op_ball_iequal2
: return emit_any_all_icomp(instr
, op2_sete_int
, 2, true);
141 case nir_op_ball_iequal3
: return emit_any_all_icomp(instr
, op2_sete_int
, 3, true);
142 case nir_op_ball_iequal4
: return emit_any_all_icomp(instr
, op2_sete_int
, 4, true);
144 case nir_op_bany_fnequal2
: return emit_any_all_fcomp2(instr
, op2_setne_dx10
, false);
145 case nir_op_bany_fnequal3
: return emit_any_all_fcomp(instr
, op2_setne
, 3, false);
146 case nir_op_bany_fnequal4
: return emit_any_all_fcomp(instr
, op2_setne
, 4, false);
148 case nir_op_ball_fequal2
: return emit_any_all_fcomp2(instr
, op2_sete_dx10
, true);
149 case nir_op_ball_fequal3
: return emit_any_all_fcomp(instr
, op2_sete
, 3, true);
150 case nir_op_ball_fequal4
: return emit_any_all_fcomp(instr
, op2_sete
, 4, true);
153 case nir_op_ffma
: return emit_alu_op3(instr
, op3_muladd_ieee
);
154 case nir_op_bcsel
: return emit_alu_op3(instr
, op3_cnde_int
, {0, 2, 1});
155 case nir_op_vec2
: return emit_create_vec(instr
, 2);
156 case nir_op_vec3
: return emit_create_vec(instr
, 3);
157 case nir_op_vec4
: return emit_create_vec(instr
, 4);
159 case nir_op_find_lsb
: return emit_alu_op1(instr
, op1_ffbl_int
);
160 case nir_op_ufind_msb
: return emit_find_msb(instr
, false);
161 case nir_op_ifind_msb
: return emit_find_msb(instr
, true);
162 case nir_op_b2i32
: return emit_b2i32(instr
);
163 case nir_op_pack_64_2x32_split
: return emit_pack_64_2x32_split(instr
);
164 case nir_op_unpack_64_2x32_split_x
: return emit_unpack_64_2x32_split(instr
, 0);
165 case nir_op_unpack_64_2x32_split_y
: return emit_unpack_64_2x32_split(instr
, 1);
166 case nir_op_unpack_half_2x16_split_x
: return emit_unpack_32_2x16_split_x(instr
);
167 case nir_op_unpack_half_2x16_split_y
: return emit_unpack_32_2x16_split_y(instr
);
168 case nir_op_pack_half_2x16_split
: return emit_pack_32_2x16_split(instr
);
171 /* These are in the ALU instruction list, but they should be texture instructions */
172 case nir_op_fddx_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, true);
173 case nir_op_fddx_coarse
:
174 case nir_op_fddx
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, false);
176 case nir_op_fddy_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_v
, true);
177 case nir_op_fddy_coarse
:
178 case nir_op_fddy
: return emit_tex_fdd(instr
,TexInstruction::get_gradient_v
, false);
180 case nir_op_umad24
: return emit_alu_op3(instr
, op3_muladd_uint24
, {0, 1, 2});
181 case nir_op_umul24
: return emit_alu_op2(instr
, op2_mul_uint24
);
187 void EmitAluInstruction::split_constants(const nir_alu_instr
& instr
)
189 const nir_op_info
*op_info
= &nir_op_infos
[instr
.op
];
190 if (op_info
->num_inputs
< 2)
194 std::array
<PValue
,4> c
;
195 std::array
<int,4> idx
;
196 for (unsigned i
= 0; i
< op_info
->num_inputs
; ++i
) {
197 PValue src
= from_nir(instr
.src
[i
], 0);
199 if (src
->type() == Value::kconst
) {
208 unsigned sel
= c
[0]->sel();
209 sfn_log
<< SfnLog::reg
<< "split " << nconst
<< " constants, sel[0] = " << sel
; ;
211 for (int i
= 1; i
< nconst
; ++i
) {
212 sfn_log
<< "sel[" << i
<< "] = " << c
[i
]->sel() << "\n";
213 if (c
[i
]->sel() != sel
) {
214 load_uniform(instr
.src
[idx
[i
]]);
219 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr
& instr
)
221 if (instr
.src
[0].negate
|| instr
.src
[0].abs
) {
222 std::cerr
<< "source modifiers not supported with int ops\n";
226 AluInstruction
*ir
= nullptr;
227 for (int i
= 0; i
< 4 ; ++i
) {
228 if (instr
.dest
.write_mask
& (1 << i
)){
229 ir
= new AluInstruction(op1_not_int
, from_nir(instr
.dest
, i
),
230 from_nir(instr
.src
[0], i
), write
);
231 emit_instruction(ir
);
235 ir
->set_flag(alu_last_instr
);
239 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
240 const AluOpFlags
& flags
)
242 AluInstruction
*ir
= nullptr;
243 for (int i
= 0; i
< 4 ; ++i
) {
244 if (instr
.dest
.write_mask
& (1 << i
)){
245 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
246 from_nir(instr
.src
[0], i
), write
);
248 if (flags
.test(alu_src0_abs
) || instr
.src
[0].abs
)
249 ir
->set_flag(alu_src0_abs
);
251 if (instr
.src
[0].negate
^ flags
.test(alu_src0_neg
))
252 ir
->set_flag(alu_src0_neg
);
254 if (flags
.test(alu_dst_clamp
) || instr
.dest
.saturate
)
255 ir
->set_flag(alu_dst_clamp
);
257 emit_instruction(ir
);
265 bool EmitAluInstruction::emit_mov(const nir_alu_instr
& instr
)
267 /* If the op is a plain move beween SSA values we can just forward
268 * the register reference to the original register */
269 if (instr
.dest
.dest
.is_ssa
&& instr
.src
[0].src
.is_ssa
&&
270 !instr
.src
[0].abs
&& !instr
.src
[0].negate
&& !instr
.dest
.saturate
) {
272 for (int i
= 0; i
< 4 ; ++i
) {
273 if (instr
.dest
.write_mask
& (1 << i
)){
274 auto src
= from_nir(instr
.src
[0], i
);
275 result
&= inject_register(instr
.dest
.dest
.ssa
.index
, i
,
278 if (src
->type() == Value::kconst
) {
279 add_uniform((instr
.dest
.dest
.ssa
.index
<< 2) + i
, src
);
285 return emit_alu_op1(instr
, op1_mov
);
289 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr
& instr
, EAluOp opcode
)
291 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
294 const float inv_2_pi
= 0.15915494f
;
296 PValue v
[4]; // this might need some additional temp register creation
297 for (unsigned i
= 0; i
< 4 ; ++i
)
298 v
[i
] = from_nir(instr
.dest
, i
);
300 PValue inv_pihalf
= PValue(new LiteralValue(inv_2_pi
, 0));
301 AluInstruction
*ir
= nullptr;
302 for (unsigned i
= 0; i
< 4 ; ++i
) {
303 if (!(instr
.dest
.write_mask
& (1 << i
)))
305 ir
= new AluInstruction(op3_muladd_ieee
, v
[i
],
306 {from_nir(instr
.src
[0],i
), inv_pihalf
, Value::zero_dot_5
},
308 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
309 emit_instruction(ir
);
313 for (unsigned i
= 0; i
< 4 ; ++i
) {
314 if (!(instr
.dest
.write_mask
& (1 << i
)))
316 ir
= new AluInstruction(op1_fract
, v
[i
], v
[i
], {alu_write
});
317 emit_instruction(ir
);
321 for (unsigned i
= 0; i
< 4 ; ++i
) {
322 if (!(instr
.dest
.write_mask
& (1 << i
)))
324 ir
= new AluInstruction(op2_add
, v
[i
], v
[i
], Value::zero_dot_5
, write
);
325 ir
->set_flag(alu_src1_neg
);
326 emit_instruction(ir
);
330 for (unsigned i
= 0; i
< 4 ; ++i
) {
331 if (!(instr
.dest
.write_mask
& (1 << i
)))
334 ir
= new AluInstruction(opcode
, v
[i
], v
[i
], last_write
);
335 emit_instruction(ir
);
340 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
343 AluInstruction
*ir
= nullptr;
344 std::set
<int> src_idx
;
346 if (get_chip_class() == CAYMAN
) {
347 int last_slot
= (instr
.dest
.write_mask
& 0x8) ? 4 : 3;
348 for (int i
= 0; i
< last_slot
; ++i
) {
349 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
350 from_nir(instr
.src
[0], 0), instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
351 if (absolute
|| instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
352 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
353 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
355 if (i
== (last_slot
- 1)) ir
->set_flag(alu_last_instr
);
357 emit_instruction(ir
);
360 for (int i
= 0; i
< 4 ; ++i
) {
361 if (instr
.dest
.write_mask
& (1 << i
)){
362 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
363 from_nir(instr
.src
[0], i
), last_write
);
364 if (absolute
|| instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
365 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
366 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
367 emit_instruction(ir
);
374 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr
& instr
, EAluOp op
)
376 AluInstruction
*ir
= nullptr;
377 std::array
<PValue
, 4> v
;
379 for (int i
= 0; i
< 4; ++i
) {
380 if (!(instr
.dest
.write_mask
& (1 << i
)))
382 v
[i
] = from_nir(instr
.dest
, i
);
383 ir
= new AluInstruction(op1_trunc
, v
[i
], from_nir(instr
.src
[0], i
), {alu_write
});
384 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
385 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
386 emit_instruction(ir
);
390 for (int i
= 0; i
< 4; ++i
) {
391 if (!(instr
.dest
.write_mask
& (1 << i
)))
393 ir
= new AluInstruction(op
, v
[i
], v
[i
], {alu_write
});
394 emit_instruction(ir
);
395 if (op
== op1_flt_to_uint
)
403 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr
& instr
, bool sgn
)
405 int sel_tmp
= allocate_temp_register();
406 int sel_tmp2
= allocate_temp_register();
407 GPRVector
tmp(sel_tmp
, {0,1,2,3});
408 GPRVector
tmp2(sel_tmp2
, {0,1,2,3});
409 AluInstruction
*ir
= nullptr;
410 EAluOp opcode
= sgn
? op1_ffbh_int
: op1_ffbh_uint
;
411 for (int i
= 0; i
< 4; ++i
) {
412 if (!(instr
.dest
.write_mask
& (1 << i
)))
415 ir
= new AluInstruction(opcode
, tmp
.reg_i(i
), from_nir(instr
.src
[0], i
), write
);
416 emit_instruction(ir
);
420 for (int i
= 0; i
< 4 ; ++i
) {
421 if (!(instr
.dest
.write_mask
& (1 << i
)))
424 ir
= new AluInstruction(op2_sub_int
, tmp2
.reg_i(i
),
425 PValue(new LiteralValue(31u, 0)), tmp
.reg_i(i
), write
);
426 emit_instruction(ir
);
430 for (int i
= 0; i
< 4 ; ++i
) {
431 if (!(instr
.dest
.write_mask
& (1 << i
)))
434 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), tmp
.reg_i(i
),
435 tmp2
.reg_i(i
), tmp
.reg_i(i
), write
);
436 emit_instruction(ir
);
443 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr
& instr
)
445 AluInstruction
*ir
= nullptr;
446 for (int i
= 0; i
< 4 ; ++i
) {
447 if (!(instr
.dest
.write_mask
& (1 << i
)))
450 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
451 from_nir(instr
.src
[0], i
), Value::one_i
, write
);
452 emit_instruction(ir
);
459 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr
& instr
)
461 AluInstruction
*ir
= nullptr;
462 for (unsigned i
= 0; i
< 2; ++i
) {
463 if (!(instr
.dest
.write_mask
& (1 << i
)))
465 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
),
466 from_nir(instr
.src
[0], i
), write
);
467 emit_instruction(ir
);
469 ir
->set_flag(alu_last_instr
);
473 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr
& instr
, unsigned comp
)
475 emit_instruction(new AluInstruction(op1_mov
, from_nir(instr
.dest
, 0),
476 from_nir(instr
.src
[0], comp
), last_write
));
480 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr
& instr
, unsigned nc
)
482 AluInstruction
*ir
= nullptr;
483 std::set
<int> src_slot
;
484 for(unsigned i
= 0; i
< nc
; ++i
) {
485 if (instr
.dest
.write_mask
& (1 << i
)){
486 auto src
= from_nir(instr
.src
[i
], 0);
487 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
), src
, write
);
488 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
490 // FIXME: This is a rather crude approach to fix the problem that
491 // r600 can't read from four different slots of the same component
492 // here we check only for the register index
493 if (src
->type() == Value::gpr
)
494 src_slot
.insert(src
->sel());
495 if (src_slot
.size() >= 3) {
497 ir
->set_flag(alu_last_instr
);
499 emit_instruction(ir
);
503 ir
->set_flag(alu_last_instr
);
507 bool EmitAluInstruction::emit_dot(const nir_alu_instr
& instr
, int n
)
509 const nir_alu_src
& src0
= instr
.src
[0];
510 const nir_alu_src
& src1
= instr
.src
[1];
512 AluInstruction
*ir
= nullptr;
513 for (int i
= 0; i
< n
; ++i
) {
514 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
515 from_nir(src0
, i
), from_nir(src1
, i
),
516 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
518 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
519 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
520 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
521 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
523 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
524 emit_instruction(ir
);
526 for (int i
= n
; i
< 4 ; ++i
) {
527 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
528 Value::zero
, Value::zero
,
529 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
530 emit_instruction(ir
);
534 ir
->set_flag(alu_last_instr
);
538 bool EmitAluInstruction::emit_fdph(const nir_alu_instr
& instr
)
540 const nir_alu_src
& src0
= instr
.src
[0];
541 const nir_alu_src
& src1
= instr
.src
[1];
543 AluInstruction
*ir
= nullptr;
544 for (int i
= 0; i
< 3 ; ++i
) {
545 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
546 from_nir(src0
, i
), from_nir(src1
, i
),
547 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
548 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
549 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
550 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
551 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
552 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
553 emit_instruction(ir
);
556 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, 3), Value::one_f
,
557 from_nir(src1
, 3), (instr
.dest
.write_mask
) & (1 << 3) ? write
: empty
);
558 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
559 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
560 emit_instruction(ir
);
562 ir
->set_flag(alu_last_instr
);
567 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr
& instr
, EAluOp op
)
569 AluInstruction
*ir
= nullptr;
570 for (int i
= 0; i
< 4 ; ++i
) {
571 if (instr
.dest
.write_mask
& (1 << i
)) {
572 ir
= new AluInstruction(op
, from_nir(instr
.dest
, i
),
573 from_nir(instr
.src
[0], i
), Value::zero
,
575 emit_instruction(ir
);
579 ir
->set_flag(alu_last_instr
);
583 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr
& instr
)
585 AluInstruction
*ir
= nullptr;
586 for (int i
= 0; i
< 4 ; ++i
) {
587 if (instr
.dest
.write_mask
& (1 << i
)){
588 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
589 from_nir(instr
.src
[0], i
), Value::one_f
, write
);
590 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
591 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
592 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
593 emit_instruction(ir
);
597 ir
->set_flag(alu_last_instr
);
601 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
604 AluInstruction
*ir
= nullptr;
605 PValue v
[4]; // this might need some additional temp register creation
606 for (unsigned i
= 0; i
< 4 ; ++i
)
607 v
[i
] = from_nir(instr
.dest
, i
);
609 EAluOp combine
= all
? op2_and_int
: op2_or_int
;
611 /* For integers we can not use the modifiers, so this needs some emulation */
612 /* Should actually be lowered with NIR */
613 if (instr
.src
[0].negate
== instr
.src
[1].negate
&&
614 instr
.src
[0].abs
== instr
.src
[1].abs
) {
616 for (unsigned i
= 0; i
< nc
; ++i
) {
617 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0], i
),
618 from_nir(instr
.src
[1], i
), write
);
619 emit_instruction(ir
);
622 ir
->set_flag(alu_last_instr
);
624 std::cerr
<< "Negate in iequal/inequal not (yet) supported\n";
628 for (unsigned i
= 0; i
< nc
/2 ; ++i
) {
629 ir
= new AluInstruction(combine
, v
[2 * i
], v
[2 * i
], v
[2 * i
+ 1], write
);
630 emit_instruction(ir
);
633 ir
->set_flag(alu_last_instr
);
636 ir
= new AluInstruction(combine
, v
[0], v
[0], v
[2], last_write
);
637 emit_instruction(ir
);
643 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
645 AluInstruction
*ir
= nullptr;
646 PValue v
[4]; // this might need some additional temp register creation
647 for (unsigned i
= 0; i
< 4 ; ++i
)
648 v
[i
] = from_nir(instr
.dest
, i
);
650 for (unsigned i
= 0; i
< nc
; ++i
) {
651 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
652 from_nir(instr
.src
[1],i
), write
);
654 if (instr
.src
[0].abs
)
655 ir
->set_flag(alu_src0_abs
);
656 if (instr
.src
[0].negate
)
657 ir
->set_flag(alu_src0_neg
);
659 if (instr
.src
[1].abs
)
660 ir
->set_flag(alu_src1_abs
);
661 if (instr
.src
[1].negate
)
662 ir
->set_flag(alu_src1_neg
);
664 emit_instruction(ir
);
667 ir
->set_flag(alu_last_instr
);
669 for (unsigned i
= 0; i
< nc
; ++i
) {
670 ir
= new AluInstruction(op1_max4
, v
[i
], v
[i
], write
);
671 if (all
) ir
->set_flag(alu_src0_neg
);
672 emit_instruction(ir
);
675 for (unsigned i
= nc
; i
< 4 ; ++i
) {
676 ir
= new AluInstruction(op1_max4
, v
[i
],
677 all
? Value::one_f
: Value::zero
, write
);
679 ir
->set_flag(alu_src0_neg
);
681 emit_instruction(ir
);
684 ir
->set_flag(alu_last_instr
);
687 op
= (op
== op2_sete
) ? op2_sete_dx10
: op2_setne_dx10
;
689 op
= (op
== op2_sete
) ? op2_setne_dx10
: op2_sete_dx10
;
691 ir
= new AluInstruction(op
, v
[0], v
[0], Value::one_f
, last_write
);
693 ir
->set_flag(alu_src1_neg
);
694 emit_instruction(ir
);
699 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr
& instr
, EAluOp op
, bool all
)
701 AluInstruction
*ir
= nullptr;
702 PValue v
[4]; // this might need some additional temp register creation
703 for (unsigned i
= 0; i
< 4 ; ++i
)
704 v
[i
] = from_nir(instr
.dest
, i
);
706 for (unsigned i
= 0; i
< 2 ; ++i
) {
707 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
708 from_nir(instr
.src
[1],i
), write
);
709 if (instr
.src
[0].abs
)
710 ir
->set_flag(alu_src0_abs
);
711 if (instr
.src
[0].negate
)
712 ir
->set_flag(alu_src0_neg
);
714 if (instr
.src
[1].abs
)
715 ir
->set_flag(alu_src1_abs
);
716 if (instr
.src
[1].negate
)
717 ir
->set_flag(alu_src1_neg
);
719 emit_instruction(ir
);
722 ir
->set_flag(alu_last_instr
);
724 op
= (op
== op2_setne_dx10
) ? op2_or_int
: op2_and_int
;
725 ir
= new AluInstruction(op
, v
[0], v
[0], v
[1], last_write
);
726 emit_instruction(ir
);
731 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr
& instr
, EAluOp opcode
)
733 const nir_alu_src
& src0
= instr
.src
[0];
734 const nir_alu_src
& src1
= instr
.src
[1];
736 AluInstruction
*ir
= nullptr;
738 if (get_chip_class() == CAYMAN
) {
739 int lasti
= util_last_bit(instr
.dest
.write_mask
);
740 for (int k
= 0; k
< lasti
; ++k
) {
741 if (instr
.dest
.write_mask
& (1 << k
)) {
743 for (int i
= 0; i
< 4; i
++) {
744 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), from_nir(src0
, k
), from_nir(src1
, k
), (i
== k
) ? write
: empty
);
745 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
746 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
747 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
748 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
749 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
750 if (i
== 3) ir
->set_flag(alu_last_instr
);
751 emit_instruction(ir
);
756 for (int i
= 0; i
< 4 ; ++i
) {
757 if (instr
.dest
.write_mask
& (1 << i
)){
758 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), from_nir(src0
, i
), from_nir(src1
, i
), last_write
);
759 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
760 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
761 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
762 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
763 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
764 emit_instruction(ir
);
771 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts opts
)
774 const nir_alu_src
& src0
= instr
.src
[0];
775 const nir_alu_src
& src1
= instr
.src
[1];
777 if (src0
.negate
|| src1
.negate
||
778 src0
.abs
|| src1
.abs
) {
779 std::cerr
<< "R600: don't support modifiers with integer operations";
782 return emit_alu_op2(instr
, opcode
, opts
);
785 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
787 const nir_alu_src
*src0
= &instr
.src
[0];
788 const nir_alu_src
*src1
= &instr
.src
[1];
790 if (ops
& op2_opt_reverse
)
791 std::swap(src0
, src1
);
793 bool src1_negate
= (ops
& op2_opt_neg_src1
) ^ src1
->negate
;
795 AluInstruction
*ir
= nullptr;
796 for (int i
= 0; i
< 4 ; ++i
) {
797 if (instr
.dest
.write_mask
& (1 << i
)){
798 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
799 from_nir(*src0
, i
), from_nir(*src1
, i
), write
);
801 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
802 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
803 if (src1_negate
) ir
->set_flag(alu_src1_neg
);
804 if (src1
->abs
) ir
->set_flag(alu_src1_abs
);
805 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
806 emit_instruction(ir
);
810 ir
->set_flag(alu_last_instr
);
814 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
816 const nir_alu_src
*src0
= &instr
.src
[0];
817 const nir_alu_src
*src1
= &instr
.src
[1];
819 if (ops
& op2_opt_reverse
)
820 std::swap(src0
, src1
);
822 GPRVector::Values v0
;
823 for (int i
= 0; i
< 4 ; ++i
)
824 v0
[i
] = from_nir(*src0
, i
);
826 GPRVector::Values v1
;
827 for (int i
= 0; i
< 4 ; ++i
)
828 v1
[i
] = from_nir(*src1
, i
);
830 if (src0
->abs
|| src0
->negate
) {
831 int src0_tmp
= allocate_temp_register();
832 GPRVector::Values v0_temp
;
833 AluInstruction
*ir
= nullptr;
834 for (int i
= 0; i
< 4 ; ++i
) {
835 if (instr
.dest
.write_mask
& (1 << i
)) {
836 v0_temp
[i
] = PValue(new GPRValue(src0_tmp
, i
));
837 ir
= new AluInstruction(op1_mov
, v0_temp
[i
], v0
[i
], write
);
838 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
839 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
840 emit_instruction(ir
);
845 ir
->set_flag(alu_last_instr
);
848 if (src1
->abs
|| src1
->negate
) {
849 int src1_tmp
= allocate_temp_register();
850 GPRVector::Values v1_temp
;
851 AluInstruction
*ir
= nullptr;
852 for (int i
= 0; i
< 4 ; ++i
) {
853 if (instr
.dest
.write_mask
& (1 << i
)) {
854 v1_temp
[i
] = PValue(new GPRValue(src1_tmp
, i
));
855 ir
= new AluInstruction(op1_mov
, v1_temp
[i
], v1
[i
], {alu_write
});
856 if (src1
->abs
) ir
->set_flag(alu_src0_abs
);
857 if (src1
->negate
) ir
->set_flag(alu_src0_neg
);
858 emit_instruction(ir
);
863 ir
->set_flag(alu_last_instr
);
866 AluInstruction
*ir
= nullptr;
867 for (int i
= 0; i
< 4 ; ++i
) {
868 if (instr
.dest
.write_mask
& (1 << i
)){
869 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), {v0
[i
], v1
[i
]}, {alu_write
});
870 emit_instruction(ir
);
874 ir
->set_flag(alu_last_instr
);
879 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr
& instr
)
881 int sel_tmp
= allocate_temp_register();
882 GPRVector
tmp(sel_tmp
, {0,1,2,3});
884 AluInstruction
*ir
= nullptr;
887 for (int i
= 0; i
< 4 ; ++i
) {
888 if (instr
.dest
.write_mask
& (1 << i
)){
889 help
[i
] = from_nir(instr
.dest
, i
);
890 auto s
= from_nir(instr
.src
[0], i
);
891 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], s
, Value::one_i
, s
, write
);
892 emit_instruction(ir
);
896 ir
->set_flag(alu_last_instr
);
898 for (int i
= 0; i
< 4 ; ++i
) {
899 if (instr
.dest
.write_mask
& (1 << i
)){
900 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, help
[i
], write
);
901 emit_instruction(ir
);
905 ir
->set_flag(alu_last_instr
);
907 for (int i
= 0; i
< 4 ; ++i
) {
908 if (instr
.dest
.write_mask
& (1 << i
)){
910 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], tmp
.reg_i(i
),
911 PValue(new LiteralValue(-1,0)), help
[i
], write
);
912 emit_instruction(ir
);
916 ir
->set_flag(alu_last_instr
);
920 bool EmitAluInstruction::emit_fsign(const nir_alu_instr
& instr
)
924 AluInstruction
*ir
= nullptr;
926 for (int i
= 0; i
< 4 ; ++i
) {
927 help
[i
] = from_nir(instr
.dest
, i
);
928 src
[i
] = from_nir(instr
.src
[0], i
);
931 if (instr
.src
[0].abs
) {
933 for (int i
= 0; i
< 4 ; ++i
) {
934 if (instr
.dest
.write_mask
& (1 << i
)){
935 ir
= new AluInstruction(op2_setgt
, help
[i
], src
[i
], Value::zero
, write
);
936 ir
->set_flag(alu_src0_abs
);
937 emit_instruction(ir
);
941 ir
->set_flag(alu_last_instr
);
943 if (instr
.src
[0].negate
) {
944 for (int i
= 0; i
< 4 ; ++i
) {
945 if (instr
.dest
.write_mask
& (1 << i
)){
946 ir
= new AluInstruction(op1_mov
, help
[i
], help
[i
], write
);
947 ir
->set_flag(alu_src0_neg
);
948 emit_instruction(ir
);
952 ir
->set_flag(alu_last_instr
);
958 for (int i
= 0; i
< 4 ; ++i
) {
959 if (instr
.dest
.write_mask
& (1 << i
)){
960 ir
= new AluInstruction(op3_cndgt
, help
[i
], src
[i
], Value::one_f
, src
[i
], write
);
961 if (instr
.src
[0].negate
) {
962 ir
->set_flag(alu_src0_neg
);
963 ir
->set_flag(alu_src2_neg
);
965 emit_instruction(ir
);
970 ir
->set_flag(alu_last_instr
);
972 for (int i
= 0; i
< 4 ; ++i
) {
973 if (instr
.dest
.write_mask
& (1 << i
)){
974 ir
= new AluInstruction(op3_cndgt
, help
[i
], help
[i
], Value::one_f
, help
[i
], write
);
975 ir
->set_flag(alu_src0_neg
);
976 ir
->set_flag(alu_src1_neg
);
977 emit_instruction(ir
);
981 ir
->set_flag(alu_last_instr
);
985 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr
& instr
, EAluOp opcode
,
986 std::array
<uint8_t, 3> reorder
)
988 const nir_alu_src
*src
[3];
989 src
[0] = &instr
.src
[reorder
[0]];
990 src
[1] = &instr
.src
[reorder
[1]];
991 src
[2] = &instr
.src
[reorder
[2]];
993 AluInstruction
*ir
= nullptr;
994 for (int i
= 0; i
< 4 ; ++i
) {
995 if (instr
.dest
.write_mask
& (1 << i
)){
996 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
997 from_nir(*src
[0], i
), from_nir(*src
[1], i
),
998 from_nir(*src
[2], i
), write
);
1000 if (src
[0]->negate
) ir
->set_flag(alu_src0_neg
);
1001 if (src
[1]->negate
) ir
->set_flag(alu_src1_neg
);
1002 if (src
[2]->negate
) ir
->set_flag(alu_src2_neg
);
1004 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
1005 ir
->set_flag(alu_write
);
1006 emit_instruction(ir
);
1010 ir
->set_flag(alu_last_instr
);
1014 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr
& instr
)
1016 AluInstruction
*ir
= nullptr;
1017 for (int i
= 0; i
< 4 ; ++i
) {
1018 if (instr
.dest
.write_mask
& (1 << i
)){
1019 ir
= new AluInstruction(op2_sub_int
, from_nir(instr
.dest
, i
), Value::zero
,
1020 from_nir(instr
.src
[0], i
), write
);
1021 emit_instruction(ir
);
1025 ir
->set_flag(alu_last_instr
);
1030 static const char swz
[] = "xyzw01?_";
1034 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr
& instr
)
1036 int sel_tmp
= allocate_temp_register();
1037 GPRVector
tmp(sel_tmp
, {0,1,2,3});
1039 std::array
<PValue
,4> src
;
1040 AluInstruction
*ir
= nullptr;
1041 for (int i
= 0; i
< 4 ; ++i
) {
1042 if (instr
.dest
.write_mask
& (1 << i
)){
1043 src
[i
] = from_nir(instr
.src
[0],i
);
1044 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, src
[i
], write
);
1045 emit_instruction(ir
);
1049 ir
->set_flag(alu_last_instr
);
1051 for (int i
= 0; i
< 4 ; ++i
) {
1052 if (instr
.dest
.write_mask
& (1 << i
)){
1053 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), src
[i
],
1054 src
[i
], tmp
.reg_i(i
), write
);
1055 emit_instruction(ir
);
1059 ir
->set_flag(alu_last_instr
);
1063 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr
& instr
, bool use_signed
, bool mod
)
1066 int sel_tmp
= allocate_temp_register();
1067 int sel_tmp0
= allocate_temp_register();
1068 int sel_tmp1
= allocate_temp_register();
1070 PValue
asrc1(new GPRValue(sel_tmp
, 0));
1071 PValue
asrc2(new GPRValue(sel_tmp
, 1));
1072 PValue
rsign(new GPRValue(sel_tmp
, 2));
1073 PValue
err(new GPRValue(sel_tmp
, 3));
1075 GPRVector
tmp0(sel_tmp0
, {0,1,2,3});
1076 GPRVector
tmp1(sel_tmp1
, {0,1,2,3});
1078 std::array
<PValue
, 4> src0
;
1079 std::array
<PValue
, 4> src1
;
1081 for (int i
= 0; i
< 4 ; ++i
) {
1082 if (instr
.dest
.write_mask
& (1 << i
)) {
1083 src0
[i
] = from_nir(instr
.src
[0], i
);
1084 src1
[i
] = from_nir(instr
.src
[1], i
);
1089 for (int i
= 3; i
>= 0 ; --i
) {
1090 if (!(instr
.dest
.write_mask
& (1 << i
)))
1093 emit_instruction(op2_sub_int
, asrc1
, {Value::zero
, src0
[i
]}, {alu_write
});
1094 emit_instruction(op2_sub_int
, asrc2
, {Value::zero
, src1
[i
]}, {alu_write
});
1095 emit_instruction(op2_xor_int
, rsign
, {src0
[i
], src1
[i
]}, {alu_write
, alu_last_instr
});
1098 emit_instruction(op3_cndge_int
, asrc1
, {src0
[i
], src0
[i
], asrc1
}, {alu_write
});
1099 emit_instruction(op3_cndge_int
, asrc2
, {src1
[i
], src1
[i
], asrc2
}, {alu_write
, alu_last_instr
});
1105 emit_instruction(op1_recip_uint
, tmp0
.x(), {asrc2
}, {alu_write
, alu_last_instr
});
1107 emit_instruction(op2_mullo_uint
, tmp0
.z(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1109 emit_instruction(op2_sub_int
, tmp0
.w(), {Value::zero
, tmp0
.z()}, {alu_write
});
1110 emit_instruction(op2_mulhi_uint
, tmp0
.y(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1112 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp0
.y(), tmp0
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1114 emit_instruction(op2_mulhi_uint
, err
, {tmp0
.z(), tmp0
.x()}, {alu_write
, alu_last_instr
});
1116 emit_instruction(op2_sub_int
, tmp1
.x(), {tmp0
.x(), err
}, {alu_write
});
1117 emit_instruction(op2_add_int
, tmp1
.y(), {tmp0
.x(), err
}, {alu_write
, alu_last_instr
});
1119 emit_instruction(op3_cnde_int
, tmp0
.x(), {tmp0
.y(), tmp1
.y(), tmp1
.x()}, {alu_write
, alu_last_instr
});
1121 emit_instruction(op2_mulhi_uint
, tmp0
.z(), {tmp0
.x(), asrc1
}, {alu_write
, alu_last_instr
});
1122 emit_instruction(op2_mullo_uint
, tmp0
.y(), {tmp0
.z(), asrc2
}, {alu_write
, alu_last_instr
});
1124 emit_instruction(op2_sub_int
, tmp0
.w(), {asrc1
, tmp0
.y()}, {alu_write
, alu_last_instr
});
1127 emit_instruction(op2_setge_uint
, tmp1
.x(), {tmp0
.w(), asrc2
}, {alu_write
});
1128 emit_instruction(op2_setge_uint
, tmp1
.y(), {asrc1
, tmp0
.y()}, {alu_write
});
1131 emit_instruction(op2_sub_int
, tmp1
.z(), {tmp0
.w(), asrc2
}, {alu_write
});
1132 emit_instruction(op2_add_int
, tmp1
.w(), {tmp0
.w(), asrc2
}, {alu_write
, alu_last_instr
});
1134 emit_instruction(op2_add_int
, tmp1
.z(), {tmp0
.z(), Value::one_i
}, {alu_write
});
1135 emit_instruction(op2_sub_int
, tmp1
.w(), {tmp0
.z(), Value::one_i
}, {alu_write
, alu_last_instr
});
1138 emit_instruction(op2_and_int
, tmp1
.x(), {tmp1
.x(), tmp1
.y()}, {alu_write
, alu_last_instr
});
1141 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.w(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1143 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.z(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1146 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1147 emit_instruction(op2_sub_int
, tmp0
.y(), {Value::zero
, tmp0
.z()}, {alu_write
, alu_last_instr
});
1150 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {src0
[i
], tmp0
.z(), tmp0
.y()},
1151 {alu_write
, alu_last_instr
});
1153 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {rsign
, tmp0
.z(), tmp0
.y()},
1154 {alu_write
, alu_last_instr
});
1156 emit_instruction(op3_cnde_int
, from_nir(instr
.dest
, i
), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1162 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src
& src
, GPRVector::Values
& s
,
1163 GPRVector::Values
& v
, int ncomp
)
1166 AluInstruction
*alu
= nullptr;
1167 for (int i
= 0; i
< ncomp
; ++i
) {
1168 alu
= new AluInstruction(op1_mov
, v
[i
], s
[i
], {alu_write
});
1170 alu
->set_flag(alu_src0_abs
);
1172 alu
->set_flag(alu_src0_neg
);
1173 emit_instruction(alu
);
1178 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr
& instr
, TexInstruction::Opcode op
,
1182 GPRVector::Values v
;
1183 GPRVector::Values s
;
1184 GPRVector::Values
*source
= &s
;
1185 std::array
<int, 4> writemask
= {0,1,2,3};
1187 int ncomp
= instr
.src
[0].src
.is_ssa
? instr
.src
[0].src
.ssa
->num_components
:
1188 instr
.src
[0].src
.reg
.reg
->num_components
;
1190 for (int i
= 0; i
< 4; ++i
) {
1191 writemask
[i
] = (instr
.dest
.write_mask
& (1 << i
)) ? i
: 7;
1192 v
[i
] = from_nir(instr
.dest
, (i
< ncomp
) ? i
: 0);
1193 s
[i
] = from_nir(instr
.src
[0], (i
< ncomp
) ? i
: 0);
1196 if (instr
.src
[0].abs
|| instr
.src
[0].negate
) {
1197 split_alu_modifiers(instr
.src
[0], s
, v
, ncomp
);
1201 /* This is querying the dreivatives of the output fb, so we would either need
1202 * access to the neighboring pixels or to the framebuffer. Neither is currently
1205 GPRVector
src(*source
);
1207 auto tex
= new TexInstruction(op
, dst
, src
, 0, R600_MAX_CONST_BUFFERS
, PValue());
1208 tex
->set_dest_swizzle(writemask
);
1211 std::cerr
<< "Sewt fine flag\n";
1212 tex
->set_flag(TexInstruction::grad_fine
);
1215 emit_instruction(tex
);
1220 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr
& instr
, EAluOp opcode
)
1222 int itmp
= allocate_temp_register();
1223 std::array
<PValue
, 4> tmp
;
1224 std::array
<PValue
, 4> dst
;
1225 std::array
<PValue
, 4> src0
;
1226 std::array
<PValue
, 4> shift
;
1228 PValue
l32(new LiteralValue(32));
1229 unsigned write_mask
= instr
.dest
.write_mask
;
1231 AluInstruction
*ir
= nullptr;
1232 for (int i
= 0; i
< 4; i
++) {
1233 if (!(write_mask
& (1<<i
)))
1235 dst
[i
] = from_nir(instr
.dest
, i
);
1236 src0
[i
] = from_nir(instr
.src
[0], i
);
1237 shift
[i
] = from_nir(instr
.src
[2], i
);
1239 ir
= new AluInstruction(opcode
, dst
[i
],
1240 {src0
[i
], from_nir(instr
.src
[1], i
), shift
[i
]},
1242 emit_instruction(ir
);
1246 for (int i
= 0; i
< 4; i
++) {
1247 if (!(write_mask
& (1<<i
)))
1249 tmp
[i
] = PValue(new GPRValue(itmp
, i
));
1250 ir
= new AluInstruction(op2_setge_int
, tmp
[i
], {shift
[i
], l32
},
1252 emit_instruction(ir
);
1256 for (int i
= 0; i
< 4; i
++) {
1257 if (!(write_mask
& (1<<i
)))
1259 ir
= new AluInstruction(op3_cnde_int
, dst
[i
], {tmp
[i
], dst
[i
], src0
[i
]},
1261 emit_instruction(ir
);
1268 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr
& instr
)
1270 auto t0
= get_temp_vec4();
1271 auto t1
= get_temp_vec4();
1272 auto t2
= get_temp_vec4();
1274 PValue
l32(new LiteralValue(32));
1275 unsigned write_mask
= instr
.dest
.write_mask
;
1276 if (!write_mask
) return true;
1278 AluInstruction
*ir
= nullptr;
1279 for (int i
= 0; i
< 4; i
++) {
1280 if (!(write_mask
& (1<<i
)))
1283 ir
= new AluInstruction(op2_setge_int
, t0
[i
], {from_nir(instr
.src
[3], i
), l32
}, {alu_write
});
1284 emit_instruction(ir
);
1288 for (int i
= 0; i
< 4; i
++) {
1289 if (!(write_mask
& (1<<i
)))
1291 ir
= new AluInstruction(op2_bfm_int
, t1
[i
], {from_nir(instr
.src
[3], i
),
1292 from_nir(instr
.src
[2], i
)}, {alu_write
});
1293 emit_instruction(ir
);
1295 ir
->set_flag(alu_last_instr
);
1297 for (int i
= 0; i
< 4; i
++) {
1298 if (!(write_mask
& (1<<i
)))
1300 ir
= new AluInstruction(op2_lshl_int
, t2
[i
], {from_nir(instr
.src
[1], i
),
1301 from_nir(instr
.src
[2], i
)}, {alu_write
});
1302 emit_instruction(ir
);
1304 ir
->set_flag(alu_last_instr
);
1307 for (int i
= 0; i
< 4; i
++) {
1308 if (!(write_mask
& (1<<i
)))
1310 ir
= new AluInstruction(op3_bfi_int
, from_nir(instr
.dest
, i
),
1311 {t1
[i
], t2
[i
], from_nir(instr
.src
[0], i
)}, {alu_write
});
1312 emit_instruction(ir
);
1314 ir
->set_flag(alu_last_instr
);
1316 for (int i
= 0; i
< 4; i
++) {
1317 if (!(write_mask
& (1<<i
)))
1319 ir
= new AluInstruction(op3_cnde_int
, from_nir(instr
.dest
, i
),
1320 {t0
[i
], from_nir(instr
.dest
, i
),
1321 from_nir(instr
.src
[1], i
)}, {alu_write
});
1322 emit_instruction(ir
);
1324 ir
->set_flag(alu_last_instr
);
1329 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr
& instr
)
1331 emit_instruction(op2_lshr_int
, from_nir(instr
.dest
, 0),
1332 {from_nir(instr
.src
[0], 0), PValue(new LiteralValue(16))},
1333 {alu_write
, alu_last_instr
});
1335 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1336 {from_nir(instr
.dest
, 0)},{alu_write
, alu_last_instr
});
1341 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr
& instr
)
1343 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1344 {from_nir(instr
.src
[0], 0)},{alu_write
, alu_last_instr
});
1348 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr
& instr
)
1350 int it0
= allocate_temp_register();
1351 PValue
x(new GPRValue(it0
, 0));
1352 PValue
y(new GPRValue(it0
, 1));
1354 emit_instruction(op1_flt32_to_flt16
, x
,{from_nir(instr
.src
[0], 0)},{alu_write
});
1355 emit_instruction(op1_flt32_to_flt16
, y
,{from_nir(instr
.src
[1], 0)},{alu_write
, alu_last_instr
});
1357 emit_instruction(op2_lshl_int
, y
, {y
, PValue(new LiteralValue(16))},{alu_write
, alu_last_instr
});
1359 emit_instruction(op2_or_int
, {from_nir(instr
.dest
, 0)} , {x
, y
},{alu_write
, alu_last_instr
});