3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
31 #include "gallium/drivers/r600/r600_shader.h"
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor
& processor
):
38 EmitInstruction (processor
)
43 bool EmitAluInstruction::do_emit(nir_instr
* ir
)
45 const nir_alu_instr
& instr
= *nir_instr_as_alu(ir
);
47 r600::sfn_log
<< SfnLog::instr
<< "emit '"
49 << " bitsize: " << static_cast<int>(instr
.dest
.dest
.ssa
.bit_size
)
50 << "' (" << __func__
<< ")\n";
52 split_constants(instr
);
55 case nir_op_b2f32
: return emit_alu_b2f(instr
);
56 case nir_op_i2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_int
);
57 case nir_op_f2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_dx10
);
59 case nir_op_mov
:return emit_mov(instr
);
60 case nir_op_ftrunc
: return emit_alu_op1(instr
, op1_trunc
);
61 case nir_op_fabs
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_abs
});
62 case nir_op_fneg
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_neg
});
63 case nir_op_fsat
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_dst_clamp
});
64 case nir_op_frcp
: return emit_alu_trans_op1(instr
, op1_recip_ieee
);
65 case nir_op_frsq
: return emit_alu_trans_op1(instr
, op1_recipsqrt_ieee1
);
66 case nir_op_fsin
: return emit_alu_trig_op1(instr
, op1_sin
);
67 case nir_op_fcos
: return emit_alu_trig_op1(instr
, op1_cos
);
68 case nir_op_fexp2
: return emit_alu_trans_op1(instr
, op1_exp_ieee
);
69 case nir_op_flog2
: return emit_alu_trans_op1(instr
, op1_log_clamped
);
71 case nir_op_fround_even
: return emit_alu_op1(instr
, op1_rndne
);
72 case nir_op_fsqrt
: return emit_alu_trans_op1(instr
, op1_sqrt_ieee
);
73 case nir_op_i2f32
: return emit_alu_trans_op1(instr
, op1_int_to_flt
);
74 case nir_op_u2f32
: return emit_alu_trans_op1(instr
, op1_uint_to_flt
);
75 case nir_op_f2i32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_int
);
76 case nir_op_f2u32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_uint
);
78 case nir_op_fceil
: return emit_alu_op1(instr
, op1_ceil
);
79 case nir_op_ffract
: return emit_alu_op1(instr
, op1_fract
);
80 case nir_op_ffloor
: return emit_alu_op1(instr
, op1_floor
);
82 case nir_op_fsign
: return emit_fsign(instr
);
83 case nir_op_fdph
: return emit_fdph(instr
);
85 case nir_op_ibitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_int
);
86 case nir_op_ubitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_uint
);
87 case nir_op_bitfield_insert
: return emit_bitfield_insert(instr
);
88 case nir_op_bit_count
: return emit_alu_op1(instr
, op1_bcnt_int
);
89 case nir_op_bitfield_reverse
: return emit_alu_op1(instr
, op1_bfrev_int
);
91 case nir_op_ieq
: return emit_alu_op2_int(instr
, op2_sete_int
);
92 case nir_op_ine
: return emit_alu_op2_int(instr
, op2_setne_int
);
93 case nir_op_ige
: return emit_alu_op2_int(instr
, op2_setge_int
);
94 case nir_op_ishl
: return emit_alu_op2_int(instr
, op2_lshl_int
);
95 case nir_op_ishr
: return emit_alu_op2_int(instr
, op2_ashr_int
);
96 case nir_op_ilt
: return emit_alu_op2_int(instr
, op2_setgt_int
, op2_opt_reverse
);
97 case nir_op_iand
: return emit_alu_op2_int(instr
, op2_and_int
);
98 case nir_op_ixor
: return emit_alu_op2_int(instr
, op2_xor_int
);
99 case nir_op_imin
: return emit_alu_op2_int(instr
, op2_min_int
);
100 case nir_op_imax
: return emit_alu_op2_int(instr
, op2_max_int
);
101 case nir_op_imul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_int
);
102 case nir_op_umul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_uint
);
103 case nir_op_umax
: return emit_alu_op2_int(instr
, op2_max_uint
);
104 case nir_op_umin
: return emit_alu_op2_int(instr
, op2_min_uint
);
105 case nir_op_ior
: return emit_alu_op2_int(instr
, op2_or_int
);
106 case nir_op_inot
: return emit_alu_op1(instr
, op1_not_int
);
107 case nir_op_iabs
: return emit_alu_iabs(instr
);
108 case nir_op_ineg
: return emit_alu_ineg(instr
);
109 case nir_op_idiv
: return emit_alu_div_int(instr
, true, false);
110 case nir_op_udiv
: return emit_alu_div_int(instr
, false, false);
111 case nir_op_umod
: return emit_alu_div_int(instr
, false, true);
112 case nir_op_isign
: return emit_alu_isign(instr
);
114 case nir_op_uge
: return emit_alu_op2_int(instr
, op2_setge_uint
);
115 case nir_op_ult
: return emit_alu_op2_int(instr
, op2_setgt_uint
, op2_opt_reverse
);
116 case nir_op_ushr
: return emit_alu_op2_int(instr
, op2_lshr_int
);
118 case nir_op_flt
: return emit_alu_op2(instr
, op2_setgt_dx10
, op2_opt_reverse
);
120 case nir_op_fge
: return emit_alu_op2(instr
, op2_setge_dx10
);
121 case nir_op_fne
: return emit_alu_op2(instr
, op2_setne_dx10
);
122 case nir_op_feq
: return emit_alu_op2(instr
, op2_sete_dx10
);
124 case nir_op_fmin
: return emit_alu_op2(instr
, op2_min_dx10
);
125 case nir_op_fmax
: return emit_alu_op2(instr
, op2_max_dx10
);
126 case nir_op_fmul
: return emit_alu_op2(instr
, op2_mul_ieee
);
127 case nir_op_imul
: return emit_alu_trans_op2(instr
, op2_mullo_int
);
128 case nir_op_fadd
: return emit_alu_op2(instr
, op2_add
);
129 case nir_op_fsub
: return emit_alu_op2(instr
, op2_add
, op2_opt_neg_src1
);
130 case nir_op_iadd
: return emit_alu_op2_int(instr
, op2_add_int
);
131 case nir_op_isub
: return emit_alu_op2_int(instr
, op2_sub_int
);
132 case nir_op_fdot2
: return emit_dot(instr
, 2);
133 case nir_op_fdot3
: return emit_dot(instr
, 3);
134 case nir_op_fdot4
: return emit_dot(instr
, 4);
136 case nir_op_bany_inequal2
: return emit_any_all_icomp(instr
, op2_setne_int
, 2, false);
137 case nir_op_bany_inequal3
: return emit_any_all_icomp(instr
, op2_setne_int
, 3, false);
138 case nir_op_bany_inequal4
: return emit_any_all_icomp(instr
, op2_setne_int
, 4, false);
140 case nir_op_ball_iequal2
: return emit_any_all_icomp(instr
, op2_sete_int
, 2, true);
141 case nir_op_ball_iequal3
: return emit_any_all_icomp(instr
, op2_sete_int
, 3, true);
142 case nir_op_ball_iequal4
: return emit_any_all_icomp(instr
, op2_sete_int
, 4, true);
144 case nir_op_bany_fnequal2
: return emit_any_all_fcomp2(instr
, op2_setne_dx10
, false);
145 case nir_op_bany_fnequal3
: return emit_any_all_fcomp(instr
, op2_setne
, 3, false);
146 case nir_op_bany_fnequal4
: return emit_any_all_fcomp(instr
, op2_setne
, 4, false);
148 case nir_op_ball_fequal2
: return emit_any_all_fcomp2(instr
, op2_sete_dx10
, true);
149 case nir_op_ball_fequal3
: return emit_any_all_fcomp(instr
, op2_sete
, 3, true);
150 case nir_op_ball_fequal4
: return emit_any_all_fcomp(instr
, op2_sete
, 4, true);
153 case nir_op_ffma
: return emit_alu_op3(instr
, op3_muladd_ieee
);
154 case nir_op_bcsel
: return emit_alu_op3(instr
, op3_cnde_int
, {0, 2, 1});
155 case nir_op_vec2
: return emit_create_vec(instr
, 2);
156 case nir_op_vec3
: return emit_create_vec(instr
, 3);
157 case nir_op_vec4
: return emit_create_vec(instr
, 4);
159 case nir_op_find_lsb
: return emit_alu_op1(instr
, op1_ffbl_int
);
160 case nir_op_ufind_msb
: return emit_find_msb(instr
, false);
161 case nir_op_ifind_msb
: return emit_find_msb(instr
, true);
162 case nir_op_b2i32
: return emit_b2i32(instr
);
163 case nir_op_pack_64_2x32_split
: return emit_pack_64_2x32_split(instr
);
164 case nir_op_unpack_64_2x32_split_x
: return emit_unpack_64_2x32_split(instr
, 0);
165 case nir_op_unpack_64_2x32_split_y
: return emit_unpack_64_2x32_split(instr
, 1);
166 case nir_op_unpack_half_2x16_split_x
: return emit_unpack_32_2x16_split_x(instr
);
167 case nir_op_unpack_half_2x16_split_y
: return emit_unpack_32_2x16_split_y(instr
);
168 case nir_op_pack_half_2x16_split
: return emit_pack_32_2x16_split(instr
);
171 /* These are in the ALU instruction list, but they should be texture instructions */
172 case nir_op_fddx_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, true);
173 case nir_op_fddx_coarse
:
174 case nir_op_fddx
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, false);
176 case nir_op_fddy_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_v
, true);
177 case nir_op_fddy_coarse
:
178 case nir_op_fddy
: return emit_tex_fdd(instr
,TexInstruction::get_gradient_v
, false);
180 case nir_op_umad24
: return emit_alu_op3(instr
, op3_muladd_uint24
, {0, 1, 2});
181 case nir_op_umul24
: return emit_alu_op2(instr
, op2_mul_uint24
);
187 void EmitAluInstruction::split_constants(const nir_alu_instr
& instr
)
189 const nir_op_info
*op_info
= &nir_op_infos
[instr
.op
];
190 if (op_info
->num_inputs
< 2)
194 std::array
<const UniformValue
*,4> c
;
195 std::array
<int,4> idx
;
196 for (unsigned i
= 0; i
< op_info
->num_inputs
; ++i
) {
197 PValue src
= from_nir(instr
.src
[i
], 0);
199 if (src
->type() == Value::kconst
) {
200 c
[nconst
] = static_cast<const UniformValue
*>(src
.get());
208 unsigned sel
= c
[0]->sel();
209 unsigned kcache
= c
[0]->kcache_bank();
210 sfn_log
<< SfnLog::reg
<< "split " << nconst
<< " constants, sel[0] = " << sel
; ;
212 for (int i
= 1; i
< nconst
; ++i
) {
213 sfn_log
<< "sel[" << i
<< "] = " << c
[i
]->sel() << "\n";
214 if (c
[i
]->sel() != sel
|| c
[i
]->kcache_bank() != kcache
) {
215 load_uniform(instr
.src
[idx
[i
]]);
220 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr
& instr
)
222 if (instr
.src
[0].negate
|| instr
.src
[0].abs
) {
223 std::cerr
<< "source modifiers not supported with int ops\n";
227 AluInstruction
*ir
= nullptr;
228 for (int i
= 0; i
< 4 ; ++i
) {
229 if (instr
.dest
.write_mask
& (1 << i
)){
230 ir
= new AluInstruction(op1_not_int
, from_nir(instr
.dest
, i
),
231 from_nir(instr
.src
[0], i
), write
);
232 emit_instruction(ir
);
236 ir
->set_flag(alu_last_instr
);
240 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
241 const AluOpFlags
& flags
)
243 AluInstruction
*ir
= nullptr;
244 for (int i
= 0; i
< 4 ; ++i
) {
245 if (instr
.dest
.write_mask
& (1 << i
)){
246 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
247 from_nir(instr
.src
[0], i
), write
);
249 if (flags
.test(alu_src0_abs
) || instr
.src
[0].abs
)
250 ir
->set_flag(alu_src0_abs
);
252 if (instr
.src
[0].negate
^ flags
.test(alu_src0_neg
))
253 ir
->set_flag(alu_src0_neg
);
255 if (flags
.test(alu_dst_clamp
) || instr
.dest
.saturate
)
256 ir
->set_flag(alu_dst_clamp
);
258 emit_instruction(ir
);
266 bool EmitAluInstruction::emit_mov(const nir_alu_instr
& instr
)
268 /* If the op is a plain move beween SSA values we can just forward
269 * the register reference to the original register */
270 if (instr
.dest
.dest
.is_ssa
&& instr
.src
[0].src
.is_ssa
&&
271 !instr
.src
[0].abs
&& !instr
.src
[0].negate
&& !instr
.dest
.saturate
) {
273 for (int i
= 0; i
< 4 ; ++i
) {
274 if (instr
.dest
.write_mask
& (1 << i
)){
275 auto src
= from_nir(instr
.src
[0], i
);
276 result
&= inject_register(instr
.dest
.dest
.ssa
.index
, i
,
279 if (src
->type() == Value::kconst
) {
280 add_uniform((instr
.dest
.dest
.ssa
.index
<< 2) + i
, src
);
286 return emit_alu_op1(instr
, op1_mov
);
290 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr
& instr
, EAluOp opcode
)
292 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
295 const float inv_2_pi
= 0.15915494f
;
297 PValue v
[4]; // this might need some additional temp register creation
298 for (unsigned i
= 0; i
< 4 ; ++i
)
299 v
[i
] = from_nir(instr
.dest
, i
);
301 PValue inv_pihalf
= PValue(new LiteralValue(inv_2_pi
, 0));
302 AluInstruction
*ir
= nullptr;
303 for (unsigned i
= 0; i
< 4 ; ++i
) {
304 if (!(instr
.dest
.write_mask
& (1 << i
)))
306 ir
= new AluInstruction(op3_muladd_ieee
, v
[i
],
307 {from_nir(instr
.src
[0],i
), inv_pihalf
, Value::zero_dot_5
},
309 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
310 emit_instruction(ir
);
314 for (unsigned i
= 0; i
< 4 ; ++i
) {
315 if (!(instr
.dest
.write_mask
& (1 << i
)))
317 ir
= new AluInstruction(op1_fract
, v
[i
], v
[i
], {alu_write
});
318 emit_instruction(ir
);
322 for (unsigned i
= 0; i
< 4 ; ++i
) {
323 if (!(instr
.dest
.write_mask
& (1 << i
)))
325 ir
= new AluInstruction(op2_add
, v
[i
], v
[i
], Value::zero_dot_5
, write
);
326 ir
->set_flag(alu_src1_neg
);
327 emit_instruction(ir
);
331 for (unsigned i
= 0; i
< 4 ; ++i
) {
332 if (!(instr
.dest
.write_mask
& (1 << i
)))
335 ir
= new AluInstruction(opcode
, v
[i
], v
[i
], last_write
);
336 emit_instruction(ir
);
341 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
344 AluInstruction
*ir
= nullptr;
345 std::set
<int> src_idx
;
347 if (get_chip_class() == CAYMAN
) {
348 int last_slot
= (instr
.dest
.write_mask
& 0x8) ? 4 : 3;
349 for (int i
= 0; i
< last_slot
; ++i
) {
350 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
351 from_nir(instr
.src
[0], 0), instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
352 if (absolute
|| instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
353 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
354 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
356 if (i
== (last_slot
- 1)) ir
->set_flag(alu_last_instr
);
358 emit_instruction(ir
);
361 for (int i
= 0; i
< 4 ; ++i
) {
362 if (instr
.dest
.write_mask
& (1 << i
)){
363 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
364 from_nir(instr
.src
[0], i
), last_write
);
365 if (absolute
|| instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
366 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
367 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
368 emit_instruction(ir
);
375 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr
& instr
, EAluOp op
)
377 AluInstruction
*ir
= nullptr;
378 std::array
<PValue
, 4> v
;
380 for (int i
= 0; i
< 4; ++i
) {
381 if (!(instr
.dest
.write_mask
& (1 << i
)))
383 v
[i
] = from_nir(instr
.dest
, i
);
384 ir
= new AluInstruction(op1_trunc
, v
[i
], from_nir(instr
.src
[0], i
), {alu_write
});
385 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
386 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
387 emit_instruction(ir
);
391 for (int i
= 0; i
< 4; ++i
) {
392 if (!(instr
.dest
.write_mask
& (1 << i
)))
394 ir
= new AluInstruction(op
, v
[i
], v
[i
], {alu_write
});
395 emit_instruction(ir
);
396 if (op
== op1_flt_to_uint
)
404 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr
& instr
, bool sgn
)
406 int sel_tmp
= allocate_temp_register();
407 int sel_tmp2
= allocate_temp_register();
408 GPRVector
tmp(sel_tmp
, {0,1,2,3});
409 GPRVector
tmp2(sel_tmp2
, {0,1,2,3});
410 AluInstruction
*ir
= nullptr;
411 EAluOp opcode
= sgn
? op1_ffbh_int
: op1_ffbh_uint
;
412 for (int i
= 0; i
< 4; ++i
) {
413 if (!(instr
.dest
.write_mask
& (1 << i
)))
416 ir
= new AluInstruction(opcode
, tmp
.reg_i(i
), from_nir(instr
.src
[0], i
), write
);
417 emit_instruction(ir
);
421 for (int i
= 0; i
< 4 ; ++i
) {
422 if (!(instr
.dest
.write_mask
& (1 << i
)))
425 ir
= new AluInstruction(op2_sub_int
, tmp2
.reg_i(i
),
426 PValue(new LiteralValue(31u, 0)), tmp
.reg_i(i
), write
);
427 emit_instruction(ir
);
431 for (int i
= 0; i
< 4 ; ++i
) {
432 if (!(instr
.dest
.write_mask
& (1 << i
)))
435 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), tmp
.reg_i(i
),
436 tmp2
.reg_i(i
), tmp
.reg_i(i
), write
);
437 emit_instruction(ir
);
444 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr
& instr
)
446 AluInstruction
*ir
= nullptr;
447 for (int i
= 0; i
< 4 ; ++i
) {
448 if (!(instr
.dest
.write_mask
& (1 << i
)))
451 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
452 from_nir(instr
.src
[0], i
), Value::one_i
, write
);
453 emit_instruction(ir
);
460 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr
& instr
)
462 AluInstruction
*ir
= nullptr;
463 for (unsigned i
= 0; i
< 2; ++i
) {
464 if (!(instr
.dest
.write_mask
& (1 << i
)))
466 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
),
467 from_nir(instr
.src
[0], i
), write
);
468 emit_instruction(ir
);
470 ir
->set_flag(alu_last_instr
);
474 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr
& instr
, unsigned comp
)
476 emit_instruction(new AluInstruction(op1_mov
, from_nir(instr
.dest
, 0),
477 from_nir(instr
.src
[0], comp
), last_write
));
481 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr
& instr
, unsigned nc
)
483 AluInstruction
*ir
= nullptr;
484 std::set
<int> src_slot
;
485 for(unsigned i
= 0; i
< nc
; ++i
) {
486 if (instr
.dest
.write_mask
& (1 << i
)){
487 auto src
= from_nir(instr
.src
[i
], 0);
488 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
), src
, write
);
489 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
491 // FIXME: This is a rather crude approach to fix the problem that
492 // r600 can't read from four different slots of the same component
493 // here we check only for the register index
494 if (src
->type() == Value::gpr
)
495 src_slot
.insert(src
->sel());
496 if (src_slot
.size() >= 3) {
498 ir
->set_flag(alu_last_instr
);
500 emit_instruction(ir
);
504 ir
->set_flag(alu_last_instr
);
508 bool EmitAluInstruction::emit_dot(const nir_alu_instr
& instr
, int n
)
510 const nir_alu_src
& src0
= instr
.src
[0];
511 const nir_alu_src
& src1
= instr
.src
[1];
513 AluInstruction
*ir
= nullptr;
514 for (int i
= 0; i
< n
; ++i
) {
515 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
516 from_nir(src0
, i
), from_nir(src1
, i
),
517 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
519 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
520 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
521 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
522 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
524 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
525 emit_instruction(ir
);
527 for (int i
= n
; i
< 4 ; ++i
) {
528 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
529 Value::zero
, Value::zero
,
530 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
531 emit_instruction(ir
);
535 ir
->set_flag(alu_last_instr
);
539 bool EmitAluInstruction::emit_fdph(const nir_alu_instr
& instr
)
541 const nir_alu_src
& src0
= instr
.src
[0];
542 const nir_alu_src
& src1
= instr
.src
[1];
544 AluInstruction
*ir
= nullptr;
545 for (int i
= 0; i
< 3 ; ++i
) {
546 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
547 from_nir(src0
, i
), from_nir(src1
, i
),
548 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
549 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
550 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
551 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
552 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
553 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
554 emit_instruction(ir
);
557 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, 3), Value::one_f
,
558 from_nir(src1
, 3), (instr
.dest
.write_mask
) & (1 << 3) ? write
: empty
);
559 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
560 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
561 emit_instruction(ir
);
563 ir
->set_flag(alu_last_instr
);
568 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr
& instr
, EAluOp op
)
570 AluInstruction
*ir
= nullptr;
571 for (int i
= 0; i
< 4 ; ++i
) {
572 if (instr
.dest
.write_mask
& (1 << i
)) {
573 ir
= new AluInstruction(op
, from_nir(instr
.dest
, i
),
574 from_nir(instr
.src
[0], i
), Value::zero
,
576 emit_instruction(ir
);
580 ir
->set_flag(alu_last_instr
);
584 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr
& instr
)
586 AluInstruction
*ir
= nullptr;
587 for (int i
= 0; i
< 4 ; ++i
) {
588 if (instr
.dest
.write_mask
& (1 << i
)){
589 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
590 from_nir(instr
.src
[0], i
), Value::one_f
, write
);
591 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
592 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
593 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
594 emit_instruction(ir
);
598 ir
->set_flag(alu_last_instr
);
602 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
605 AluInstruction
*ir
= nullptr;
606 PValue v
[4]; // this might need some additional temp register creation
607 for (unsigned i
= 0; i
< 4 ; ++i
)
608 v
[i
] = from_nir(instr
.dest
, i
);
610 EAluOp combine
= all
? op2_and_int
: op2_or_int
;
612 /* For integers we can not use the modifiers, so this needs some emulation */
613 /* Should actually be lowered with NIR */
614 if (instr
.src
[0].negate
== instr
.src
[1].negate
&&
615 instr
.src
[0].abs
== instr
.src
[1].abs
) {
617 for (unsigned i
= 0; i
< nc
; ++i
) {
618 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0], i
),
619 from_nir(instr
.src
[1], i
), write
);
620 emit_instruction(ir
);
623 ir
->set_flag(alu_last_instr
);
625 std::cerr
<< "Negate in iequal/inequal not (yet) supported\n";
629 for (unsigned i
= 0; i
< nc
/2 ; ++i
) {
630 ir
= new AluInstruction(combine
, v
[2 * i
], v
[2 * i
], v
[2 * i
+ 1], write
);
631 emit_instruction(ir
);
634 ir
->set_flag(alu_last_instr
);
637 ir
= new AluInstruction(combine
, v
[0], v
[0], v
[2], last_write
);
638 emit_instruction(ir
);
644 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
646 AluInstruction
*ir
= nullptr;
647 PValue v
[4]; // this might need some additional temp register creation
648 for (unsigned i
= 0; i
< 4 ; ++i
)
649 v
[i
] = from_nir(instr
.dest
, i
);
651 for (unsigned i
= 0; i
< nc
; ++i
) {
652 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
653 from_nir(instr
.src
[1],i
), write
);
655 if (instr
.src
[0].abs
)
656 ir
->set_flag(alu_src0_abs
);
657 if (instr
.src
[0].negate
)
658 ir
->set_flag(alu_src0_neg
);
660 if (instr
.src
[1].abs
)
661 ir
->set_flag(alu_src1_abs
);
662 if (instr
.src
[1].negate
)
663 ir
->set_flag(alu_src1_neg
);
665 emit_instruction(ir
);
668 ir
->set_flag(alu_last_instr
);
670 for (unsigned i
= 0; i
< nc
; ++i
) {
671 ir
= new AluInstruction(op1_max4
, v
[i
], v
[i
], write
);
672 if (all
) ir
->set_flag(alu_src0_neg
);
673 emit_instruction(ir
);
676 for (unsigned i
= nc
; i
< 4 ; ++i
) {
677 ir
= new AluInstruction(op1_max4
, v
[i
],
678 all
? Value::one_f
: Value::zero
, write
);
680 ir
->set_flag(alu_src0_neg
);
682 emit_instruction(ir
);
685 ir
->set_flag(alu_last_instr
);
688 op
= (op
== op2_sete
) ? op2_sete_dx10
: op2_setne_dx10
;
690 op
= (op
== op2_sete
) ? op2_setne_dx10
: op2_sete_dx10
;
692 ir
= new AluInstruction(op
, v
[0], v
[0], Value::one_f
, last_write
);
694 ir
->set_flag(alu_src1_neg
);
695 emit_instruction(ir
);
700 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr
& instr
, EAluOp op
, bool all
)
702 AluInstruction
*ir
= nullptr;
703 PValue v
[4]; // this might need some additional temp register creation
704 for (unsigned i
= 0; i
< 4 ; ++i
)
705 v
[i
] = from_nir(instr
.dest
, i
);
707 for (unsigned i
= 0; i
< 2 ; ++i
) {
708 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
709 from_nir(instr
.src
[1],i
), write
);
710 if (instr
.src
[0].abs
)
711 ir
->set_flag(alu_src0_abs
);
712 if (instr
.src
[0].negate
)
713 ir
->set_flag(alu_src0_neg
);
715 if (instr
.src
[1].abs
)
716 ir
->set_flag(alu_src1_abs
);
717 if (instr
.src
[1].negate
)
718 ir
->set_flag(alu_src1_neg
);
720 emit_instruction(ir
);
723 ir
->set_flag(alu_last_instr
);
725 op
= (op
== op2_setne_dx10
) ? op2_or_int
: op2_and_int
;
726 ir
= new AluInstruction(op
, v
[0], v
[0], v
[1], last_write
);
727 emit_instruction(ir
);
732 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr
& instr
, EAluOp opcode
)
734 const nir_alu_src
& src0
= instr
.src
[0];
735 const nir_alu_src
& src1
= instr
.src
[1];
737 AluInstruction
*ir
= nullptr;
739 if (get_chip_class() == CAYMAN
) {
740 int lasti
= util_last_bit(instr
.dest
.write_mask
);
741 for (int k
= 0; k
< lasti
; ++k
) {
742 if (instr
.dest
.write_mask
& (1 << k
)) {
744 for (int i
= 0; i
< 4; i
++) {
745 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), from_nir(src0
, k
), from_nir(src1
, k
), (i
== k
) ? write
: empty
);
746 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
747 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
748 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
749 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
750 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
751 if (i
== 3) ir
->set_flag(alu_last_instr
);
752 emit_instruction(ir
);
757 for (int i
= 0; i
< 4 ; ++i
) {
758 if (instr
.dest
.write_mask
& (1 << i
)){
759 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), from_nir(src0
, i
), from_nir(src1
, i
), last_write
);
760 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
761 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
762 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
763 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
764 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
765 emit_instruction(ir
);
772 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts opts
)
775 const nir_alu_src
& src0
= instr
.src
[0];
776 const nir_alu_src
& src1
= instr
.src
[1];
778 if (src0
.negate
|| src1
.negate
||
779 src0
.abs
|| src1
.abs
) {
780 std::cerr
<< "R600: don't support modifiers with integer operations";
783 return emit_alu_op2(instr
, opcode
, opts
);
786 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
788 const nir_alu_src
*src0
= &instr
.src
[0];
789 const nir_alu_src
*src1
= &instr
.src
[1];
791 if (ops
& op2_opt_reverse
)
792 std::swap(src0
, src1
);
794 bool src1_negate
= (ops
& op2_opt_neg_src1
) ^ src1
->negate
;
796 AluInstruction
*ir
= nullptr;
797 for (int i
= 0; i
< 4 ; ++i
) {
798 if (instr
.dest
.write_mask
& (1 << i
)){
799 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
800 from_nir(*src0
, i
), from_nir(*src1
, i
), write
);
802 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
803 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
804 if (src1_negate
) ir
->set_flag(alu_src1_neg
);
805 if (src1
->abs
) ir
->set_flag(alu_src1_abs
);
806 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
807 emit_instruction(ir
);
811 ir
->set_flag(alu_last_instr
);
815 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
817 const nir_alu_src
*src0
= &instr
.src
[0];
818 const nir_alu_src
*src1
= &instr
.src
[1];
820 if (ops
& op2_opt_reverse
)
821 std::swap(src0
, src1
);
823 GPRVector::Values v0
;
824 for (int i
= 0; i
< 4 ; ++i
)
825 v0
[i
] = from_nir(*src0
, i
);
827 GPRVector::Values v1
;
828 for (int i
= 0; i
< 4 ; ++i
)
829 v1
[i
] = from_nir(*src1
, i
);
831 if (src0
->abs
|| src0
->negate
) {
832 int src0_tmp
= allocate_temp_register();
833 GPRVector::Values v0_temp
;
834 AluInstruction
*ir
= nullptr;
835 for (int i
= 0; i
< 4 ; ++i
) {
836 if (instr
.dest
.write_mask
& (1 << i
)) {
837 v0_temp
[i
] = PValue(new GPRValue(src0_tmp
, i
));
838 ir
= new AluInstruction(op1_mov
, v0_temp
[i
], v0
[i
], write
);
839 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
840 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
841 emit_instruction(ir
);
846 ir
->set_flag(alu_last_instr
);
849 if (src1
->abs
|| src1
->negate
) {
850 int src1_tmp
= allocate_temp_register();
851 GPRVector::Values v1_temp
;
852 AluInstruction
*ir
= nullptr;
853 for (int i
= 0; i
< 4 ; ++i
) {
854 if (instr
.dest
.write_mask
& (1 << i
)) {
855 v1_temp
[i
] = PValue(new GPRValue(src1_tmp
, i
));
856 ir
= new AluInstruction(op1_mov
, v1_temp
[i
], v1
[i
], {alu_write
});
857 if (src1
->abs
) ir
->set_flag(alu_src0_abs
);
858 if (src1
->negate
) ir
->set_flag(alu_src0_neg
);
859 emit_instruction(ir
);
864 ir
->set_flag(alu_last_instr
);
867 AluInstruction
*ir
= nullptr;
868 for (int i
= 0; i
< 4 ; ++i
) {
869 if (instr
.dest
.write_mask
& (1 << i
)){
870 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), {v0
[i
], v1
[i
]}, {alu_write
});
871 emit_instruction(ir
);
875 ir
->set_flag(alu_last_instr
);
880 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr
& instr
)
882 int sel_tmp
= allocate_temp_register();
883 GPRVector
tmp(sel_tmp
, {0,1,2,3});
885 AluInstruction
*ir
= nullptr;
888 for (int i
= 0; i
< 4 ; ++i
) {
889 if (instr
.dest
.write_mask
& (1 << i
)){
890 help
[i
] = from_nir(instr
.dest
, i
);
891 auto s
= from_nir(instr
.src
[0], i
);
892 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], s
, Value::one_i
, s
, write
);
893 emit_instruction(ir
);
897 ir
->set_flag(alu_last_instr
);
899 for (int i
= 0; i
< 4 ; ++i
) {
900 if (instr
.dest
.write_mask
& (1 << i
)){
901 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, help
[i
], write
);
902 emit_instruction(ir
);
906 ir
->set_flag(alu_last_instr
);
908 for (int i
= 0; i
< 4 ; ++i
) {
909 if (instr
.dest
.write_mask
& (1 << i
)){
911 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], tmp
.reg_i(i
),
912 PValue(new LiteralValue(-1,0)), help
[i
], write
);
913 emit_instruction(ir
);
917 ir
->set_flag(alu_last_instr
);
921 bool EmitAluInstruction::emit_fsign(const nir_alu_instr
& instr
)
925 AluInstruction
*ir
= nullptr;
927 for (int i
= 0; i
< 4 ; ++i
) {
928 help
[i
] = from_nir(instr
.dest
, i
);
929 src
[i
] = from_nir(instr
.src
[0], i
);
932 if (instr
.src
[0].abs
) {
934 for (int i
= 0; i
< 4 ; ++i
) {
935 if (instr
.dest
.write_mask
& (1 << i
)){
936 ir
= new AluInstruction(op2_setgt
, help
[i
], src
[i
], Value::zero
, write
);
937 ir
->set_flag(alu_src0_abs
);
938 emit_instruction(ir
);
942 ir
->set_flag(alu_last_instr
);
944 if (instr
.src
[0].negate
) {
945 for (int i
= 0; i
< 4 ; ++i
) {
946 if (instr
.dest
.write_mask
& (1 << i
)){
947 ir
= new AluInstruction(op1_mov
, help
[i
], help
[i
], write
);
948 ir
->set_flag(alu_src0_neg
);
949 emit_instruction(ir
);
953 ir
->set_flag(alu_last_instr
);
959 for (int i
= 0; i
< 4 ; ++i
) {
960 if (instr
.dest
.write_mask
& (1 << i
)){
961 ir
= new AluInstruction(op3_cndgt
, help
[i
], src
[i
], Value::one_f
, src
[i
], write
);
962 if (instr
.src
[0].negate
) {
963 ir
->set_flag(alu_src0_neg
);
964 ir
->set_flag(alu_src2_neg
);
966 emit_instruction(ir
);
971 ir
->set_flag(alu_last_instr
);
973 for (int i
= 0; i
< 4 ; ++i
) {
974 if (instr
.dest
.write_mask
& (1 << i
)){
975 ir
= new AluInstruction(op3_cndgt
, help
[i
], help
[i
], Value::one_f
, help
[i
], write
);
976 ir
->set_flag(alu_src0_neg
);
977 ir
->set_flag(alu_src1_neg
);
978 emit_instruction(ir
);
982 ir
->set_flag(alu_last_instr
);
986 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr
& instr
, EAluOp opcode
,
987 std::array
<uint8_t, 3> reorder
)
989 const nir_alu_src
*src
[3];
990 src
[0] = &instr
.src
[reorder
[0]];
991 src
[1] = &instr
.src
[reorder
[1]];
992 src
[2] = &instr
.src
[reorder
[2]];
994 AluInstruction
*ir
= nullptr;
995 for (int i
= 0; i
< 4 ; ++i
) {
996 if (instr
.dest
.write_mask
& (1 << i
)){
997 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
998 from_nir(*src
[0], i
), from_nir(*src
[1], i
),
999 from_nir(*src
[2], i
), write
);
1001 if (src
[0]->negate
) ir
->set_flag(alu_src0_neg
);
1002 if (src
[1]->negate
) ir
->set_flag(alu_src1_neg
);
1003 if (src
[2]->negate
) ir
->set_flag(alu_src2_neg
);
1005 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
1006 ir
->set_flag(alu_write
);
1007 emit_instruction(ir
);
1011 ir
->set_flag(alu_last_instr
);
1015 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr
& instr
)
1017 AluInstruction
*ir
= nullptr;
1018 for (int i
= 0; i
< 4 ; ++i
) {
1019 if (instr
.dest
.write_mask
& (1 << i
)){
1020 ir
= new AluInstruction(op2_sub_int
, from_nir(instr
.dest
, i
), Value::zero
,
1021 from_nir(instr
.src
[0], i
), write
);
1022 emit_instruction(ir
);
1026 ir
->set_flag(alu_last_instr
);
1031 static const char swz
[] = "xyzw01?_";
1035 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr
& instr
)
1037 int sel_tmp
= allocate_temp_register();
1038 GPRVector
tmp(sel_tmp
, {0,1,2,3});
1040 std::array
<PValue
,4> src
;
1041 AluInstruction
*ir
= nullptr;
1042 for (int i
= 0; i
< 4 ; ++i
) {
1043 if (instr
.dest
.write_mask
& (1 << i
)){
1044 src
[i
] = from_nir(instr
.src
[0],i
);
1045 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, src
[i
], write
);
1046 emit_instruction(ir
);
1050 ir
->set_flag(alu_last_instr
);
1052 for (int i
= 0; i
< 4 ; ++i
) {
1053 if (instr
.dest
.write_mask
& (1 << i
)){
1054 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), src
[i
],
1055 src
[i
], tmp
.reg_i(i
), write
);
1056 emit_instruction(ir
);
1060 ir
->set_flag(alu_last_instr
);
1064 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr
& instr
, bool use_signed
, bool mod
)
1067 int sel_tmp
= allocate_temp_register();
1068 int sel_tmp0
= allocate_temp_register();
1069 int sel_tmp1
= allocate_temp_register();
1071 PValue
asrc1(new GPRValue(sel_tmp
, 0));
1072 PValue
asrc2(new GPRValue(sel_tmp
, 1));
1073 PValue
rsign(new GPRValue(sel_tmp
, 2));
1074 PValue
err(new GPRValue(sel_tmp
, 3));
1076 GPRVector
tmp0(sel_tmp0
, {0,1,2,3});
1077 GPRVector
tmp1(sel_tmp1
, {0,1,2,3});
1079 std::array
<PValue
, 4> src0
;
1080 std::array
<PValue
, 4> src1
;
1082 for (int i
= 0; i
< 4 ; ++i
) {
1083 if (instr
.dest
.write_mask
& (1 << i
)) {
1084 src0
[i
] = from_nir(instr
.src
[0], i
);
1085 src1
[i
] = from_nir(instr
.src
[1], i
);
1090 for (int i
= 3; i
>= 0 ; --i
) {
1091 if (!(instr
.dest
.write_mask
& (1 << i
)))
1094 emit_instruction(op2_sub_int
, asrc1
, {Value::zero
, src0
[i
]}, {alu_write
});
1095 emit_instruction(op2_sub_int
, asrc2
, {Value::zero
, src1
[i
]}, {alu_write
});
1096 emit_instruction(op2_xor_int
, rsign
, {src0
[i
], src1
[i
]}, {alu_write
, alu_last_instr
});
1099 emit_instruction(op3_cndge_int
, asrc1
, {src0
[i
], src0
[i
], asrc1
}, {alu_write
});
1100 emit_instruction(op3_cndge_int
, asrc2
, {src1
[i
], src1
[i
], asrc2
}, {alu_write
, alu_last_instr
});
1106 emit_instruction(op1_recip_uint
, tmp0
.x(), {asrc2
}, {alu_write
, alu_last_instr
});
1108 emit_instruction(op2_mullo_uint
, tmp0
.z(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1110 emit_instruction(op2_sub_int
, tmp0
.w(), {Value::zero
, tmp0
.z()}, {alu_write
});
1111 emit_instruction(op2_mulhi_uint
, tmp0
.y(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1113 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp0
.y(), tmp0
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1115 emit_instruction(op2_mulhi_uint
, err
, {tmp0
.z(), tmp0
.x()}, {alu_write
, alu_last_instr
});
1117 emit_instruction(op2_sub_int
, tmp1
.x(), {tmp0
.x(), err
}, {alu_write
});
1118 emit_instruction(op2_add_int
, tmp1
.y(), {tmp0
.x(), err
}, {alu_write
, alu_last_instr
});
1120 emit_instruction(op3_cnde_int
, tmp0
.x(), {tmp0
.y(), tmp1
.y(), tmp1
.x()}, {alu_write
, alu_last_instr
});
1122 emit_instruction(op2_mulhi_uint
, tmp0
.z(), {tmp0
.x(), asrc1
}, {alu_write
, alu_last_instr
});
1123 emit_instruction(op2_mullo_uint
, tmp0
.y(), {tmp0
.z(), asrc2
}, {alu_write
, alu_last_instr
});
1125 emit_instruction(op2_sub_int
, tmp0
.w(), {asrc1
, tmp0
.y()}, {alu_write
, alu_last_instr
});
1128 emit_instruction(op2_setge_uint
, tmp1
.x(), {tmp0
.w(), asrc2
}, {alu_write
});
1129 emit_instruction(op2_setge_uint
, tmp1
.y(), {asrc1
, tmp0
.y()}, {alu_write
});
1132 emit_instruction(op2_sub_int
, tmp1
.z(), {tmp0
.w(), asrc2
}, {alu_write
});
1133 emit_instruction(op2_add_int
, tmp1
.w(), {tmp0
.w(), asrc2
}, {alu_write
, alu_last_instr
});
1135 emit_instruction(op2_add_int
, tmp1
.z(), {tmp0
.z(), Value::one_i
}, {alu_write
});
1136 emit_instruction(op2_sub_int
, tmp1
.w(), {tmp0
.z(), Value::one_i
}, {alu_write
, alu_last_instr
});
1139 emit_instruction(op2_and_int
, tmp1
.x(), {tmp1
.x(), tmp1
.y()}, {alu_write
, alu_last_instr
});
1142 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.w(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1144 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.z(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1147 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1148 emit_instruction(op2_sub_int
, tmp0
.y(), {Value::zero
, tmp0
.z()}, {alu_write
, alu_last_instr
});
1151 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {src0
[i
], tmp0
.z(), tmp0
.y()},
1152 {alu_write
, alu_last_instr
});
1154 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {rsign
, tmp0
.z(), tmp0
.y()},
1155 {alu_write
, alu_last_instr
});
1157 emit_instruction(op3_cnde_int
, from_nir(instr
.dest
, i
), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1163 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src
& src
, GPRVector::Values
& s
,
1164 GPRVector::Values
& v
, int ncomp
)
1167 AluInstruction
*alu
= nullptr;
1168 for (int i
= 0; i
< ncomp
; ++i
) {
1169 alu
= new AluInstruction(op1_mov
, v
[i
], s
[i
], {alu_write
});
1171 alu
->set_flag(alu_src0_abs
);
1173 alu
->set_flag(alu_src0_neg
);
1174 emit_instruction(alu
);
1179 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr
& instr
, TexInstruction::Opcode op
,
1183 GPRVector::Values v
;
1184 GPRVector::Values s
;
1185 GPRVector::Values
*source
= &s
;
1186 std::array
<int, 4> writemask
= {0,1,2,3};
1188 int ncomp
= instr
.src
[0].src
.is_ssa
? instr
.src
[0].src
.ssa
->num_components
:
1189 instr
.src
[0].src
.reg
.reg
->num_components
;
1191 for (int i
= 0; i
< 4; ++i
) {
1192 writemask
[i
] = (instr
.dest
.write_mask
& (1 << i
)) ? i
: 7;
1193 v
[i
] = from_nir(instr
.dest
, (i
< ncomp
) ? i
: 0);
1194 s
[i
] = from_nir(instr
.src
[0], (i
< ncomp
) ? i
: 0);
1197 if (instr
.src
[0].abs
|| instr
.src
[0].negate
) {
1198 split_alu_modifiers(instr
.src
[0], s
, v
, ncomp
);
1202 /* This is querying the dreivatives of the output fb, so we would either need
1203 * access to the neighboring pixels or to the framebuffer. Neither is currently
1206 GPRVector
src(*source
);
1208 auto tex
= new TexInstruction(op
, dst
, src
, 0, R600_MAX_CONST_BUFFERS
, PValue());
1209 tex
->set_dest_swizzle(writemask
);
1212 std::cerr
<< "Sewt fine flag\n";
1213 tex
->set_flag(TexInstruction::grad_fine
);
1216 emit_instruction(tex
);
1221 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr
& instr
, EAluOp opcode
)
1223 int itmp
= allocate_temp_register();
1224 std::array
<PValue
, 4> tmp
;
1225 std::array
<PValue
, 4> dst
;
1226 std::array
<PValue
, 4> src0
;
1227 std::array
<PValue
, 4> shift
;
1229 PValue
l32(new LiteralValue(32));
1230 unsigned write_mask
= instr
.dest
.write_mask
;
1232 AluInstruction
*ir
= nullptr;
1233 for (int i
= 0; i
< 4; i
++) {
1234 if (!(write_mask
& (1<<i
)))
1236 dst
[i
] = from_nir(instr
.dest
, i
);
1237 src0
[i
] = from_nir(instr
.src
[0], i
);
1238 shift
[i
] = from_nir(instr
.src
[2], i
);
1240 ir
= new AluInstruction(opcode
, dst
[i
],
1241 {src0
[i
], from_nir(instr
.src
[1], i
), shift
[i
]},
1243 emit_instruction(ir
);
1247 for (int i
= 0; i
< 4; i
++) {
1248 if (!(write_mask
& (1<<i
)))
1250 tmp
[i
] = PValue(new GPRValue(itmp
, i
));
1251 ir
= new AluInstruction(op2_setge_int
, tmp
[i
], {shift
[i
], l32
},
1253 emit_instruction(ir
);
1257 for (int i
= 0; i
< 4; i
++) {
1258 if (!(write_mask
& (1<<i
)))
1260 ir
= new AluInstruction(op3_cnde_int
, dst
[i
], {tmp
[i
], dst
[i
], src0
[i
]},
1262 emit_instruction(ir
);
1269 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr
& instr
)
1271 auto t0
= get_temp_vec4();
1272 auto t1
= get_temp_vec4();
1273 auto t2
= get_temp_vec4();
1275 PValue
l32(new LiteralValue(32));
1276 unsigned write_mask
= instr
.dest
.write_mask
;
1277 if (!write_mask
) return true;
1279 AluInstruction
*ir
= nullptr;
1280 for (int i
= 0; i
< 4; i
++) {
1281 if (!(write_mask
& (1<<i
)))
1284 ir
= new AluInstruction(op2_setge_int
, t0
[i
], {from_nir(instr
.src
[3], i
), l32
}, {alu_write
});
1285 emit_instruction(ir
);
1289 for (int i
= 0; i
< 4; i
++) {
1290 if (!(write_mask
& (1<<i
)))
1292 ir
= new AluInstruction(op2_bfm_int
, t1
[i
], {from_nir(instr
.src
[3], i
),
1293 from_nir(instr
.src
[2], i
)}, {alu_write
});
1294 emit_instruction(ir
);
1296 ir
->set_flag(alu_last_instr
);
1298 for (int i
= 0; i
< 4; i
++) {
1299 if (!(write_mask
& (1<<i
)))
1301 ir
= new AluInstruction(op2_lshl_int
, t2
[i
], {from_nir(instr
.src
[1], i
),
1302 from_nir(instr
.src
[2], i
)}, {alu_write
});
1303 emit_instruction(ir
);
1305 ir
->set_flag(alu_last_instr
);
1308 for (int i
= 0; i
< 4; i
++) {
1309 if (!(write_mask
& (1<<i
)))
1311 ir
= new AluInstruction(op3_bfi_int
, from_nir(instr
.dest
, i
),
1312 {t1
[i
], t2
[i
], from_nir(instr
.src
[0], i
)}, {alu_write
});
1313 emit_instruction(ir
);
1315 ir
->set_flag(alu_last_instr
);
1317 for (int i
= 0; i
< 4; i
++) {
1318 if (!(write_mask
& (1<<i
)))
1320 ir
= new AluInstruction(op3_cnde_int
, from_nir(instr
.dest
, i
),
1321 {t0
[i
], from_nir(instr
.dest
, i
),
1322 from_nir(instr
.src
[1], i
)}, {alu_write
});
1323 emit_instruction(ir
);
1325 ir
->set_flag(alu_last_instr
);
1330 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr
& instr
)
1332 emit_instruction(op2_lshr_int
, from_nir(instr
.dest
, 0),
1333 {from_nir(instr
.src
[0], 0), PValue(new LiteralValue(16))},
1334 {alu_write
, alu_last_instr
});
1336 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1337 {from_nir(instr
.dest
, 0)},{alu_write
, alu_last_instr
});
1342 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr
& instr
)
1344 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1345 {from_nir(instr
.src
[0], 0)},{alu_write
, alu_last_instr
});
1349 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr
& instr
)
1351 int it0
= allocate_temp_register();
1352 PValue
x(new GPRValue(it0
, 0));
1353 PValue
y(new GPRValue(it0
, 1));
1355 emit_instruction(op1_flt32_to_flt16
, x
,{from_nir(instr
.src
[0], 0)},{alu_write
});
1356 emit_instruction(op1_flt32_to_flt16
, y
,{from_nir(instr
.src
[1], 0)},{alu_write
, alu_last_instr
});
1358 emit_instruction(op2_lshl_int
, y
, {y
, PValue(new LiteralValue(16))},{alu_write
, alu_last_instr
});
1360 emit_instruction(op2_or_int
, {from_nir(instr
.dest
, 0)} , {x
, y
},{alu_write
, alu_last_instr
});