3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
31 #include "gallium/drivers/r600/r600_shader.h"
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor
& processor
):
38 EmitInstruction (processor
)
43 bool EmitAluInstruction::do_emit(nir_instr
* ir
)
45 const nir_alu_instr
& instr
= *nir_instr_as_alu(ir
);
47 r600::sfn_log
<< SfnLog::instr
<< "emit '"
49 << " bitsize: " << static_cast<int>(instr
.dest
.dest
.ssa
.bit_size
)
50 << "' (" << __func__
<< ")\n";
52 split_constants(instr
);
55 case nir_op_b2f32
: return emit_alu_b2f(instr
);
56 case nir_op_i2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_int
);
57 case nir_op_f2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_dx10
);
59 case nir_op_mov
:return emit_alu_op1(instr
, op1_mov
);
60 case nir_op_ftrunc
: return emit_alu_op1(instr
, op1_trunc
);
61 case nir_op_fabs
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_abs
});
62 case nir_op_fneg
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_neg
});
63 case nir_op_fsat
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_dst_clamp
});
64 case nir_op_frcp
: return emit_alu_trans_op1(instr
, op1_recip_ieee
);
65 case nir_op_frsq
: return emit_alu_trans_op1(instr
, op1_recipsqrt_ieee1
);
66 case nir_op_fsin
: return emit_alu_trig_op1(instr
, op1_sin
);
67 case nir_op_fcos
: return emit_alu_trig_op1(instr
, op1_cos
);
68 case nir_op_fexp2
: return emit_alu_trans_op1(instr
, op1_exp_ieee
);
69 case nir_op_flog2
: return emit_alu_trans_op1(instr
, op1_log_clamped
);
71 case nir_op_fround_even
: return emit_alu_op1(instr
, op1_rndne
);
72 case nir_op_fsqrt
: return emit_alu_trans_op1(instr
, op1_sqrt_ieee
);
73 case nir_op_i2f32
: return emit_alu_trans_op1(instr
, op1_int_to_flt
);
74 case nir_op_u2f32
: return emit_alu_trans_op1(instr
, op1_uint_to_flt
);
75 case nir_op_f2i32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_int
);
76 case nir_op_f2u32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_uint
);
78 case nir_op_fceil
: return emit_alu_op1(instr
, op1_ceil
);
79 case nir_op_ffract
: return emit_alu_op1(instr
, op1_fract
);
80 case nir_op_ffloor
: return emit_alu_op1(instr
, op1_floor
);
82 case nir_op_fsign
: return emit_fsign(instr
);
83 case nir_op_fdph
: return emit_fdph(instr
);
85 case nir_op_ibitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_int
);
86 case nir_op_ubitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_uint
);
87 case nir_op_bitfield_insert
: return emit_bitfield_insert(instr
);
88 case nir_op_bit_count
: return emit_alu_op1(instr
, op1_bcnt_int
);
89 case nir_op_bitfield_reverse
: return emit_alu_op1(instr
, op1_bfrev_int
);
91 case nir_op_ieq
: return emit_alu_op2_int(instr
, op2_sete_int
);
92 case nir_op_ine
: return emit_alu_op2_int(instr
, op2_setne_int
);
93 case nir_op_ige
: return emit_alu_op2_int(instr
, op2_setge_int
);
94 case nir_op_ishl
: return emit_alu_op2_int(instr
, op2_lshl_int
);
95 case nir_op_ishr
: return emit_alu_op2_int(instr
, op2_ashr_int
);
96 case nir_op_ilt
: return emit_alu_op2_int(instr
, op2_setgt_int
, op2_opt_reverse
);
97 case nir_op_iand
: return emit_alu_op2_int(instr
, op2_and_int
);
98 case nir_op_ixor
: return emit_alu_op2_int(instr
, op2_xor_int
);
99 case nir_op_imin
: return emit_alu_op2_int(instr
, op2_min_int
);
100 case nir_op_imax
: return emit_alu_op2_int(instr
, op2_max_int
);
101 case nir_op_imul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_int
);
102 case nir_op_umul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_uint
);
103 case nir_op_umax
: return emit_alu_op2_int(instr
, op2_max_uint
);
104 case nir_op_umin
: return emit_alu_op2_int(instr
, op2_min_uint
);
105 case nir_op_ior
: return emit_alu_op2_int(instr
, op2_or_int
);
106 case nir_op_inot
: return emit_alu_op1(instr
, op1_not_int
);
107 case nir_op_iabs
: return emit_alu_iabs(instr
);
108 case nir_op_ineg
: return emit_alu_ineg(instr
);
109 case nir_op_idiv
: return emit_alu_div_int(instr
, true, false);
110 case nir_op_udiv
: return emit_alu_div_int(instr
, false, false);
111 case nir_op_umod
: return emit_alu_div_int(instr
, false, true);
112 case nir_op_isign
: return emit_alu_isign(instr
);
114 case nir_op_uge
: return emit_alu_op2_int(instr
, op2_setge_uint
);
115 case nir_op_ult
: return emit_alu_op2_int(instr
, op2_setgt_uint
, op2_opt_reverse
);
116 case nir_op_ushr
: return emit_alu_op2_int(instr
, op2_lshr_int
);
118 case nir_op_flt
: return emit_alu_op2(instr
, op2_setgt_dx10
, op2_opt_reverse
);
120 case nir_op_fge
: return emit_alu_op2(instr
, op2_setge_dx10
);
121 case nir_op_fne
: return emit_alu_op2(instr
, op2_setne_dx10
);
122 case nir_op_feq
: return emit_alu_op2(instr
, op2_sete_dx10
);
124 case nir_op_fmin
: return emit_alu_op2(instr
, op2_min_dx10
);
125 case nir_op_fmax
: return emit_alu_op2(instr
, op2_max_dx10
);
126 case nir_op_fmul
: return emit_alu_op2(instr
, op2_mul_ieee
);
127 case nir_op_imul
: return emit_alu_trans_op2(instr
, op2_mullo_int
);
128 case nir_op_fadd
: return emit_alu_op2(instr
, op2_add
);
129 case nir_op_fsub
: return emit_alu_op2(instr
, op2_add
, op2_opt_neg_src1
);
130 case nir_op_iadd
: return emit_alu_op2_int(instr
, op2_add_int
);
131 case nir_op_isub
: return emit_alu_op2_int(instr
, op2_sub_int
);
132 case nir_op_fdot2
: return emit_dot(instr
, 2);
133 case nir_op_fdot3
: return emit_dot(instr
, 3);
134 case nir_op_fdot4
: return emit_dot(instr
, 4);
136 case nir_op_bany_inequal2
: return emit_any_all_icomp(instr
, op2_setne_int
, 2, false);
137 case nir_op_bany_inequal3
: return emit_any_all_icomp(instr
, op2_setne_int
, 3, false);
138 case nir_op_bany_inequal4
: return emit_any_all_icomp(instr
, op2_setne_int
, 4, false);
140 case nir_op_ball_iequal2
: return emit_any_all_icomp(instr
, op2_sete_int
, 2, true);
141 case nir_op_ball_iequal3
: return emit_any_all_icomp(instr
, op2_sete_int
, 3, true);
142 case nir_op_ball_iequal4
: return emit_any_all_icomp(instr
, op2_sete_int
, 4, true);
144 case nir_op_bany_fnequal2
: return emit_any_all_fcomp2(instr
, op2_setne_dx10
, false);
145 case nir_op_bany_fnequal3
: return emit_any_all_fcomp(instr
, op2_setne
, 3, false);
146 case nir_op_bany_fnequal4
: return emit_any_all_fcomp(instr
, op2_setne
, 4, false);
148 case nir_op_ball_fequal2
: return emit_any_all_fcomp2(instr
, op2_sete_dx10
, true);
149 case nir_op_ball_fequal3
: return emit_any_all_fcomp(instr
, op2_sete
, 3, true);
150 case nir_op_ball_fequal4
: return emit_any_all_fcomp(instr
, op2_sete
, 4, true);
153 case nir_op_ffma
: return emit_alu_op3(instr
, op3_muladd_ieee
);
154 case nir_op_bcsel
: return emit_alu_op3(instr
, op3_cnde_int
, {0, 2, 1});
155 case nir_op_vec2
: return emit_create_vec(instr
, 2);
156 case nir_op_vec3
: return emit_create_vec(instr
, 3);
157 case nir_op_vec4
: return emit_create_vec(instr
, 4);
159 case nir_op_find_lsb
: return emit_alu_op1(instr
, op1_ffbl_int
);
160 case nir_op_ufind_msb
: return emit_find_msb(instr
, false);
161 case nir_op_ifind_msb
: return emit_find_msb(instr
, true);
162 case nir_op_b2i32
: return emit_b2i32(instr
);
163 case nir_op_pack_64_2x32_split
: return emit_pack_64_2x32_split(instr
);
164 case nir_op_unpack_64_2x32_split_x
: return emit_unpack_64_2x32_split(instr
, 0);
165 case nir_op_unpack_64_2x32_split_y
: return emit_unpack_64_2x32_split(instr
, 1);
166 case nir_op_unpack_half_2x16_split_x
: return emit_unpack_32_2x16_split_x(instr
);
167 case nir_op_unpack_half_2x16_split_y
: return emit_unpack_32_2x16_split_y(instr
);
168 case nir_op_pack_half_2x16_split
: return emit_pack_32_2x16_split(instr
);
171 /* These are in the ALU instruction list, but they should be texture instructions */
172 case nir_op_fddx_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, true);
173 case nir_op_fddx_coarse
:
174 case nir_op_fddx
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, false);
176 case nir_op_fddy_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_v
, true);
177 case nir_op_fddy_coarse
:
178 case nir_op_fddy
: return emit_tex_fdd(instr
,TexInstruction::get_gradient_v
, false);
180 case nir_op_umad24
: return emit_alu_op3(instr
, op3_muladd_uint24
, {0, 1, 2});
181 case nir_op_umul24
: return emit_alu_op2(instr
, op2_mul_uint24
);
187 void EmitAluInstruction::split_constants(const nir_alu_instr
& instr
)
189 const nir_op_info
*op_info
= &nir_op_infos
[instr
.op
];
190 if (op_info
->num_inputs
< 2)
194 std::array
<PValue
,4> c
;
195 std::array
<int,4> idx
;
196 for (unsigned i
= 0; i
< op_info
->num_inputs
; ++i
) {
197 PValue src
= from_nir(instr
.src
[i
], 0);
199 if (src
->type() == Value::kconst
) {
208 unsigned sel
= c
[0]->sel();
209 sfn_log
<< SfnLog::reg
<< "split " << nconst
<< " constants, sel[0] = " << sel
; ;
211 for (int i
= 1; i
< nconst
; ++i
) {
212 sfn_log
<< "sel[" << i
<< "] = " << c
[i
]->sel() << "\n";
213 if (c
[i
]->sel() != sel
) {
214 load_uniform(instr
.src
[idx
[i
]]);
219 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr
& instr
)
221 if (instr
.src
[0].negate
|| instr
.src
[0].abs
) {
222 std::cerr
<< "source modifiers not supported with int ops\n";
226 AluInstruction
*ir
= nullptr;
227 for (int i
= 0; i
< 4 ; ++i
) {
228 if (instr
.dest
.write_mask
& (1 << i
)){
229 ir
= new AluInstruction(op1_not_int
, from_nir(instr
.dest
, i
),
230 from_nir(instr
.src
[0], i
), write
);
231 emit_instruction(ir
);
235 ir
->set_flag(alu_last_instr
);
239 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
240 const AluOpFlags
& flags
)
242 AluInstruction
*ir
= nullptr;
243 for (int i
= 0; i
< 4 ; ++i
) {
244 if (instr
.dest
.write_mask
& (1 << i
)){
245 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
246 from_nir(instr
.src
[0], i
), write
);
248 if (flags
.test(alu_src0_abs
) || instr
.src
[0].abs
)
249 ir
->set_flag(alu_src0_abs
);
251 if (instr
.src
[0].negate
^ flags
.test(alu_src0_neg
))
252 ir
->set_flag(alu_src0_neg
);
254 if (flags
.test(alu_dst_clamp
) || instr
.dest
.saturate
)
255 ir
->set_flag(alu_dst_clamp
);
257 emit_instruction(ir
);
265 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr
& instr
, EAluOp opcode
)
267 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
270 const float inv_2_pi
= 0.15915494f
;
272 PValue v
[4]; // this might need some additional temp register creation
273 for (unsigned i
= 0; i
< 4 ; ++i
)
274 v
[i
] = from_nir(instr
.dest
, i
);
276 PValue inv_pihalf
= PValue(new LiteralValue(inv_2_pi
, 0));
277 AluInstruction
*ir
= nullptr;
278 for (unsigned i
= 0; i
< 4 ; ++i
) {
279 if (!(instr
.dest
.write_mask
& (1 << i
)))
281 ir
= new AluInstruction(op3_muladd_ieee
, v
[i
],
282 {from_nir(instr
.src
[0],i
), inv_pihalf
, Value::zero_dot_5
},
284 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
285 emit_instruction(ir
);
289 for (unsigned i
= 0; i
< 4 ; ++i
) {
290 if (!(instr
.dest
.write_mask
& (1 << i
)))
292 ir
= new AluInstruction(op1_fract
, v
[i
], v
[i
], {alu_write
});
293 emit_instruction(ir
);
297 for (unsigned i
= 0; i
< 4 ; ++i
) {
298 if (!(instr
.dest
.write_mask
& (1 << i
)))
300 ir
= new AluInstruction(op2_add
, v
[i
], v
[i
], Value::zero_dot_5
, write
);
301 ir
->set_flag(alu_src1_neg
);
302 emit_instruction(ir
);
306 for (unsigned i
= 0; i
< 4 ; ++i
) {
307 if (!(instr
.dest
.write_mask
& (1 << i
)))
310 ir
= new AluInstruction(opcode
, v
[i
], v
[i
], last_write
);
311 emit_instruction(ir
);
316 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
319 AluInstruction
*ir
= nullptr;
320 std::set
<int> src_idx
;
321 for (int i
= 0; i
< 4 ; ++i
) {
322 if (instr
.dest
.write_mask
& (1 << i
)){
323 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
324 from_nir(instr
.src
[0], i
), last_write
);
325 if (absolute
|| instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
326 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
327 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
328 emit_instruction(ir
);
334 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr
& instr
, EAluOp op
)
336 AluInstruction
*ir
= nullptr;
337 std::array
<PValue
, 4> v
;
339 for (int i
= 0; i
< 4; ++i
) {
340 if (!(instr
.dest
.write_mask
& (1 << i
)))
342 v
[i
] = from_nir(instr
.dest
, i
);
343 ir
= new AluInstruction(op1_trunc
, v
[i
], from_nir(instr
.src
[0], i
), {alu_write
});
344 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
345 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
346 emit_instruction(ir
);
350 for (int i
= 0; i
< 4; ++i
) {
351 if (!(instr
.dest
.write_mask
& (1 << i
)))
353 ir
= new AluInstruction(op
, v
[i
], v
[i
], {alu_write
});
354 emit_instruction(ir
);
355 if (op
== op1_flt_to_uint
)
363 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr
& instr
, bool sgn
)
365 int sel_tmp
= allocate_temp_register();
366 int sel_tmp2
= allocate_temp_register();
367 GPRVector
tmp(sel_tmp
, {0,1,2,3});
368 GPRVector
tmp2(sel_tmp2
, {0,1,2,3});
369 AluInstruction
*ir
= nullptr;
370 EAluOp opcode
= sgn
? op1_ffbh_int
: op1_ffbh_uint
;
371 for (int i
= 0; i
< 4; ++i
) {
372 if (!(instr
.dest
.write_mask
& (1 << i
)))
375 ir
= new AluInstruction(opcode
, tmp
.reg_i(i
), from_nir(instr
.src
[0], i
), write
);
376 emit_instruction(ir
);
380 for (int i
= 0; i
< 4 ; ++i
) {
381 if (!(instr
.dest
.write_mask
& (1 << i
)))
384 ir
= new AluInstruction(op2_sub_int
, tmp2
.reg_i(i
),
385 PValue(new LiteralValue(31u, 0)), tmp
.reg_i(i
), write
);
386 emit_instruction(ir
);
390 for (int i
= 0; i
< 4 ; ++i
) {
391 if (!(instr
.dest
.write_mask
& (1 << i
)))
394 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), tmp
.reg_i(i
),
395 tmp2
.reg_i(i
), tmp
.reg_i(i
), write
);
396 emit_instruction(ir
);
403 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr
& instr
)
405 AluInstruction
*ir
= nullptr;
406 for (int i
= 0; i
< 4 ; ++i
) {
407 if (!(instr
.dest
.write_mask
& (1 << i
)))
410 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
411 from_nir(instr
.src
[0], i
), Value::one_i
, write
);
412 emit_instruction(ir
);
419 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr
& instr
)
421 AluInstruction
*ir
= nullptr;
422 for (unsigned i
= 0; i
< 2; ++i
) {
423 if (!(instr
.dest
.write_mask
& (1 << i
)))
425 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
),
426 from_nir(instr
.src
[0], i
), write
);
427 emit_instruction(ir
);
429 ir
->set_flag(alu_last_instr
);
433 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr
& instr
, unsigned comp
)
435 emit_instruction(new AluInstruction(op1_mov
, from_nir(instr
.dest
, 0),
436 from_nir(instr
.src
[0], comp
), last_write
));
440 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr
& instr
, unsigned nc
)
442 AluInstruction
*ir
= nullptr;
443 std::set
<int> src_slot
;
444 for(unsigned i
= 0; i
< nc
; ++i
) {
445 if (instr
.dest
.write_mask
& (1 << i
)){
446 auto src
= from_nir(instr
.src
[i
], 0);
447 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
), src
, write
);
448 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
450 // FIXME: This is a rather crude approach to fix the problem that
451 // r600 can't read from four different slots of the same component
452 // here we check only for the register index
453 if (src
->type() == Value::gpr
)
454 src_slot
.insert(src
->sel());
455 if (src_slot
.size() >= 3) {
457 ir
->set_flag(alu_last_instr
);
459 emit_instruction(ir
);
463 ir
->set_flag(alu_last_instr
);
467 bool EmitAluInstruction::emit_dot(const nir_alu_instr
& instr
, int n
)
469 const nir_alu_src
& src0
= instr
.src
[0];
470 const nir_alu_src
& src1
= instr
.src
[1];
472 AluInstruction
*ir
= nullptr;
473 for (int i
= 0; i
< n
; ++i
) {
474 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
475 from_nir(src0
, i
), from_nir(src1
, i
),
476 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
478 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
479 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
480 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
481 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
483 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
484 emit_instruction(ir
);
486 for (int i
= n
; i
< 4 ; ++i
) {
487 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
488 Value::zero
, Value::zero
,
489 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
490 emit_instruction(ir
);
494 ir
->set_flag(alu_last_instr
);
498 bool EmitAluInstruction::emit_fdph(const nir_alu_instr
& instr
)
500 const nir_alu_src
& src0
= instr
.src
[0];
501 const nir_alu_src
& src1
= instr
.src
[1];
503 AluInstruction
*ir
= nullptr;
504 for (int i
= 0; i
< 3 ; ++i
) {
505 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
506 from_nir(src0
, i
), from_nir(src1
, i
),
507 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
508 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
509 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
510 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
511 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
512 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
513 emit_instruction(ir
);
516 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, 3), Value::one_f
,
517 from_nir(src1
, 3), (instr
.dest
.write_mask
) & (1 << 3) ? write
: empty
);
518 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
519 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
520 emit_instruction(ir
);
522 ir
->set_flag(alu_last_instr
);
527 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr
& instr
, EAluOp op
)
529 AluInstruction
*ir
= nullptr;
530 for (int i
= 0; i
< 4 ; ++i
) {
531 if (instr
.dest
.write_mask
& (1 << i
)) {
532 ir
= new AluInstruction(op
, from_nir(instr
.dest
, i
),
533 from_nir(instr
.src
[0], i
), Value::zero
,
535 emit_instruction(ir
);
539 ir
->set_flag(alu_last_instr
);
543 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr
& instr
)
545 AluInstruction
*ir
= nullptr;
546 for (int i
= 0; i
< 4 ; ++i
) {
547 if (instr
.dest
.write_mask
& (1 << i
)){
548 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
549 from_nir(instr
.src
[0], i
), Value::one_f
, write
);
550 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
551 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
552 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
553 emit_instruction(ir
);
557 ir
->set_flag(alu_last_instr
);
561 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
564 AluInstruction
*ir
= nullptr;
565 PValue v
[4]; // this might need some additional temp register creation
566 for (unsigned i
= 0; i
< 4 ; ++i
)
567 v
[i
] = from_nir(instr
.dest
, i
);
569 EAluOp combine
= all
? op2_and_int
: op2_or_int
;
571 /* For integers we can not use the modifiers, so this needs some emulation */
572 /* Should actually be lowered with NIR */
573 if (instr
.src
[0].negate
== instr
.src
[1].negate
&&
574 instr
.src
[0].abs
== instr
.src
[1].abs
) {
576 for (unsigned i
= 0; i
< nc
; ++i
) {
577 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0], i
),
578 from_nir(instr
.src
[1], i
), write
);
579 emit_instruction(ir
);
582 ir
->set_flag(alu_last_instr
);
584 std::cerr
<< "Negate in iequal/inequal not (yet) supported\n";
588 for (unsigned i
= 0; i
< nc
/2 ; ++i
) {
589 ir
= new AluInstruction(combine
, v
[2 * i
], v
[2 * i
], v
[2 * i
+ 1], write
);
590 emit_instruction(ir
);
593 ir
->set_flag(alu_last_instr
);
596 ir
= new AluInstruction(combine
, v
[0], v
[0], v
[2], last_write
);
597 emit_instruction(ir
);
603 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
605 AluInstruction
*ir
= nullptr;
606 PValue v
[4]; // this might need some additional temp register creation
607 for (unsigned i
= 0; i
< 4 ; ++i
)
608 v
[i
] = from_nir(instr
.dest
, i
);
610 for (unsigned i
= 0; i
< nc
; ++i
) {
611 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
612 from_nir(instr
.src
[1],i
), write
);
614 if (instr
.src
[0].abs
)
615 ir
->set_flag(alu_src0_abs
);
616 if (instr
.src
[0].negate
)
617 ir
->set_flag(alu_src0_neg
);
619 if (instr
.src
[1].abs
)
620 ir
->set_flag(alu_src1_abs
);
621 if (instr
.src
[1].negate
)
622 ir
->set_flag(alu_src1_neg
);
624 emit_instruction(ir
);
627 ir
->set_flag(alu_last_instr
);
629 for (unsigned i
= 0; i
< nc
; ++i
) {
630 ir
= new AluInstruction(op1_max4
, v
[i
], v
[i
], write
);
631 if (all
) ir
->set_flag(alu_src0_neg
);
632 emit_instruction(ir
);
635 for (unsigned i
= nc
; i
< 4 ; ++i
) {
636 ir
= new AluInstruction(op1_max4
, v
[i
],
637 all
? Value::one_f
: Value::zero
, write
);
639 ir
->set_flag(alu_src0_neg
);
641 emit_instruction(ir
);
644 ir
->set_flag(alu_last_instr
);
647 op
= (op
== op2_sete
) ? op2_sete_dx10
: op2_setne_dx10
;
649 op
= (op
== op2_sete
) ? op2_setne_dx10
: op2_sete_dx10
;
651 ir
= new AluInstruction(op
, v
[0], v
[0], Value::one_f
, last_write
);
653 ir
->set_flag(alu_src1_neg
);
654 emit_instruction(ir
);
659 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr
& instr
, EAluOp op
, bool all
)
661 AluInstruction
*ir
= nullptr;
662 PValue v
[4]; // this might need some additional temp register creation
663 for (unsigned i
= 0; i
< 4 ; ++i
)
664 v
[i
] = from_nir(instr
.dest
, i
);
666 for (unsigned i
= 0; i
< 2 ; ++i
) {
667 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
668 from_nir(instr
.src
[1],i
), write
);
669 if (instr
.src
[0].abs
)
670 ir
->set_flag(alu_src0_abs
);
671 if (instr
.src
[0].negate
)
672 ir
->set_flag(alu_src0_neg
);
674 if (instr
.src
[1].abs
)
675 ir
->set_flag(alu_src1_abs
);
676 if (instr
.src
[1].negate
)
677 ir
->set_flag(alu_src1_neg
);
679 emit_instruction(ir
);
682 ir
->set_flag(alu_last_instr
);
684 op
= (op
== op2_setne_dx10
) ? op2_or_int
: op2_and_int
;
685 ir
= new AluInstruction(op
, v
[0], v
[0], v
[1], last_write
);
686 emit_instruction(ir
);
691 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr
& instr
, EAluOp opcode
)
693 const nir_alu_src
& src0
= instr
.src
[0];
694 const nir_alu_src
& src1
= instr
.src
[1];
696 AluInstruction
*ir
= nullptr;
697 for (int i
= 0; i
< 4 ; ++i
) {
698 if (instr
.dest
.write_mask
& (1 << i
)){
699 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), from_nir(src0
, i
), from_nir(src1
, i
), last_write
);
700 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
701 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
702 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
703 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
704 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
705 emit_instruction(ir
);
711 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts opts
)
714 const nir_alu_src
& src0
= instr
.src
[0];
715 const nir_alu_src
& src1
= instr
.src
[1];
717 if (src0
.negate
|| src1
.negate
||
718 src0
.abs
|| src1
.abs
) {
719 std::cerr
<< "R600: don't support modifiers with integer operations";
722 return emit_alu_op2(instr
, opcode
, opts
);
725 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
727 const nir_alu_src
*src0
= &instr
.src
[0];
728 const nir_alu_src
*src1
= &instr
.src
[1];
730 if (ops
& op2_opt_reverse
)
731 std::swap(src0
, src1
);
733 bool src1_negate
= (ops
& op2_opt_neg_src1
) ^ src1
->negate
;
735 AluInstruction
*ir
= nullptr;
736 for (int i
= 0; i
< 4 ; ++i
) {
737 if (instr
.dest
.write_mask
& (1 << i
)){
738 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
739 from_nir(*src0
, i
), from_nir(*src1
, i
), write
);
741 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
742 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
743 if (src1_negate
) ir
->set_flag(alu_src1_neg
);
744 if (src1
->abs
) ir
->set_flag(alu_src1_abs
);
745 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
746 emit_instruction(ir
);
750 ir
->set_flag(alu_last_instr
);
754 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
756 const nir_alu_src
*src0
= &instr
.src
[0];
757 const nir_alu_src
*src1
= &instr
.src
[1];
759 if (ops
& op2_opt_reverse
)
760 std::swap(src0
, src1
);
762 GPRVector::Values v0
;
763 for (int i
= 0; i
< 4 ; ++i
)
764 v0
[i
] = from_nir(*src0
, i
);
766 GPRVector::Values v1
;
767 for (int i
= 0; i
< 4 ; ++i
)
768 v1
[i
] = from_nir(*src1
, i
);
770 if (src0
->abs
|| src0
->negate
) {
771 int src0_tmp
= allocate_temp_register();
772 GPRVector::Values v0_temp
;
773 AluInstruction
*ir
= nullptr;
774 for (int i
= 0; i
< 4 ; ++i
) {
775 if (instr
.dest
.write_mask
& (1 << i
)) {
776 v0_temp
[i
] = PValue(new GPRValue(src0_tmp
, i
));
777 ir
= new AluInstruction(op1_mov
, v0_temp
[i
], v0
[i
], write
);
778 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
779 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
780 emit_instruction(ir
);
785 ir
->set_flag(alu_last_instr
);
788 if (src1
->abs
|| src1
->negate
) {
789 int src1_tmp
= allocate_temp_register();
790 GPRVector::Values v1_temp
;
791 AluInstruction
*ir
= nullptr;
792 for (int i
= 0; i
< 4 ; ++i
) {
793 if (instr
.dest
.write_mask
& (1 << i
)) {
794 v1_temp
[i
] = PValue(new GPRValue(src1_tmp
, i
));
795 ir
= new AluInstruction(op1_mov
, v1_temp
[i
], v1
[i
], {alu_write
});
796 if (src1
->abs
) ir
->set_flag(alu_src0_abs
);
797 if (src1
->negate
) ir
->set_flag(alu_src0_neg
);
798 emit_instruction(ir
);
803 ir
->set_flag(alu_last_instr
);
806 AluInstruction
*ir
= nullptr;
807 for (int i
= 0; i
< 4 ; ++i
) {
808 if (instr
.dest
.write_mask
& (1 << i
)){
809 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), {v0
[i
], v1
[i
]}, {alu_write
});
810 emit_instruction(ir
);
814 ir
->set_flag(alu_last_instr
);
819 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr
& instr
)
821 int sel_tmp
= allocate_temp_register();
822 GPRVector
tmp(sel_tmp
, {0,1,2,3});
824 AluInstruction
*ir
= nullptr;
827 for (int i
= 0; i
< 4 ; ++i
) {
828 if (instr
.dest
.write_mask
& (1 << i
)){
829 help
[i
] = from_nir(instr
.dest
, i
);
830 auto s
= from_nir(instr
.src
[0], i
);
831 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], s
, Value::one_i
, s
, write
);
832 emit_instruction(ir
);
836 ir
->set_flag(alu_last_instr
);
838 for (int i
= 0; i
< 4 ; ++i
) {
839 if (instr
.dest
.write_mask
& (1 << i
)){
840 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, help
[i
], write
);
841 emit_instruction(ir
);
845 ir
->set_flag(alu_last_instr
);
847 for (int i
= 0; i
< 4 ; ++i
) {
848 if (instr
.dest
.write_mask
& (1 << i
)){
850 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], tmp
.reg_i(i
),
851 PValue(new LiteralValue(-1,0)), help
[i
], write
);
852 emit_instruction(ir
);
856 ir
->set_flag(alu_last_instr
);
860 bool EmitAluInstruction::emit_fsign(const nir_alu_instr
& instr
)
864 AluInstruction
*ir
= nullptr;
866 for (int i
= 0; i
< 4 ; ++i
) {
867 help
[i
] = from_nir(instr
.dest
, i
);
868 src
[i
] = from_nir(instr
.src
[0], i
);
871 if (instr
.src
[0].abs
) {
873 for (int i
= 0; i
< 4 ; ++i
) {
874 if (instr
.dest
.write_mask
& (1 << i
)){
875 ir
= new AluInstruction(op2_setgt
, help
[i
], src
[i
], Value::zero
, write
);
876 ir
->set_flag(alu_src0_abs
);
877 emit_instruction(ir
);
881 ir
->set_flag(alu_last_instr
);
883 if (instr
.src
[0].negate
) {
884 for (int i
= 0; i
< 4 ; ++i
) {
885 if (instr
.dest
.write_mask
& (1 << i
)){
886 ir
= new AluInstruction(op1_mov
, help
[i
], help
[i
], write
);
887 ir
->set_flag(alu_src0_neg
);
888 emit_instruction(ir
);
892 ir
->set_flag(alu_last_instr
);
898 for (int i
= 0; i
< 4 ; ++i
) {
899 if (instr
.dest
.write_mask
& (1 << i
)){
900 ir
= new AluInstruction(op3_cndgt
, help
[i
], src
[i
], Value::one_f
, src
[i
], write
);
901 if (instr
.src
[0].negate
) {
902 ir
->set_flag(alu_src0_neg
);
903 ir
->set_flag(alu_src2_neg
);
905 emit_instruction(ir
);
910 ir
->set_flag(alu_last_instr
);
912 for (int i
= 0; i
< 4 ; ++i
) {
913 if (instr
.dest
.write_mask
& (1 << i
)){
914 ir
= new AluInstruction(op3_cndgt
, help
[i
], help
[i
], Value::one_f
, help
[i
], write
);
915 ir
->set_flag(alu_src0_neg
);
916 ir
->set_flag(alu_src1_neg
);
917 emit_instruction(ir
);
921 ir
->set_flag(alu_last_instr
);
925 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr
& instr
, EAluOp opcode
,
926 std::array
<uint8_t, 3> reorder
)
928 const nir_alu_src
*src
[3];
929 src
[0] = &instr
.src
[reorder
[0]];
930 src
[1] = &instr
.src
[reorder
[1]];
931 src
[2] = &instr
.src
[reorder
[2]];
933 AluInstruction
*ir
= nullptr;
934 for (int i
= 0; i
< 4 ; ++i
) {
935 if (instr
.dest
.write_mask
& (1 << i
)){
936 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
937 from_nir(*src
[0], i
), from_nir(*src
[1], i
),
938 from_nir(*src
[2], i
), write
);
940 if (src
[0]->negate
) ir
->set_flag(alu_src0_neg
);
941 if (src
[1]->negate
) ir
->set_flag(alu_src1_neg
);
942 if (src
[2]->negate
) ir
->set_flag(alu_src2_neg
);
944 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
945 ir
->set_flag(alu_write
);
946 emit_instruction(ir
);
950 ir
->set_flag(alu_last_instr
);
954 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr
& instr
)
956 AluInstruction
*ir
= nullptr;
957 for (int i
= 0; i
< 4 ; ++i
) {
958 if (instr
.dest
.write_mask
& (1 << i
)){
959 ir
= new AluInstruction(op2_sub_int
, from_nir(instr
.dest
, i
), Value::zero
,
960 from_nir(instr
.src
[0], i
), write
);
961 emit_instruction(ir
);
965 ir
->set_flag(alu_last_instr
);
970 static const char swz
[] = "xyzw01?_";
974 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr
& instr
)
976 int sel_tmp
= allocate_temp_register();
977 GPRVector
tmp(sel_tmp
, {0,1,2,3});
979 std::array
<PValue
,4> src
;
980 AluInstruction
*ir
= nullptr;
981 for (int i
= 0; i
< 4 ; ++i
) {
982 if (instr
.dest
.write_mask
& (1 << i
)){
983 src
[i
] = from_nir(instr
.src
[0],i
);
984 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, src
[i
], write
);
985 emit_instruction(ir
);
989 ir
->set_flag(alu_last_instr
);
991 for (int i
= 0; i
< 4 ; ++i
) {
992 if (instr
.dest
.write_mask
& (1 << i
)){
993 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), src
[i
],
994 src
[i
], tmp
.reg_i(i
), write
);
995 emit_instruction(ir
);
999 ir
->set_flag(alu_last_instr
);
1003 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr
& instr
, bool use_signed
, bool mod
)
1006 int sel_tmp
= allocate_temp_register();
1007 int sel_tmp0
= allocate_temp_register();
1008 int sel_tmp1
= allocate_temp_register();
1010 PValue
asrc1(new GPRValue(sel_tmp
, 0));
1011 PValue
asrc2(new GPRValue(sel_tmp
, 1));
1012 PValue
rsign(new GPRValue(sel_tmp
, 2));
1013 PValue
err(new GPRValue(sel_tmp
, 3));
1015 GPRVector
tmp0(sel_tmp0
, {0,1,2,3});
1016 GPRVector
tmp1(sel_tmp1
, {0,1,2,3});
1018 std::array
<PValue
, 4> src0
;
1019 std::array
<PValue
, 4> src1
;
1021 for (int i
= 0; i
< 4 ; ++i
) {
1022 if (instr
.dest
.write_mask
& (1 << i
)) {
1023 src0
[i
] = from_nir(instr
.src
[0], i
);
1024 src1
[i
] = from_nir(instr
.src
[1], i
);
1029 for (int i
= 3; i
>= 0 ; --i
) {
1030 if (!(instr
.dest
.write_mask
& (1 << i
)))
1033 emit_instruction(op2_sub_int
, asrc1
, {Value::zero
, src0
[i
]}, {alu_write
});
1034 emit_instruction(op2_sub_int
, asrc2
, {Value::zero
, src1
[i
]}, {alu_write
});
1035 emit_instruction(op2_xor_int
, rsign
, {src0
[i
], src1
[i
]}, {alu_write
, alu_last_instr
});
1038 emit_instruction(op3_cndge_int
, asrc1
, {src0
[i
], src0
[i
], asrc1
}, {alu_write
});
1039 emit_instruction(op3_cndge_int
, asrc2
, {src1
[i
], src1
[i
], asrc2
}, {alu_write
, alu_last_instr
});
1045 emit_instruction(op1_recip_uint
, tmp0
.x(), {asrc2
}, {alu_write
, alu_last_instr
});
1047 emit_instruction(op2_mullo_uint
, tmp0
.z(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1049 emit_instruction(op2_sub_int
, tmp0
.w(), {Value::zero
, tmp0
.z()}, {alu_write
});
1050 emit_instruction(op2_mulhi_uint
, tmp0
.y(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1052 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp0
.y(), tmp0
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1054 emit_instruction(op2_mulhi_uint
, err
, {tmp0
.z(), tmp0
.x()}, {alu_write
, alu_last_instr
});
1056 emit_instruction(op2_sub_int
, tmp1
.x(), {tmp0
.x(), err
}, {alu_write
});
1057 emit_instruction(op2_add_int
, tmp1
.y(), {tmp0
.x(), err
}, {alu_write
, alu_last_instr
});
1059 emit_instruction(op3_cnde_int
, tmp0
.x(), {tmp0
.y(), tmp1
.y(), tmp1
.x()}, {alu_write
, alu_last_instr
});
1061 emit_instruction(op2_mulhi_uint
, tmp0
.z(), {tmp0
.x(), asrc1
}, {alu_write
, alu_last_instr
});
1062 emit_instruction(op2_mullo_uint
, tmp0
.y(), {tmp0
.z(), asrc2
}, {alu_write
, alu_last_instr
});
1064 emit_instruction(op2_sub_int
, tmp0
.w(), {asrc1
, tmp0
.y()}, {alu_write
, alu_last_instr
});
1067 emit_instruction(op2_setge_uint
, tmp1
.x(), {tmp0
.w(), asrc2
}, {alu_write
});
1068 emit_instruction(op2_setge_uint
, tmp1
.y(), {asrc1
, tmp0
.y()}, {alu_write
});
1071 emit_instruction(op2_sub_int
, tmp1
.z(), {tmp0
.w(), asrc2
}, {alu_write
});
1072 emit_instruction(op2_add_int
, tmp1
.w(), {tmp0
.w(), asrc2
}, {alu_write
, alu_last_instr
});
1074 emit_instruction(op2_add_int
, tmp1
.z(), {tmp0
.z(), Value::one_i
}, {alu_write
});
1075 emit_instruction(op2_sub_int
, tmp1
.w(), {tmp0
.z(), Value::one_i
}, {alu_write
, alu_last_instr
});
1078 emit_instruction(op2_and_int
, tmp1
.x(), {tmp1
.x(), tmp1
.y()}, {alu_write
, alu_last_instr
});
1081 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.w(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1083 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.z(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1086 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1087 emit_instruction(op2_sub_int
, tmp0
.y(), {Value::zero
, tmp0
.z()}, {alu_write
, alu_last_instr
});
1090 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {src0
[i
], tmp0
.z(), tmp0
.y()},
1091 {alu_write
, alu_last_instr
});
1093 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {rsign
, tmp0
.z(), tmp0
.y()},
1094 {alu_write
, alu_last_instr
});
1096 emit_instruction(op3_cnde_int
, from_nir(instr
.dest
, i
), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1102 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src
& src
, GPRVector::Values
& s
,
1103 GPRVector::Values
& v
, int ncomp
)
1106 AluInstruction
*alu
= nullptr;
1107 for (int i
= 0; i
< ncomp
; ++i
) {
1108 alu
= new AluInstruction(op1_mov
, v
[i
], s
[i
], {alu_write
});
1110 alu
->set_flag(alu_src0_abs
);
1112 alu
->set_flag(alu_src0_neg
);
1113 emit_instruction(alu
);
1118 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr
& instr
, TexInstruction::Opcode op
,
1122 GPRVector::Values v
;
1123 GPRVector::Values s
;
1124 GPRVector::Values
*source
= &s
;
1125 std::array
<int, 4> writemask
= {0,1,2,3};
1127 int ncomp
= instr
.src
[0].src
.is_ssa
? instr
.src
[0].src
.ssa
->num_components
:
1128 instr
.src
[0].src
.reg
.reg
->num_components
;
1130 for (int i
= 0; i
< 4; ++i
) {
1131 writemask
[i
] = (instr
.dest
.write_mask
& (1 << i
)) ? i
: 7;
1132 v
[i
] = from_nir(instr
.dest
, (i
< ncomp
) ? i
: 0);
1133 s
[i
] = from_nir(instr
.src
[0], (i
< ncomp
) ? i
: 0);
1136 if (instr
.src
[0].abs
|| instr
.src
[0].negate
) {
1137 split_alu_modifiers(instr
.src
[0], s
, v
, ncomp
);
1141 /* This is querying the dreivatives of the output fb, so we would either need
1142 * access to the neighboring pixels or to the framebuffer. Neither is currently
1145 GPRVector
src(*source
);
1147 auto tex
= new TexInstruction(op
, dst
, src
, 0, R600_MAX_CONST_BUFFERS
, PValue());
1148 tex
->set_dest_swizzle(writemask
);
1151 std::cerr
<< "Sewt fine flag\n";
1152 tex
->set_flag(TexInstruction::grad_fine
);
1155 emit_instruction(tex
);
1160 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr
& instr
, EAluOp opcode
)
1162 int itmp
= allocate_temp_register();
1163 std::array
<PValue
, 4> tmp
;
1164 std::array
<PValue
, 4> dst
;
1165 std::array
<PValue
, 4> src0
;
1166 std::array
<PValue
, 4> shift
;
1168 PValue
l32(new LiteralValue(32));
1169 unsigned write_mask
= instr
.dest
.write_mask
;
1171 AluInstruction
*ir
= nullptr;
1172 for (int i
= 0; i
< 4; i
++) {
1173 if (!(write_mask
& (1<<i
)))
1175 dst
[i
] = from_nir(instr
.dest
, i
);
1176 src0
[i
] = from_nir(instr
.src
[0], i
);
1177 shift
[i
] = from_nir(instr
.src
[2], i
);
1179 ir
= new AluInstruction(opcode
, dst
[i
],
1180 {src0
[i
], from_nir(instr
.src
[1], i
), shift
[i
]},
1182 emit_instruction(ir
);
1186 for (int i
= 0; i
< 4; i
++) {
1187 if (!(write_mask
& (1<<i
)))
1189 tmp
[i
] = PValue(new GPRValue(itmp
, i
));
1190 ir
= new AluInstruction(op2_setge_int
, tmp
[i
], {shift
[i
], l32
},
1192 emit_instruction(ir
);
1196 for (int i
= 0; i
< 4; i
++) {
1197 if (!(write_mask
& (1<<i
)))
1199 ir
= new AluInstruction(op3_cnde_int
, dst
[i
], {tmp
[i
], dst
[i
], src0
[i
]},
1201 emit_instruction(ir
);
1208 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr
& instr
)
1210 auto t0
= get_temp_vec4();
1211 auto t1
= get_temp_vec4();
1212 auto t2
= get_temp_vec4();
1214 PValue
l32(new LiteralValue(32));
1215 unsigned write_mask
= instr
.dest
.write_mask
;
1216 if (!write_mask
) return true;
1218 AluInstruction
*ir
= nullptr;
1219 for (int i
= 0; i
< 4; i
++) {
1220 if (!(write_mask
& (1<<i
)))
1223 ir
= new AluInstruction(op2_setge_int
, t0
[i
], {from_nir(instr
.src
[3], i
), l32
}, {alu_write
});
1224 emit_instruction(ir
);
1228 for (int i
= 0; i
< 4; i
++) {
1229 if (!(write_mask
& (1<<i
)))
1231 ir
= new AluInstruction(op2_bfm_int
, t1
[i
], {from_nir(instr
.src
[3], i
),
1232 from_nir(instr
.src
[2], i
)}, {alu_write
});
1233 emit_instruction(ir
);
1235 ir
->set_flag(alu_last_instr
);
1237 for (int i
= 0; i
< 4; i
++) {
1238 if (!(write_mask
& (1<<i
)))
1240 ir
= new AluInstruction(op2_lshl_int
, t2
[i
], {from_nir(instr
.src
[1], i
),
1241 from_nir(instr
.src
[2], i
)}, {alu_write
});
1242 emit_instruction(ir
);
1244 ir
->set_flag(alu_last_instr
);
1247 for (int i
= 0; i
< 4; i
++) {
1248 if (!(write_mask
& (1<<i
)))
1250 ir
= new AluInstruction(op3_bfi_int
, from_nir(instr
.dest
, i
),
1251 {t1
[i
], t2
[i
], from_nir(instr
.src
[0], i
)}, {alu_write
});
1252 emit_instruction(ir
);
1254 ir
->set_flag(alu_last_instr
);
1256 for (int i
= 0; i
< 4; i
++) {
1257 if (!(write_mask
& (1<<i
)))
1259 ir
= new AluInstruction(op3_cnde_int
, from_nir(instr
.dest
, i
),
1260 {t0
[i
], from_nir(instr
.dest
, i
),
1261 from_nir(instr
.src
[1], i
)}, {alu_write
});
1262 emit_instruction(ir
);
1264 ir
->set_flag(alu_last_instr
);
1269 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr
& instr
)
1271 emit_instruction(op2_lshr_int
, from_nir(instr
.dest
, 0),
1272 {from_nir(instr
.src
[0], 0), PValue(new LiteralValue(16))},
1273 {alu_write
, alu_last_instr
});
1275 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1276 {from_nir(instr
.dest
, 0)},{alu_write
, alu_last_instr
});
1281 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr
& instr
)
1283 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1284 {from_nir(instr
.src
[0], 0)},{alu_write
, alu_last_instr
});
1288 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr
& instr
)
1290 int it0
= allocate_temp_register();
1291 PValue
x(new GPRValue(it0
, 0));
1292 PValue
y(new GPRValue(it0
, 1));
1294 emit_instruction(op1_flt32_to_flt16
, x
,{from_nir(instr
.src
[0], 0)},{alu_write
});
1295 emit_instruction(op1_flt32_to_flt16
, y
,{from_nir(instr
.src
[1], 0)},{alu_write
, alu_last_instr
});
1297 emit_instruction(op2_lshl_int
, y
, {y
, PValue(new LiteralValue(16))},{alu_write
, alu_last_instr
});
1299 emit_instruction(op2_or_int
, {from_nir(instr
.dest
, 0)} , {x
, y
},{alu_write
, alu_last_instr
});