3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
31 #include "gallium/drivers/r600/r600_shader.h"
37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor
& processor
):
38 EmitInstruction (processor
)
43 bool EmitAluInstruction::do_emit(nir_instr
* ir
)
45 const nir_alu_instr
& instr
= *nir_instr_as_alu(ir
);
47 r600::sfn_log
<< SfnLog::instr
<< "emit '"
49 << " bitsize: " << static_cast<int>(instr
.dest
.dest
.ssa
.bit_size
)
50 << "' (" << __func__
<< ")\n";
52 split_constants(instr
);
55 case nir_op_b2f32
: return emit_alu_b2f(instr
);
56 case nir_op_i2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_int
);
57 case nir_op_f2b1
: return emit_alu_i2orf2_b1(instr
, op2_setne_dx10
);
58 case nir_op_mov
:return emit_alu_op1(instr
, op1_mov
);
59 case nir_op_ftrunc
: return emit_alu_op1(instr
, op1_trunc
);
60 case nir_op_fabs
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_abs
});
61 case nir_op_fneg
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_src0_neg
});
62 case nir_op_fsat
: return emit_alu_op1(instr
, op1_mov
, {1 << alu_dst_clamp
});
63 case nir_op_frcp
: return emit_alu_trans_op1(instr
, op1_recip_ieee
);
64 case nir_op_frsq
: return emit_alu_trans_op1(instr
, op1_recipsqrt_ieee1
);
65 case nir_op_fsin
: return emit_alu_trig_op1(instr
, op1_sin
);
66 case nir_op_fcos
: return emit_alu_trig_op1(instr
, op1_cos
);
67 case nir_op_fexp2
: return emit_alu_trans_op1(instr
, op1_exp_ieee
);
68 case nir_op_flog2
: return emit_alu_trans_op1(instr
, op1_log_clamped
);
70 case nir_op_fround_even
: return emit_alu_op1(instr
, op1_rndne
);
71 case nir_op_fsqrt
: return emit_alu_trans_op1(instr
, op1_sqrt_ieee
);
72 case nir_op_i2f32
: return emit_alu_trans_op1(instr
, op1_int_to_flt
);
73 case nir_op_u2f32
: return emit_alu_trans_op1(instr
, op1_uint_to_flt
);
74 case nir_op_f2i32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_int
);
75 case nir_op_f2u32
: return emit_alu_f2i32_or_u32(instr
, op1_flt_to_uint
);
77 case nir_op_fceil
: return emit_alu_op1(instr
, op1_ceil
);
78 case nir_op_ffract
: return emit_alu_op1(instr
, op1_fract
);
79 case nir_op_ffloor
: return emit_alu_op1(instr
, op1_floor
);
81 case nir_op_fsign
: return emit_fsign(instr
);
82 case nir_op_fdph
: return emit_fdph(instr
);
84 case nir_op_ibitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_int
);
85 case nir_op_ubitfield_extract
: return emit_bitfield_extract(instr
, op3_bfe_uint
);
86 case nir_op_bitfield_insert
: return emit_bitfield_insert(instr
);
87 case nir_op_bit_count
: return emit_alu_op1(instr
, op1_bcnt_int
);
88 case nir_op_bitfield_reverse
: return emit_alu_op1(instr
, op1_bfrev_int
);
90 case nir_op_ieq
: return emit_alu_op2_int(instr
, op2_sete_int
);
91 case nir_op_ine
: return emit_alu_op2_int(instr
, op2_setne_int
);
92 case nir_op_ige
: return emit_alu_op2_int(instr
, op2_setge_int
);
93 case nir_op_ishl
: return emit_alu_op2_int(instr
, op2_lshl_int
);
94 case nir_op_ishr
: return emit_alu_op2_int(instr
, op2_ashr_int
);
95 case nir_op_ilt
: return emit_alu_op2_int(instr
, op2_setgt_int
, op2_opt_reverse
);
96 case nir_op_iand
: return emit_alu_op2_int(instr
, op2_and_int
);
97 case nir_op_ixor
: return emit_alu_op2_int(instr
, op2_xor_int
);
98 case nir_op_imin
: return emit_alu_op2_int(instr
, op2_min_int
);
99 case nir_op_imax
: return emit_alu_op2_int(instr
, op2_max_int
);
100 case nir_op_imul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_int
);
101 case nir_op_umul_high
: return emit_alu_trans_op2(instr
, op2_mulhi_uint
);
102 case nir_op_umax
: return emit_alu_op2_int(instr
, op2_max_uint
);
103 case nir_op_umin
: return emit_alu_op2_int(instr
, op2_min_uint
);
104 case nir_op_ior
: return emit_alu_op2_int(instr
, op2_or_int
);
105 case nir_op_inot
: return emit_alu_op1(instr
, op1_not_int
);
106 case nir_op_iabs
: return emit_alu_iabs(instr
);
107 case nir_op_ineg
: return emit_alu_ineg(instr
);
108 case nir_op_idiv
: return emit_alu_div_int(instr
, true, false);
109 case nir_op_udiv
: return emit_alu_div_int(instr
, false, false);
110 case nir_op_umod
: return emit_alu_div_int(instr
, false, true);
111 case nir_op_isign
: return emit_alu_isign(instr
);
113 case nir_op_uge
: return emit_alu_op2_int(instr
, op2_setge_uint
);
114 case nir_op_ult
: return emit_alu_op2_int(instr
, op2_setgt_uint
, op2_opt_reverse
);
115 case nir_op_ushr
: return emit_alu_op2_int(instr
, op2_lshr_int
);
117 case nir_op_flt
: return emit_alu_op2(instr
, op2_setgt_dx10
, op2_opt_reverse
);
119 case nir_op_fge
: return emit_alu_op2(instr
, op2_setge_dx10
);
120 case nir_op_fne
: return emit_alu_op2(instr
, op2_setne_dx10
);
121 case nir_op_feq
: return emit_alu_op2(instr
, op2_sete_dx10
);
123 case nir_op_fmin
: return emit_alu_op2(instr
, op2_min_dx10
);
124 case nir_op_fmax
: return emit_alu_op2(instr
, op2_max_dx10
);
125 case nir_op_fmul
: return emit_alu_op2(instr
, op2_mul_ieee
);
126 case nir_op_imul
: return emit_alu_trans_op2(instr
, op2_mullo_int
);
127 case nir_op_fadd
: return emit_alu_op2(instr
, op2_add
);
128 case nir_op_fsub
: return emit_alu_op2(instr
, op2_add
, op2_opt_neg_src1
);
129 case nir_op_iadd
: return emit_alu_op2_int(instr
, op2_add_int
);
130 case nir_op_isub
: return emit_alu_op2_int(instr
, op2_sub_int
);
131 case nir_op_fdot2
: return emit_dot(instr
, 2);
132 case nir_op_fdot3
: return emit_dot(instr
, 3);
133 case nir_op_fdot4
: return emit_dot(instr
, 4);
135 case nir_op_bany_inequal2
: return emit_any_all_icomp(instr
, op2_setne_int
, 2, false);
136 case nir_op_bany_inequal3
: return emit_any_all_icomp(instr
, op2_setne_int
, 3, false);
137 case nir_op_bany_inequal4
: return emit_any_all_icomp(instr
, op2_setne_int
, 4, false);
139 case nir_op_ball_iequal2
: return emit_any_all_icomp(instr
, op2_sete_int
, 2, true);
140 case nir_op_ball_iequal3
: return emit_any_all_icomp(instr
, op2_sete_int
, 3, true);
141 case nir_op_ball_iequal4
: return emit_any_all_icomp(instr
, op2_sete_int
, 4, true);
143 case nir_op_bany_fnequal2
: return emit_any_all_fcomp2(instr
, op2_setne_dx10
, false);
144 case nir_op_bany_fnequal3
: return emit_any_all_fcomp(instr
, op2_setne
, 3, false);
145 case nir_op_bany_fnequal4
: return emit_any_all_fcomp(instr
, op2_setne
, 4, false);
147 case nir_op_ball_fequal2
: return emit_any_all_fcomp2(instr
, op2_sete_dx10
, true);
148 case nir_op_ball_fequal3
: return emit_any_all_fcomp(instr
, op2_sete
, 3, true);
149 case nir_op_ball_fequal4
: return emit_any_all_fcomp(instr
, op2_sete
, 4, true);
152 case nir_op_ffma
: return emit_alu_op3(instr
, op3_muladd_ieee
);
153 case nir_op_bcsel
: return emit_alu_op3(instr
, op3_cnde_int
, {0, 2, 1});
154 case nir_op_vec2
: return emit_create_vec(instr
, 2);
155 case nir_op_vec3
: return emit_create_vec(instr
, 3);
156 case nir_op_vec4
: return emit_create_vec(instr
, 4);
158 case nir_op_find_lsb
: return emit_alu_op1(instr
, op1_ffbl_int
);
159 case nir_op_ufind_msb
: return emit_find_msb(instr
, false);
160 case nir_op_ifind_msb
: return emit_find_msb(instr
, true);
161 case nir_op_b2i32
: return emit_b2i32(instr
);
162 case nir_op_pack_64_2x32_split
: return emit_pack_64_2x32_split(instr
);
163 case nir_op_unpack_64_2x32_split_x
: return emit_unpack_64_2x32_split(instr
, 0);
164 case nir_op_unpack_64_2x32_split_y
: return emit_unpack_64_2x32_split(instr
, 1);
165 case nir_op_unpack_half_2x16_split_x
: return emit_unpack_32_2x16_split_x(instr
);
166 case nir_op_unpack_half_2x16_split_y
: return emit_unpack_32_2x16_split_y(instr
);
167 case nir_op_pack_half_2x16_split
: return emit_pack_32_2x16_split(instr
);
170 /* These are in the ALU instruction list, but they should be texture instructions */
171 case nir_op_fddx_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, true);
172 case nir_op_fddx_coarse
:
173 case nir_op_fddx
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_h
, false);
175 case nir_op_fddy_fine
: return emit_tex_fdd(instr
, TexInstruction::get_gradient_v
, true);
176 case nir_op_fddy_coarse
:
177 case nir_op_fddy
: return emit_tex_fdd(instr
,TexInstruction::get_gradient_v
, false);
184 void EmitAluInstruction::split_constants(const nir_alu_instr
& instr
)
186 const nir_op_info
*op_info
= &nir_op_infos
[instr
.op
];
187 if (op_info
->num_inputs
< 2)
191 std::array
<PValue
,4> c
;
192 std::array
<int,4> idx
;
193 for (unsigned i
= 0; i
< op_info
->num_inputs
; ++i
) {
194 PValue src
= from_nir(instr
.src
[i
], 0);
196 if (src
->type() == Value::kconst
) {
205 unsigned sel
= c
[0]->sel();
206 sfn_log
<< SfnLog::reg
<< "split " << nconst
<< " constants, sel[0] = " << sel
; ;
208 for (int i
= 1; i
< nconst
; ++i
) {
209 sfn_log
<< "sel[" << i
<< "] = " << c
[i
]->sel() << "\n";
210 if (c
[i
]->sel() != sel
) {
211 load_uniform(instr
.src
[idx
[i
]]);
216 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr
& instr
)
218 if (instr
.src
[0].negate
|| instr
.src
[0].abs
) {
219 std::cerr
<< "source modifiers not supported with int ops\n";
223 AluInstruction
*ir
= nullptr;
224 for (int i
= 0; i
< 4 ; ++i
) {
225 if (instr
.dest
.write_mask
& (1 << i
)){
226 ir
= new AluInstruction(op1_not_int
, from_nir(instr
.dest
, i
),
227 from_nir(instr
.src
[0], i
), write
);
228 emit_instruction(ir
);
232 ir
->set_flag(alu_last_instr
);
236 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
237 const AluOpFlags
& flags
)
239 AluInstruction
*ir
= nullptr;
240 for (int i
= 0; i
< 4 ; ++i
) {
241 if (instr
.dest
.write_mask
& (1 << i
)){
242 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
243 from_nir(instr
.src
[0], i
), write
);
245 if (flags
.test(alu_src0_abs
) || instr
.src
[0].abs
)
246 ir
->set_flag(alu_src0_abs
);
248 if (instr
.src
[0].negate
^ flags
.test(alu_src0_neg
))
249 ir
->set_flag(alu_src0_neg
);
251 if (flags
.test(alu_dst_clamp
) || instr
.dest
.saturate
)
252 ir
->set_flag(alu_dst_clamp
);
254 emit_instruction(ir
);
262 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr
& instr
, EAluOp opcode
)
264 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
267 const float inv_2_pi
= 0.15915494f
;
269 PValue v
[4]; // this might need some additional temp register creation
270 for (unsigned i
= 0; i
< 4 ; ++i
)
271 v
[i
] = from_nir(instr
.dest
, i
);
273 PValue inv_pihalf
= PValue(new LiteralValue(inv_2_pi
, 0));
274 AluInstruction
*ir
= nullptr;
275 for (unsigned i
= 0; i
< 4 ; ++i
) {
276 if (!(instr
.dest
.write_mask
& (1 << i
)))
278 ir
= new AluInstruction(op3_muladd_ieee
, v
[i
],
279 {from_nir(instr
.src
[0],i
), inv_pihalf
, Value::zero_dot_5
},
281 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
282 emit_instruction(ir
);
286 for (unsigned i
= 0; i
< 4 ; ++i
) {
287 if (!(instr
.dest
.write_mask
& (1 << i
)))
289 ir
= new AluInstruction(op1_fract
, v
[i
], v
[i
], {alu_write
});
290 emit_instruction(ir
);
294 for (unsigned i
= 0; i
< 4 ; ++i
) {
295 if (!(instr
.dest
.write_mask
& (1 << i
)))
297 ir
= new AluInstruction(op2_add
, v
[i
], v
[i
], Value::zero_dot_5
, write
);
298 ir
->set_flag(alu_src1_neg
);
299 emit_instruction(ir
);
303 for (unsigned i
= 0; i
< 4 ; ++i
) {
304 if (!(instr
.dest
.write_mask
& (1 << i
)))
307 ir
= new AluInstruction(opcode
, v
[i
], v
[i
], last_write
);
308 emit_instruction(ir
);
313 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr
& instr
, EAluOp opcode
,
316 AluInstruction
*ir
= nullptr;
317 std::set
<int> src_idx
;
318 for (int i
= 0; i
< 4 ; ++i
) {
319 if (instr
.dest
.write_mask
& (1 << i
)){
320 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
321 from_nir(instr
.src
[0], i
), last_write
);
322 if (absolute
|| instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
323 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
324 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
325 emit_instruction(ir
);
331 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr
& instr
, EAluOp op
)
333 AluInstruction
*ir
= nullptr;
334 std::array
<PValue
, 4> v
;
336 for (int i
= 0; i
< 4; ++i
) {
337 if (!(instr
.dest
.write_mask
& (1 << i
)))
339 v
[i
] = from_nir(instr
.dest
, i
);
340 ir
= new AluInstruction(op1_trunc
, v
[i
], from_nir(instr
.src
[0], i
), {alu_write
});
341 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
342 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
343 emit_instruction(ir
);
347 for (int i
= 0; i
< 4; ++i
) {
348 if (!(instr
.dest
.write_mask
& (1 << i
)))
350 ir
= new AluInstruction(op
, v
[i
], v
[i
], {alu_write
});
351 emit_instruction(ir
);
352 if (op
== op1_flt_to_uint
)
360 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr
& instr
, bool sgn
)
362 int sel_tmp
= allocate_temp_register();
363 int sel_tmp2
= allocate_temp_register();
364 GPRVector
tmp(sel_tmp
, {0,1,2,3});
365 GPRVector
tmp2(sel_tmp2
, {0,1,2,3});
366 AluInstruction
*ir
= nullptr;
367 EAluOp opcode
= sgn
? op1_ffbh_int
: op1_ffbh_uint
;
368 for (int i
= 0; i
< 4; ++i
) {
369 if (!(instr
.dest
.write_mask
& (1 << i
)))
372 ir
= new AluInstruction(opcode
, tmp
.reg_i(i
), from_nir(instr
.src
[0], i
), write
);
373 emit_instruction(ir
);
377 for (int i
= 0; i
< 4 ; ++i
) {
378 if (!(instr
.dest
.write_mask
& (1 << i
)))
381 ir
= new AluInstruction(op2_sub_int
, tmp2
.reg_i(i
),
382 PValue(new LiteralValue(31u, 0)), tmp
.reg_i(i
), write
);
383 emit_instruction(ir
);
387 for (int i
= 0; i
< 4 ; ++i
) {
388 if (!(instr
.dest
.write_mask
& (1 << i
)))
391 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), tmp
.reg_i(i
),
392 tmp2
.reg_i(i
), tmp
.reg_i(i
), write
);
393 emit_instruction(ir
);
400 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr
& instr
)
402 AluInstruction
*ir
= nullptr;
403 for (int i
= 0; i
< 4 ; ++i
) {
404 if (!(instr
.dest
.write_mask
& (1 << i
)))
407 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
408 from_nir(instr
.src
[0], i
), Value::one_i
, write
);
409 emit_instruction(ir
);
416 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr
& instr
)
418 AluInstruction
*ir
= nullptr;
419 for (unsigned i
= 0; i
< 2; ++i
) {
420 if (!(instr
.dest
.write_mask
& (1 << i
)))
422 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
),
423 from_nir(instr
.src
[0], i
), write
);
424 emit_instruction(ir
);
426 ir
->set_flag(alu_last_instr
);
430 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr
& instr
, unsigned comp
)
432 emit_instruction(new AluInstruction(op1_mov
, from_nir(instr
.dest
, 0),
433 from_nir(instr
.src
[0], comp
), last_write
));
437 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr
& instr
, unsigned nc
)
439 AluInstruction
*ir
= nullptr;
440 std::set
<int> src_slot
;
441 for(unsigned i
= 0; i
< nc
; ++i
) {
442 if (instr
.dest
.write_mask
& (1 << i
)){
443 auto src
= from_nir(instr
.src
[i
], 0);
444 ir
= new AluInstruction(op1_mov
, from_nir(instr
.dest
, i
), src
, write
);
445 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
447 // FIXME: This is a rather crude approach to fix the problem that
448 // r600 can't read from four different slots of the same component
449 // here we check only for the register index
450 if (src
->type() == Value::gpr
)
451 src_slot
.insert(src
->sel());
452 if (src_slot
.size() >= 3) {
454 ir
->set_flag(alu_last_instr
);
456 emit_instruction(ir
);
460 ir
->set_flag(alu_last_instr
);
464 bool EmitAluInstruction::emit_dot(const nir_alu_instr
& instr
, int n
)
466 const nir_alu_src
& src0
= instr
.src
[0];
467 const nir_alu_src
& src1
= instr
.src
[1];
469 AluInstruction
*ir
= nullptr;
470 for (int i
= 0; i
< n
; ++i
) {
471 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
472 from_nir(src0
, i
), from_nir(src1
, i
),
473 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
475 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
476 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
477 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
478 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
480 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
481 emit_instruction(ir
);
483 for (int i
= n
; i
< 4 ; ++i
) {
484 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
485 Value::zero
, Value::zero
,
486 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
487 emit_instruction(ir
);
491 ir
->set_flag(alu_last_instr
);
495 bool EmitAluInstruction::emit_fdph(const nir_alu_instr
& instr
)
497 const nir_alu_src
& src0
= instr
.src
[0];
498 const nir_alu_src
& src1
= instr
.src
[1];
500 AluInstruction
*ir
= nullptr;
501 for (int i
= 0; i
< 3 ; ++i
) {
502 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, i
),
503 from_nir(src0
, i
), from_nir(src1
, i
),
504 instr
.dest
.write_mask
& (1 << i
) ? write
: empty
);
505 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
506 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
507 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
508 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
509 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
510 emit_instruction(ir
);
513 ir
= new AluInstruction(op2_dot4_ieee
, from_nir(instr
.dest
, 3), Value::one_f
,
514 from_nir(src1
, 3), (instr
.dest
.write_mask
) & (1 << 3) ? write
: empty
);
515 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
516 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
517 emit_instruction(ir
);
519 ir
->set_flag(alu_last_instr
);
524 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr
& instr
, EAluOp op
)
526 AluInstruction
*ir
= nullptr;
527 for (int i
= 0; i
< 4 ; ++i
) {
528 if (instr
.dest
.write_mask
& (1 << i
)) {
529 ir
= new AluInstruction(op
, from_nir(instr
.dest
, i
),
530 from_nir(instr
.src
[0], i
), Value::zero
,
532 emit_instruction(ir
);
536 ir
->set_flag(alu_last_instr
);
540 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr
& instr
)
542 AluInstruction
*ir
= nullptr;
543 for (int i
= 0; i
< 4 ; ++i
) {
544 if (instr
.dest
.write_mask
& (1 << i
)){
545 ir
= new AluInstruction(op2_and_int
, from_nir(instr
.dest
, i
),
546 from_nir(instr
.src
[0], i
), Value::one_f
, write
);
547 if (instr
.src
[0].negate
) ir
->set_flag(alu_src0_neg
);
548 if (instr
.src
[0].abs
) ir
->set_flag(alu_src0_abs
);
549 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
550 emit_instruction(ir
);
554 ir
->set_flag(alu_last_instr
);
558 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
561 AluInstruction
*ir
= nullptr;
562 PValue v
[4]; // this might need some additional temp register creation
563 for (unsigned i
= 0; i
< 4 ; ++i
)
564 v
[i
] = from_nir(instr
.dest
, i
);
566 EAluOp combine
= all
? op2_and_int
: op2_or_int
;
568 /* For integers we can not use the modifiers, so this needs some emulation */
569 /* Should actually be lowered with NIR */
570 if (instr
.src
[0].negate
== instr
.src
[1].negate
&&
571 instr
.src
[0].abs
== instr
.src
[1].abs
) {
573 for (unsigned i
= 0; i
< nc
; ++i
) {
574 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0], i
),
575 from_nir(instr
.src
[1], i
), write
);
576 emit_instruction(ir
);
579 ir
->set_flag(alu_last_instr
);
581 std::cerr
<< "Negate in iequal/inequal not (yet) supported\n";
585 for (unsigned i
= 0; i
< nc
/2 ; ++i
) {
586 ir
= new AluInstruction(combine
, v
[2 * i
], v
[2 * i
], v
[2 * i
+ 1], write
);
587 emit_instruction(ir
);
590 ir
->set_flag(alu_last_instr
);
593 ir
= new AluInstruction(combine
, v
[0], v
[0], v
[2], last_write
);
594 emit_instruction(ir
);
600 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr
& instr
, EAluOp op
, unsigned nc
, bool all
)
602 AluInstruction
*ir
= nullptr;
603 PValue v
[4]; // this might need some additional temp register creation
604 for (unsigned i
= 0; i
< 4 ; ++i
)
605 v
[i
] = from_nir(instr
.dest
, i
);
607 for (unsigned i
= 0; i
< nc
; ++i
) {
608 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
609 from_nir(instr
.src
[1],i
), write
);
611 if (instr
.src
[0].abs
)
612 ir
->set_flag(alu_src0_abs
);
613 if (instr
.src
[0].negate
)
614 ir
->set_flag(alu_src0_neg
);
616 if (instr
.src
[1].abs
)
617 ir
->set_flag(alu_src1_abs
);
618 if (instr
.src
[1].negate
)
619 ir
->set_flag(alu_src1_neg
);
621 emit_instruction(ir
);
624 ir
->set_flag(alu_last_instr
);
626 for (unsigned i
= 0; i
< nc
; ++i
) {
627 ir
= new AluInstruction(op1_max4
, v
[i
], v
[i
], write
);
628 if (all
) ir
->set_flag(alu_src0_neg
);
629 emit_instruction(ir
);
632 for (unsigned i
= nc
; i
< 4 ; ++i
) {
633 ir
= new AluInstruction(op1_max4
, v
[i
],
634 all
? Value::one_f
: Value::zero
, write
);
636 ir
->set_flag(alu_src0_neg
);
638 emit_instruction(ir
);
641 ir
->set_flag(alu_last_instr
);
644 op
= (op
== op2_sete
) ? op2_sete_dx10
: op2_setne_dx10
;
646 op
= (op
== op2_sete
) ? op2_setne_dx10
: op2_sete_dx10
;
648 ir
= new AluInstruction(op
, v
[0], v
[0], Value::one_f
, last_write
);
650 ir
->set_flag(alu_src1_neg
);
651 emit_instruction(ir
);
656 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr
& instr
, EAluOp op
, bool all
)
658 AluInstruction
*ir
= nullptr;
659 PValue v
[4]; // this might need some additional temp register creation
660 for (unsigned i
= 0; i
< 4 ; ++i
)
661 v
[i
] = from_nir(instr
.dest
, i
);
663 for (unsigned i
= 0; i
< 2 ; ++i
) {
664 ir
= new AluInstruction(op
, v
[i
], from_nir(instr
.src
[0],i
),
665 from_nir(instr
.src
[1],i
), write
);
666 if (instr
.src
[0].abs
)
667 ir
->set_flag(alu_src0_abs
);
668 if (instr
.src
[0].negate
)
669 ir
->set_flag(alu_src0_neg
);
671 if (instr
.src
[1].abs
)
672 ir
->set_flag(alu_src1_abs
);
673 if (instr
.src
[1].negate
)
674 ir
->set_flag(alu_src1_neg
);
676 emit_instruction(ir
);
679 ir
->set_flag(alu_last_instr
);
681 op
= (op
== op2_setne_dx10
) ? op2_or_int
: op2_and_int
;
682 ir
= new AluInstruction(op
, v
[0], v
[0], v
[1], last_write
);
683 emit_instruction(ir
);
688 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr
& instr
, EAluOp opcode
)
690 const nir_alu_src
& src0
= instr
.src
[0];
691 const nir_alu_src
& src1
= instr
.src
[1];
693 AluInstruction
*ir
= nullptr;
694 for (int i
= 0; i
< 4 ; ++i
) {
695 if (instr
.dest
.write_mask
& (1 << i
)){
696 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), from_nir(src0
, i
), from_nir(src1
, i
), last_write
);
697 if (src0
.negate
) ir
->set_flag(alu_src0_neg
);
698 if (src0
.abs
) ir
->set_flag(alu_src0_abs
);
699 if (src1
.negate
) ir
->set_flag(alu_src1_neg
);
700 if (src1
.abs
) ir
->set_flag(alu_src1_abs
);
701 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
702 emit_instruction(ir
);
708 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts opts
)
711 const nir_alu_src
& src0
= instr
.src
[0];
712 const nir_alu_src
& src1
= instr
.src
[1];
714 if (src0
.negate
|| src1
.negate
||
715 src0
.abs
|| src1
.abs
) {
716 std::cerr
<< "R600: don't support modifiers with integer operations";
719 return emit_alu_op2(instr
, opcode
, opts
);
722 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
724 const nir_alu_src
*src0
= &instr
.src
[0];
725 const nir_alu_src
*src1
= &instr
.src
[1];
727 if (ops
& op2_opt_reverse
)
728 std::swap(src0
, src1
);
730 bool src1_negate
= (ops
& op2_opt_neg_src1
) ^ src1
->negate
;
732 AluInstruction
*ir
= nullptr;
733 for (int i
= 0; i
< 4 ; ++i
) {
734 if (instr
.dest
.write_mask
& (1 << i
)){
735 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
736 from_nir(*src0
, i
), from_nir(*src1
, i
), write
);
738 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
739 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
740 if (src1_negate
) ir
->set_flag(alu_src1_neg
);
741 if (src1
->abs
) ir
->set_flag(alu_src1_abs
);
742 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
743 emit_instruction(ir
);
747 ir
->set_flag(alu_last_instr
);
751 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr
& instr
, EAluOp opcode
, AluOp2Opts ops
)
753 const nir_alu_src
*src0
= &instr
.src
[0];
754 const nir_alu_src
*src1
= &instr
.src
[1];
756 if (ops
& op2_opt_reverse
)
757 std::swap(src0
, src1
);
759 GPRVector::Values v0
;
760 for (int i
= 0; i
< 4 ; ++i
)
761 v0
[i
] = from_nir(*src0
, i
);
763 GPRVector::Values v1
;
764 for (int i
= 0; i
< 4 ; ++i
)
765 v1
[i
] = from_nir(*src1
, i
);
767 if (src0
->abs
|| src0
->negate
) {
768 int src0_tmp
= allocate_temp_register();
769 GPRVector::Values v0_temp
;
770 AluInstruction
*ir
= nullptr;
771 for (int i
= 0; i
< 4 ; ++i
) {
772 if (instr
.dest
.write_mask
& (1 << i
)) {
773 v0_temp
[i
] = PValue(new GPRValue(src0_tmp
, i
));
774 ir
= new AluInstruction(op1_mov
, v0_temp
[i
], v0
[i
], write
);
775 if (src0
->abs
) ir
->set_flag(alu_src0_abs
);
776 if (src0
->negate
) ir
->set_flag(alu_src0_neg
);
777 emit_instruction(ir
);
782 ir
->set_flag(alu_last_instr
);
785 if (src1
->abs
|| src1
->negate
) {
786 int src1_tmp
= allocate_temp_register();
787 GPRVector::Values v1_temp
;
788 AluInstruction
*ir
= nullptr;
789 for (int i
= 0; i
< 4 ; ++i
) {
790 if (instr
.dest
.write_mask
& (1 << i
)) {
791 v1_temp
[i
] = PValue(new GPRValue(src1_tmp
, i
));
792 ir
= new AluInstruction(op1_mov
, v1_temp
[i
], v1
[i
], {alu_write
});
793 if (src1
->abs
) ir
->set_flag(alu_src0_abs
);
794 if (src1
->negate
) ir
->set_flag(alu_src0_neg
);
795 emit_instruction(ir
);
800 ir
->set_flag(alu_last_instr
);
803 AluInstruction
*ir
= nullptr;
804 for (int i
= 0; i
< 4 ; ++i
) {
805 if (instr
.dest
.write_mask
& (1 << i
)){
806 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
), {v0
[i
], v1
[i
]}, {alu_write
});
807 emit_instruction(ir
);
811 ir
->set_flag(alu_last_instr
);
816 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr
& instr
)
818 int sel_tmp
= allocate_temp_register();
819 GPRVector
tmp(sel_tmp
, {0,1,2,3});
821 AluInstruction
*ir
= nullptr;
824 for (int i
= 0; i
< 4 ; ++i
) {
825 if (instr
.dest
.write_mask
& (1 << i
)){
826 help
[i
] = from_nir(instr
.dest
, i
);
827 auto s
= from_nir(instr
.src
[0], i
);
828 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], s
, Value::one_i
, s
, write
);
829 emit_instruction(ir
);
833 ir
->set_flag(alu_last_instr
);
835 for (int i
= 0; i
< 4 ; ++i
) {
836 if (instr
.dest
.write_mask
& (1 << i
)){
837 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, help
[i
], write
);
838 emit_instruction(ir
);
842 ir
->set_flag(alu_last_instr
);
844 for (int i
= 0; i
< 4 ; ++i
) {
845 if (instr
.dest
.write_mask
& (1 << i
)){
847 ir
= new AluInstruction(op3_cndgt_int
, help
[i
], tmp
.reg_i(i
),
848 PValue(new LiteralValue(-1,0)), help
[i
], write
);
849 emit_instruction(ir
);
853 ir
->set_flag(alu_last_instr
);
857 bool EmitAluInstruction::emit_fsign(const nir_alu_instr
& instr
)
861 AluInstruction
*ir
= nullptr;
863 for (int i
= 0; i
< 4 ; ++i
) {
864 help
[i
] = from_nir(instr
.dest
, i
);
865 src
[i
] = from_nir(instr
.src
[0], i
);
868 if (instr
.src
[0].abs
) {
870 for (int i
= 0; i
< 4 ; ++i
) {
871 if (instr
.dest
.write_mask
& (1 << i
)){
872 ir
= new AluInstruction(op2_setgt
, help
[i
], src
[i
], Value::zero
, write
);
873 ir
->set_flag(alu_src0_abs
);
874 emit_instruction(ir
);
878 ir
->set_flag(alu_last_instr
);
880 if (instr
.src
[0].negate
) {
881 for (int i
= 0; i
< 4 ; ++i
) {
882 if (instr
.dest
.write_mask
& (1 << i
)){
883 ir
= new AluInstruction(op1_mov
, help
[i
], help
[i
], write
);
884 ir
->set_flag(alu_src0_neg
);
885 emit_instruction(ir
);
889 ir
->set_flag(alu_last_instr
);
895 for (int i
= 0; i
< 4 ; ++i
) {
896 if (instr
.dest
.write_mask
& (1 << i
)){
897 ir
= new AluInstruction(op3_cndgt
, help
[i
], src
[i
], Value::one_f
, src
[i
], write
);
898 if (instr
.src
[0].negate
) {
899 ir
->set_flag(alu_src0_neg
);
900 ir
->set_flag(alu_src2_neg
);
902 emit_instruction(ir
);
907 ir
->set_flag(alu_last_instr
);
909 for (int i
= 0; i
< 4 ; ++i
) {
910 if (instr
.dest
.write_mask
& (1 << i
)){
911 ir
= new AluInstruction(op3_cndgt
, help
[i
], help
[i
], Value::one_f
, help
[i
], write
);
912 ir
->set_flag(alu_src0_neg
);
913 ir
->set_flag(alu_src1_neg
);
914 emit_instruction(ir
);
918 ir
->set_flag(alu_last_instr
);
922 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr
& instr
, EAluOp opcode
,
923 std::array
<uint8_t, 3> reorder
)
925 const nir_alu_src
*src
[3];
926 src
[0] = &instr
.src
[reorder
[0]];
927 src
[1] = &instr
.src
[reorder
[1]];
928 src
[2] = &instr
.src
[reorder
[2]];
930 AluInstruction
*ir
= nullptr;
931 for (int i
= 0; i
< 4 ; ++i
) {
932 if (instr
.dest
.write_mask
& (1 << i
)){
933 ir
= new AluInstruction(opcode
, from_nir(instr
.dest
, i
),
934 from_nir(*src
[0], i
), from_nir(*src
[1], i
),
935 from_nir(*src
[2], i
), write
);
937 if (src
[0]->negate
) ir
->set_flag(alu_src0_neg
);
938 if (src
[1]->negate
) ir
->set_flag(alu_src1_neg
);
939 if (src
[2]->negate
) ir
->set_flag(alu_src2_neg
);
941 if (instr
.dest
.saturate
) ir
->set_flag(alu_dst_clamp
);
942 ir
->set_flag(alu_write
);
943 emit_instruction(ir
);
947 ir
->set_flag(alu_last_instr
);
951 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr
& instr
)
953 AluInstruction
*ir
= nullptr;
954 for (int i
= 0; i
< 4 ; ++i
) {
955 if (instr
.dest
.write_mask
& (1 << i
)){
956 ir
= new AluInstruction(op2_sub_int
, from_nir(instr
.dest
, i
), Value::zero
,
957 from_nir(instr
.src
[0], i
), write
);
958 emit_instruction(ir
);
962 ir
->set_flag(alu_last_instr
);
967 static const char swz
[] = "xyzw01?_";
971 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr
& instr
)
973 int sel_tmp
= allocate_temp_register();
974 GPRVector
tmp(sel_tmp
, {0,1,2,3});
976 std::array
<PValue
,4> src
;
977 AluInstruction
*ir
= nullptr;
978 for (int i
= 0; i
< 4 ; ++i
) {
979 if (instr
.dest
.write_mask
& (1 << i
)){
980 src
[i
] = from_nir(instr
.src
[0],i
);
981 ir
= new AluInstruction(op2_sub_int
, tmp
.reg_i(i
), Value::zero
, src
[i
], write
);
982 emit_instruction(ir
);
986 ir
->set_flag(alu_last_instr
);
988 for (int i
= 0; i
< 4 ; ++i
) {
989 if (instr
.dest
.write_mask
& (1 << i
)){
990 ir
= new AluInstruction(op3_cndge_int
, from_nir(instr
.dest
, i
), src
[i
],
991 src
[i
], tmp
.reg_i(i
), write
);
992 emit_instruction(ir
);
996 ir
->set_flag(alu_last_instr
);
1000 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr
& instr
, bool use_signed
, bool mod
)
1003 int sel_tmp
= allocate_temp_register();
1004 int sel_tmp0
= allocate_temp_register();
1005 int sel_tmp1
= allocate_temp_register();
1007 PValue
asrc1(new GPRValue(sel_tmp
, 0));
1008 PValue
asrc2(new GPRValue(sel_tmp
, 1));
1009 PValue
rsign(new GPRValue(sel_tmp
, 2));
1010 PValue
err(new GPRValue(sel_tmp
, 3));
1012 GPRVector
tmp0(sel_tmp0
, {0,1,2,3});
1013 GPRVector
tmp1(sel_tmp1
, {0,1,2,3});
1015 std::array
<PValue
, 4> src0
;
1016 std::array
<PValue
, 4> src1
;
1018 for (int i
= 0; i
< 4 ; ++i
) {
1019 if (instr
.dest
.write_mask
& (1 << i
)) {
1020 src0
[i
] = from_nir(instr
.src
[0], i
);
1021 src1
[i
] = from_nir(instr
.src
[1], i
);
1026 for (int i
= 3; i
>= 0 ; --i
) {
1027 if (!(instr
.dest
.write_mask
& (1 << i
)))
1030 emit_instruction(op2_sub_int
, asrc1
, {Value::zero
, src0
[i
]}, {alu_write
});
1031 emit_instruction(op2_sub_int
, asrc2
, {Value::zero
, src1
[i
]}, {alu_write
});
1032 emit_instruction(op2_xor_int
, rsign
, {src0
[i
], src1
[i
]}, {alu_write
, alu_last_instr
});
1035 emit_instruction(op3_cndge_int
, asrc1
, {src0
[i
], src0
[i
], asrc1
}, {alu_write
});
1036 emit_instruction(op3_cndge_int
, asrc2
, {src1
[i
], src1
[i
], asrc2
}, {alu_write
, alu_last_instr
});
1042 emit_instruction(op1_recip_uint
, tmp0
.x(), {asrc2
}, {alu_write
, alu_last_instr
});
1044 emit_instruction(op2_mullo_uint
, tmp0
.z(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1046 emit_instruction(op2_sub_int
, tmp0
.w(), {Value::zero
, tmp0
.z()}, {alu_write
});
1047 emit_instruction(op2_mulhi_uint
, tmp0
.y(), {tmp0
.x(), asrc2
}, {alu_write
, alu_last_instr
});
1049 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp0
.y(), tmp0
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1051 emit_instruction(op2_mulhi_uint
, err
, {tmp0
.z(), tmp0
.x()}, {alu_write
, alu_last_instr
});
1053 emit_instruction(op2_sub_int
, tmp1
.x(), {tmp0
.x(), err
}, {alu_write
});
1054 emit_instruction(op2_add_int
, tmp1
.y(), {tmp0
.x(), err
}, {alu_write
, alu_last_instr
});
1056 emit_instruction(op3_cnde_int
, tmp0
.x(), {tmp0
.y(), tmp1
.y(), tmp1
.x()}, {alu_write
, alu_last_instr
});
1058 emit_instruction(op2_mulhi_uint
, tmp0
.z(), {tmp0
.x(), asrc1
}, {alu_write
, alu_last_instr
});
1059 emit_instruction(op2_mullo_uint
, tmp0
.y(), {tmp0
.z(), asrc2
}, {alu_write
, alu_last_instr
});
1061 emit_instruction(op2_sub_int
, tmp0
.w(), {asrc1
, tmp0
.y()}, {alu_write
, alu_last_instr
});
1064 emit_instruction(op2_setge_uint
, tmp1
.x(), {tmp0
.w(), asrc2
}, {alu_write
});
1065 emit_instruction(op2_setge_uint
, tmp1
.y(), {asrc1
, tmp0
.y()}, {alu_write
});
1068 emit_instruction(op2_sub_int
, tmp1
.z(), {tmp0
.w(), asrc2
}, {alu_write
});
1069 emit_instruction(op2_add_int
, tmp1
.w(), {tmp0
.w(), asrc2
}, {alu_write
, alu_last_instr
});
1071 emit_instruction(op2_add_int
, tmp1
.z(), {tmp0
.z(), Value::one_i
}, {alu_write
});
1072 emit_instruction(op2_sub_int
, tmp1
.w(), {tmp0
.z(), Value::one_i
}, {alu_write
, alu_last_instr
});
1075 emit_instruction(op2_and_int
, tmp1
.x(), {tmp1
.x(), tmp1
.y()}, {alu_write
, alu_last_instr
});
1078 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.w(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1080 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.x(), tmp0
.z(), tmp1
.z()}, {alu_write
, alu_last_instr
});
1083 emit_instruction(op3_cnde_int
, tmp0
.z(), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1084 emit_instruction(op2_sub_int
, tmp0
.y(), {Value::zero
, tmp0
.z()}, {alu_write
, alu_last_instr
});
1087 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {src0
[i
], tmp0
.z(), tmp0
.y()},
1088 {alu_write
, alu_last_instr
});
1090 emit_instruction(op3_cndge_int
, from_nir(instr
.dest
, i
), {rsign
, tmp0
.z(), tmp0
.y()},
1091 {alu_write
, alu_last_instr
});
1093 emit_instruction(op3_cnde_int
, from_nir(instr
.dest
, i
), {tmp1
.y(), tmp1
.w(), tmp0
.z()}, {alu_write
, alu_last_instr
});
1099 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src
& src
, GPRVector::Values
& s
,
1100 GPRVector::Values
& v
, int ncomp
)
1103 AluInstruction
*alu
= nullptr;
1104 for (int i
= 0; i
< ncomp
; ++i
) {
1105 alu
= new AluInstruction(op1_mov
, v
[i
], s
[i
], {alu_write
});
1107 alu
->set_flag(alu_src0_abs
);
1109 alu
->set_flag(alu_src0_neg
);
1110 emit_instruction(alu
);
1115 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr
& instr
, TexInstruction::Opcode op
,
1119 GPRVector::Values v
;
1120 GPRVector::Values s
;
1121 GPRVector::Values
*source
= &s
;
1122 std::array
<int, 4> writemask
= {0,1,2,3};
1124 int ncomp
= instr
.src
[0].src
.is_ssa
? instr
.src
[0].src
.ssa
->num_components
:
1125 instr
.src
[0].src
.reg
.reg
->num_components
;
1127 for (int i
= 0; i
< 4; ++i
) {
1128 writemask
[i
] = (instr
.dest
.write_mask
& (1 << i
)) ? i
: 7;
1129 v
[i
] = from_nir(instr
.dest
, (i
< ncomp
) ? i
: 0);
1130 s
[i
] = from_nir(instr
.src
[0], (i
< ncomp
) ? i
: 0);
1133 if (instr
.src
[0].abs
|| instr
.src
[0].negate
) {
1134 split_alu_modifiers(instr
.src
[0], s
, v
, ncomp
);
1138 /* This is querying the dreivatives of the output fb, so we would either need
1139 * access to the neighboring pixels or to the framebuffer. Neither is currently
1142 GPRVector
src(*source
);
1144 auto tex
= new TexInstruction(op
, dst
, src
, 0, R600_MAX_CONST_BUFFERS
, PValue());
1145 tex
->set_dest_swizzle(writemask
);
1148 std::cerr
<< "Sewt fine flag\n";
1149 tex
->set_flag(TexInstruction::grad_fine
);
1152 emit_instruction(tex
);
1157 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr
& instr
, EAluOp opcode
)
1159 int itmp
= allocate_temp_register();
1160 std::array
<PValue
, 4> tmp
;
1161 std::array
<PValue
, 4> dst
;
1162 std::array
<PValue
, 4> src0
;
1163 std::array
<PValue
, 4> shift
;
1165 PValue
l32(new LiteralValue(32));
1166 unsigned write_mask
= instr
.dest
.write_mask
;
1168 AluInstruction
*ir
= nullptr;
1169 for (int i
= 0; i
< 4; i
++) {
1170 if (!(write_mask
& (1<<i
)))
1172 dst
[i
] = from_nir(instr
.dest
, i
);
1173 src0
[i
] = from_nir(instr
.src
[0], i
);
1174 shift
[i
] = from_nir(instr
.src
[2], i
);
1176 ir
= new AluInstruction(opcode
, dst
[i
],
1177 {src0
[i
], from_nir(instr
.src
[1], i
), shift
[i
]},
1179 emit_instruction(ir
);
1183 for (int i
= 0; i
< 4; i
++) {
1184 if (!(write_mask
& (1<<i
)))
1186 tmp
[i
] = PValue(new GPRValue(itmp
, i
));
1187 ir
= new AluInstruction(op2_setge_int
, tmp
[i
], {shift
[i
], l32
},
1189 emit_instruction(ir
);
1193 for (int i
= 0; i
< 4; i
++) {
1194 if (!(write_mask
& (1<<i
)))
1196 ir
= new AluInstruction(op3_cnde_int
, dst
[i
], {tmp
[i
], dst
[i
], src0
[i
]},
1198 emit_instruction(ir
);
1205 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr
& instr
)
1207 auto t0
= get_temp_vec4();
1208 auto t1
= get_temp_vec4();
1209 auto t2
= get_temp_vec4();
1211 PValue
l32(new LiteralValue(32));
1212 unsigned write_mask
= instr
.dest
.write_mask
;
1213 if (!write_mask
) return true;
1215 AluInstruction
*ir
= nullptr;
1216 for (int i
= 0; i
< 4; i
++) {
1217 if (!(write_mask
& (1<<i
)))
1220 ir
= new AluInstruction(op2_setge_int
, t0
[i
], {from_nir(instr
.src
[3], i
), l32
}, {alu_write
});
1221 emit_instruction(ir
);
1225 for (int i
= 0; i
< 4; i
++) {
1226 if (!(write_mask
& (1<<i
)))
1228 ir
= new AluInstruction(op2_bfm_int
, t1
[i
], {from_nir(instr
.src
[3], i
),
1229 from_nir(instr
.src
[2], i
)}, {alu_write
});
1230 emit_instruction(ir
);
1232 ir
->set_flag(alu_last_instr
);
1234 for (int i
= 0; i
< 4; i
++) {
1235 if (!(write_mask
& (1<<i
)))
1237 ir
= new AluInstruction(op2_lshl_int
, t2
[i
], {from_nir(instr
.src
[1], i
),
1238 from_nir(instr
.src
[2], i
)}, {alu_write
});
1239 emit_instruction(ir
);
1241 ir
->set_flag(alu_last_instr
);
1244 for (int i
= 0; i
< 4; i
++) {
1245 if (!(write_mask
& (1<<i
)))
1247 ir
= new AluInstruction(op3_bfi_int
, from_nir(instr
.dest
, i
),
1248 {t1
[i
], t2
[i
], from_nir(instr
.src
[0], i
)}, {alu_write
});
1249 emit_instruction(ir
);
1251 ir
->set_flag(alu_last_instr
);
1253 for (int i
= 0; i
< 4; i
++) {
1254 if (!(write_mask
& (1<<i
)))
1256 ir
= new AluInstruction(op3_cnde_int
, from_nir(instr
.dest
, i
),
1257 {t0
[i
], from_nir(instr
.dest
, i
),
1258 from_nir(instr
.src
[1], i
)}, {alu_write
});
1259 emit_instruction(ir
);
1261 ir
->set_flag(alu_last_instr
);
1266 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr
& instr
)
1268 emit_instruction(op2_lshr_int
, from_nir(instr
.dest
, 0),
1269 {from_nir(instr
.src
[0], 0), PValue(new LiteralValue(16))},
1270 {alu_write
, alu_last_instr
});
1272 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1273 {from_nir(instr
.dest
, 0)},{alu_write
, alu_last_instr
});
1278 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr
& instr
)
1280 emit_instruction(op1_flt16_to_flt32
, from_nir(instr
.dest
, 0),
1281 {from_nir(instr
.src
[0], 0)},{alu_write
, alu_last_instr
});
1285 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr
& instr
)
1287 int it0
= allocate_temp_register();
1288 PValue
x(new GPRValue(it0
, 0));
1289 PValue
y(new GPRValue(it0
, 1));
1291 emit_instruction(op1_flt32_to_flt16
, x
,{from_nir(instr
.src
[0], 0)},{alu_write
});
1292 emit_instruction(op1_flt32_to_flt16
, y
,{from_nir(instr
.src
[1], 0)},{alu_write
, alu_last_instr
});
1294 emit_instruction(op2_lshl_int
, y
, {y
, PValue(new LiteralValue(16))},{alu_write
, alu_last_instr
});
1296 emit_instruction(op2_or_int
, {from_nir(instr
.dest
, 0)} , {x
, y
},{alu_write
, alu_last_instr
});