re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right...
[gcc.git] / gcc / config / mips / loongson.md
1 ;; Machine description for Loongson-specific patterns, such as
2 ;; ST Microelectronics Loongson-2E/2F etc.
3 ;; Copyright (C) 2008-2017 Free Software Foundation, Inc.
4 ;; Contributed by CodeSourcery.
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
12
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
21
22 (define_c_enum "unspec" [
23 UNSPEC_LOONGSON_PAVG
24 UNSPEC_LOONGSON_PCMPEQ
25 UNSPEC_LOONGSON_PCMPGT
26 UNSPEC_LOONGSON_PEXTR
27 UNSPEC_LOONGSON_PINSRH
28 UNSPEC_LOONGSON_VINIT
29 UNSPEC_LOONGSON_PMADD
30 UNSPEC_LOONGSON_PMOVMSK
31 UNSPEC_LOONGSON_PMULHU
32 UNSPEC_LOONGSON_PMULH
33 UNSPEC_LOONGSON_PMULU
34 UNSPEC_LOONGSON_PASUBUB
35 UNSPEC_LOONGSON_BIADD
36 UNSPEC_LOONGSON_PSADBH
37 UNSPEC_LOONGSON_PSHUFH
38 UNSPEC_LOONGSON_PUNPCKH
39 UNSPEC_LOONGSON_PUNPCKL
40 UNSPEC_LOONGSON_PADDD
41 UNSPEC_LOONGSON_PSUBD
42 UNSPEC_LOONGSON_DSLL
43 UNSPEC_LOONGSON_DSRL
44 ])
45
46 ;; Mode iterators and attributes.
47
48 ;; 64-bit vectors of bytes.
49 (define_mode_iterator VB [V8QI])
50
51 ;; 64-bit vectors of halfwords.
52 (define_mode_iterator VH [V4HI])
53
54 ;; 64-bit vectors of words.
55 (define_mode_iterator VW [V2SI])
56
57 ;; 64-bit vectors of halfwords and bytes.
58 (define_mode_iterator VHB [V4HI V8QI])
59
60 ;; 64-bit vectors of words and halfwords.
61 (define_mode_iterator VWH [V2SI V4HI])
62
63 ;; 64-bit vectors of words and bytes
64 (define_mode_iterator VWB [V2SI V8QI])
65
66 ;; 64-bit vectors of words, halfwords and bytes.
67 (define_mode_iterator VWHB [V2SI V4HI V8QI])
68
69 ;; 64-bit vectors of words, halfwords and bytes; and DImode.
70 (define_mode_iterator VWHBDI [V2SI V4HI V8QI DI])
71
72 ;; The Loongson instruction suffixes corresponding to the modes in the
73 ;; VWHBDI iterator.
74 (define_mode_attr V_suffix [(V2SI "w") (V4HI "h") (V8QI "b") (DI "d")])
75
76 ;; Given a vector type T, the mode of a vector half the size of T
77 ;; and with the same number of elements.
78 (define_mode_attr V_squash [(V2SI "V2HI") (V4HI "V4QI")])
79
80 ;; Given a vector type T, the mode of a vector the same size as T
81 ;; but with half as many elements.
82 (define_mode_attr V_stretch_half [(V2SI "DI") (V4HI "V2SI") (V8QI "V4HI")])
83
84 ;; The Loongson instruction suffixes corresponding to the transformation
85 ;; expressed by V_stretch_half.
86 (define_mode_attr V_stretch_half_suffix [(V2SI "wd") (V4HI "hw") (V8QI "bh")])
87
88 ;; Given a vector type T, the mode of a vector the same size as T
89 ;; but with twice as many elements.
90 (define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")])
91
92 ;; Given a vector type T, the inner mode.
93 (define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
94
95 ;; The Loongson instruction suffixes corresponding to the conversions
96 ;; specified by V_half_width.
97 (define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")])
98
99 ;; Move patterns.
100
101 ;; Expander to legitimize moves involving values of vector modes.
102 (define_expand "mov<mode>"
103 [(set (match_operand:VWHB 0)
104 (match_operand:VWHB 1))]
105 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
106 {
107 if (mips_legitimize_move (<MODE>mode, operands[0], operands[1]))
108 DONE;
109 })
110
111 ;; Handle legitimized moves between values of vector modes.
112 (define_insn "mov<mode>_internal"
113 [(set (match_operand:VWHB 0 "nonimmediate_operand" "=m,f,d,f, d, m, d")
114 (match_operand:VWHB 1 "move_operand" "f,m,f,dYG,dYG,dYG,m"))]
115 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
116 { return mips_output_move (operands[0], operands[1]); }
117 [(set_attr "move_type" "fpstore,fpload,mfc,mtc,move,store,load")
118 (set_attr "mode" "DI")])
119
120 ;; Initialization of a vector.
121
122 (define_expand "vec_init<mode><unitmode>"
123 [(set (match_operand:VWHB 0 "register_operand")
124 (match_operand 1 ""))]
125 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
126 {
127 mips_expand_vector_init (operands[0], operands[1]);
128 DONE;
129 })
130
131 ;; Helper for vec_init. Initialize element 0 of the output from the input.
132 ;; All other elements are undefined.
133 (define_insn "loongson_vec_init1_<mode>"
134 [(set (match_operand:VHB 0 "register_operand" "=f")
135 (unspec:VHB [(truncate:<V_inner>
136 (match_operand:DI 1 "reg_or_0_operand" "Jd"))]
137 UNSPEC_LOONGSON_VINIT))]
138 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
139 "dmtc1\t%z1,%0"
140 [(set_attr "move_type" "mtc")
141 (set_attr "mode" "DI")])
142
143 ;; Helper for vec_initv2si.
144 (define_insn "*vec_concatv2si"
145 [(set (match_operand:V2SI 0 "register_operand" "=f")
146 (vec_concat:V2SI
147 (match_operand:SI 1 "register_operand" "f")
148 (match_operand:SI 2 "register_operand" "f")))]
149 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
150 "punpcklwd\t%0,%1,%2"
151 [(set_attr "type" "fcvt")])
152
153 ;; Instruction patterns for SIMD instructions.
154
155 ;; Pack with signed saturation.
156 (define_insn "vec_pack_ssat_<mode>"
157 [(set (match_operand:<V_squash_double> 0 "register_operand" "=f")
158 (vec_concat:<V_squash_double>
159 (ss_truncate:<V_squash>
160 (match_operand:VWH 1 "register_operand" "f"))
161 (ss_truncate:<V_squash>
162 (match_operand:VWH 2 "register_operand" "f"))))]
163 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
164 "packss<V_squash_double_suffix>\t%0,%1,%2"
165 [(set_attr "type" "fmul")])
166
167 ;; Pack with unsigned saturation.
168 (define_insn "vec_pack_usat_<mode>"
169 [(set (match_operand:<V_squash_double> 0 "register_operand" "=f")
170 (vec_concat:<V_squash_double>
171 (us_truncate:<V_squash>
172 (match_operand:VH 1 "register_operand" "f"))
173 (us_truncate:<V_squash>
174 (match_operand:VH 2 "register_operand" "f"))))]
175 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
176 "packus<V_squash_double_suffix>\t%0,%1,%2"
177 [(set_attr "type" "fmul")])
178
179 ;; Addition, treating overflow by wraparound.
180 (define_insn "add<mode>3"
181 [(set (match_operand:VWHB 0 "register_operand" "=f")
182 (plus:VWHB (match_operand:VWHB 1 "register_operand" "f")
183 (match_operand:VWHB 2 "register_operand" "f")))]
184 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
185 "padd<V_suffix>\t%0,%1,%2"
186 [(set_attr "type" "fadd")])
187
188 ;; Addition of doubleword integers stored in FP registers.
189 ;; Overflow is treated by wraparound.
190 ;; We use 'unspec' instead of 'plus' here to avoid clash with
191 ;; mips.md::add<mode>3. If 'plus' was used, then such instruction
192 ;; would be recognized as adddi3 and reload would make it use
193 ;; GPRs instead of FPRs.
194 (define_insn "loongson_paddd"
195 [(set (match_operand:DI 0 "register_operand" "=f")
196 (unspec:DI [(match_operand:DI 1 "register_operand" "f")
197 (match_operand:DI 2 "register_operand" "f")]
198 UNSPEC_LOONGSON_PADDD))]
199 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
200 "paddd\t%0,%1,%2"
201 [(set_attr "type" "fadd")])
202
203 ;; Addition, treating overflow by signed saturation.
204 (define_insn "ssadd<mode>3"
205 [(set (match_operand:VHB 0 "register_operand" "=f")
206 (ss_plus:VHB (match_operand:VHB 1 "register_operand" "f")
207 (match_operand:VHB 2 "register_operand" "f")))]
208 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
209 "padds<V_suffix>\t%0,%1,%2"
210 [(set_attr "type" "fadd")])
211
212 ;; Addition, treating overflow by unsigned saturation.
213 (define_insn "usadd<mode>3"
214 [(set (match_operand:VHB 0 "register_operand" "=f")
215 (us_plus:VHB (match_operand:VHB 1 "register_operand" "f")
216 (match_operand:VHB 2 "register_operand" "f")))]
217 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
218 "paddus<V_suffix>\t%0,%1,%2"
219 [(set_attr "type" "fadd")])
220
221 ;; Logical AND NOT.
222 (define_insn "loongson_pandn_<V_suffix>"
223 [(set (match_operand:VWHBDI 0 "register_operand" "=f")
224 (and:VWHBDI
225 (not:VWHBDI (match_operand:VWHBDI 1 "register_operand" "f"))
226 (match_operand:VWHBDI 2 "register_operand" "f")))]
227 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
228 "pandn\t%0,%1,%2"
229 [(set_attr "type" "fmul")])
230
231 ;; Logical AND.
232 (define_insn "and<mode>3"
233 [(set (match_operand:VWHB 0 "register_operand" "=f")
234 (and:VWHB (match_operand:VWHB 1 "register_operand" "f")
235 (match_operand:VWHB 2 "register_operand" "f")))]
236 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
237 "and\t%0,%1,%2"
238 [(set_attr "type" "fmul")])
239
240 ;; Logical OR.
241 (define_insn "ior<mode>3"
242 [(set (match_operand:VWHB 0 "register_operand" "=f")
243 (ior:VWHB (match_operand:VWHB 1 "register_operand" "f")
244 (match_operand:VWHB 2 "register_operand" "f")))]
245 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
246 "or\t%0,%1,%2"
247 [(set_attr "type" "fcvt")])
248
249 ;; Logical XOR.
250 (define_insn "xor<mode>3"
251 [(set (match_operand:VWHB 0 "register_operand" "=f")
252 (xor:VWHB (match_operand:VWHB 1 "register_operand" "f")
253 (match_operand:VWHB 2 "register_operand" "f")))]
254 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
255 "xor\t%0,%1,%2"
256 [(set_attr "type" "fmul")])
257
258 ;; Logical NOR.
259 (define_insn "*loongson_nor"
260 [(set (match_operand:VWHB 0 "register_operand" "=f")
261 (and:VWHB
262 (not:VWHB (match_operand:VWHB 1 "register_operand" "f"))
263 (not:VWHB (match_operand:VWHB 2 "register_operand" "f"))))]
264 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
265 "nor\t%0,%1,%2"
266 [(set_attr "type" "fmul")])
267
268 ;; Logical NOT.
269 (define_insn "one_cmpl<mode>2"
270 [(set (match_operand:VWHB 0 "register_operand" "=f")
271 (not:VWHB (match_operand:VWHB 1 "register_operand" "f")))]
272 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
273 "nor\t%0,%1,%1"
274 [(set_attr "type" "fmul")])
275
276 ;; Average.
277 (define_insn "loongson_pavg<V_suffix>"
278 [(set (match_operand:VHB 0 "register_operand" "=f")
279 (unspec:VHB [(match_operand:VHB 1 "register_operand" "f")
280 (match_operand:VHB 2 "register_operand" "f")]
281 UNSPEC_LOONGSON_PAVG))]
282 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
283 "pavg<V_suffix>\t%0,%1,%2"
284 [(set_attr "type" "fadd")])
285
286 ;; Equality test.
287 (define_insn "loongson_pcmpeq<V_suffix>"
288 [(set (match_operand:VWHB 0 "register_operand" "=f")
289 (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
290 (match_operand:VWHB 2 "register_operand" "f")]
291 UNSPEC_LOONGSON_PCMPEQ))]
292 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
293 "pcmpeq<V_suffix>\t%0,%1,%2"
294 [(set_attr "type" "fadd")])
295
296 ;; Greater-than test.
297 (define_insn "loongson_pcmpgt<V_suffix>"
298 [(set (match_operand:VWHB 0 "register_operand" "=f")
299 (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
300 (match_operand:VWHB 2 "register_operand" "f")]
301 UNSPEC_LOONGSON_PCMPGT))]
302 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
303 "pcmpgt<V_suffix>\t%0,%1,%2"
304 [(set_attr "type" "fadd")])
305
306 ;; Extract halfword.
307 (define_insn "loongson_pextrh"
308 [(set (match_operand:V4HI 0 "register_operand" "=f")
309 (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
310 (match_operand:SI 2 "register_operand" "f")]
311 UNSPEC_LOONGSON_PEXTR))]
312 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
313 "pextrh\t%0,%1,%2"
314 [(set_attr "type" "fcvt")])
315
316 ;; Insert halfword.
317 (define_insn "loongson_pinsrh_0"
318 [(set (match_operand:V4HI 0 "register_operand" "=f")
319 (vec_select:V4HI
320 (vec_concat:V8HI
321 (match_operand:V4HI 1 "register_operand" "f")
322 (match_operand:V4HI 2 "register_operand" "f"))
323 (parallel [(const_int 4) (const_int 1)
324 (const_int 2) (const_int 3)])))]
325 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
326 "pinsrh_0\t%0,%1,%2"
327 [(set_attr "type" "fdiv")])
328
329 (define_insn "loongson_pinsrh_1"
330 [(set (match_operand:V4HI 0 "register_operand" "=f")
331 (vec_select:V4HI
332 (vec_concat:V8HI
333 (match_operand:V4HI 1 "register_operand" "f")
334 (match_operand:V4HI 2 "register_operand" "f"))
335 (parallel [(const_int 0) (const_int 4)
336 (const_int 2) (const_int 3)])))]
337 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
338 "pinsrh_1\t%0,%1,%2"
339 [(set_attr "type" "fdiv")])
340
341 (define_insn "loongson_pinsrh_2"
342 [(set (match_operand:V4HI 0 "register_operand" "=f")
343 (vec_select:V4HI
344 (vec_concat:V8HI
345 (match_operand:V4HI 1 "register_operand" "f")
346 (match_operand:V4HI 2 "register_operand" "f"))
347 (parallel [(const_int 0) (const_int 1)
348 (const_int 4) (const_int 3)])))]
349 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
350 "pinsrh_2\t%0,%1,%2"
351 [(set_attr "type" "fdiv")])
352
353 (define_insn "loongson_pinsrh_3"
354 [(set (match_operand:V4HI 0 "register_operand" "=f")
355 (vec_select:V4HI
356 (vec_concat:V8HI
357 (match_operand:V4HI 1 "register_operand" "f")
358 (match_operand:V4HI 2 "register_operand" "f"))
359 (parallel [(const_int 0) (const_int 1)
360 (const_int 2) (const_int 4)])))]
361 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
362 "pinsrh_3\t%0,%1,%2"
363 [(set_attr "type" "fdiv")])
364
365 (define_insn "*vec_setv4hi"
366 [(set (match_operand:V4HI 0 "register_operand" "=f")
367 (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
368 (match_operand:SI 2 "register_operand" "f")
369 (match_operand:SI 3 "const_0_to_3_operand" "")]
370 UNSPEC_LOONGSON_PINSRH))]
371 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
372 "pinsrh_%3\t%0,%1,%2"
373 [(set_attr "type" "fdiv")])
374
375 (define_expand "vec_setv4hi"
376 [(set (match_operand:V4HI 0 "register_operand" "=f")
377 (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
378 (match_operand:HI 2 "register_operand" "f")
379 (match_operand:SI 3 "const_0_to_3_operand" "")]
380 UNSPEC_LOONGSON_PINSRH))]
381 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
382 {
383 rtx ext = gen_reg_rtx (SImode);
384 emit_move_insn (ext, gen_lowpart (SImode, operands[1]));
385 operands[1] = ext;
386 })
387
388 ;; Multiply and add packed integers.
389 (define_insn "loongson_pmaddhw"
390 [(set (match_operand:V2SI 0 "register_operand" "=f")
391 (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "f")
392 (match_operand:V4HI 2 "register_operand" "f")]
393 UNSPEC_LOONGSON_PMADD))]
394 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
395 "pmaddhw\t%0,%1,%2"
396 [(set_attr "type" "fmul")])
397
398 (define_expand "sdot_prodv4hi"
399 [(match_operand:V2SI 0 "register_operand" "")
400 (match_operand:V4HI 1 "register_operand" "")
401 (match_operand:V4HI 2 "register_operand" "")
402 (match_operand:V2SI 3 "register_operand" "")]
403 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
404 {
405 rtx t = gen_reg_rtx (V2SImode);
406 emit_insn (gen_loongson_pmaddhw (t, operands[1], operands[2]));
407 emit_insn (gen_addv2si3 (operands[0], t, operands[3]));
408 DONE;
409 })
410
411 ;; Maximum of signed halfwords.
412 (define_insn "smaxv4hi3"
413 [(set (match_operand:V4HI 0 "register_operand" "=f")
414 (smax:V4HI (match_operand:V4HI 1 "register_operand" "f")
415 (match_operand:V4HI 2 "register_operand" "f")))]
416 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
417 "pmaxsh\t%0,%1,%2"
418 [(set_attr "type" "fadd")])
419
420 (define_expand "smax<mode>3"
421 [(match_operand:VWB 0 "register_operand" "")
422 (match_operand:VWB 1 "register_operand" "")
423 (match_operand:VWB 2 "register_operand" "")]
424 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
425 {
426 mips_expand_vec_minmax (operands[0], operands[1], operands[2],
427 gen_loongson_pcmpgt<V_suffix>, false);
428 DONE;
429 })
430
431 ;; Maximum of unsigned bytes.
432 (define_insn "umaxv8qi3"
433 [(set (match_operand:V8QI 0 "register_operand" "=f")
434 (umax:V8QI (match_operand:V8QI 1 "register_operand" "f")
435 (match_operand:V8QI 2 "register_operand" "f")))]
436 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
437 "pmaxub\t%0,%1,%2"
438 [(set_attr "type" "fadd")])
439
440 ;; Minimum of signed halfwords.
441 (define_insn "sminv4hi3"
442 [(set (match_operand:V4HI 0 "register_operand" "=f")
443 (smin:V4HI (match_operand:V4HI 1 "register_operand" "f")
444 (match_operand:V4HI 2 "register_operand" "f")))]
445 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
446 "pminsh\t%0,%1,%2"
447 [(set_attr "type" "fadd")])
448
449 (define_expand "smin<mode>3"
450 [(match_operand:VWB 0 "register_operand" "")
451 (match_operand:VWB 1 "register_operand" "")
452 (match_operand:VWB 2 "register_operand" "")]
453 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
454 {
455 mips_expand_vec_minmax (operands[0], operands[1], operands[2],
456 gen_loongson_pcmpgt<V_suffix>, true);
457 DONE;
458 })
459
460 ;; Minimum of unsigned bytes.
461 (define_insn "uminv8qi3"
462 [(set (match_operand:V8QI 0 "register_operand" "=f")
463 (umin:V8QI (match_operand:V8QI 1 "register_operand" "f")
464 (match_operand:V8QI 2 "register_operand" "f")))]
465 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
466 "pminub\t%0,%1,%2"
467 [(set_attr "type" "fadd")])
468
469 ;; Move byte mask.
470 (define_insn "loongson_pmovmsk<V_suffix>"
471 [(set (match_operand:VB 0 "register_operand" "=f")
472 (unspec:VB [(match_operand:VB 1 "register_operand" "f")]
473 UNSPEC_LOONGSON_PMOVMSK))]
474 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
475 "pmovmsk<V_suffix>\t%0,%1"
476 [(set_attr "type" "fabs")])
477
478 ;; Multiply unsigned integers and store high result.
479 (define_insn "umul<mode>3_highpart"
480 [(set (match_operand:VH 0 "register_operand" "=f")
481 (unspec:VH [(match_operand:VH 1 "register_operand" "f")
482 (match_operand:VH 2 "register_operand" "f")]
483 UNSPEC_LOONGSON_PMULHU))]
484 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
485 "pmulhu<V_suffix>\t%0,%1,%2"
486 [(set_attr "type" "fmul")])
487
488 ;; Multiply signed integers and store high result.
489 (define_insn "smul<mode>3_highpart"
490 [(set (match_operand:VH 0 "register_operand" "=f")
491 (unspec:VH [(match_operand:VH 1 "register_operand" "f")
492 (match_operand:VH 2 "register_operand" "f")]
493 UNSPEC_LOONGSON_PMULH))]
494 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
495 "pmulh<V_suffix>\t%0,%1,%2"
496 [(set_attr "type" "fmul")])
497
498 ;; Multiply signed integers and store low result.
499 (define_insn "mul<mode>3"
500 [(set (match_operand:VH 0 "register_operand" "=f")
501 (mult:VH (match_operand:VH 1 "register_operand" "f")
502 (match_operand:VH 2 "register_operand" "f")))]
503 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
504 "pmull<V_suffix>\t%0,%1,%2"
505 [(set_attr "type" "fmul")])
506
507 ;; Multiply unsigned word integers.
508 (define_insn "loongson_pmulu<V_suffix>"
509 [(set (match_operand:DI 0 "register_operand" "=f")
510 (unspec:DI [(match_operand:VW 1 "register_operand" "f")
511 (match_operand:VW 2 "register_operand" "f")]
512 UNSPEC_LOONGSON_PMULU))]
513 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
514 "pmulu<V_suffix>\t%0,%1,%2"
515 [(set_attr "type" "fmul")])
516
517 ;; Absolute difference.
518 (define_insn "loongson_pasubub"
519 [(set (match_operand:VB 0 "register_operand" "=f")
520 (unspec:VB [(match_operand:VB 1 "register_operand" "f")
521 (match_operand:VB 2 "register_operand" "f")]
522 UNSPEC_LOONGSON_PASUBUB))]
523 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
524 "pasubub\t%0,%1,%2"
525 [(set_attr "type" "fadd")])
526
527 ;; Sum of unsigned byte integers.
528 (define_insn "loongson_biadd"
529 [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
530 (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")]
531 UNSPEC_LOONGSON_BIADD))]
532 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
533 "biadd\t%0,%1"
534 [(set_attr "type" "fabs")])
535
536 (define_insn "reduc_uplus_v8qi"
537 [(set (match_operand:V8QI 0 "register_operand" "=f")
538 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")]
539 UNSPEC_LOONGSON_BIADD))]
540 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
541 "biadd\t%0,%1"
542 [(set_attr "type" "fabs")])
543
544 ;; Sum of absolute differences.
545 (define_insn "loongson_psadbh"
546 [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
547 (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")
548 (match_operand:VB 2 "register_operand" "f")]
549 UNSPEC_LOONGSON_PSADBH))]
550 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
551 "pasubub\t%0,%1,%2;biadd\t%0,%0"
552 [(set_attr "type" "fadd")])
553
554 ;; Shuffle halfwords.
555 (define_insn "loongson_pshufh"
556 [(set (match_operand:VH 0 "register_operand" "=f")
557 (unspec:VH [(match_operand:VH 1 "register_operand" "f")
558 (match_operand:SI 2 "register_operand" "f")]
559 UNSPEC_LOONGSON_PSHUFH))]
560 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
561 "pshufh\t%0,%1,%2"
562 [(set_attr "type" "fmul")])
563
564 ;; Shift left logical.
565 (define_insn "ashl<mode>3"
566 [(set (match_operand:VWH 0 "register_operand" "=f")
567 (ashift:VWH (match_operand:VWH 1 "register_operand" "f")
568 (match_operand:SI 2 "register_operand" "f")))]
569 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
570 "psll<V_suffix>\t%0,%1,%2"
571 [(set_attr "type" "fcvt")])
572
573 ;; Shift right arithmetic.
574 (define_insn "ashr<mode>3"
575 [(set (match_operand:VWH 0 "register_operand" "=f")
576 (ashiftrt:VWH (match_operand:VWH 1 "register_operand" "f")
577 (match_operand:SI 2 "register_operand" "f")))]
578 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
579 "psra<V_suffix>\t%0,%1,%2"
580 [(set_attr "type" "fcvt")])
581
582 ;; Shift right logical.
583 (define_insn "lshr<mode>3"
584 [(set (match_operand:VWH 0 "register_operand" "=f")
585 (lshiftrt:VWH (match_operand:VWH 1 "register_operand" "f")
586 (match_operand:SI 2 "register_operand" "f")))]
587 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
588 "psrl<V_suffix>\t%0,%1,%2"
589 [(set_attr "type" "fcvt")])
590
591 ;; Subtraction, treating overflow by wraparound.
592 (define_insn "sub<mode>3"
593 [(set (match_operand:VWHB 0 "register_operand" "=f")
594 (minus:VWHB (match_operand:VWHB 1 "register_operand" "f")
595 (match_operand:VWHB 2 "register_operand" "f")))]
596 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
597 "psub<V_suffix>\t%0,%1,%2"
598 [(set_attr "type" "fadd")])
599
600 ;; Subtraction of doubleword integers stored in FP registers.
601 ;; Overflow is treated by wraparound.
602 ;; See loongson_paddd for the reason we use 'unspec' rather than
603 ;; 'minus' here.
604 (define_insn "loongson_psubd"
605 [(set (match_operand:DI 0 "register_operand" "=f")
606 (unspec:DI [(match_operand:DI 1 "register_operand" "f")
607 (match_operand:DI 2 "register_operand" "f")]
608 UNSPEC_LOONGSON_PSUBD))]
609 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
610 "psubd\t%0,%1,%2"
611 [(set_attr "type" "fadd")])
612
613 ;; Subtraction, treating overflow by signed saturation.
614 (define_insn "sssub<mode>3"
615 [(set (match_operand:VHB 0 "register_operand" "=f")
616 (ss_minus:VHB (match_operand:VHB 1 "register_operand" "f")
617 (match_operand:VHB 2 "register_operand" "f")))]
618 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
619 "psubs<V_suffix>\t%0,%1,%2"
620 [(set_attr "type" "fadd")])
621
622 ;; Subtraction, treating overflow by unsigned saturation.
623 (define_insn "ussub<mode>3"
624 [(set (match_operand:VHB 0 "register_operand" "=f")
625 (us_minus:VHB (match_operand:VHB 1 "register_operand" "f")
626 (match_operand:VHB 2 "register_operand" "f")))]
627 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
628 "psubus<V_suffix>\t%0,%1,%2"
629 [(set_attr "type" "fadd")])
630
631 ;; Unpack high data. Recall that Loongson only runs in little-endian.
632 (define_insn "loongson_punpckhbh"
633 [(set (match_operand:V8QI 0 "register_operand" "=f")
634 (vec_select:V8QI
635 (vec_concat:V16QI
636 (match_operand:V8QI 1 "register_operand" "f")
637 (match_operand:V8QI 2 "register_operand" "f"))
638 (parallel [(const_int 4) (const_int 12)
639 (const_int 5) (const_int 13)
640 (const_int 6) (const_int 14)
641 (const_int 7) (const_int 15)])))]
642 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
643 "punpckhbh\t%0,%1,%2"
644 [(set_attr "type" "fdiv")])
645
646 (define_insn "loongson_punpckhhw"
647 [(set (match_operand:V4HI 0 "register_operand" "=f")
648 (vec_select:V4HI
649 (vec_concat:V8HI
650 (match_operand:V4HI 1 "register_operand" "f")
651 (match_operand:V4HI 2 "register_operand" "f"))
652 (parallel [(const_int 2) (const_int 6)
653 (const_int 3) (const_int 7)])))]
654 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
655 "punpckhhw\t%0,%1,%2"
656 [(set_attr "type" "fdiv")])
657
658 (define_insn "loongson_punpckhhw_qi"
659 [(set (match_operand:V8QI 0 "register_operand" "=f")
660 (vec_select:V8QI
661 (vec_concat:V16QI
662 (match_operand:V8QI 1 "register_operand" "f")
663 (match_operand:V8QI 2 "register_operand" "f"))
664 (parallel [(const_int 4) (const_int 5)
665 (const_int 12) (const_int 13)
666 (const_int 6) (const_int 7)
667 (const_int 14) (const_int 15)])))]
668 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
669 "punpckhhw\t%0,%1,%2"
670 [(set_attr "type" "fdiv")])
671
672 (define_insn "loongson_punpckhwd"
673 [(set (match_operand:V2SI 0 "register_operand" "=f")
674 (vec_select:V2SI
675 (vec_concat:V4SI
676 (match_operand:V2SI 1 "register_operand" "f")
677 (match_operand:V2SI 2 "register_operand" "f"))
678 (parallel [(const_int 1) (const_int 3)])))]
679 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
680 "punpckhwd\t%0,%1,%2"
681 [(set_attr "type" "fcvt")])
682
683 (define_insn "loongson_punpckhwd_qi"
684 [(set (match_operand:V8QI 0 "register_operand" "=f")
685 (vec_select:V8QI
686 (vec_concat:V16QI
687 (match_operand:V8QI 1 "register_operand" "f")
688 (match_operand:V8QI 2 "register_operand" "f"))
689 (parallel [(const_int 4) (const_int 5)
690 (const_int 6) (const_int 7)
691 (const_int 12) (const_int 13)
692 (const_int 14) (const_int 15)])))]
693 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
694 "punpckhwd\t%0,%1,%2"
695 [(set_attr "type" "fcvt")])
696
697 (define_insn "loongson_punpckhwd_hi"
698 [(set (match_operand:V4HI 0 "register_operand" "=f")
699 (vec_select:V4HI
700 (vec_concat:V8HI
701 (match_operand:V4HI 1 "register_operand" "f")
702 (match_operand:V4HI 2 "register_operand" "f"))
703 (parallel [(const_int 2) (const_int 3)
704 (const_int 6) (const_int 7)])))]
705 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
706 "punpckhwd\t%0,%1,%2"
707 [(set_attr "type" "fcvt")])
708
709 ;; Unpack low data.
710 (define_insn "loongson_punpcklbh"
711 [(set (match_operand:V8QI 0 "register_operand" "=f")
712 (vec_select:V8QI
713 (vec_concat:V16QI
714 (match_operand:V8QI 1 "register_operand" "f")
715 (match_operand:V8QI 2 "register_operand" "f"))
716 (parallel [(const_int 0) (const_int 8)
717 (const_int 1) (const_int 9)
718 (const_int 2) (const_int 10)
719 (const_int 3) (const_int 11)])))]
720 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
721 "punpcklbh\t%0,%1,%2"
722 [(set_attr "type" "fdiv")])
723
724 (define_insn "loongson_punpcklhw"
725 [(set (match_operand:V4HI 0 "register_operand" "=f")
726 (vec_select:V4HI
727 (vec_concat:V8HI
728 (match_operand:V4HI 1 "register_operand" "f")
729 (match_operand:V4HI 2 "register_operand" "f"))
730 (parallel [(const_int 0) (const_int 4)
731 (const_int 1) (const_int 5)])))]
732 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
733 "punpcklhw\t%0,%1,%2"
734 [(set_attr "type" "fdiv")])
735
736 (define_insn "*loongson_punpcklhw_qi"
737 [(set (match_operand:V8QI 0 "register_operand" "=f")
738 (vec_select:V8QI
739 (vec_concat:V16QI
740 (match_operand:V8QI 1 "register_operand" "f")
741 (match_operand:V8QI 2 "register_operand" "f"))
742 (parallel [(const_int 0) (const_int 1)
743 (const_int 8) (const_int 9)
744 (const_int 2) (const_int 3)
745 (const_int 10) (const_int 11)])))]
746 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
747 "punpcklhw\t%0,%1,%2"
748 [(set_attr "type" "fdiv")])
749
750 (define_insn "loongson_punpcklwd"
751 [(set (match_operand:V2SI 0 "register_operand" "=f")
752 (vec_select:V2SI
753 (vec_concat:V4SI
754 (match_operand:V2SI 1 "register_operand" "f")
755 (match_operand:V2SI 2 "register_operand" "f"))
756 (parallel [(const_int 0) (const_int 2)])))]
757 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
758 "punpcklwd\t%0,%1,%2"
759 [(set_attr "type" "fcvt")])
760
761 (define_insn "*loongson_punpcklwd_qi"
762 [(set (match_operand:V8QI 0 "register_operand" "=f")
763 (vec_select:V8QI
764 (vec_concat:V16QI
765 (match_operand:V8QI 1 "register_operand" "f")
766 (match_operand:V8QI 2 "register_operand" "f"))
767 (parallel [(const_int 0) (const_int 1)
768 (const_int 2) (const_int 3)
769 (const_int 8) (const_int 9)
770 (const_int 10) (const_int 11)])))]
771 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
772 "punpcklwd\t%0,%1,%2"
773 [(set_attr "type" "fcvt")])
774
775 (define_insn "*loongson_punpcklwd_hi"
776 [(set (match_operand:V4HI 0 "register_operand" "=f")
777 (vec_select:V4HI
778 (vec_concat:V8HI
779 (match_operand:V4HI 1 "register_operand" "f")
780 (match_operand:V4HI 2 "register_operand" "f"))
781 (parallel [(const_int 0) (const_int 1)
782 (const_int 4) (const_int 5)])))]
783 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
784 "punpcklwd\t%0,%1,%2"
785 [(set_attr "type" "fcvt")])
786
787 (define_expand "vec_perm_const<mode>"
788 [(match_operand:VWHB 0 "register_operand" "")
789 (match_operand:VWHB 1 "register_operand" "")
790 (match_operand:VWHB 2 "register_operand" "")
791 (match_operand:VWHB 3 "" "")]
792 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
793 {
794 if (mips_expand_vec_perm_const (operands))
795 DONE;
796 else
797 FAIL;
798 })
799
800 (define_expand "vec_unpacks_lo_<mode>"
801 [(match_operand:<V_stretch_half> 0 "register_operand" "")
802 (match_operand:VHB 1 "register_operand" "")]
803 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
804 {
805 mips_expand_vec_unpack (operands, false, false);
806 DONE;
807 })
808
809 (define_expand "vec_unpacks_hi_<mode>"
810 [(match_operand:<V_stretch_half> 0 "register_operand" "")
811 (match_operand:VHB 1 "register_operand" "")]
812 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
813 {
814 mips_expand_vec_unpack (operands, false, true);
815 DONE;
816 })
817
818 (define_expand "vec_unpacku_lo_<mode>"
819 [(match_operand:<V_stretch_half> 0 "register_operand" "")
820 (match_operand:VHB 1 "register_operand" "")]
821 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
822 {
823 mips_expand_vec_unpack (operands, true, false);
824 DONE;
825 })
826
827 (define_expand "vec_unpacku_hi_<mode>"
828 [(match_operand:<V_stretch_half> 0 "register_operand" "")
829 (match_operand:VHB 1 "register_operand" "")]
830 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
831 {
832 mips_expand_vec_unpack (operands, true, true);
833 DONE;
834 })
835
836 ;; Whole vector shifts, used for reduction epilogues.
837 (define_insn "vec_shl_<mode>"
838 [(set (match_operand:VWHBDI 0 "register_operand" "=f")
839 (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
840 (match_operand:SI 2 "register_operand" "f")]
841 UNSPEC_LOONGSON_DSLL))]
842 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
843 "dsll\t%0,%1,%2"
844 [(set_attr "type" "fcvt")])
845
846 (define_insn "vec_shr_<mode>"
847 [(set (match_operand:VWHBDI 0 "register_operand" "=f")
848 (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
849 (match_operand:SI 2 "register_operand" "f")]
850 UNSPEC_LOONGSON_DSRL))]
851 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
852 "dsrl\t%0,%1,%2"
853 [(set_attr "type" "fcvt")])
854
855 (define_insn "vec_loongson_extract_lo_<mode>"
856 [(set (match_operand:<V_inner> 0 "register_operand" "=r")
857 (vec_select:<V_inner>
858 (match_operand:VWHB 1 "register_operand" "f")
859 (parallel [(const_int 0)])))]
860 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
861 "mfc1\t%0,%1"
862 [(set_attr "type" "mfc")])
863
864 (define_expand "reduc_plus_scal_<mode>"
865 [(match_operand:<V_inner> 0 "register_operand" "")
866 (match_operand:VWHB 1 "register_operand" "")]
867 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
868 {
869 rtx tmp = gen_reg_rtx (GET_MODE (operands[1]));
870 mips_expand_vec_reduc (tmp, operands[1], gen_add<mode>3);
871 emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp));
872 DONE;
873 })
874
875 (define_expand "reduc_smax_scal_<mode>"
876 [(match_operand:<V_inner> 0 "register_operand" "")
877 (match_operand:VWHB 1 "register_operand" "")]
878 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
879 {
880 rtx tmp = gen_reg_rtx (GET_MODE (operands[1]));
881 mips_expand_vec_reduc (tmp, operands[1], gen_smax<mode>3);
882 emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp));
883 DONE;
884 })
885
886 (define_expand "reduc_smin_scal_<mode>"
887 [(match_operand:<V_inner> 0 "register_operand" "")
888 (match_operand:VWHB 1 "register_operand" "")]
889 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
890 {
891 rtx tmp = gen_reg_rtx (GET_MODE (operands[1]));
892 mips_expand_vec_reduc (tmp, operands[1], gen_smin<mode>3);
893 emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp));
894 DONE;
895 })
896
897 (define_expand "reduc_umax_scal_<mode>"
898 [(match_operand:<V_inner> 0 "register_operand" "")
899 (match_operand:VB 1 "register_operand" "")]
900 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
901 {
902 rtx tmp = gen_reg_rtx (GET_MODE (operands[1]));
903 mips_expand_vec_reduc (tmp, operands[1], gen_umax<mode>3);
904 emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp));
905 DONE;
906 })
907
908 (define_expand "reduc_umin_scal_<mode>"
909 [(match_operand:<V_inner> 0 "register_operand" "")
910 (match_operand:VB 1 "register_operand" "")]
911 "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
912 {
913 rtx tmp = gen_reg_rtx (GET_MODE (operands[1]));
914 mips_expand_vec_reduc (tmp, operands[1], gen_umin<mode>3);
915 emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp));
916 DONE;
917 })