sse.md ("*divv4sf3"): Rename to "sse_divv4sf3".
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 static int x86_builtin_vectorization_cost (bool);
56
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
59 #endif
60
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
67 : 4)
68
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
72
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74
75 static const
76 struct processor_costs size_cost = { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
94 0, /* "large" insn */
95 2, /* MOVE_RATIO */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
121 2, /* Branch cost */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
143 };
144
145 /* Processor costs (relative to an add) */
146 static const
147 struct processor_costs i386_cost = { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
192 1, /* Branch cost */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
200 DUMMY_STRINGOP_ALGS},
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
214 };
215
216 static const
217 struct processor_costs i486_cost = { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
236 3, /* MOVE_RATIO */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
264 1, /* Branch cost */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
272 DUMMY_STRINGOP_ALGS},
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
286 };
287
288 static const
289 struct processor_costs pentium_cost = {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
308 6, /* MOVE_RATIO */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 2, /* Branch cost */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
342 DUMMY_STRINGOP_ALGS},
343 {{libcall, {{-1, rep_prefix_4_byte}}},
344 DUMMY_STRINGOP_ALGS},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
356 };
357
358 static const
359 struct processor_costs pentiumpro_cost = {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
378 6, /* MOVE_RATIO */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
404 2, /* Branch cost */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
415 */
416 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
417 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
418 DUMMY_STRINGOP_ALGS},
419 {{rep_prefix_4_byte, {{1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
433 };
434
435 static const
436 struct processor_costs geode_cost = {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
455 4, /* MOVE_RATIO */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
466
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
482 1, /* Branch cost */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
490 DUMMY_STRINGOP_ALGS},
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
504 };
505
506 static const
507 struct processor_costs k6_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
526 4, /* MOVE_RATIO */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
555 1, /* Branch cost */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
563 DUMMY_STRINGOP_ALGS},
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
577 };
578
579 static const
580 struct processor_costs athlon_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
599 9, /* MOVE_RATIO */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
625 5, /* Branch cost */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
636 DUMMY_STRINGOP_ALGS},
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs k8_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
672 9, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
701 time). */
702 100, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
714 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
715 {{libcall, {{8, loop}, {24, unrolled_loop},
716 {2048, rep_prefix_4_byte}, {-1, libcall}}},
717 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
729 };
730
731 struct processor_costs amdfam10_cost = {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
750 9, /* MOVE_RATIO */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
772 /* On K8
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
775 On AMDFAM10
776 MOVD reg64, xmmreg Double FADD 3
777 1/1 1/1
778 MOVD reg32, xmmreg Double FADD 3
779 1/1 1/1 */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 5, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
801 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
802 {{libcall, {{8, loop}, {24, unrolled_loop},
803 {2048, rep_prefix_4_byte}, {-1, libcall}}},
804 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
816 };
817
818 static const
819 struct processor_costs pentium4_cost = {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
838 6, /* MOVE_RATIO */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
864 2, /* Branch cost */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
872 DUMMY_STRINGOP_ALGS},
873 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
874 {-1, libcall}}},
875 DUMMY_STRINGOP_ALGS},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
887 };
888
889 static const
890 struct processor_costs nocona_cost = {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
909 17, /* MOVE_RATIO */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
935 1, /* Branch cost */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
943 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
944 {100000, unrolled_loop}, {-1, libcall}}}},
945 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
946 {-1, libcall}}},
947 {libcall, {{24, loop}, {64, unrolled_loop},
948 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
960 };
961
962 static const
963 struct processor_costs core2_cost = {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
982 16, /* MOVE_RATIO */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1015 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1016 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1017 {{libcall, {{8, loop}, {15, unrolled_loop},
1018 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1019 {libcall, {{24, loop}, {32, unrolled_loop},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1032 };
1033
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1035 static const
1036 struct processor_costs generic64_cost = {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS,
1095 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1109 };
1110
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1112 static const
1113 struct processor_costs generic32_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1166 DUMMY_STRINGOP_ALGS},
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1180 };
1181
1182 const struct processor_costs *ix86_cost = &pentium_cost;
1183
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1192
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1201
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1204
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1208
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1216
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1219 | m_NOCONA | m_CORE2 | m_GENERIC,
1220
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1222 m_486 | m_PENT,
1223
1224 /* X86_TUNE_USE_BIT_TEST */
1225 m_386,
1226
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1232
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1237 0,
1238
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1240 ~m_386,
1241
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1244 | m_NOCONA | m_CORE2 | m_GENERIC,
1245
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1249 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1250
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1259 m_PPRO,
1260
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2 | m_GENERIC,
1263
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386 | m_486 | m_K6_GEODE,
1266
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1269
1270 /* X86_TUNE_USE_MOV0 */
1271 m_K6,
1272
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1277 m_PENT4,
1278
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1280 m_PPRO,
1281
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1283 ~m_PENT,
1284
1285 /* X86_TUNE_READ_MODIFY */
1286 ~(m_PENT | m_PPRO),
1287
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1290 | m_GENERIC /* | m_PENT4 ? */,
1291
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT | m_486 | m_386),
1294
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386 | m_PENT4 | m_NOCONA,
1297
1298 /* X86_TUNE_QIMODE_MATH */
1299 ~0,
1300
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1305 ~m_PPRO,
1306
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1308 0,
1309
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1311 m_PPRO,
1312
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1315
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1318 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1325 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1330 | m_GENERIC | m_GEODE),
1331
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1345
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1347 m_AMDFAM10,
1348
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1353 m_ATHLON_K8,
1354
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1356 m_AMD_MULTIPLE,
1357
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO | m_PENT4 | m_NOCONA,
1360
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1363
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1366
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_SHIFT1 */
1371 ~m_486,
1372
1373 /* X86_TUNE_USE_FFREEP */
1374 m_AMD_MULTIPLE,
1375
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE | m_GENERIC),
1378
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1380 ~(m_AMDFAM10),
1381
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1385
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1388
1389 /* X86_TUNE_USE_BT */
1390 m_AMD_MULTIPLE,
1391
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1394
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1400
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1402 ~m_K8,
1403
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1405 m_K8 | m_GENERIC64,
1406
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1409 ~(m_386 | m_486),
1410
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8 | m_GENERIC64 | m_AMDFAM10,
1414
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1416 machines. */
1417 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1420 than a MOV. */
1421 m_PENT,
1422
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1425 m_PENT,
1426
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1430 m_K6,
1431
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1433 from integer to FP. */
1434 m_AMDFAM10,
1435 };
1436
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386 | m_486 | m_PENT | m_K6),
1441
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1443 ~m_386,
1444
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1446 ~(m_386 | m_486),
1447
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1449 ~m_386,
1450
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1452 ~m_386,
1453 };
1454
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1457
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1460 | m_NOCONA | m_CORE2 | m_GENERIC;
1461
1462 static enum stringop_alg stringop_alg = no_stringop;
1463
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1466 epilogue code. */
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1468
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1471 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1472 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1473
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1476
1477 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1478 {
1479 /* ax, dx, cx, bx */
1480 AREG, DREG, CREG, BREG,
1481 /* si, di, bp, sp */
1482 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1483 /* FP registers */
1484 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1485 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1486 /* arg pointer */
1487 NON_Q_REGS,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1490 /* SSE registers */
1491 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1492 SSE_REGS, SSE_REGS,
1493 /* MMX registers */
1494 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1495 MMX_REGS, MMX_REGS,
1496 /* REX registers */
1497 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 /* SSE REX registers */
1500 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1501 SSE_REGS, SSE_REGS,
1502 };
1503
1504 /* The "default" register map used in 32bit mode. */
1505
1506 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1507 {
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1515 };
1516
1517 static int const x86_64_int_parameter_registers[6] =
1518 {
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1521 };
1522
1523 static int const x86_64_ms_abi_int_parameter_registers[4] =
1524 {
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1527 };
1528
1529 static int const x86_64_int_return_registers[4] =
1530 {
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1532 };
1533
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1536 {
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1544 };
1545
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1590 numbers.
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1599 */
1600 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1601 {
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1609 };
1610
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1613
1614 rtx ix86_compare_op0 = NULL_RTX;
1615 rtx ix86_compare_op1 = NULL_RTX;
1616 rtx ix86_compare_emitted = NULL_RTX;
1617
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1620
1621 /* Define the structure for the machine field in struct function. */
1622
1623 struct stack_local_entry GTY(())
1624 {
1625 unsigned short mode;
1626 unsigned short n;
1627 rtx rtl;
1628 struct stack_local_entry *next;
1629 };
1630
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1633
1634 [arguments]
1635 <- ARG_POINTER
1636 saved pc
1637
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1640 [saved regs]
1641
1642 [padding1] \
1643 )
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1646 [frame] (
1647 )
1648 [padding2] /
1649 */
1650 struct ix86_frame
1651 {
1652 int nregs;
1653 int padding1;
1654 int va_arg_size;
1655 HOST_WIDE_INT frame;
1656 int padding2;
1657 int outgoing_arguments_size;
1658 int red_zone_size;
1659
1660 HOST_WIDE_INT to_allocate;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset;
1663 HOST_WIDE_INT hard_frame_pointer_offset;
1664 HOST_WIDE_INT stack_pointer_offset;
1665
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov;
1669 };
1670
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel;
1673 /* Asm dialect. */
1674 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1675 /* TLS dialects. */
1676 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1677
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath;
1680
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune;
1683
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch;
1686
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse;
1689
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm;
1692
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer;
1695 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1696
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary;
1699
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost;
1702
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1705
1706 int ix86_section_threshold = 65536;
1707
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix[16];
1710 int internal_label_prefix_len;
1711
1712 /* Fence to use after loop using movnt. */
1713 tree x86_mfence;
1714
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1719
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1723 {
1724 X86_64_NO_CLASS,
1725 X86_64_INTEGER_CLASS,
1726 X86_64_INTEGERSI_CLASS,
1727 X86_64_SSE_CLASS,
1728 X86_64_SSESF_CLASS,
1729 X86_64_SSEDF_CLASS,
1730 X86_64_SSEUP_CLASS,
1731 X86_64_X87_CLASS,
1732 X86_64_X87UP_CLASS,
1733 X86_64_COMPLEX_X87_CLASS,
1734 X86_64_MEMORY_CLASS
1735 };
1736 static const char * const x86_64_reg_class_name[] =
1737 {
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1740 };
1741
1742 #define MAX_CLASSES 4
1743
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1746 static bool ext_80387_constants_init = 0;
1747
1748 \f
1749 static struct machine_function * ix86_init_machine_status (void);
1750 static rtx ix86_function_value (const_tree, const_tree, bool);
1751 static int ix86_function_regparm (const_tree, const_tree);
1752 static void ix86_compute_frame_layout (struct ix86_frame *);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1754 rtx, rtx, int);
1755
1756 \f
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1758 in memory. */
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1761 #endif
1762
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1765
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit;
1769
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1772
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1776
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1788
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1794
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1796
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1799
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1802 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1803
1804 /* Implement TARGET_HANDLE_OPTION. */
1805
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1808 {
1809 switch (code)
1810 {
1811 case OPT_mmmx:
1812 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1813 if (!value)
1814 {
1815 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1816 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1817 }
1818 return true;
1819
1820 case OPT_m3dnow:
1821 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1822 if (!value)
1823 {
1824 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1826 }
1827 return true;
1828
1829 case OPT_m3dnowa:
1830 return false;
1831
1832 case OPT_msse:
1833 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1834 if (!value)
1835 {
1836 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1838 }
1839 return true;
1840
1841 case OPT_msse2:
1842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1843 if (!value)
1844 {
1845 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1847 }
1848 return true;
1849
1850 case OPT_msse3:
1851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1852 if (!value)
1853 {
1854 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1856 }
1857 return true;
1858
1859 case OPT_mssse3:
1860 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1861 if (!value)
1862 {
1863 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1865 }
1866 return true;
1867
1868 case OPT_msse4_1:
1869 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1870 if (!value)
1871 {
1872 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1873 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1874 }
1875 return true;
1876
1877 case OPT_msse4_2:
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1879 if (!value)
1880 {
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1883 }
1884 return true;
1885
1886 case OPT_msse4:
1887 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1889 return true;
1890
1891 case OPT_mno_sse4:
1892 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1894 return true;
1895
1896 case OPT_msse4a:
1897 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1898 if (!value)
1899 {
1900 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1902 }
1903 return true;
1904
1905 case OPT_msse5:
1906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1907 if (!value)
1908 {
1909 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1910 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1911 }
1912 return true;
1913
1914 default:
1915 return true;
1916 }
1917 }
1918
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1923 been parsed.
1924
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1927
1928 void
1929 override_options (void)
1930 {
1931 int i;
1932 int ix86_tune_defaulted = 0;
1933 int ix86_arch_specified = 0;
1934 unsigned int ix86_arch_mask, ix86_tune_mask;
1935
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1938
1939 static struct ptt
1940 {
1941 const struct processor_costs *cost; /* Processor costs */
1942 const int align_loop; /* Default alignments. */
1943 const int align_loop_max_skip;
1944 const int align_jump;
1945 const int align_jump_max_skip;
1946 const int align_func;
1947 }
1948 const processor_target_table[PROCESSOR_max] =
1949 {
1950 {&i386_cost, 4, 3, 4, 3, 4},
1951 {&i486_cost, 16, 15, 16, 15, 16},
1952 {&pentium_cost, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1954 {&geode_cost, 0, 0, 0, 0, 0},
1955 {&k6_cost, 32, 7, 32, 7, 32},
1956 {&athlon_cost, 16, 7, 16, 7, 16},
1957 {&pentium4_cost, 0, 0, 0, 0, 0},
1958 {&k8_cost, 16, 7, 16, 7, 16},
1959 {&nocona_cost, 0, 0, 0, 0, 0},
1960 {&core2_cost, 16, 10, 16, 10, 16},
1961 {&generic32_cost, 16, 7, 16, 7, 16},
1962 {&generic64_cost, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost, 32, 24, 32, 7, 32}
1964 };
1965
1966 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1967 {
1968 "generic",
1969 "i386",
1970 "i486",
1971 "pentium",
1972 "pentium-mmx",
1973 "pentiumpro",
1974 "pentium2",
1975 "pentium3",
1976 "pentium4",
1977 "pentium-m",
1978 "prescott",
1979 "nocona",
1980 "core2",
1981 "geode",
1982 "k6",
1983 "k6-2",
1984 "k6-3",
1985 "athlon",
1986 "athlon-4",
1987 "k8",
1988 "amdfam10"
1989 };
1990
1991 enum pta_flags
1992 {
1993 PTA_SSE = 1 << 0,
1994 PTA_SSE2 = 1 << 1,
1995 PTA_SSE3 = 1 << 2,
1996 PTA_MMX = 1 << 3,
1997 PTA_PREFETCH_SSE = 1 << 4,
1998 PTA_3DNOW = 1 << 5,
1999 PTA_3DNOW_A = 1 << 6,
2000 PTA_64BIT = 1 << 7,
2001 PTA_SSSE3 = 1 << 8,
2002 PTA_CX16 = 1 << 9,
2003 PTA_POPCNT = 1 << 10,
2004 PTA_ABM = 1 << 11,
2005 PTA_SSE4A = 1 << 12,
2006 PTA_NO_SAHF = 1 << 13,
2007 PTA_SSE4_1 = 1 << 14,
2008 PTA_SSE4_2 = 1 << 15,
2009 PTA_SSE5 = 1 << 16
2010 };
2011
2012 static struct pta
2013 {
2014 const char *const name; /* processor name or nickname. */
2015 const enum processor_type processor;
2016 const unsigned /*enum pta_flags*/ flags;
2017 }
2018 const processor_alias_table[] =
2019 {
2020 {"i386", PROCESSOR_I386, 0},
2021 {"i486", PROCESSOR_I486, 0},
2022 {"i586", PROCESSOR_PENTIUM, 0},
2023 {"pentium", PROCESSOR_PENTIUM, 0},
2024 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2025 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2026 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2027 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2028 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2029 {"i686", PROCESSOR_PENTIUMPRO, 0},
2030 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2031 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2032 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2033 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2034 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2035 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2036 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2037 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2038 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2039 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2040 | PTA_CX16 | PTA_NO_SAHF)},
2041 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2042 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2043 | PTA_SSSE3
2044 | PTA_CX16)},
2045 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2046 |PTA_PREFETCH_SSE)},
2047 {"k6", PROCESSOR_K6, PTA_MMX},
2048 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2049 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2050 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2051 | PTA_PREFETCH_SSE)},
2052 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2053 | PTA_PREFETCH_SSE)},
2054 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2055 | PTA_SSE)},
2056 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2057 | PTA_SSE)},
2058 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2059 | PTA_SSE)},
2060 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2061 | PTA_MMX | PTA_SSE | PTA_SSE2
2062 | PTA_NO_SAHF)},
2063 {"k8", PROCESSOR_K8, (PTA_64BIT
2064 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2065 | PTA_SSE | PTA_SSE2
2066 | PTA_NO_SAHF)},
2067 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2068 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2069 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2070 | PTA_NO_SAHF)},
2071 {"opteron", PROCESSOR_K8, (PTA_64BIT
2072 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2073 | PTA_SSE | PTA_SSE2
2074 | PTA_NO_SAHF)},
2075 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2076 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2077 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2078 | PTA_NO_SAHF)},
2079 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2080 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2081 | PTA_SSE | PTA_SSE2
2082 | PTA_NO_SAHF)},
2083 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2084 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2085 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2086 | PTA_NO_SAHF)},
2087 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2088 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2089 | PTA_SSE | PTA_SSE2
2090 | PTA_NO_SAHF)},
2091 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2092 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2093 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2094 | PTA_SSE4A
2095 | PTA_CX16 | PTA_ABM)},
2096 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2097 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2098 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2099 | PTA_SSE4A
2100 | PTA_CX16 | PTA_ABM)},
2101 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2102 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2103 };
2104
2105 int const pta_size = ARRAY_SIZE (processor_alias_table);
2106
2107 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2108 SUBTARGET_OVERRIDE_OPTIONS;
2109 #endif
2110
2111 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2112 SUBSUBTARGET_OVERRIDE_OPTIONS;
2113 #endif
2114
2115 /* -fPIC is the default for x86_64. */
2116 if (TARGET_MACHO && TARGET_64BIT)
2117 flag_pic = 2;
2118
2119 /* Set the default values for switches whose default depends on TARGET_64BIT
2120 in case they weren't overwritten by command line options. */
2121 if (TARGET_64BIT)
2122 {
2123 /* Mach-O doesn't support omitting the frame pointer for now. */
2124 if (flag_omit_frame_pointer == 2)
2125 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2126 if (flag_asynchronous_unwind_tables == 2)
2127 flag_asynchronous_unwind_tables = 1;
2128 if (flag_pcc_struct_return == 2)
2129 flag_pcc_struct_return = 0;
2130 }
2131 else
2132 {
2133 if (flag_omit_frame_pointer == 2)
2134 flag_omit_frame_pointer = 0;
2135 if (flag_asynchronous_unwind_tables == 2)
2136 flag_asynchronous_unwind_tables = 0;
2137 if (flag_pcc_struct_return == 2)
2138 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2139 }
2140
2141 /* Need to check -mtune=generic first. */
2142 if (ix86_tune_string)
2143 {
2144 if (!strcmp (ix86_tune_string, "generic")
2145 || !strcmp (ix86_tune_string, "i686")
2146 /* As special support for cross compilers we read -mtune=native
2147 as -mtune=generic. With native compilers we won't see the
2148 -mtune=native, as it was changed by the driver. */
2149 || !strcmp (ix86_tune_string, "native"))
2150 {
2151 if (TARGET_64BIT)
2152 ix86_tune_string = "generic64";
2153 else
2154 ix86_tune_string = "generic32";
2155 }
2156 else if (!strncmp (ix86_tune_string, "generic", 7))
2157 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2158 }
2159 else
2160 {
2161 if (ix86_arch_string)
2162 ix86_tune_string = ix86_arch_string;
2163 if (!ix86_tune_string)
2164 {
2165 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2166 ix86_tune_defaulted = 1;
2167 }
2168
2169 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2170 need to use a sensible tune option. */
2171 if (!strcmp (ix86_tune_string, "generic")
2172 || !strcmp (ix86_tune_string, "x86-64")
2173 || !strcmp (ix86_tune_string, "i686"))
2174 {
2175 if (TARGET_64BIT)
2176 ix86_tune_string = "generic64";
2177 else
2178 ix86_tune_string = "generic32";
2179 }
2180 }
2181 if (ix86_stringop_string)
2182 {
2183 if (!strcmp (ix86_stringop_string, "rep_byte"))
2184 stringop_alg = rep_prefix_1_byte;
2185 else if (!strcmp (ix86_stringop_string, "libcall"))
2186 stringop_alg = libcall;
2187 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2188 stringop_alg = rep_prefix_4_byte;
2189 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2190 stringop_alg = rep_prefix_8_byte;
2191 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2192 stringop_alg = loop_1_byte;
2193 else if (!strcmp (ix86_stringop_string, "loop"))
2194 stringop_alg = loop;
2195 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2196 stringop_alg = unrolled_loop;
2197 else
2198 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2199 }
2200 if (!strcmp (ix86_tune_string, "x86-64"))
2201 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2202 "-mtune=generic instead as appropriate.");
2203
2204 if (!ix86_arch_string)
2205 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2206 else
2207 ix86_arch_specified = 1;
2208
2209 if (!strcmp (ix86_arch_string, "generic"))
2210 error ("generic CPU can be used only for -mtune= switch");
2211 if (!strncmp (ix86_arch_string, "generic", 7))
2212 error ("bad value (%s) for -march= switch", ix86_arch_string);
2213
2214 if (ix86_cmodel_string != 0)
2215 {
2216 if (!strcmp (ix86_cmodel_string, "small"))
2217 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2218 else if (!strcmp (ix86_cmodel_string, "medium"))
2219 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2220 else if (!strcmp (ix86_cmodel_string, "large"))
2221 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2222 else if (flag_pic)
2223 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2224 else if (!strcmp (ix86_cmodel_string, "32"))
2225 ix86_cmodel = CM_32;
2226 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2227 ix86_cmodel = CM_KERNEL;
2228 else
2229 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2230 }
2231 else
2232 {
2233 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2234 use of rip-relative addressing. This eliminates fixups that
2235 would otherwise be needed if this object is to be placed in a
2236 DLL, and is essentially just as efficient as direct addressing. */
2237 if (TARGET_64BIT_MS_ABI)
2238 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2239 else if (TARGET_64BIT)
2240 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2241 else
2242 ix86_cmodel = CM_32;
2243 }
2244 if (ix86_asm_string != 0)
2245 {
2246 if (! TARGET_MACHO
2247 && !strcmp (ix86_asm_string, "intel"))
2248 ix86_asm_dialect = ASM_INTEL;
2249 else if (!strcmp (ix86_asm_string, "att"))
2250 ix86_asm_dialect = ASM_ATT;
2251 else
2252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2253 }
2254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2255 error ("code model %qs not supported in the %s bit mode",
2256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2257 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2258 sorry ("%i-bit mode not compiled in",
2259 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2260
2261 for (i = 0; i < pta_size; i++)
2262 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2263 {
2264 ix86_arch = processor_alias_table[i].processor;
2265 /* Default cpu tuning to the architecture. */
2266 ix86_tune = ix86_arch;
2267
2268 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2269 error ("CPU you selected does not support x86-64 "
2270 "instruction set");
2271
2272 if (processor_alias_table[i].flags & PTA_MMX
2273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2274 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2275 if (processor_alias_table[i].flags & PTA_3DNOW
2276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2277 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2278 if (processor_alias_table[i].flags & PTA_3DNOW_A
2279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2280 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2281 if (processor_alias_table[i].flags & PTA_SSE
2282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2284 if (processor_alias_table[i].flags & PTA_SSE2
2285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2287 if (processor_alias_table[i].flags & PTA_SSE3
2288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2289 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2290 if (processor_alias_table[i].flags & PTA_SSSE3
2291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2292 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2293 if (processor_alias_table[i].flags & PTA_SSE4_1
2294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2296 if (processor_alias_table[i].flags & PTA_SSE4_2
2297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2299 if (processor_alias_table[i].flags & PTA_SSE4A
2300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2301 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2302 if (processor_alias_table[i].flags & PTA_SSE5
2303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2304 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2305
2306 if (processor_alias_table[i].flags & PTA_ABM)
2307 x86_abm = true;
2308 if (processor_alias_table[i].flags & PTA_CX16)
2309 x86_cmpxchg16b = true;
2310 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2311 x86_popcnt = true;
2312 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2313 x86_prefetch_sse = true;
2314 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2315 x86_sahf = true;
2316
2317 break;
2318 }
2319
2320 if (i == pta_size)
2321 error ("bad value (%s) for -march= switch", ix86_arch_string);
2322
2323 ix86_arch_mask = 1u << ix86_arch;
2324 for (i = 0; i < X86_ARCH_LAST; ++i)
2325 ix86_arch_features[i] &= ix86_arch_mask;
2326
2327 for (i = 0; i < pta_size; i++)
2328 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2329 {
2330 ix86_tune = processor_alias_table[i].processor;
2331 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2332 {
2333 if (ix86_tune_defaulted)
2334 {
2335 ix86_tune_string = "x86-64";
2336 for (i = 0; i < pta_size; i++)
2337 if (! strcmp (ix86_tune_string,
2338 processor_alias_table[i].name))
2339 break;
2340 ix86_tune = processor_alias_table[i].processor;
2341 }
2342 else
2343 error ("CPU you selected does not support x86-64 "
2344 "instruction set");
2345 }
2346 /* Intel CPUs have always interpreted SSE prefetch instructions as
2347 NOPs; so, we can enable SSE prefetch instructions even when
2348 -mtune (rather than -march) points us to a processor that has them.
2349 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2350 higher processors. */
2351 if (TARGET_CMOVE
2352 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2353 x86_prefetch_sse = true;
2354 break;
2355 }
2356 if (i == pta_size)
2357 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2358
2359 ix86_tune_mask = 1u << ix86_tune;
2360 for (i = 0; i < X86_TUNE_LAST; ++i)
2361 ix86_tune_features[i] &= ix86_tune_mask;
2362
2363 if (optimize_size)
2364 ix86_cost = &size_cost;
2365 else
2366 ix86_cost = processor_target_table[ix86_tune].cost;
2367
2368 /* Arrange to set up i386_stack_locals for all functions. */
2369 init_machine_status = ix86_init_machine_status;
2370
2371 /* Validate -mregparm= value. */
2372 if (ix86_regparm_string)
2373 {
2374 if (TARGET_64BIT)
2375 warning (0, "-mregparm is ignored in 64-bit mode");
2376 i = atoi (ix86_regparm_string);
2377 if (i < 0 || i > REGPARM_MAX)
2378 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2379 else
2380 ix86_regparm = i;
2381 }
2382 if (TARGET_64BIT)
2383 ix86_regparm = REGPARM_MAX;
2384
2385 /* If the user has provided any of the -malign-* options,
2386 warn and use that value only if -falign-* is not set.
2387 Remove this code in GCC 3.2 or later. */
2388 if (ix86_align_loops_string)
2389 {
2390 warning (0, "-malign-loops is obsolete, use -falign-loops");
2391 if (align_loops == 0)
2392 {
2393 i = atoi (ix86_align_loops_string);
2394 if (i < 0 || i > MAX_CODE_ALIGN)
2395 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2396 else
2397 align_loops = 1 << i;
2398 }
2399 }
2400
2401 if (ix86_align_jumps_string)
2402 {
2403 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2404 if (align_jumps == 0)
2405 {
2406 i = atoi (ix86_align_jumps_string);
2407 if (i < 0 || i > MAX_CODE_ALIGN)
2408 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2409 else
2410 align_jumps = 1 << i;
2411 }
2412 }
2413
2414 if (ix86_align_funcs_string)
2415 {
2416 warning (0, "-malign-functions is obsolete, use -falign-functions");
2417 if (align_functions == 0)
2418 {
2419 i = atoi (ix86_align_funcs_string);
2420 if (i < 0 || i > MAX_CODE_ALIGN)
2421 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2422 else
2423 align_functions = 1 << i;
2424 }
2425 }
2426
2427 /* Default align_* from the processor table. */
2428 if (align_loops == 0)
2429 {
2430 align_loops = processor_target_table[ix86_tune].align_loop;
2431 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2432 }
2433 if (align_jumps == 0)
2434 {
2435 align_jumps = processor_target_table[ix86_tune].align_jump;
2436 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2437 }
2438 if (align_functions == 0)
2439 {
2440 align_functions = processor_target_table[ix86_tune].align_func;
2441 }
2442
2443 /* Validate -mbranch-cost= value, or provide default. */
2444 ix86_branch_cost = ix86_cost->branch_cost;
2445 if (ix86_branch_cost_string)
2446 {
2447 i = atoi (ix86_branch_cost_string);
2448 if (i < 0 || i > 5)
2449 error ("-mbranch-cost=%d is not between 0 and 5", i);
2450 else
2451 ix86_branch_cost = i;
2452 }
2453 if (ix86_section_threshold_string)
2454 {
2455 i = atoi (ix86_section_threshold_string);
2456 if (i < 0)
2457 error ("-mlarge-data-threshold=%d is negative", i);
2458 else
2459 ix86_section_threshold = i;
2460 }
2461
2462 if (ix86_tls_dialect_string)
2463 {
2464 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2465 ix86_tls_dialect = TLS_DIALECT_GNU;
2466 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2467 ix86_tls_dialect = TLS_DIALECT_GNU2;
2468 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2469 ix86_tls_dialect = TLS_DIALECT_SUN;
2470 else
2471 error ("bad value (%s) for -mtls-dialect= switch",
2472 ix86_tls_dialect_string);
2473 }
2474
2475 if (ix87_precision_string)
2476 {
2477 i = atoi (ix87_precision_string);
2478 if (i != 32 && i != 64 && i != 80)
2479 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2480 }
2481
2482 if (TARGET_64BIT)
2483 {
2484 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2485
2486 /* Enable by default the SSE and MMX builtins. Do allow the user to
2487 explicitly disable any of these. In particular, disabling SSE and
2488 MMX for kernel code is extremely useful. */
2489 if (!ix86_arch_specified)
2490 ix86_isa_flags
2491 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2492 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2493
2494 if (TARGET_RTD)
2495 warning (0, "-mrtd is ignored in 64bit mode");
2496 }
2497 else
2498 {
2499 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2500
2501 if (!ix86_arch_specified)
2502 ix86_isa_flags
2503 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2504
2505 /* i386 ABI does not specify red zone. It still makes sense to use it
2506 when programmer takes care to stack from being destroyed. */
2507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2508 target_flags |= MASK_NO_RED_ZONE;
2509 }
2510
2511 /* Keep nonleaf frame pointers. */
2512 if (flag_omit_frame_pointer)
2513 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2514 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2515 flag_omit_frame_pointer = 1;
2516
2517 /* If we're doing fast math, we don't care about comparison order
2518 wrt NaNs. This lets us use a shorter comparison sequence. */
2519 if (flag_finite_math_only)
2520 target_flags &= ~MASK_IEEE_FP;
2521
2522 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2523 since the insns won't need emulation. */
2524 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2525 target_flags &= ~MASK_NO_FANCY_MATH_387;
2526
2527 /* Likewise, if the target doesn't have a 387, or we've specified
2528 software floating point, don't use 387 inline intrinsics. */
2529 if (!TARGET_80387)
2530 target_flags |= MASK_NO_FANCY_MATH_387;
2531
2532 /* Turn on SSE4A bultins for -msse5. */
2533 if (TARGET_SSE5)
2534 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2535
2536 /* Turn on SSE4.1 builtins for -msse4.2. */
2537 if (TARGET_SSE4_2)
2538 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2539
2540 /* Turn on SSSE3 builtins for -msse4.1. */
2541 if (TARGET_SSE4_1)
2542 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2543
2544 /* Turn on SSE3 builtins for -mssse3. */
2545 if (TARGET_SSSE3)
2546 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2547
2548 /* Turn on SSE3 builtins for -msse4a. */
2549 if (TARGET_SSE4A)
2550 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2551
2552 /* Turn on SSE2 builtins for -msse3. */
2553 if (TARGET_SSE3)
2554 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2555
2556 /* Turn on SSE builtins for -msse2. */
2557 if (TARGET_SSE2)
2558 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2559
2560 /* Turn on MMX builtins for -msse. */
2561 if (TARGET_SSE)
2562 {
2563 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2564 x86_prefetch_sse = true;
2565 }
2566
2567 /* Turn on MMX builtins for 3Dnow. */
2568 if (TARGET_3DNOW)
2569 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2570
2571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2572 if (TARGET_SSE4_2 || TARGET_ABM)
2573 x86_popcnt = true;
2574
2575 /* Validate -mpreferred-stack-boundary= value, or provide default.
2576 The default of 128 bits is for Pentium III's SSE __m128. We can't
2577 change it because of optimize_size. Otherwise, we can't mix object
2578 files compiled with -Os and -On. */
2579 ix86_preferred_stack_boundary = 128;
2580 if (ix86_preferred_stack_boundary_string)
2581 {
2582 i = atoi (ix86_preferred_stack_boundary_string);
2583 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2584 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2585 TARGET_64BIT ? 4 : 2);
2586 else
2587 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2588 }
2589
2590 /* Accept -msseregparm only if at least SSE support is enabled. */
2591 if (TARGET_SSEREGPARM
2592 && ! TARGET_SSE)
2593 error ("-msseregparm used without SSE enabled");
2594
2595 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2596 if (ix86_fpmath_string != 0)
2597 {
2598 if (! strcmp (ix86_fpmath_string, "387"))
2599 ix86_fpmath = FPMATH_387;
2600 else if (! strcmp (ix86_fpmath_string, "sse"))
2601 {
2602 if (!TARGET_SSE)
2603 {
2604 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2605 ix86_fpmath = FPMATH_387;
2606 }
2607 else
2608 ix86_fpmath = FPMATH_SSE;
2609 }
2610 else if (! strcmp (ix86_fpmath_string, "387,sse")
2611 || ! strcmp (ix86_fpmath_string, "sse,387"))
2612 {
2613 if (!TARGET_SSE)
2614 {
2615 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2616 ix86_fpmath = FPMATH_387;
2617 }
2618 else if (!TARGET_80387)
2619 {
2620 warning (0, "387 instruction set disabled, using SSE arithmetics");
2621 ix86_fpmath = FPMATH_SSE;
2622 }
2623 else
2624 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2625 }
2626 else
2627 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2628 }
2629
2630 /* If the i387 is disabled, then do not return values in it. */
2631 if (!TARGET_80387)
2632 target_flags &= ~MASK_FLOAT_RETURNS;
2633
2634 /* Use external vectorized library in vectorizing intrinsics. */
2635 if (ix86_veclibabi_string)
2636 {
2637 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2638 ix86_veclib_handler = ix86_veclibabi_acml;
2639 else
2640 error ("unknown vectorization library ABI type (%s) for "
2641 "-mveclibabi= switch", ix86_veclibabi_string);
2642 }
2643
2644 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2645 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2646 && !optimize_size)
2647 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2648
2649 /* ??? Unwind info is not correct around the CFG unless either a frame
2650 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2651 unwind info generation to be aware of the CFG and propagating states
2652 around edges. */
2653 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2654 || flag_exceptions || flag_non_call_exceptions)
2655 && flag_omit_frame_pointer
2656 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2657 {
2658 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2659 warning (0, "unwind tables currently require either a frame pointer "
2660 "or -maccumulate-outgoing-args for correctness");
2661 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2662 }
2663
2664 /* For sane SSE instruction set generation we need fcomi instruction.
2665 It is safe to enable all CMOVE instructions. */
2666 if (TARGET_SSE)
2667 TARGET_CMOVE = 1;
2668
2669 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2670 {
2671 char *p;
2672 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2673 p = strchr (internal_label_prefix, 'X');
2674 internal_label_prefix_len = p - internal_label_prefix;
2675 *p = '\0';
2676 }
2677
2678 /* When scheduling description is not available, disable scheduler pass
2679 so it won't slow down the compilation and make x87 code slower. */
2680 if (!TARGET_SCHEDULE)
2681 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2682
2683 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2684 set_param_value ("simultaneous-prefetches",
2685 ix86_cost->simultaneous_prefetches);
2686 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2687 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2688 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2689 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2690 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2691 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2692
2693 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2694 can be optimized to ap = __builtin_next_arg (0). */
2695 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2696 targetm.expand_builtin_va_start = NULL;
2697 }
2698 \f
2699 /* Return true if this goes in large data/bss. */
2700
2701 static bool
2702 ix86_in_large_data_p (tree exp)
2703 {
2704 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2705 return false;
2706
2707 /* Functions are never large data. */
2708 if (TREE_CODE (exp) == FUNCTION_DECL)
2709 return false;
2710
2711 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2712 {
2713 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2714 if (strcmp (section, ".ldata") == 0
2715 || strcmp (section, ".lbss") == 0)
2716 return true;
2717 return false;
2718 }
2719 else
2720 {
2721 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2722
2723 /* If this is an incomplete type with size 0, then we can't put it
2724 in data because it might be too big when completed. */
2725 if (!size || size > ix86_section_threshold)
2726 return true;
2727 }
2728
2729 return false;
2730 }
2731
2732 /* Switch to the appropriate section for output of DECL.
2733 DECL is either a `VAR_DECL' node or a constant of some sort.
2734 RELOC indicates whether forming the initial value of DECL requires
2735 link-time relocations. */
2736
2737 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2738 ATTRIBUTE_UNUSED;
2739
2740 static section *
2741 x86_64_elf_select_section (tree decl, int reloc,
2742 unsigned HOST_WIDE_INT align)
2743 {
2744 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2745 && ix86_in_large_data_p (decl))
2746 {
2747 const char *sname = NULL;
2748 unsigned int flags = SECTION_WRITE;
2749 switch (categorize_decl_for_section (decl, reloc))
2750 {
2751 case SECCAT_DATA:
2752 sname = ".ldata";
2753 break;
2754 case SECCAT_DATA_REL:
2755 sname = ".ldata.rel";
2756 break;
2757 case SECCAT_DATA_REL_LOCAL:
2758 sname = ".ldata.rel.local";
2759 break;
2760 case SECCAT_DATA_REL_RO:
2761 sname = ".ldata.rel.ro";
2762 break;
2763 case SECCAT_DATA_REL_RO_LOCAL:
2764 sname = ".ldata.rel.ro.local";
2765 break;
2766 case SECCAT_BSS:
2767 sname = ".lbss";
2768 flags |= SECTION_BSS;
2769 break;
2770 case SECCAT_RODATA:
2771 case SECCAT_RODATA_MERGE_STR:
2772 case SECCAT_RODATA_MERGE_STR_INIT:
2773 case SECCAT_RODATA_MERGE_CONST:
2774 sname = ".lrodata";
2775 flags = 0;
2776 break;
2777 case SECCAT_SRODATA:
2778 case SECCAT_SDATA:
2779 case SECCAT_SBSS:
2780 gcc_unreachable ();
2781 case SECCAT_TEXT:
2782 case SECCAT_TDATA:
2783 case SECCAT_TBSS:
2784 /* We don't split these for medium model. Place them into
2785 default sections and hope for best. */
2786 break;
2787 }
2788 if (sname)
2789 {
2790 /* We might get called with string constants, but get_named_section
2791 doesn't like them as they are not DECLs. Also, we need to set
2792 flags in that case. */
2793 if (!DECL_P (decl))
2794 return get_section (sname, flags, NULL);
2795 return get_named_section (decl, sname, reloc);
2796 }
2797 }
2798 return default_elf_select_section (decl, reloc, align);
2799 }
2800
2801 /* Build up a unique section name, expressed as a
2802 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2803 RELOC indicates whether the initial value of EXP requires
2804 link-time relocations. */
2805
2806 static void ATTRIBUTE_UNUSED
2807 x86_64_elf_unique_section (tree decl, int reloc)
2808 {
2809 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2810 && ix86_in_large_data_p (decl))
2811 {
2812 const char *prefix = NULL;
2813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2814 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2815
2816 switch (categorize_decl_for_section (decl, reloc))
2817 {
2818 case SECCAT_DATA:
2819 case SECCAT_DATA_REL:
2820 case SECCAT_DATA_REL_LOCAL:
2821 case SECCAT_DATA_REL_RO:
2822 case SECCAT_DATA_REL_RO_LOCAL:
2823 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2824 break;
2825 case SECCAT_BSS:
2826 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2827 break;
2828 case SECCAT_RODATA:
2829 case SECCAT_RODATA_MERGE_STR:
2830 case SECCAT_RODATA_MERGE_STR_INIT:
2831 case SECCAT_RODATA_MERGE_CONST:
2832 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2833 break;
2834 case SECCAT_SRODATA:
2835 case SECCAT_SDATA:
2836 case SECCAT_SBSS:
2837 gcc_unreachable ();
2838 case SECCAT_TEXT:
2839 case SECCAT_TDATA:
2840 case SECCAT_TBSS:
2841 /* We don't split these for medium model. Place them into
2842 default sections and hope for best. */
2843 break;
2844 }
2845 if (prefix)
2846 {
2847 const char *name;
2848 size_t nlen, plen;
2849 char *string;
2850 plen = strlen (prefix);
2851
2852 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2853 name = targetm.strip_name_encoding (name);
2854 nlen = strlen (name);
2855
2856 string = (char *) alloca (nlen + plen + 1);
2857 memcpy (string, prefix, plen);
2858 memcpy (string + plen, name, nlen + 1);
2859
2860 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2861 return;
2862 }
2863 }
2864 default_unique_section (decl, reloc);
2865 }
2866
2867 #ifdef COMMON_ASM_OP
2868 /* This says how to output assembler code to declare an
2869 uninitialized external linkage data object.
2870
2871 For medium model x86-64 we need to use .largecomm opcode for
2872 large objects. */
2873 void
2874 x86_elf_aligned_common (FILE *file,
2875 const char *name, unsigned HOST_WIDE_INT size,
2876 int align)
2877 {
2878 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2879 && size > (unsigned int)ix86_section_threshold)
2880 fprintf (file, ".largecomm\t");
2881 else
2882 fprintf (file, "%s", COMMON_ASM_OP);
2883 assemble_name (file, name);
2884 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2885 size, align / BITS_PER_UNIT);
2886 }
2887 #endif
2888
2889 /* Utility function for targets to use in implementing
2890 ASM_OUTPUT_ALIGNED_BSS. */
2891
2892 void
2893 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2894 const char *name, unsigned HOST_WIDE_INT size,
2895 int align)
2896 {
2897 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2898 && size > (unsigned int)ix86_section_threshold)
2899 switch_to_section (get_named_section (decl, ".lbss", 0));
2900 else
2901 switch_to_section (bss_section);
2902 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2903 #ifdef ASM_DECLARE_OBJECT_NAME
2904 last_assemble_variable_decl = decl;
2905 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2906 #else
2907 /* Standard thing is just output label for the object. */
2908 ASM_OUTPUT_LABEL (file, name);
2909 #endif /* ASM_DECLARE_OBJECT_NAME */
2910 ASM_OUTPUT_SKIP (file, size ? size : 1);
2911 }
2912 \f
2913 void
2914 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2915 {
2916 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2917 make the problem with not enough registers even worse. */
2918 #ifdef INSN_SCHEDULING
2919 if (level > 1)
2920 flag_schedule_insns = 0;
2921 #endif
2922
2923 if (TARGET_MACHO)
2924 /* The Darwin libraries never set errno, so we might as well
2925 avoid calling them when that's the only reason we would. */
2926 flag_errno_math = 0;
2927
2928 /* The default values of these switches depend on the TARGET_64BIT
2929 that is not known at this moment. Mark these values with 2 and
2930 let user the to override these. In case there is no command line option
2931 specifying them, we will set the defaults in override_options. */
2932 if (optimize >= 1)
2933 flag_omit_frame_pointer = 2;
2934 flag_pcc_struct_return = 2;
2935 flag_asynchronous_unwind_tables = 2;
2936 flag_vect_cost_model = 1;
2937 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2938 SUBTARGET_OPTIMIZATION_OPTIONS;
2939 #endif
2940 }
2941 \f
2942 /* Decide whether we can make a sibling call to a function. DECL is the
2943 declaration of the function being targeted by the call and EXP is the
2944 CALL_EXPR representing the call. */
2945
2946 static bool
2947 ix86_function_ok_for_sibcall (tree decl, tree exp)
2948 {
2949 tree func;
2950 rtx a, b;
2951
2952 /* If we are generating position-independent code, we cannot sibcall
2953 optimize any indirect call, or a direct call to a global function,
2954 as the PLT requires %ebx be live. */
2955 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2956 return false;
2957
2958 if (decl)
2959 func = decl;
2960 else
2961 {
2962 func = TREE_TYPE (CALL_EXPR_FN (exp));
2963 if (POINTER_TYPE_P (func))
2964 func = TREE_TYPE (func);
2965 }
2966
2967 /* Check that the return value locations are the same. Like
2968 if we are returning floats on the 80387 register stack, we cannot
2969 make a sibcall from a function that doesn't return a float to a
2970 function that does or, conversely, from a function that does return
2971 a float to a function that doesn't; the necessary stack adjustment
2972 would not be executed. This is also the place we notice
2973 differences in the return value ABI. Note that it is ok for one
2974 of the functions to have void return type as long as the return
2975 value of the other is passed in a register. */
2976 a = ix86_function_value (TREE_TYPE (exp), func, false);
2977 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2978 cfun->decl, false);
2979 if (STACK_REG_P (a) || STACK_REG_P (b))
2980 {
2981 if (!rtx_equal_p (a, b))
2982 return false;
2983 }
2984 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2985 ;
2986 else if (!rtx_equal_p (a, b))
2987 return false;
2988
2989 /* If this call is indirect, we'll need to be able to use a call-clobbered
2990 register for the address of the target function. Make sure that all
2991 such registers are not used for passing parameters. */
2992 if (!decl && !TARGET_64BIT)
2993 {
2994 tree type;
2995
2996 /* We're looking at the CALL_EXPR, we need the type of the function. */
2997 type = CALL_EXPR_FN (exp); /* pointer expression */
2998 type = TREE_TYPE (type); /* pointer type */
2999 type = TREE_TYPE (type); /* function type */
3000
3001 if (ix86_function_regparm (type, NULL) >= 3)
3002 {
3003 /* ??? Need to count the actual number of registers to be used,
3004 not the possible number of registers. Fix later. */
3005 return false;
3006 }
3007 }
3008
3009 /* Dllimport'd functions are also called indirectly. */
3010 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3011 && decl && DECL_DLLIMPORT_P (decl)
3012 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3013 return false;
3014
3015 /* If we forced aligned the stack, then sibcalling would unalign the
3016 stack, which may break the called function. */
3017 if (cfun->machine->force_align_arg_pointer)
3018 return false;
3019
3020 /* Otherwise okay. That also includes certain types of indirect calls. */
3021 return true;
3022 }
3023
3024 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3025 calling convention attributes;
3026 arguments as in struct attribute_spec.handler. */
3027
3028 static tree
3029 ix86_handle_cconv_attribute (tree *node, tree name,
3030 tree args,
3031 int flags ATTRIBUTE_UNUSED,
3032 bool *no_add_attrs)
3033 {
3034 if (TREE_CODE (*node) != FUNCTION_TYPE
3035 && TREE_CODE (*node) != METHOD_TYPE
3036 && TREE_CODE (*node) != FIELD_DECL
3037 && TREE_CODE (*node) != TYPE_DECL)
3038 {
3039 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3040 IDENTIFIER_POINTER (name));
3041 *no_add_attrs = true;
3042 return NULL_TREE;
3043 }
3044
3045 /* Can combine regparm with all attributes but fastcall. */
3046 if (is_attribute_p ("regparm", name))
3047 {
3048 tree cst;
3049
3050 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3051 {
3052 error ("fastcall and regparm attributes are not compatible");
3053 }
3054
3055 cst = TREE_VALUE (args);
3056 if (TREE_CODE (cst) != INTEGER_CST)
3057 {
3058 warning (OPT_Wattributes,
3059 "%qs attribute requires an integer constant argument",
3060 IDENTIFIER_POINTER (name));
3061 *no_add_attrs = true;
3062 }
3063 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3064 {
3065 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3066 IDENTIFIER_POINTER (name), REGPARM_MAX);
3067 *no_add_attrs = true;
3068 }
3069
3070 if (!TARGET_64BIT
3071 && lookup_attribute (ix86_force_align_arg_pointer_string,
3072 TYPE_ATTRIBUTES (*node))
3073 && compare_tree_int (cst, REGPARM_MAX-1))
3074 {
3075 error ("%s functions limited to %d register parameters",
3076 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3077 }
3078
3079 return NULL_TREE;
3080 }
3081
3082 if (TARGET_64BIT)
3083 {
3084 /* Do not warn when emulating the MS ABI. */
3085 if (!TARGET_64BIT_MS_ABI)
3086 warning (OPT_Wattributes, "%qs attribute ignored",
3087 IDENTIFIER_POINTER (name));
3088 *no_add_attrs = true;
3089 return NULL_TREE;
3090 }
3091
3092 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3093 if (is_attribute_p ("fastcall", name))
3094 {
3095 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3096 {
3097 error ("fastcall and cdecl attributes are not compatible");
3098 }
3099 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3100 {
3101 error ("fastcall and stdcall attributes are not compatible");
3102 }
3103 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3104 {
3105 error ("fastcall and regparm attributes are not compatible");
3106 }
3107 }
3108
3109 /* Can combine stdcall with fastcall (redundant), regparm and
3110 sseregparm. */
3111 else if (is_attribute_p ("stdcall", name))
3112 {
3113 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3114 {
3115 error ("stdcall and cdecl attributes are not compatible");
3116 }
3117 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3118 {
3119 error ("stdcall and fastcall attributes are not compatible");
3120 }
3121 }
3122
3123 /* Can combine cdecl with regparm and sseregparm. */
3124 else if (is_attribute_p ("cdecl", name))
3125 {
3126 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3127 {
3128 error ("stdcall and cdecl attributes are not compatible");
3129 }
3130 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3131 {
3132 error ("fastcall and cdecl attributes are not compatible");
3133 }
3134 }
3135
3136 /* Can combine sseregparm with all attributes. */
3137
3138 return NULL_TREE;
3139 }
3140
3141 /* Return 0 if the attributes for two types are incompatible, 1 if they
3142 are compatible, and 2 if they are nearly compatible (which causes a
3143 warning to be generated). */
3144
3145 static int
3146 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3147 {
3148 /* Check for mismatch of non-default calling convention. */
3149 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3150
3151 if (TREE_CODE (type1) != FUNCTION_TYPE
3152 && TREE_CODE (type1) != METHOD_TYPE)
3153 return 1;
3154
3155 /* Check for mismatched fastcall/regparm types. */
3156 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3157 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3158 || (ix86_function_regparm (type1, NULL)
3159 != ix86_function_regparm (type2, NULL)))
3160 return 0;
3161
3162 /* Check for mismatched sseregparm types. */
3163 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3164 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3165 return 0;
3166
3167 /* Check for mismatched return types (cdecl vs stdcall). */
3168 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3169 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3170 return 0;
3171
3172 return 1;
3173 }
3174 \f
3175 /* Return the regparm value for a function with the indicated TYPE and DECL.
3176 DECL may be NULL when calling function indirectly
3177 or considering a libcall. */
3178
3179 static int
3180 ix86_function_regparm (const_tree type, const_tree decl)
3181 {
3182 tree attr;
3183 int regparm = ix86_regparm;
3184
3185 if (TARGET_64BIT)
3186 return regparm;
3187
3188 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3189 if (attr)
3190 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3191
3192 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3193 return 2;
3194
3195 /* Use register calling convention for local functions when possible. */
3196 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3197 && flag_unit_at_a_time && !profile_flag)
3198 {
3199 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3200 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3201 if (i && i->local)
3202 {
3203 int local_regparm, globals = 0, regno;
3204 struct function *f;
3205
3206 /* Make sure no regparm register is taken by a
3207 fixed register variable. */
3208 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3209 if (fixed_regs[local_regparm])
3210 break;
3211
3212 /* We can't use regparm(3) for nested functions as these use
3213 static chain pointer in third argument. */
3214 if (local_regparm == 3
3215 && (decl_function_context (decl)
3216 || ix86_force_align_arg_pointer)
3217 && !DECL_NO_STATIC_CHAIN (decl))
3218 local_regparm = 2;
3219
3220 /* If the function realigns its stackpointer, the prologue will
3221 clobber %ecx. If we've already generated code for the callee,
3222 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3223 scanning the attributes for the self-realigning property. */
3224 f = DECL_STRUCT_FUNCTION (decl);
3225 if (local_regparm == 3
3226 && (f ? !!f->machine->force_align_arg_pointer
3227 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3228 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3229 local_regparm = 2;
3230
3231 /* Each fixed register usage increases register pressure,
3232 so less registers should be used for argument passing.
3233 This functionality can be overriden by an explicit
3234 regparm value. */
3235 for (regno = 0; regno <= DI_REG; regno++)
3236 if (fixed_regs[regno])
3237 globals++;
3238
3239 local_regparm
3240 = globals < local_regparm ? local_regparm - globals : 0;
3241
3242 if (local_regparm > regparm)
3243 regparm = local_regparm;
3244 }
3245 }
3246
3247 return regparm;
3248 }
3249
3250 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3251 DFmode (2) arguments in SSE registers for a function with the
3252 indicated TYPE and DECL. DECL may be NULL when calling function
3253 indirectly or considering a libcall. Otherwise return 0. */
3254
3255 static int
3256 ix86_function_sseregparm (const_tree type, const_tree decl)
3257 {
3258 gcc_assert (!TARGET_64BIT);
3259
3260 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3261 by the sseregparm attribute. */
3262 if (TARGET_SSEREGPARM
3263 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3264 {
3265 if (!TARGET_SSE)
3266 {
3267 if (decl)
3268 error ("Calling %qD with attribute sseregparm without "
3269 "SSE/SSE2 enabled", decl);
3270 else
3271 error ("Calling %qT with attribute sseregparm without "
3272 "SSE/SSE2 enabled", type);
3273 return 0;
3274 }
3275
3276 return 2;
3277 }
3278
3279 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3280 (and DFmode for SSE2) arguments in SSE registers. */
3281 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3282 {
3283 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3284 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3285 if (i && i->local)
3286 return TARGET_SSE2 ? 2 : 1;
3287 }
3288
3289 return 0;
3290 }
3291
3292 /* Return true if EAX is live at the start of the function. Used by
3293 ix86_expand_prologue to determine if we need special help before
3294 calling allocate_stack_worker. */
3295
3296 static bool
3297 ix86_eax_live_at_start_p (void)
3298 {
3299 /* Cheat. Don't bother working forward from ix86_function_regparm
3300 to the function type to whether an actual argument is located in
3301 eax. Instead just look at cfg info, which is still close enough
3302 to correct at this point. This gives false positives for broken
3303 functions that might use uninitialized data that happens to be
3304 allocated in eax, but who cares? */
3305 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3306 }
3307
3308 /* Value is the number of bytes of arguments automatically
3309 popped when returning from a subroutine call.
3310 FUNDECL is the declaration node of the function (as a tree),
3311 FUNTYPE is the data type of the function (as a tree),
3312 or for a library call it is an identifier node for the subroutine name.
3313 SIZE is the number of bytes of arguments passed on the stack.
3314
3315 On the 80386, the RTD insn may be used to pop them if the number
3316 of args is fixed, but if the number is variable then the caller
3317 must pop them all. RTD can't be used for library calls now
3318 because the library is compiled with the Unix compiler.
3319 Use of RTD is a selectable option, since it is incompatible with
3320 standard Unix calling sequences. If the option is not selected,
3321 the caller must always pop the args.
3322
3323 The attribute stdcall is equivalent to RTD on a per module basis. */
3324
3325 int
3326 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3327 {
3328 int rtd;
3329
3330 /* None of the 64-bit ABIs pop arguments. */
3331 if (TARGET_64BIT)
3332 return 0;
3333
3334 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3335
3336 /* Cdecl functions override -mrtd, and never pop the stack. */
3337 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3338 {
3339 /* Stdcall and fastcall functions will pop the stack if not
3340 variable args. */
3341 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3342 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3343 rtd = 1;
3344
3345 if (rtd && ! stdarg_p (funtype))
3346 return size;
3347 }
3348
3349 /* Lose any fake structure return argument if it is passed on the stack. */
3350 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3351 && !KEEP_AGGREGATE_RETURN_POINTER)
3352 {
3353 int nregs = ix86_function_regparm (funtype, fundecl);
3354 if (nregs == 0)
3355 return GET_MODE_SIZE (Pmode);
3356 }
3357
3358 return 0;
3359 }
3360 \f
3361 /* Argument support functions. */
3362
3363 /* Return true when register may be used to pass function parameters. */
3364 bool
3365 ix86_function_arg_regno_p (int regno)
3366 {
3367 int i;
3368 const int *parm_regs;
3369
3370 if (!TARGET_64BIT)
3371 {
3372 if (TARGET_MACHO)
3373 return (regno < REGPARM_MAX
3374 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3375 else
3376 return (regno < REGPARM_MAX
3377 || (TARGET_MMX && MMX_REGNO_P (regno)
3378 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3379 || (TARGET_SSE && SSE_REGNO_P (regno)
3380 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3381 }
3382
3383 if (TARGET_MACHO)
3384 {
3385 if (SSE_REGNO_P (regno) && TARGET_SSE)
3386 return true;
3387 }
3388 else
3389 {
3390 if (TARGET_SSE && SSE_REGNO_P (regno)
3391 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3392 return true;
3393 }
3394
3395 /* RAX is used as hidden argument to va_arg functions. */
3396 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3397 return true;
3398
3399 if (TARGET_64BIT_MS_ABI)
3400 parm_regs = x86_64_ms_abi_int_parameter_registers;
3401 else
3402 parm_regs = x86_64_int_parameter_registers;
3403 for (i = 0; i < REGPARM_MAX; i++)
3404 if (regno == parm_regs[i])
3405 return true;
3406 return false;
3407 }
3408
3409 /* Return if we do not know how to pass TYPE solely in registers. */
3410
3411 static bool
3412 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3413 {
3414 if (must_pass_in_stack_var_size_or_pad (mode, type))
3415 return true;
3416
3417 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3418 The layout_type routine is crafty and tries to trick us into passing
3419 currently unsupported vector types on the stack by using TImode. */
3420 return (!TARGET_64BIT && mode == TImode
3421 && type && TREE_CODE (type) != VECTOR_TYPE);
3422 }
3423
3424 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3425 for a call to a function whose data type is FNTYPE.
3426 For a library call, FNTYPE is 0. */
3427
3428 void
3429 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3430 tree fntype, /* tree ptr for function decl */
3431 rtx libname, /* SYMBOL_REF of library name or 0 */
3432 tree fndecl)
3433 {
3434 memset (cum, 0, sizeof (*cum));
3435
3436 /* Set up the number of registers to use for passing arguments. */
3437 cum->nregs = ix86_regparm;
3438 if (TARGET_SSE)
3439 cum->sse_nregs = SSE_REGPARM_MAX;
3440 if (TARGET_MMX)
3441 cum->mmx_nregs = MMX_REGPARM_MAX;
3442 cum->warn_sse = true;
3443 cum->warn_mmx = true;
3444 cum->maybe_vaarg = (fntype
3445 ? (!prototype_p (fntype) || stdarg_p (fntype))
3446 : !libname);
3447
3448 if (!TARGET_64BIT)
3449 {
3450 /* If there are variable arguments, then we won't pass anything
3451 in registers in 32-bit mode. */
3452 if (cum->maybe_vaarg)
3453 {
3454 cum->nregs = 0;
3455 cum->sse_nregs = 0;
3456 cum->mmx_nregs = 0;
3457 cum->warn_sse = 0;
3458 cum->warn_mmx = 0;
3459 return;
3460 }
3461
3462 /* Use ecx and edx registers if function has fastcall attribute,
3463 else look for regparm information. */
3464 if (fntype)
3465 {
3466 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3467 {
3468 cum->nregs = 2;
3469 cum->fastcall = 1;
3470 }
3471 else
3472 cum->nregs = ix86_function_regparm (fntype, fndecl);
3473 }
3474
3475 /* Set up the number of SSE registers used for passing SFmode
3476 and DFmode arguments. Warn for mismatching ABI. */
3477 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3478 }
3479 }
3480
3481 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3482 But in the case of vector types, it is some vector mode.
3483
3484 When we have only some of our vector isa extensions enabled, then there
3485 are some modes for which vector_mode_supported_p is false. For these
3486 modes, the generic vector support in gcc will choose some non-vector mode
3487 in order to implement the type. By computing the natural mode, we'll
3488 select the proper ABI location for the operand and not depend on whatever
3489 the middle-end decides to do with these vector types. */
3490
3491 static enum machine_mode
3492 type_natural_mode (const_tree type)
3493 {
3494 enum machine_mode mode = TYPE_MODE (type);
3495
3496 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3497 {
3498 HOST_WIDE_INT size = int_size_in_bytes (type);
3499 if ((size == 8 || size == 16)
3500 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3501 && TYPE_VECTOR_SUBPARTS (type) > 1)
3502 {
3503 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3504
3505 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3506 mode = MIN_MODE_VECTOR_FLOAT;
3507 else
3508 mode = MIN_MODE_VECTOR_INT;
3509
3510 /* Get the mode which has this inner mode and number of units. */
3511 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3512 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3513 && GET_MODE_INNER (mode) == innermode)
3514 return mode;
3515
3516 gcc_unreachable ();
3517 }
3518 }
3519
3520 return mode;
3521 }
3522
3523 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3524 this may not agree with the mode that the type system has chosen for the
3525 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3526 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3527
3528 static rtx
3529 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3530 unsigned int regno)
3531 {
3532 rtx tmp;
3533
3534 if (orig_mode != BLKmode)
3535 tmp = gen_rtx_REG (orig_mode, regno);
3536 else
3537 {
3538 tmp = gen_rtx_REG (mode, regno);
3539 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3540 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3541 }
3542
3543 return tmp;
3544 }
3545
3546 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3547 of this code is to classify each 8bytes of incoming argument by the register
3548 class and assign registers accordingly. */
3549
3550 /* Return the union class of CLASS1 and CLASS2.
3551 See the x86-64 PS ABI for details. */
3552
3553 static enum x86_64_reg_class
3554 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3555 {
3556 /* Rule #1: If both classes are equal, this is the resulting class. */
3557 if (class1 == class2)
3558 return class1;
3559
3560 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3561 the other class. */
3562 if (class1 == X86_64_NO_CLASS)
3563 return class2;
3564 if (class2 == X86_64_NO_CLASS)
3565 return class1;
3566
3567 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3568 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3569 return X86_64_MEMORY_CLASS;
3570
3571 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3572 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3573 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3574 return X86_64_INTEGERSI_CLASS;
3575 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3576 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3577 return X86_64_INTEGER_CLASS;
3578
3579 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3580 MEMORY is used. */
3581 if (class1 == X86_64_X87_CLASS
3582 || class1 == X86_64_X87UP_CLASS
3583 || class1 == X86_64_COMPLEX_X87_CLASS
3584 || class2 == X86_64_X87_CLASS
3585 || class2 == X86_64_X87UP_CLASS
3586 || class2 == X86_64_COMPLEX_X87_CLASS)
3587 return X86_64_MEMORY_CLASS;
3588
3589 /* Rule #6: Otherwise class SSE is used. */
3590 return X86_64_SSE_CLASS;
3591 }
3592
3593 /* Classify the argument of type TYPE and mode MODE.
3594 CLASSES will be filled by the register class used to pass each word
3595 of the operand. The number of words is returned. In case the parameter
3596 should be passed in memory, 0 is returned. As a special case for zero
3597 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3598
3599 BIT_OFFSET is used internally for handling records and specifies offset
3600 of the offset in bits modulo 256 to avoid overflow cases.
3601
3602 See the x86-64 PS ABI for details.
3603 */
3604
3605 static int
3606 classify_argument (enum machine_mode mode, const_tree type,
3607 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3608 {
3609 HOST_WIDE_INT bytes =
3610 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3611 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3612
3613 /* Variable sized entities are always passed/returned in memory. */
3614 if (bytes < 0)
3615 return 0;
3616
3617 if (mode != VOIDmode
3618 && targetm.calls.must_pass_in_stack (mode, type))
3619 return 0;
3620
3621 if (type && AGGREGATE_TYPE_P (type))
3622 {
3623 int i;
3624 tree field;
3625 enum x86_64_reg_class subclasses[MAX_CLASSES];
3626
3627 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3628 if (bytes > 16)
3629 return 0;
3630
3631 for (i = 0; i < words; i++)
3632 classes[i] = X86_64_NO_CLASS;
3633
3634 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3635 signalize memory class, so handle it as special case. */
3636 if (!words)
3637 {
3638 classes[0] = X86_64_NO_CLASS;
3639 return 1;
3640 }
3641
3642 /* Classify each field of record and merge classes. */
3643 switch (TREE_CODE (type))
3644 {
3645 case RECORD_TYPE:
3646 /* And now merge the fields of structure. */
3647 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3648 {
3649 if (TREE_CODE (field) == FIELD_DECL)
3650 {
3651 int num;
3652
3653 if (TREE_TYPE (field) == error_mark_node)
3654 continue;
3655
3656 /* Bitfields are always classified as integer. Handle them
3657 early, since later code would consider them to be
3658 misaligned integers. */
3659 if (DECL_BIT_FIELD (field))
3660 {
3661 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3662 i < ((int_bit_position (field) + (bit_offset % 64))
3663 + tree_low_cst (DECL_SIZE (field), 0)
3664 + 63) / 8 / 8; i++)
3665 classes[i] =
3666 merge_classes (X86_64_INTEGER_CLASS,
3667 classes[i]);
3668 }
3669 else
3670 {
3671 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3672 TREE_TYPE (field), subclasses,
3673 (int_bit_position (field)
3674 + bit_offset) % 256);
3675 if (!num)
3676 return 0;
3677 for (i = 0; i < num; i++)
3678 {
3679 int pos =
3680 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3681 classes[i + pos] =
3682 merge_classes (subclasses[i], classes[i + pos]);
3683 }
3684 }
3685 }
3686 }
3687 break;
3688
3689 case ARRAY_TYPE:
3690 /* Arrays are handled as small records. */
3691 {
3692 int num;
3693 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3694 TREE_TYPE (type), subclasses, bit_offset);
3695 if (!num)
3696 return 0;
3697
3698 /* The partial classes are now full classes. */
3699 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3700 subclasses[0] = X86_64_SSE_CLASS;
3701 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3702 subclasses[0] = X86_64_INTEGER_CLASS;
3703
3704 for (i = 0; i < words; i++)
3705 classes[i] = subclasses[i % num];
3706
3707 break;
3708 }
3709 case UNION_TYPE:
3710 case QUAL_UNION_TYPE:
3711 /* Unions are similar to RECORD_TYPE but offset is always 0.
3712 */
3713 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3714 {
3715 if (TREE_CODE (field) == FIELD_DECL)
3716 {
3717 int num;
3718
3719 if (TREE_TYPE (field) == error_mark_node)
3720 continue;
3721
3722 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3723 TREE_TYPE (field), subclasses,
3724 bit_offset);
3725 if (!num)
3726 return 0;
3727 for (i = 0; i < num; i++)
3728 classes[i] = merge_classes (subclasses[i], classes[i]);
3729 }
3730 }
3731 break;
3732
3733 default:
3734 gcc_unreachable ();
3735 }
3736
3737 /* Final merger cleanup. */
3738 for (i = 0; i < words; i++)
3739 {
3740 /* If one class is MEMORY, everything should be passed in
3741 memory. */
3742 if (classes[i] == X86_64_MEMORY_CLASS)
3743 return 0;
3744
3745 /* The X86_64_SSEUP_CLASS should be always preceded by
3746 X86_64_SSE_CLASS. */
3747 if (classes[i] == X86_64_SSEUP_CLASS
3748 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3749 classes[i] = X86_64_SSE_CLASS;
3750
3751 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3752 if (classes[i] == X86_64_X87UP_CLASS
3753 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3754 classes[i] = X86_64_SSE_CLASS;
3755 }
3756 return words;
3757 }
3758
3759 /* Compute alignment needed. We align all types to natural boundaries with
3760 exception of XFmode that is aligned to 64bits. */
3761 if (mode != VOIDmode && mode != BLKmode)
3762 {
3763 int mode_alignment = GET_MODE_BITSIZE (mode);
3764
3765 if (mode == XFmode)
3766 mode_alignment = 128;
3767 else if (mode == XCmode)
3768 mode_alignment = 256;
3769 if (COMPLEX_MODE_P (mode))
3770 mode_alignment /= 2;
3771 /* Misaligned fields are always returned in memory. */
3772 if (bit_offset % mode_alignment)
3773 return 0;
3774 }
3775
3776 /* for V1xx modes, just use the base mode */
3777 if (VECTOR_MODE_P (mode)
3778 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3779 mode = GET_MODE_INNER (mode);
3780
3781 /* Classification of atomic types. */
3782 switch (mode)
3783 {
3784 case SDmode:
3785 case DDmode:
3786 classes[0] = X86_64_SSE_CLASS;
3787 return 1;
3788 case TDmode:
3789 classes[0] = X86_64_SSE_CLASS;
3790 classes[1] = X86_64_SSEUP_CLASS;
3791 return 2;
3792 case DImode:
3793 case SImode:
3794 case HImode:
3795 case QImode:
3796 case CSImode:
3797 case CHImode:
3798 case CQImode:
3799 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3800 classes[0] = X86_64_INTEGERSI_CLASS;
3801 else
3802 classes[0] = X86_64_INTEGER_CLASS;
3803 return 1;
3804 case CDImode:
3805 case TImode:
3806 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3807 return 2;
3808 case CTImode:
3809 return 0;
3810 case SFmode:
3811 if (!(bit_offset % 64))
3812 classes[0] = X86_64_SSESF_CLASS;
3813 else
3814 classes[0] = X86_64_SSE_CLASS;
3815 return 1;
3816 case DFmode:
3817 classes[0] = X86_64_SSEDF_CLASS;
3818 return 1;
3819 case XFmode:
3820 classes[0] = X86_64_X87_CLASS;
3821 classes[1] = X86_64_X87UP_CLASS;
3822 return 2;
3823 case TFmode:
3824 classes[0] = X86_64_SSE_CLASS;
3825 classes[1] = X86_64_SSEUP_CLASS;
3826 return 2;
3827 case SCmode:
3828 classes[0] = X86_64_SSE_CLASS;
3829 return 1;
3830 case DCmode:
3831 classes[0] = X86_64_SSEDF_CLASS;
3832 classes[1] = X86_64_SSEDF_CLASS;
3833 return 2;
3834 case XCmode:
3835 classes[0] = X86_64_COMPLEX_X87_CLASS;
3836 return 1;
3837 case TCmode:
3838 /* This modes is larger than 16 bytes. */
3839 return 0;
3840 case V4SFmode:
3841 case V4SImode:
3842 case V16QImode:
3843 case V8HImode:
3844 case V2DFmode:
3845 case V2DImode:
3846 classes[0] = X86_64_SSE_CLASS;
3847 classes[1] = X86_64_SSEUP_CLASS;
3848 return 2;
3849 case V2SFmode:
3850 case V2SImode:
3851 case V4HImode:
3852 case V8QImode:
3853 classes[0] = X86_64_SSE_CLASS;
3854 return 1;
3855 case BLKmode:
3856 case VOIDmode:
3857 return 0;
3858 default:
3859 gcc_assert (VECTOR_MODE_P (mode));
3860
3861 if (bytes > 16)
3862 return 0;
3863
3864 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3865
3866 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3867 classes[0] = X86_64_INTEGERSI_CLASS;
3868 else
3869 classes[0] = X86_64_INTEGER_CLASS;
3870 classes[1] = X86_64_INTEGER_CLASS;
3871 return 1 + (bytes > 8);
3872 }
3873 }
3874
3875 /* Examine the argument and return set number of register required in each
3876 class. Return 0 iff parameter should be passed in memory. */
3877 static int
3878 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3879 int *int_nregs, int *sse_nregs)
3880 {
3881 enum x86_64_reg_class regclass[MAX_CLASSES];
3882 int n = classify_argument (mode, type, regclass, 0);
3883
3884 *int_nregs = 0;
3885 *sse_nregs = 0;
3886 if (!n)
3887 return 0;
3888 for (n--; n >= 0; n--)
3889 switch (regclass[n])
3890 {
3891 case X86_64_INTEGER_CLASS:
3892 case X86_64_INTEGERSI_CLASS:
3893 (*int_nregs)++;
3894 break;
3895 case X86_64_SSE_CLASS:
3896 case X86_64_SSESF_CLASS:
3897 case X86_64_SSEDF_CLASS:
3898 (*sse_nregs)++;
3899 break;
3900 case X86_64_NO_CLASS:
3901 case X86_64_SSEUP_CLASS:
3902 break;
3903 case X86_64_X87_CLASS:
3904 case X86_64_X87UP_CLASS:
3905 if (!in_return)
3906 return 0;
3907 break;
3908 case X86_64_COMPLEX_X87_CLASS:
3909 return in_return ? 2 : 0;
3910 case X86_64_MEMORY_CLASS:
3911 gcc_unreachable ();
3912 }
3913 return 1;
3914 }
3915
3916 /* Construct container for the argument used by GCC interface. See
3917 FUNCTION_ARG for the detailed description. */
3918
3919 static rtx
3920 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3921 const_tree type, int in_return, int nintregs, int nsseregs,
3922 const int *intreg, int sse_regno)
3923 {
3924 /* The following variables hold the static issued_error state. */
3925 static bool issued_sse_arg_error;
3926 static bool issued_sse_ret_error;
3927 static bool issued_x87_ret_error;
3928
3929 enum machine_mode tmpmode;
3930 int bytes =
3931 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3932 enum x86_64_reg_class regclass[MAX_CLASSES];
3933 int n;
3934 int i;
3935 int nexps = 0;
3936 int needed_sseregs, needed_intregs;
3937 rtx exp[MAX_CLASSES];
3938 rtx ret;
3939
3940 n = classify_argument (mode, type, regclass, 0);
3941 if (!n)
3942 return NULL;
3943 if (!examine_argument (mode, type, in_return, &needed_intregs,
3944 &needed_sseregs))
3945 return NULL;
3946 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3947 return NULL;
3948
3949 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3950 some less clueful developer tries to use floating-point anyway. */
3951 if (needed_sseregs && !TARGET_SSE)
3952 {
3953 if (in_return)
3954 {
3955 if (!issued_sse_ret_error)
3956 {
3957 error ("SSE register return with SSE disabled");
3958 issued_sse_ret_error = true;
3959 }
3960 }
3961 else if (!issued_sse_arg_error)
3962 {
3963 error ("SSE register argument with SSE disabled");
3964 issued_sse_arg_error = true;
3965 }
3966 return NULL;
3967 }
3968
3969 /* Likewise, error if the ABI requires us to return values in the
3970 x87 registers and the user specified -mno-80387. */
3971 if (!TARGET_80387 && in_return)
3972 for (i = 0; i < n; i++)
3973 if (regclass[i] == X86_64_X87_CLASS
3974 || regclass[i] == X86_64_X87UP_CLASS
3975 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3976 {
3977 if (!issued_x87_ret_error)
3978 {
3979 error ("x87 register return with x87 disabled");
3980 issued_x87_ret_error = true;
3981 }
3982 return NULL;
3983 }
3984
3985 /* First construct simple cases. Avoid SCmode, since we want to use
3986 single register to pass this type. */
3987 if (n == 1 && mode != SCmode)
3988 switch (regclass[0])
3989 {
3990 case X86_64_INTEGER_CLASS:
3991 case X86_64_INTEGERSI_CLASS:
3992 return gen_rtx_REG (mode, intreg[0]);
3993 case X86_64_SSE_CLASS:
3994 case X86_64_SSESF_CLASS:
3995 case X86_64_SSEDF_CLASS:
3996 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3997 case X86_64_X87_CLASS:
3998 case X86_64_COMPLEX_X87_CLASS:
3999 return gen_rtx_REG (mode, FIRST_STACK_REG);
4000 case X86_64_NO_CLASS:
4001 /* Zero sized array, struct or class. */
4002 return NULL;
4003 default:
4004 gcc_unreachable ();
4005 }
4006 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4007 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4008 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4009
4010 if (n == 2
4011 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4012 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4013 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4014 && regclass[1] == X86_64_INTEGER_CLASS
4015 && (mode == CDImode || mode == TImode || mode == TFmode)
4016 && intreg[0] + 1 == intreg[1])
4017 return gen_rtx_REG (mode, intreg[0]);
4018
4019 /* Otherwise figure out the entries of the PARALLEL. */
4020 for (i = 0; i < n; i++)
4021 {
4022 switch (regclass[i])
4023 {
4024 case X86_64_NO_CLASS:
4025 break;
4026 case X86_64_INTEGER_CLASS:
4027 case X86_64_INTEGERSI_CLASS:
4028 /* Merge TImodes on aligned occasions here too. */
4029 if (i * 8 + 8 > bytes)
4030 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4031 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4032 tmpmode = SImode;
4033 else
4034 tmpmode = DImode;
4035 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4036 if (tmpmode == BLKmode)
4037 tmpmode = DImode;
4038 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4039 gen_rtx_REG (tmpmode, *intreg),
4040 GEN_INT (i*8));
4041 intreg++;
4042 break;
4043 case X86_64_SSESF_CLASS:
4044 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4045 gen_rtx_REG (SFmode,
4046 SSE_REGNO (sse_regno)),
4047 GEN_INT (i*8));
4048 sse_regno++;
4049 break;
4050 case X86_64_SSEDF_CLASS:
4051 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4052 gen_rtx_REG (DFmode,
4053 SSE_REGNO (sse_regno)),
4054 GEN_INT (i*8));
4055 sse_regno++;
4056 break;
4057 case X86_64_SSE_CLASS:
4058 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4059 tmpmode = TImode;
4060 else
4061 tmpmode = DImode;
4062 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4063 gen_rtx_REG (tmpmode,
4064 SSE_REGNO (sse_regno)),
4065 GEN_INT (i*8));
4066 if (tmpmode == TImode)
4067 i++;
4068 sse_regno++;
4069 break;
4070 default:
4071 gcc_unreachable ();
4072 }
4073 }
4074
4075 /* Empty aligned struct, union or class. */
4076 if (nexps == 0)
4077 return NULL;
4078
4079 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4080 for (i = 0; i < nexps; i++)
4081 XVECEXP (ret, 0, i) = exp [i];
4082 return ret;
4083 }
4084
4085 /* Update the data in CUM to advance over an argument of mode MODE
4086 and data type TYPE. (TYPE is null for libcalls where that information
4087 may not be available.) */
4088
4089 static void
4090 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4091 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4092 {
4093 switch (mode)
4094 {
4095 default:
4096 break;
4097
4098 case BLKmode:
4099 if (bytes < 0)
4100 break;
4101 /* FALLTHRU */
4102
4103 case DImode:
4104 case SImode:
4105 case HImode:
4106 case QImode:
4107 cum->words += words;
4108 cum->nregs -= words;
4109 cum->regno += words;
4110
4111 if (cum->nregs <= 0)
4112 {
4113 cum->nregs = 0;
4114 cum->regno = 0;
4115 }
4116 break;
4117
4118 case DFmode:
4119 if (cum->float_in_sse < 2)
4120 break;
4121 case SFmode:
4122 if (cum->float_in_sse < 1)
4123 break;
4124 /* FALLTHRU */
4125
4126 case TImode:
4127 case V16QImode:
4128 case V8HImode:
4129 case V4SImode:
4130 case V2DImode:
4131 case V4SFmode:
4132 case V2DFmode:
4133 if (!type || !AGGREGATE_TYPE_P (type))
4134 {
4135 cum->sse_words += words;
4136 cum->sse_nregs -= 1;
4137 cum->sse_regno += 1;
4138 if (cum->sse_nregs <= 0)
4139 {
4140 cum->sse_nregs = 0;
4141 cum->sse_regno = 0;
4142 }
4143 }
4144 break;
4145
4146 case V8QImode:
4147 case V4HImode:
4148 case V2SImode:
4149 case V2SFmode:
4150 if (!type || !AGGREGATE_TYPE_P (type))
4151 {
4152 cum->mmx_words += words;
4153 cum->mmx_nregs -= 1;
4154 cum->mmx_regno += 1;
4155 if (cum->mmx_nregs <= 0)
4156 {
4157 cum->mmx_nregs = 0;
4158 cum->mmx_regno = 0;
4159 }
4160 }
4161 break;
4162 }
4163 }
4164
4165 static void
4166 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4167 tree type, HOST_WIDE_INT words)
4168 {
4169 int int_nregs, sse_nregs;
4170
4171 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4172 cum->words += words;
4173 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4174 {
4175 cum->nregs -= int_nregs;
4176 cum->sse_nregs -= sse_nregs;
4177 cum->regno += int_nregs;
4178 cum->sse_regno += sse_nregs;
4179 }
4180 else
4181 cum->words += words;
4182 }
4183
4184 static void
4185 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4186 HOST_WIDE_INT words)
4187 {
4188 /* Otherwise, this should be passed indirect. */
4189 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4190
4191 cum->words += words;
4192 if (cum->nregs > 0)
4193 {
4194 cum->nregs -= 1;
4195 cum->regno += 1;
4196 }
4197 }
4198
4199 void
4200 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4201 tree type, int named ATTRIBUTE_UNUSED)
4202 {
4203 HOST_WIDE_INT bytes, words;
4204
4205 if (mode == BLKmode)
4206 bytes = int_size_in_bytes (type);
4207 else
4208 bytes = GET_MODE_SIZE (mode);
4209 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4210
4211 if (type)
4212 mode = type_natural_mode (type);
4213
4214 if (TARGET_64BIT_MS_ABI)
4215 function_arg_advance_ms_64 (cum, bytes, words);
4216 else if (TARGET_64BIT)
4217 function_arg_advance_64 (cum, mode, type, words);
4218 else
4219 function_arg_advance_32 (cum, mode, type, bytes, words);
4220 }
4221
4222 /* Define where to put the arguments to a function.
4223 Value is zero to push the argument on the stack,
4224 or a hard register in which to store the argument.
4225
4226 MODE is the argument's machine mode.
4227 TYPE is the data type of the argument (as a tree).
4228 This is null for libcalls where that information may
4229 not be available.
4230 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4231 the preceding args and about the function being called.
4232 NAMED is nonzero if this argument is a named parameter
4233 (otherwise it is an extra parameter matching an ellipsis). */
4234
4235 static rtx
4236 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4237 enum machine_mode orig_mode, tree type,
4238 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4239 {
4240 static bool warnedsse, warnedmmx;
4241
4242 /* Avoid the AL settings for the Unix64 ABI. */
4243 if (mode == VOIDmode)
4244 return constm1_rtx;
4245
4246 switch (mode)
4247 {
4248 default:
4249 break;
4250
4251 case BLKmode:
4252 if (bytes < 0)
4253 break;
4254 /* FALLTHRU */
4255 case DImode:
4256 case SImode:
4257 case HImode:
4258 case QImode:
4259 if (words <= cum->nregs)
4260 {
4261 int regno = cum->regno;
4262
4263 /* Fastcall allocates the first two DWORD (SImode) or
4264 smaller arguments to ECX and EDX if it isn't an
4265 aggregate type . */
4266 if (cum->fastcall)
4267 {
4268 if (mode == BLKmode
4269 || mode == DImode
4270 || (type && AGGREGATE_TYPE_P (type)))
4271 break;
4272
4273 /* ECX not EAX is the first allocated register. */
4274 if (regno == AX_REG)
4275 regno = CX_REG;
4276 }
4277 return gen_rtx_REG (mode, regno);
4278 }
4279 break;
4280
4281 case DFmode:
4282 if (cum->float_in_sse < 2)
4283 break;
4284 case SFmode:
4285 if (cum->float_in_sse < 1)
4286 break;
4287 /* FALLTHRU */
4288 case TImode:
4289 case V16QImode:
4290 case V8HImode:
4291 case V4SImode:
4292 case V2DImode:
4293 case V4SFmode:
4294 case V2DFmode:
4295 if (!type || !AGGREGATE_TYPE_P (type))
4296 {
4297 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4298 {
4299 warnedsse = true;
4300 warning (0, "SSE vector argument without SSE enabled "
4301 "changes the ABI");
4302 }
4303 if (cum->sse_nregs)
4304 return gen_reg_or_parallel (mode, orig_mode,
4305 cum->sse_regno + FIRST_SSE_REG);
4306 }
4307 break;
4308
4309 case V8QImode:
4310 case V4HImode:
4311 case V2SImode:
4312 case V2SFmode:
4313 if (!type || !AGGREGATE_TYPE_P (type))
4314 {
4315 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4316 {
4317 warnedmmx = true;
4318 warning (0, "MMX vector argument without MMX enabled "
4319 "changes the ABI");
4320 }
4321 if (cum->mmx_nregs)
4322 return gen_reg_or_parallel (mode, orig_mode,
4323 cum->mmx_regno + FIRST_MMX_REG);
4324 }
4325 break;
4326 }
4327
4328 return NULL_RTX;
4329 }
4330
4331 static rtx
4332 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4333 enum machine_mode orig_mode, tree type)
4334 {
4335 /* Handle a hidden AL argument containing number of registers
4336 for varargs x86-64 functions. */
4337 if (mode == VOIDmode)
4338 return GEN_INT (cum->maybe_vaarg
4339 ? (cum->sse_nregs < 0
4340 ? SSE_REGPARM_MAX
4341 : cum->sse_regno)
4342 : -1);
4343
4344 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4345 cum->sse_nregs,
4346 &x86_64_int_parameter_registers [cum->regno],
4347 cum->sse_regno);
4348 }
4349
4350 static rtx
4351 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4352 enum machine_mode orig_mode, int named)
4353 {
4354 unsigned int regno;
4355
4356 /* Avoid the AL settings for the Unix64 ABI. */
4357 if (mode == VOIDmode)
4358 return constm1_rtx;
4359
4360 /* If we've run out of registers, it goes on the stack. */
4361 if (cum->nregs == 0)
4362 return NULL_RTX;
4363
4364 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4365
4366 /* Only floating point modes are passed in anything but integer regs. */
4367 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4368 {
4369 if (named)
4370 regno = cum->regno + FIRST_SSE_REG;
4371 else
4372 {
4373 rtx t1, t2;
4374
4375 /* Unnamed floating parameters are passed in both the
4376 SSE and integer registers. */
4377 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4378 t2 = gen_rtx_REG (mode, regno);
4379 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4380 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4381 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4382 }
4383 }
4384
4385 return gen_reg_or_parallel (mode, orig_mode, regno);
4386 }
4387
4388 rtx
4389 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4390 tree type, int named)
4391 {
4392 enum machine_mode mode = omode;
4393 HOST_WIDE_INT bytes, words;
4394
4395 if (mode == BLKmode)
4396 bytes = int_size_in_bytes (type);
4397 else
4398 bytes = GET_MODE_SIZE (mode);
4399 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4400
4401 /* To simplify the code below, represent vector types with a vector mode
4402 even if MMX/SSE are not active. */
4403 if (type && TREE_CODE (type) == VECTOR_TYPE)
4404 mode = type_natural_mode (type);
4405
4406 if (TARGET_64BIT_MS_ABI)
4407 return function_arg_ms_64 (cum, mode, omode, named);
4408 else if (TARGET_64BIT)
4409 return function_arg_64 (cum, mode, omode, type);
4410 else
4411 return function_arg_32 (cum, mode, omode, type, bytes, words);
4412 }
4413
4414 /* A C expression that indicates when an argument must be passed by
4415 reference. If nonzero for an argument, a copy of that argument is
4416 made in memory and a pointer to the argument is passed instead of
4417 the argument itself. The pointer is passed in whatever way is
4418 appropriate for passing a pointer to that type. */
4419
4420 static bool
4421 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4422 enum machine_mode mode ATTRIBUTE_UNUSED,
4423 const_tree type, bool named ATTRIBUTE_UNUSED)
4424 {
4425 if (TARGET_64BIT_MS_ABI)
4426 {
4427 if (type)
4428 {
4429 /* Arrays are passed by reference. */
4430 if (TREE_CODE (type) == ARRAY_TYPE)
4431 return true;
4432
4433 if (AGGREGATE_TYPE_P (type))
4434 {
4435 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4436 are passed by reference. */
4437 int el2 = exact_log2 (int_size_in_bytes (type));
4438 return !(el2 >= 0 && el2 <= 3);
4439 }
4440 }
4441
4442 /* __m128 is passed by reference. */
4443 /* ??? How to handle complex? For now treat them as structs,
4444 and pass them by reference if they're too large. */
4445 if (GET_MODE_SIZE (mode) > 8)
4446 return true;
4447 }
4448 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4449 return 1;
4450
4451 return 0;
4452 }
4453
4454 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4455 ABI. Only called if TARGET_SSE. */
4456 static bool
4457 contains_128bit_aligned_vector_p (tree type)
4458 {
4459 enum machine_mode mode = TYPE_MODE (type);
4460 if (SSE_REG_MODE_P (mode)
4461 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4462 return true;
4463 if (TYPE_ALIGN (type) < 128)
4464 return false;
4465
4466 if (AGGREGATE_TYPE_P (type))
4467 {
4468 /* Walk the aggregates recursively. */
4469 switch (TREE_CODE (type))
4470 {
4471 case RECORD_TYPE:
4472 case UNION_TYPE:
4473 case QUAL_UNION_TYPE:
4474 {
4475 tree field;
4476
4477 /* Walk all the structure fields. */
4478 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4479 {
4480 if (TREE_CODE (field) == FIELD_DECL
4481 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4482 return true;
4483 }
4484 break;
4485 }
4486
4487 case ARRAY_TYPE:
4488 /* Just for use if some languages passes arrays by value. */
4489 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4490 return true;
4491 break;
4492
4493 default:
4494 gcc_unreachable ();
4495 }
4496 }
4497 return false;
4498 }
4499
4500 /* Gives the alignment boundary, in bits, of an argument with the
4501 specified mode and type. */
4502
4503 int
4504 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4505 {
4506 int align;
4507 if (type)
4508 align = TYPE_ALIGN (type);
4509 else
4510 align = GET_MODE_ALIGNMENT (mode);
4511 if (align < PARM_BOUNDARY)
4512 align = PARM_BOUNDARY;
4513 if (!TARGET_64BIT)
4514 {
4515 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4516 make an exception for SSE modes since these require 128bit
4517 alignment.
4518
4519 The handling here differs from field_alignment. ICC aligns MMX
4520 arguments to 4 byte boundaries, while structure fields are aligned
4521 to 8 byte boundaries. */
4522 if (!TARGET_SSE)
4523 align = PARM_BOUNDARY;
4524 else if (!type)
4525 {
4526 if (!SSE_REG_MODE_P (mode))
4527 align = PARM_BOUNDARY;
4528 }
4529 else
4530 {
4531 if (!contains_128bit_aligned_vector_p (type))
4532 align = PARM_BOUNDARY;
4533 }
4534 }
4535 if (align > 128)
4536 align = 128;
4537 return align;
4538 }
4539
4540 /* Return true if N is a possible register number of function value. */
4541
4542 bool
4543 ix86_function_value_regno_p (int regno)
4544 {
4545 switch (regno)
4546 {
4547 case 0:
4548 return true;
4549
4550 case FIRST_FLOAT_REG:
4551 if (TARGET_64BIT_MS_ABI)
4552 return false;
4553 return TARGET_FLOAT_RETURNS_IN_80387;
4554
4555 case FIRST_SSE_REG:
4556 return TARGET_SSE;
4557
4558 case FIRST_MMX_REG:
4559 if (TARGET_MACHO || TARGET_64BIT)
4560 return false;
4561 return TARGET_MMX;
4562 }
4563
4564 return false;
4565 }
4566
4567 /* Define how to find the value returned by a function.
4568 VALTYPE is the data type of the value (as a tree).
4569 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4570 otherwise, FUNC is 0. */
4571
4572 static rtx
4573 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4574 const_tree fntype, const_tree fn)
4575 {
4576 unsigned int regno;
4577
4578 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4579 we normally prevent this case when mmx is not available. However
4580 some ABIs may require the result to be returned like DImode. */
4581 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4582 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4583
4584 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4585 we prevent this case when sse is not available. However some ABIs
4586 may require the result to be returned like integer TImode. */
4587 else if (mode == TImode
4588 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4589 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4590
4591 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4592 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4593 regno = FIRST_FLOAT_REG;
4594 else
4595 /* Most things go in %eax. */
4596 regno = AX_REG;
4597
4598 /* Override FP return register with %xmm0 for local functions when
4599 SSE math is enabled or for functions with sseregparm attribute. */
4600 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4601 {
4602 int sse_level = ix86_function_sseregparm (fntype, fn);
4603 if ((sse_level >= 1 && mode == SFmode)
4604 || (sse_level == 2 && mode == DFmode))
4605 regno = FIRST_SSE_REG;
4606 }
4607
4608 return gen_rtx_REG (orig_mode, regno);
4609 }
4610
4611 static rtx
4612 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4613 const_tree valtype)
4614 {
4615 rtx ret;
4616
4617 /* Handle libcalls, which don't provide a type node. */
4618 if (valtype == NULL)
4619 {
4620 switch (mode)
4621 {
4622 case SFmode:
4623 case SCmode:
4624 case DFmode:
4625 case DCmode:
4626 case TFmode:
4627 case SDmode:
4628 case DDmode:
4629 case TDmode:
4630 return gen_rtx_REG (mode, FIRST_SSE_REG);
4631 case XFmode:
4632 case XCmode:
4633 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4634 case TCmode:
4635 return NULL;
4636 default:
4637 return gen_rtx_REG (mode, AX_REG);
4638 }
4639 }
4640
4641 ret = construct_container (mode, orig_mode, valtype, 1,
4642 REGPARM_MAX, SSE_REGPARM_MAX,
4643 x86_64_int_return_registers, 0);
4644
4645 /* For zero sized structures, construct_container returns NULL, but we
4646 need to keep rest of compiler happy by returning meaningful value. */
4647 if (!ret)
4648 ret = gen_rtx_REG (orig_mode, AX_REG);
4649
4650 return ret;
4651 }
4652
4653 static rtx
4654 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4655 {
4656 unsigned int regno = AX_REG;
4657
4658 if (TARGET_SSE)
4659 {
4660 if (mode == SFmode || mode == DFmode)
4661 regno = FIRST_SSE_REG;
4662 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4663 regno = FIRST_SSE_REG;
4664 }
4665
4666 return gen_rtx_REG (orig_mode, regno);
4667 }
4668
4669 static rtx
4670 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4671 enum machine_mode orig_mode, enum machine_mode mode)
4672 {
4673 const_tree fn, fntype;
4674
4675 fn = NULL_TREE;
4676 if (fntype_or_decl && DECL_P (fntype_or_decl))
4677 fn = fntype_or_decl;
4678 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4679
4680 if (TARGET_64BIT_MS_ABI)
4681 return function_value_ms_64 (orig_mode, mode);
4682 else if (TARGET_64BIT)
4683 return function_value_64 (orig_mode, mode, valtype);
4684 else
4685 return function_value_32 (orig_mode, mode, fntype, fn);
4686 }
4687
4688 static rtx
4689 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4690 bool outgoing ATTRIBUTE_UNUSED)
4691 {
4692 enum machine_mode mode, orig_mode;
4693
4694 orig_mode = TYPE_MODE (valtype);
4695 mode = type_natural_mode (valtype);
4696 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4697 }
4698
4699 rtx
4700 ix86_libcall_value (enum machine_mode mode)
4701 {
4702 return ix86_function_value_1 (NULL, NULL, mode, mode);
4703 }
4704
4705 /* Return true iff type is returned in memory. */
4706
4707 static int
4708 return_in_memory_32 (const_tree type, enum machine_mode mode)
4709 {
4710 HOST_WIDE_INT size;
4711
4712 if (mode == BLKmode)
4713 return 1;
4714
4715 size = int_size_in_bytes (type);
4716
4717 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4718 return 0;
4719
4720 if (VECTOR_MODE_P (mode) || mode == TImode)
4721 {
4722 /* User-created vectors small enough to fit in EAX. */
4723 if (size < 8)
4724 return 0;
4725
4726 /* MMX/3dNow values are returned in MM0,
4727 except when it doesn't exits. */
4728 if (size == 8)
4729 return (TARGET_MMX ? 0 : 1);
4730
4731 /* SSE values are returned in XMM0, except when it doesn't exist. */
4732 if (size == 16)
4733 return (TARGET_SSE ? 0 : 1);
4734 }
4735
4736 if (mode == XFmode)
4737 return 0;
4738
4739 if (mode == TDmode)
4740 return 1;
4741
4742 if (size > 12)
4743 return 1;
4744 return 0;
4745 }
4746
4747 static int
4748 return_in_memory_64 (const_tree type, enum machine_mode mode)
4749 {
4750 int needed_intregs, needed_sseregs;
4751 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4752 }
4753
4754 static int
4755 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4756 {
4757 HOST_WIDE_INT size = int_size_in_bytes (type);
4758
4759 /* __m128 and friends are returned in xmm0. */
4760 if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4761 return 0;
4762
4763 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4764 return (size != 1 && size != 2 && size != 4 && size != 8)
4765 || COMPLEX_MODE_P (mode);
4766 }
4767
4768 int
4769 ix86_return_in_memory (const_tree type)
4770 {
4771 const enum machine_mode mode = type_natural_mode (type);
4772
4773 if (TARGET_64BIT_MS_ABI)
4774 return return_in_memory_ms_64 (type, mode);
4775 else if (TARGET_64BIT)
4776 return return_in_memory_64 (type, mode);
4777 else
4778 return return_in_memory_32 (type, mode);
4779 }
4780
4781 /* Return false iff TYPE is returned in memory. This version is used
4782 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4783 but differs notably in that when MMX is available, 8-byte vectors
4784 are returned in memory, rather than in MMX registers. */
4785
4786 int
4787 ix86_sol10_return_in_memory (const_tree type)
4788 {
4789 int size;
4790 enum machine_mode mode = type_natural_mode (type);
4791
4792 if (TARGET_64BIT)
4793 return return_in_memory_64 (type, mode);
4794
4795 if (mode == BLKmode)
4796 return 1;
4797
4798 size = int_size_in_bytes (type);
4799
4800 if (VECTOR_MODE_P (mode))
4801 {
4802 /* Return in memory only if MMX registers *are* available. This
4803 seems backwards, but it is consistent with the existing
4804 Solaris x86 ABI. */
4805 if (size == 8)
4806 return TARGET_MMX;
4807 if (size == 16)
4808 return !TARGET_SSE;
4809 }
4810 else if (mode == TImode)
4811 return !TARGET_SSE;
4812 else if (mode == XFmode)
4813 return 0;
4814
4815 return size > 12;
4816 }
4817
4818 /* When returning SSE vector types, we have a choice of either
4819 (1) being abi incompatible with a -march switch, or
4820 (2) generating an error.
4821 Given no good solution, I think the safest thing is one warning.
4822 The user won't be able to use -Werror, but....
4823
4824 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4825 called in response to actually generating a caller or callee that
4826 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4827 via aggregate_value_p for general type probing from tree-ssa. */
4828
4829 static rtx
4830 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4831 {
4832 static bool warnedsse, warnedmmx;
4833
4834 if (!TARGET_64BIT && type)
4835 {
4836 /* Look at the return type of the function, not the function type. */
4837 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4838
4839 if (!TARGET_SSE && !warnedsse)
4840 {
4841 if (mode == TImode
4842 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4843 {
4844 warnedsse = true;
4845 warning (0, "SSE vector return without SSE enabled "
4846 "changes the ABI");
4847 }
4848 }
4849
4850 if (!TARGET_MMX && !warnedmmx)
4851 {
4852 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4853 {
4854 warnedmmx = true;
4855 warning (0, "MMX vector return without MMX enabled "
4856 "changes the ABI");
4857 }
4858 }
4859 }
4860
4861 return NULL;
4862 }
4863
4864 \f
4865 /* Create the va_list data type. */
4866
4867 static tree
4868 ix86_build_builtin_va_list (void)
4869 {
4870 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4871
4872 /* For i386 we use plain pointer to argument area. */
4873 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4874 return build_pointer_type (char_type_node);
4875
4876 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4877 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4878
4879 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4880 unsigned_type_node);
4881 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4882 unsigned_type_node);
4883 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4884 ptr_type_node);
4885 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4886 ptr_type_node);
4887
4888 va_list_gpr_counter_field = f_gpr;
4889 va_list_fpr_counter_field = f_fpr;
4890
4891 DECL_FIELD_CONTEXT (f_gpr) = record;
4892 DECL_FIELD_CONTEXT (f_fpr) = record;
4893 DECL_FIELD_CONTEXT (f_ovf) = record;
4894 DECL_FIELD_CONTEXT (f_sav) = record;
4895
4896 TREE_CHAIN (record) = type_decl;
4897 TYPE_NAME (record) = type_decl;
4898 TYPE_FIELDS (record) = f_gpr;
4899 TREE_CHAIN (f_gpr) = f_fpr;
4900 TREE_CHAIN (f_fpr) = f_ovf;
4901 TREE_CHAIN (f_ovf) = f_sav;
4902
4903 layout_type (record);
4904
4905 /* The correct type is an array type of one element. */
4906 return build_array_type (record, build_index_type (size_zero_node));
4907 }
4908
4909 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4910
4911 static void
4912 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4913 {
4914 rtx save_area, mem;
4915 rtx label;
4916 rtx label_ref;
4917 rtx tmp_reg;
4918 rtx nsse_reg;
4919 alias_set_type set;
4920 int i;
4921
4922 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4923 return;
4924
4925 /* Indicate to allocate space on the stack for varargs save area. */
4926 ix86_save_varrargs_registers = 1;
4927 /* We need 16-byte stack alignment to save SSE registers. If user
4928 asked for lower preferred_stack_boundary, lets just hope that he knows
4929 what he is doing and won't varargs SSE values.
4930
4931 We also may end up assuming that only 64bit values are stored in SSE
4932 register let some floating point program work. */
4933 if (ix86_preferred_stack_boundary >= 128)
4934 cfun->stack_alignment_needed = 128;
4935
4936 save_area = frame_pointer_rtx;
4937 set = get_varargs_alias_set ();
4938
4939 for (i = cum->regno;
4940 i < ix86_regparm
4941 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4942 i++)
4943 {
4944 mem = gen_rtx_MEM (Pmode,
4945 plus_constant (save_area, i * UNITS_PER_WORD));
4946 MEM_NOTRAP_P (mem) = 1;
4947 set_mem_alias_set (mem, set);
4948 emit_move_insn (mem, gen_rtx_REG (Pmode,
4949 x86_64_int_parameter_registers[i]));
4950 }
4951
4952 if (cum->sse_nregs && cfun->va_list_fpr_size)
4953 {
4954 /* Now emit code to save SSE registers. The AX parameter contains number
4955 of SSE parameter registers used to call this function. We use
4956 sse_prologue_save insn template that produces computed jump across
4957 SSE saves. We need some preparation work to get this working. */
4958
4959 label = gen_label_rtx ();
4960 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4961
4962 /* Compute address to jump to :
4963 label - 5*eax + nnamed_sse_arguments*5 */
4964 tmp_reg = gen_reg_rtx (Pmode);
4965 nsse_reg = gen_reg_rtx (Pmode);
4966 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
4967 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4968 gen_rtx_MULT (Pmode, nsse_reg,
4969 GEN_INT (4))));
4970 if (cum->sse_regno)
4971 emit_move_insn
4972 (nsse_reg,
4973 gen_rtx_CONST (DImode,
4974 gen_rtx_PLUS (DImode,
4975 label_ref,
4976 GEN_INT (cum->sse_regno * 4))));
4977 else
4978 emit_move_insn (nsse_reg, label_ref);
4979 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4980
4981 /* Compute address of memory block we save into. We always use pointer
4982 pointing 127 bytes after first byte to store - this is needed to keep
4983 instruction size limited by 4 bytes. */
4984 tmp_reg = gen_reg_rtx (Pmode);
4985 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4986 plus_constant (save_area,
4987 8 * REGPARM_MAX + 127)));
4988 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4989 MEM_NOTRAP_P (mem) = 1;
4990 set_mem_alias_set (mem, set);
4991 set_mem_align (mem, BITS_PER_WORD);
4992
4993 /* And finally do the dirty job! */
4994 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4995 GEN_INT (cum->sse_regno), label));
4996 }
4997 }
4998
4999 static void
5000 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5001 {
5002 alias_set_type set = get_varargs_alias_set ();
5003 int i;
5004
5005 for (i = cum->regno; i < REGPARM_MAX; i++)
5006 {
5007 rtx reg, mem;
5008
5009 mem = gen_rtx_MEM (Pmode,
5010 plus_constant (virtual_incoming_args_rtx,
5011 i * UNITS_PER_WORD));
5012 MEM_NOTRAP_P (mem) = 1;
5013 set_mem_alias_set (mem, set);
5014
5015 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5016 emit_move_insn (mem, reg);
5017 }
5018 }
5019
5020 static void
5021 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5022 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5023 int no_rtl)
5024 {
5025 CUMULATIVE_ARGS next_cum;
5026 tree fntype;
5027
5028 /* This argument doesn't appear to be used anymore. Which is good,
5029 because the old code here didn't suppress rtl generation. */
5030 gcc_assert (!no_rtl);
5031
5032 if (!TARGET_64BIT)
5033 return;
5034
5035 fntype = TREE_TYPE (current_function_decl);
5036
5037 /* For varargs, we do not want to skip the dummy va_dcl argument.
5038 For stdargs, we do want to skip the last named argument. */
5039 next_cum = *cum;
5040 if (stdarg_p (fntype))
5041 function_arg_advance (&next_cum, mode, type, 1);
5042
5043 if (TARGET_64BIT_MS_ABI)
5044 setup_incoming_varargs_ms_64 (&next_cum);
5045 else
5046 setup_incoming_varargs_64 (&next_cum);
5047 }
5048
5049 /* Implement va_start. */
5050
5051 static void
5052 ix86_va_start (tree valist, rtx nextarg)
5053 {
5054 HOST_WIDE_INT words, n_gpr, n_fpr;
5055 tree f_gpr, f_fpr, f_ovf, f_sav;
5056 tree gpr, fpr, ovf, sav, t;
5057 tree type;
5058
5059 /* Only 64bit target needs something special. */
5060 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5061 {
5062 std_expand_builtin_va_start (valist, nextarg);
5063 return;
5064 }
5065
5066 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5067 f_fpr = TREE_CHAIN (f_gpr);
5068 f_ovf = TREE_CHAIN (f_fpr);
5069 f_sav = TREE_CHAIN (f_ovf);
5070
5071 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5072 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5073 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5074 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5075 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5076
5077 /* Count number of gp and fp argument registers used. */
5078 words = current_function_args_info.words;
5079 n_gpr = current_function_args_info.regno;
5080 n_fpr = current_function_args_info.sse_regno;
5081
5082 if (cfun->va_list_gpr_size)
5083 {
5084 type = TREE_TYPE (gpr);
5085 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5086 build_int_cst (type, n_gpr * 8));
5087 TREE_SIDE_EFFECTS (t) = 1;
5088 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5089 }
5090
5091 if (cfun->va_list_fpr_size)
5092 {
5093 type = TREE_TYPE (fpr);
5094 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5095 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5096 TREE_SIDE_EFFECTS (t) = 1;
5097 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5098 }
5099
5100 /* Find the overflow area. */
5101 type = TREE_TYPE (ovf);
5102 t = make_tree (type, virtual_incoming_args_rtx);
5103 if (words != 0)
5104 t = build2 (POINTER_PLUS_EXPR, type, t,
5105 size_int (words * UNITS_PER_WORD));
5106 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5107 TREE_SIDE_EFFECTS (t) = 1;
5108 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5109
5110 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5111 {
5112 /* Find the register save area.
5113 Prologue of the function save it right above stack frame. */
5114 type = TREE_TYPE (sav);
5115 t = make_tree (type, frame_pointer_rtx);
5116 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5117 TREE_SIDE_EFFECTS (t) = 1;
5118 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5119 }
5120 }
5121
5122 /* Implement va_arg. */
5123
5124 static tree
5125 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5126 {
5127 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5128 tree f_gpr, f_fpr, f_ovf, f_sav;
5129 tree gpr, fpr, ovf, sav, t;
5130 int size, rsize;
5131 tree lab_false, lab_over = NULL_TREE;
5132 tree addr, t2;
5133 rtx container;
5134 int indirect_p = 0;
5135 tree ptrtype;
5136 enum machine_mode nat_mode;
5137
5138 /* Only 64bit target needs something special. */
5139 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5140 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5141
5142 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5143 f_fpr = TREE_CHAIN (f_gpr);
5144 f_ovf = TREE_CHAIN (f_fpr);
5145 f_sav = TREE_CHAIN (f_ovf);
5146
5147 valist = build_va_arg_indirect_ref (valist);
5148 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5149 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5150 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5151 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5152
5153 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5154 if (indirect_p)
5155 type = build_pointer_type (type);
5156 size = int_size_in_bytes (type);
5157 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5158
5159 nat_mode = type_natural_mode (type);
5160 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5161 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5162
5163 /* Pull the value out of the saved registers. */
5164
5165 addr = create_tmp_var (ptr_type_node, "addr");
5166 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5167
5168 if (container)
5169 {
5170 int needed_intregs, needed_sseregs;
5171 bool need_temp;
5172 tree int_addr, sse_addr;
5173
5174 lab_false = create_artificial_label ();
5175 lab_over = create_artificial_label ();
5176
5177 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5178
5179 need_temp = (!REG_P (container)
5180 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5181 || TYPE_ALIGN (type) > 128));
5182
5183 /* In case we are passing structure, verify that it is consecutive block
5184 on the register save area. If not we need to do moves. */
5185 if (!need_temp && !REG_P (container))
5186 {
5187 /* Verify that all registers are strictly consecutive */
5188 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5189 {
5190 int i;
5191
5192 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5193 {
5194 rtx slot = XVECEXP (container, 0, i);
5195 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5196 || INTVAL (XEXP (slot, 1)) != i * 16)
5197 need_temp = 1;
5198 }
5199 }
5200 else
5201 {
5202 int i;
5203
5204 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5205 {
5206 rtx slot = XVECEXP (container, 0, i);
5207 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5208 || INTVAL (XEXP (slot, 1)) != i * 8)
5209 need_temp = 1;
5210 }
5211 }
5212 }
5213 if (!need_temp)
5214 {
5215 int_addr = addr;
5216 sse_addr = addr;
5217 }
5218 else
5219 {
5220 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5221 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5222 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5223 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5224 }
5225
5226 /* First ensure that we fit completely in registers. */
5227 if (needed_intregs)
5228 {
5229 t = build_int_cst (TREE_TYPE (gpr),
5230 (REGPARM_MAX - needed_intregs + 1) * 8);
5231 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5232 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5233 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5234 gimplify_and_add (t, pre_p);
5235 }
5236 if (needed_sseregs)
5237 {
5238 t = build_int_cst (TREE_TYPE (fpr),
5239 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5240 + REGPARM_MAX * 8);
5241 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5242 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5243 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5244 gimplify_and_add (t, pre_p);
5245 }
5246
5247 /* Compute index to start of area used for integer regs. */
5248 if (needed_intregs)
5249 {
5250 /* int_addr = gpr + sav; */
5251 t = fold_convert (sizetype, gpr);
5252 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5253 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5254 gimplify_and_add (t, pre_p);
5255 }
5256 if (needed_sseregs)
5257 {
5258 /* sse_addr = fpr + sav; */
5259 t = fold_convert (sizetype, fpr);
5260 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5261 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5262 gimplify_and_add (t, pre_p);
5263 }
5264 if (need_temp)
5265 {
5266 int i;
5267 tree temp = create_tmp_var (type, "va_arg_tmp");
5268
5269 /* addr = &temp; */
5270 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5271 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5272 gimplify_and_add (t, pre_p);
5273
5274 for (i = 0; i < XVECLEN (container, 0); i++)
5275 {
5276 rtx slot = XVECEXP (container, 0, i);
5277 rtx reg = XEXP (slot, 0);
5278 enum machine_mode mode = GET_MODE (reg);
5279 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5280 tree addr_type = build_pointer_type (piece_type);
5281 tree src_addr, src;
5282 int src_offset;
5283 tree dest_addr, dest;
5284
5285 if (SSE_REGNO_P (REGNO (reg)))
5286 {
5287 src_addr = sse_addr;
5288 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5289 }
5290 else
5291 {
5292 src_addr = int_addr;
5293 src_offset = REGNO (reg) * 8;
5294 }
5295 src_addr = fold_convert (addr_type, src_addr);
5296 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5297 size_int (src_offset));
5298 src = build_va_arg_indirect_ref (src_addr);
5299
5300 dest_addr = fold_convert (addr_type, addr);
5301 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5302 size_int (INTVAL (XEXP (slot, 1))));
5303 dest = build_va_arg_indirect_ref (dest_addr);
5304
5305 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5306 gimplify_and_add (t, pre_p);
5307 }
5308 }
5309
5310 if (needed_intregs)
5311 {
5312 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5313 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5314 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5315 gimplify_and_add (t, pre_p);
5316 }
5317 if (needed_sseregs)
5318 {
5319 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5320 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5321 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5322 gimplify_and_add (t, pre_p);
5323 }
5324
5325 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5326 gimplify_and_add (t, pre_p);
5327
5328 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5329 append_to_statement_list (t, pre_p);
5330 }
5331
5332 /* ... otherwise out of the overflow area. */
5333
5334 /* Care for on-stack alignment if needed. */
5335 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5336 || integer_zerop (TYPE_SIZE (type)))
5337 t = ovf;
5338 else
5339 {
5340 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5341 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5342 size_int (align - 1));
5343 t = fold_convert (sizetype, t);
5344 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5345 size_int (-align));
5346 t = fold_convert (TREE_TYPE (ovf), t);
5347 }
5348 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5349
5350 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5351 gimplify_and_add (t2, pre_p);
5352
5353 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5354 size_int (rsize * UNITS_PER_WORD));
5355 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5356 gimplify_and_add (t, pre_p);
5357
5358 if (container)
5359 {
5360 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5361 append_to_statement_list (t, pre_p);
5362 }
5363
5364 ptrtype = build_pointer_type (type);
5365 addr = fold_convert (ptrtype, addr);
5366
5367 if (indirect_p)
5368 addr = build_va_arg_indirect_ref (addr);
5369 return build_va_arg_indirect_ref (addr);
5370 }
5371 \f
5372 /* Return nonzero if OPNUM's MEM should be matched
5373 in movabs* patterns. */
5374
5375 int
5376 ix86_check_movabs (rtx insn, int opnum)
5377 {
5378 rtx set, mem;
5379
5380 set = PATTERN (insn);
5381 if (GET_CODE (set) == PARALLEL)
5382 set = XVECEXP (set, 0, 0);
5383 gcc_assert (GET_CODE (set) == SET);
5384 mem = XEXP (set, opnum);
5385 while (GET_CODE (mem) == SUBREG)
5386 mem = SUBREG_REG (mem);
5387 gcc_assert (MEM_P (mem));
5388 return (volatile_ok || !MEM_VOLATILE_P (mem));
5389 }
5390 \f
5391 /* Initialize the table of extra 80387 mathematical constants. */
5392
5393 static void
5394 init_ext_80387_constants (void)
5395 {
5396 static const char * cst[5] =
5397 {
5398 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5399 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5400 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5401 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5402 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5403 };
5404 int i;
5405
5406 for (i = 0; i < 5; i++)
5407 {
5408 real_from_string (&ext_80387_constants_table[i], cst[i]);
5409 /* Ensure each constant is rounded to XFmode precision. */
5410 real_convert (&ext_80387_constants_table[i],
5411 XFmode, &ext_80387_constants_table[i]);
5412 }
5413
5414 ext_80387_constants_init = 1;
5415 }
5416
5417 /* Return true if the constant is something that can be loaded with
5418 a special instruction. */
5419
5420 int
5421 standard_80387_constant_p (rtx x)
5422 {
5423 enum machine_mode mode = GET_MODE (x);
5424
5425 REAL_VALUE_TYPE r;
5426
5427 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5428 return -1;
5429
5430 if (x == CONST0_RTX (mode))
5431 return 1;
5432 if (x == CONST1_RTX (mode))
5433 return 2;
5434
5435 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5436
5437 /* For XFmode constants, try to find a special 80387 instruction when
5438 optimizing for size or on those CPUs that benefit from them. */
5439 if (mode == XFmode
5440 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5441 {
5442 int i;
5443
5444 if (! ext_80387_constants_init)
5445 init_ext_80387_constants ();
5446
5447 for (i = 0; i < 5; i++)
5448 if (real_identical (&r, &ext_80387_constants_table[i]))
5449 return i + 3;
5450 }
5451
5452 /* Load of the constant -0.0 or -1.0 will be split as
5453 fldz;fchs or fld1;fchs sequence. */
5454 if (real_isnegzero (&r))
5455 return 8;
5456 if (real_identical (&r, &dconstm1))
5457 return 9;
5458
5459 return 0;
5460 }
5461
5462 /* Return the opcode of the special instruction to be used to load
5463 the constant X. */
5464
5465 const char *
5466 standard_80387_constant_opcode (rtx x)
5467 {
5468 switch (standard_80387_constant_p (x))
5469 {
5470 case 1:
5471 return "fldz";
5472 case 2:
5473 return "fld1";
5474 case 3:
5475 return "fldlg2";
5476 case 4:
5477 return "fldln2";
5478 case 5:
5479 return "fldl2e";
5480 case 6:
5481 return "fldl2t";
5482 case 7:
5483 return "fldpi";
5484 case 8:
5485 case 9:
5486 return "#";
5487 default:
5488 gcc_unreachable ();
5489 }
5490 }
5491
5492 /* Return the CONST_DOUBLE representing the 80387 constant that is
5493 loaded by the specified special instruction. The argument IDX
5494 matches the return value from standard_80387_constant_p. */
5495
5496 rtx
5497 standard_80387_constant_rtx (int idx)
5498 {
5499 int i;
5500
5501 if (! ext_80387_constants_init)
5502 init_ext_80387_constants ();
5503
5504 switch (idx)
5505 {
5506 case 3:
5507 case 4:
5508 case 5:
5509 case 6:
5510 case 7:
5511 i = idx - 3;
5512 break;
5513
5514 default:
5515 gcc_unreachable ();
5516 }
5517
5518 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5519 XFmode);
5520 }
5521
5522 /* Return 1 if mode is a valid mode for sse. */
5523 static int
5524 standard_sse_mode_p (enum machine_mode mode)
5525 {
5526 switch (mode)
5527 {
5528 case V16QImode:
5529 case V8HImode:
5530 case V4SImode:
5531 case V2DImode:
5532 case V4SFmode:
5533 case V2DFmode:
5534 return 1;
5535
5536 default:
5537 return 0;
5538 }
5539 }
5540
5541 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5542 */
5543 int
5544 standard_sse_constant_p (rtx x)
5545 {
5546 enum machine_mode mode = GET_MODE (x);
5547
5548 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5549 return 1;
5550 if (vector_all_ones_operand (x, mode)
5551 && standard_sse_mode_p (mode))
5552 return TARGET_SSE2 ? 2 : -1;
5553
5554 return 0;
5555 }
5556
5557 /* Return the opcode of the special instruction to be used to load
5558 the constant X. */
5559
5560 const char *
5561 standard_sse_constant_opcode (rtx insn, rtx x)
5562 {
5563 switch (standard_sse_constant_p (x))
5564 {
5565 case 1:
5566 if (get_attr_mode (insn) == MODE_V4SF)
5567 return "xorps\t%0, %0";
5568 else if (get_attr_mode (insn) == MODE_V2DF)
5569 return "xorpd\t%0, %0";
5570 else
5571 return "pxor\t%0, %0";
5572 case 2:
5573 return "pcmpeqd\t%0, %0";
5574 }
5575 gcc_unreachable ();
5576 }
5577
5578 /* Returns 1 if OP contains a symbol reference */
5579
5580 int
5581 symbolic_reference_mentioned_p (rtx op)
5582 {
5583 const char *fmt;
5584 int i;
5585
5586 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5587 return 1;
5588
5589 fmt = GET_RTX_FORMAT (GET_CODE (op));
5590 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5591 {
5592 if (fmt[i] == 'E')
5593 {
5594 int j;
5595
5596 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5597 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5598 return 1;
5599 }
5600
5601 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5602 return 1;
5603 }
5604
5605 return 0;
5606 }
5607
5608 /* Return 1 if it is appropriate to emit `ret' instructions in the
5609 body of a function. Do this only if the epilogue is simple, needing a
5610 couple of insns. Prior to reloading, we can't tell how many registers
5611 must be saved, so return 0 then. Return 0 if there is no frame
5612 marker to de-allocate. */
5613
5614 int
5615 ix86_can_use_return_insn_p (void)
5616 {
5617 struct ix86_frame frame;
5618
5619 if (! reload_completed || frame_pointer_needed)
5620 return 0;
5621
5622 /* Don't allow more than 32 pop, since that's all we can do
5623 with one instruction. */
5624 if (current_function_pops_args
5625 && current_function_args_size >= 32768)
5626 return 0;
5627
5628 ix86_compute_frame_layout (&frame);
5629 return frame.to_allocate == 0 && frame.nregs == 0;
5630 }
5631 \f
5632 /* Value should be nonzero if functions must have frame pointers.
5633 Zero means the frame pointer need not be set up (and parms may
5634 be accessed via the stack pointer) in functions that seem suitable. */
5635
5636 int
5637 ix86_frame_pointer_required (void)
5638 {
5639 /* If we accessed previous frames, then the generated code expects
5640 to be able to access the saved ebp value in our frame. */
5641 if (cfun->machine->accesses_prev_frame)
5642 return 1;
5643
5644 /* Several x86 os'es need a frame pointer for other reasons,
5645 usually pertaining to setjmp. */
5646 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5647 return 1;
5648
5649 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5650 the frame pointer by default. Turn it back on now if we've not
5651 got a leaf function. */
5652 if (TARGET_OMIT_LEAF_FRAME_POINTER
5653 && (!current_function_is_leaf
5654 || ix86_current_function_calls_tls_descriptor))
5655 return 1;
5656
5657 if (current_function_profile)
5658 return 1;
5659
5660 return 0;
5661 }
5662
5663 /* Record that the current function accesses previous call frames. */
5664
5665 void
5666 ix86_setup_frame_addresses (void)
5667 {
5668 cfun->machine->accesses_prev_frame = 1;
5669 }
5670 \f
5671 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5672 # define USE_HIDDEN_LINKONCE 1
5673 #else
5674 # define USE_HIDDEN_LINKONCE 0
5675 #endif
5676
5677 static int pic_labels_used;
5678
5679 /* Fills in the label name that should be used for a pc thunk for
5680 the given register. */
5681
5682 static void
5683 get_pc_thunk_name (char name[32], unsigned int regno)
5684 {
5685 gcc_assert (!TARGET_64BIT);
5686
5687 if (USE_HIDDEN_LINKONCE)
5688 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5689 else
5690 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5691 }
5692
5693
5694 /* This function generates code for -fpic that loads %ebx with
5695 the return address of the caller and then returns. */
5696
5697 void
5698 ix86_file_end (void)
5699 {
5700 rtx xops[2];
5701 int regno;
5702
5703 for (regno = 0; regno < 8; ++regno)
5704 {
5705 char name[32];
5706
5707 if (! ((pic_labels_used >> regno) & 1))
5708 continue;
5709
5710 get_pc_thunk_name (name, regno);
5711
5712 #if TARGET_MACHO
5713 if (TARGET_MACHO)
5714 {
5715 switch_to_section (darwin_sections[text_coal_section]);
5716 fputs ("\t.weak_definition\t", asm_out_file);
5717 assemble_name (asm_out_file, name);
5718 fputs ("\n\t.private_extern\t", asm_out_file);
5719 assemble_name (asm_out_file, name);
5720 fputs ("\n", asm_out_file);
5721 ASM_OUTPUT_LABEL (asm_out_file, name);
5722 }
5723 else
5724 #endif
5725 if (USE_HIDDEN_LINKONCE)
5726 {
5727 tree decl;
5728
5729 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5730 error_mark_node);
5731 TREE_PUBLIC (decl) = 1;
5732 TREE_STATIC (decl) = 1;
5733 DECL_ONE_ONLY (decl) = 1;
5734
5735 (*targetm.asm_out.unique_section) (decl, 0);
5736 switch_to_section (get_named_section (decl, NULL, 0));
5737
5738 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5739 fputs ("\t.hidden\t", asm_out_file);
5740 assemble_name (asm_out_file, name);
5741 fputc ('\n', asm_out_file);
5742 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5743 }
5744 else
5745 {
5746 switch_to_section (text_section);
5747 ASM_OUTPUT_LABEL (asm_out_file, name);
5748 }
5749
5750 xops[0] = gen_rtx_REG (SImode, regno);
5751 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5752 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5753 output_asm_insn ("ret", xops);
5754 }
5755
5756 if (NEED_INDICATE_EXEC_STACK)
5757 file_end_indicate_exec_stack ();
5758 }
5759
5760 /* Emit code for the SET_GOT patterns. */
5761
5762 const char *
5763 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5764 {
5765 rtx xops[3];
5766
5767 xops[0] = dest;
5768
5769 if (TARGET_VXWORKS_RTP && flag_pic)
5770 {
5771 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5772 xops[2] = gen_rtx_MEM (Pmode,
5773 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5774 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5775
5776 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5777 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5778 an unadorned address. */
5779 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5780 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5781 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5782 return "";
5783 }
5784
5785 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5786
5787 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5788 {
5789 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5790
5791 if (!flag_pic)
5792 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5793 else
5794 output_asm_insn ("call\t%a2", xops);
5795
5796 #if TARGET_MACHO
5797 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5798 is what will be referenced by the Mach-O PIC subsystem. */
5799 if (!label)
5800 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5801 #endif
5802
5803 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5804 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5805
5806 if (flag_pic)
5807 output_asm_insn ("pop{l}\t%0", xops);
5808 }
5809 else
5810 {
5811 char name[32];
5812 get_pc_thunk_name (name, REGNO (dest));
5813 pic_labels_used |= 1 << REGNO (dest);
5814
5815 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5816 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5817 output_asm_insn ("call\t%X2", xops);
5818 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5819 is what will be referenced by the Mach-O PIC subsystem. */
5820 #if TARGET_MACHO
5821 if (!label)
5822 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5823 else
5824 targetm.asm_out.internal_label (asm_out_file, "L",
5825 CODE_LABEL_NUMBER (label));
5826 #endif
5827 }
5828
5829 if (TARGET_MACHO)
5830 return "";
5831
5832 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5833 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5834 else
5835 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5836
5837 return "";
5838 }
5839
5840 /* Generate an "push" pattern for input ARG. */
5841
5842 static rtx
5843 gen_push (rtx arg)
5844 {
5845 return gen_rtx_SET (VOIDmode,
5846 gen_rtx_MEM (Pmode,
5847 gen_rtx_PRE_DEC (Pmode,
5848 stack_pointer_rtx)),
5849 arg);
5850 }
5851
5852 /* Return >= 0 if there is an unused call-clobbered register available
5853 for the entire function. */
5854
5855 static unsigned int
5856 ix86_select_alt_pic_regnum (void)
5857 {
5858 if (current_function_is_leaf && !current_function_profile
5859 && !ix86_current_function_calls_tls_descriptor)
5860 {
5861 int i;
5862 for (i = 2; i >= 0; --i)
5863 if (!df_regs_ever_live_p (i))
5864 return i;
5865 }
5866
5867 return INVALID_REGNUM;
5868 }
5869
5870 /* Return 1 if we need to save REGNO. */
5871 static int
5872 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5873 {
5874 if (pic_offset_table_rtx
5875 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5876 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5877 || current_function_profile
5878 || current_function_calls_eh_return
5879 || current_function_uses_const_pool))
5880 {
5881 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5882 return 0;
5883 return 1;
5884 }
5885
5886 if (current_function_calls_eh_return && maybe_eh_return)
5887 {
5888 unsigned i;
5889 for (i = 0; ; i++)
5890 {
5891 unsigned test = EH_RETURN_DATA_REGNO (i);
5892 if (test == INVALID_REGNUM)
5893 break;
5894 if (test == regno)
5895 return 1;
5896 }
5897 }
5898
5899 if (cfun->machine->force_align_arg_pointer
5900 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5901 return 1;
5902
5903 return (df_regs_ever_live_p (regno)
5904 && !call_used_regs[regno]
5905 && !fixed_regs[regno]
5906 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5907 }
5908
5909 /* Return number of registers to be saved on the stack. */
5910
5911 static int
5912 ix86_nsaved_regs (void)
5913 {
5914 int nregs = 0;
5915 int regno;
5916
5917 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5918 if (ix86_save_reg (regno, true))
5919 nregs++;
5920 return nregs;
5921 }
5922
5923 /* Return the offset between two registers, one to be eliminated, and the other
5924 its replacement, at the start of a routine. */
5925
5926 HOST_WIDE_INT
5927 ix86_initial_elimination_offset (int from, int to)
5928 {
5929 struct ix86_frame frame;
5930 ix86_compute_frame_layout (&frame);
5931
5932 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5933 return frame.hard_frame_pointer_offset;
5934 else if (from == FRAME_POINTER_REGNUM
5935 && to == HARD_FRAME_POINTER_REGNUM)
5936 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5937 else
5938 {
5939 gcc_assert (to == STACK_POINTER_REGNUM);
5940
5941 if (from == ARG_POINTER_REGNUM)
5942 return frame.stack_pointer_offset;
5943
5944 gcc_assert (from == FRAME_POINTER_REGNUM);
5945 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5946 }
5947 }
5948
5949 /* Fill structure ix86_frame about frame of currently computed function. */
5950
5951 static void
5952 ix86_compute_frame_layout (struct ix86_frame *frame)
5953 {
5954 HOST_WIDE_INT total_size;
5955 unsigned int stack_alignment_needed;
5956 HOST_WIDE_INT offset;
5957 unsigned int preferred_alignment;
5958 HOST_WIDE_INT size = get_frame_size ();
5959
5960 frame->nregs = ix86_nsaved_regs ();
5961 total_size = size;
5962
5963 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5964 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5965
5966 /* During reload iteration the amount of registers saved can change.
5967 Recompute the value as needed. Do not recompute when amount of registers
5968 didn't change as reload does multiple calls to the function and does not
5969 expect the decision to change within single iteration. */
5970 if (!optimize_size
5971 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5972 {
5973 int count = frame->nregs;
5974
5975 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5976 /* The fast prologue uses move instead of push to save registers. This
5977 is significantly longer, but also executes faster as modern hardware
5978 can execute the moves in parallel, but can't do that for push/pop.
5979
5980 Be careful about choosing what prologue to emit: When function takes
5981 many instructions to execute we may use slow version as well as in
5982 case function is known to be outside hot spot (this is known with
5983 feedback only). Weight the size of function by number of registers
5984 to save as it is cheap to use one or two push instructions but very
5985 slow to use many of them. */
5986 if (count)
5987 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5988 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5989 || (flag_branch_probabilities
5990 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5991 cfun->machine->use_fast_prologue_epilogue = false;
5992 else
5993 cfun->machine->use_fast_prologue_epilogue
5994 = !expensive_function_p (count);
5995 }
5996 if (TARGET_PROLOGUE_USING_MOVE
5997 && cfun->machine->use_fast_prologue_epilogue)
5998 frame->save_regs_using_mov = true;
5999 else
6000 frame->save_regs_using_mov = false;
6001
6002
6003 /* Skip return address and saved base pointer. */
6004 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6005
6006 frame->hard_frame_pointer_offset = offset;
6007
6008 /* Do some sanity checking of stack_alignment_needed and
6009 preferred_alignment, since i386 port is the only using those features
6010 that may break easily. */
6011
6012 gcc_assert (!size || stack_alignment_needed);
6013 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6014 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6015 gcc_assert (stack_alignment_needed
6016 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6017
6018 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6019 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6020
6021 /* Register save area */
6022 offset += frame->nregs * UNITS_PER_WORD;
6023
6024 /* Va-arg area */
6025 if (ix86_save_varrargs_registers)
6026 {
6027 offset += X86_64_VARARGS_SIZE;
6028 frame->va_arg_size = X86_64_VARARGS_SIZE;
6029 }
6030 else
6031 frame->va_arg_size = 0;
6032
6033 /* Align start of frame for local function. */
6034 frame->padding1 = ((offset + stack_alignment_needed - 1)
6035 & -stack_alignment_needed) - offset;
6036
6037 offset += frame->padding1;
6038
6039 /* Frame pointer points here. */
6040 frame->frame_pointer_offset = offset;
6041
6042 offset += size;
6043
6044 /* Add outgoing arguments area. Can be skipped if we eliminated
6045 all the function calls as dead code.
6046 Skipping is however impossible when function calls alloca. Alloca
6047 expander assumes that last current_function_outgoing_args_size
6048 of stack frame are unused. */
6049 if (ACCUMULATE_OUTGOING_ARGS
6050 && (!current_function_is_leaf || current_function_calls_alloca
6051 || ix86_current_function_calls_tls_descriptor))
6052 {
6053 offset += current_function_outgoing_args_size;
6054 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6055 }
6056 else
6057 frame->outgoing_arguments_size = 0;
6058
6059 /* Align stack boundary. Only needed if we're calling another function
6060 or using alloca. */
6061 if (!current_function_is_leaf || current_function_calls_alloca
6062 || ix86_current_function_calls_tls_descriptor)
6063 frame->padding2 = ((offset + preferred_alignment - 1)
6064 & -preferred_alignment) - offset;
6065 else
6066 frame->padding2 = 0;
6067
6068 offset += frame->padding2;
6069
6070 /* We've reached end of stack frame. */
6071 frame->stack_pointer_offset = offset;
6072
6073 /* Size prologue needs to allocate. */
6074 frame->to_allocate =
6075 (size + frame->padding1 + frame->padding2
6076 + frame->outgoing_arguments_size + frame->va_arg_size);
6077
6078 if ((!frame->to_allocate && frame->nregs <= 1)
6079 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6080 frame->save_regs_using_mov = false;
6081
6082 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6083 && current_function_is_leaf
6084 && !ix86_current_function_calls_tls_descriptor)
6085 {
6086 frame->red_zone_size = frame->to_allocate;
6087 if (frame->save_regs_using_mov)
6088 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6089 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6090 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6091 }
6092 else
6093 frame->red_zone_size = 0;
6094 frame->to_allocate -= frame->red_zone_size;
6095 frame->stack_pointer_offset -= frame->red_zone_size;
6096 #if 0
6097 fprintf (stderr, "\n");
6098 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6099 fprintf (stderr, "size: %ld\n", (long)size);
6100 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6101 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6102 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6103 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6104 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6105 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6106 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6107 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6108 (long)frame->hard_frame_pointer_offset);
6109 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6110 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6111 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6112 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6113 #endif
6114 }
6115
6116 /* Emit code to save registers in the prologue. */
6117
6118 static void
6119 ix86_emit_save_regs (void)
6120 {
6121 unsigned int regno;
6122 rtx insn;
6123
6124 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6125 if (ix86_save_reg (regno, true))
6126 {
6127 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6128 RTX_FRAME_RELATED_P (insn) = 1;
6129 }
6130 }
6131
6132 /* Emit code to save registers using MOV insns. First register
6133 is restored from POINTER + OFFSET. */
6134 static void
6135 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6136 {
6137 unsigned int regno;
6138 rtx insn;
6139
6140 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6141 if (ix86_save_reg (regno, true))
6142 {
6143 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6144 Pmode, offset),
6145 gen_rtx_REG (Pmode, regno));
6146 RTX_FRAME_RELATED_P (insn) = 1;
6147 offset += UNITS_PER_WORD;
6148 }
6149 }
6150
6151 /* Expand prologue or epilogue stack adjustment.
6152 The pattern exist to put a dependency on all ebp-based memory accesses.
6153 STYLE should be negative if instructions should be marked as frame related,
6154 zero if %r11 register is live and cannot be freely used and positive
6155 otherwise. */
6156
6157 static void
6158 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6159 {
6160 rtx insn;
6161
6162 if (! TARGET_64BIT)
6163 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6164 else if (x86_64_immediate_operand (offset, DImode))
6165 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6166 else
6167 {
6168 rtx r11;
6169 /* r11 is used by indirect sibcall return as well, set before the
6170 epilogue and used after the epilogue. ATM indirect sibcall
6171 shouldn't be used together with huge frame sizes in one
6172 function because of the frame_size check in sibcall.c. */
6173 gcc_assert (style);
6174 r11 = gen_rtx_REG (DImode, R11_REG);
6175 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6176 if (style < 0)
6177 RTX_FRAME_RELATED_P (insn) = 1;
6178 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6179 offset));
6180 }
6181 if (style < 0)
6182 RTX_FRAME_RELATED_P (insn) = 1;
6183 }
6184
6185 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6186
6187 static rtx
6188 ix86_internal_arg_pointer (void)
6189 {
6190 bool has_force_align_arg_pointer =
6191 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6192 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6193 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6194 && DECL_NAME (current_function_decl)
6195 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6196 && DECL_FILE_SCOPE_P (current_function_decl))
6197 || ix86_force_align_arg_pointer
6198 || has_force_align_arg_pointer)
6199 {
6200 /* Nested functions can't realign the stack due to a register
6201 conflict. */
6202 if (DECL_CONTEXT (current_function_decl)
6203 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6204 {
6205 if (ix86_force_align_arg_pointer)
6206 warning (0, "-mstackrealign ignored for nested functions");
6207 if (has_force_align_arg_pointer)
6208 error ("%s not supported for nested functions",
6209 ix86_force_align_arg_pointer_string);
6210 return virtual_incoming_args_rtx;
6211 }
6212 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6213 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6214 }
6215 else
6216 return virtual_incoming_args_rtx;
6217 }
6218
6219 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6220 This is called from dwarf2out.c to emit call frame instructions
6221 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6222 static void
6223 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6224 {
6225 rtx unspec = SET_SRC (pattern);
6226 gcc_assert (GET_CODE (unspec) == UNSPEC);
6227
6228 switch (index)
6229 {
6230 case UNSPEC_REG_SAVE:
6231 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6232 SET_DEST (pattern));
6233 break;
6234 case UNSPEC_DEF_CFA:
6235 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6236 INTVAL (XVECEXP (unspec, 0, 0)));
6237 break;
6238 default:
6239 gcc_unreachable ();
6240 }
6241 }
6242
6243 /* Expand the prologue into a bunch of separate insns. */
6244
6245 void
6246 ix86_expand_prologue (void)
6247 {
6248 rtx insn;
6249 bool pic_reg_used;
6250 struct ix86_frame frame;
6251 HOST_WIDE_INT allocate;
6252
6253 ix86_compute_frame_layout (&frame);
6254
6255 if (cfun->machine->force_align_arg_pointer)
6256 {
6257 rtx x, y;
6258
6259 /* Grab the argument pointer. */
6260 x = plus_constant (stack_pointer_rtx, 4);
6261 y = cfun->machine->force_align_arg_pointer;
6262 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6263 RTX_FRAME_RELATED_P (insn) = 1;
6264
6265 /* The unwind info consists of two parts: install the fafp as the cfa,
6266 and record the fafp as the "save register" of the stack pointer.
6267 The later is there in order that the unwinder can see where it
6268 should restore the stack pointer across the and insn. */
6269 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6270 x = gen_rtx_SET (VOIDmode, y, x);
6271 RTX_FRAME_RELATED_P (x) = 1;
6272 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6273 UNSPEC_REG_SAVE);
6274 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6275 RTX_FRAME_RELATED_P (y) = 1;
6276 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6277 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6278 REG_NOTES (insn) = x;
6279
6280 /* Align the stack. */
6281 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6282 GEN_INT (-16)));
6283
6284 /* And here we cheat like madmen with the unwind info. We force the
6285 cfa register back to sp+4, which is exactly what it was at the
6286 start of the function. Re-pushing the return address results in
6287 the return at the same spot relative to the cfa, and thus is
6288 correct wrt the unwind info. */
6289 x = cfun->machine->force_align_arg_pointer;
6290 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6291 insn = emit_insn (gen_push (x));
6292 RTX_FRAME_RELATED_P (insn) = 1;
6293
6294 x = GEN_INT (4);
6295 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6296 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6297 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6298 REG_NOTES (insn) = x;
6299 }
6300
6301 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6302 slower on all targets. Also sdb doesn't like it. */
6303
6304 if (frame_pointer_needed)
6305 {
6306 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6307 RTX_FRAME_RELATED_P (insn) = 1;
6308
6309 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6310 RTX_FRAME_RELATED_P (insn) = 1;
6311 }
6312
6313 allocate = frame.to_allocate;
6314
6315 if (!frame.save_regs_using_mov)
6316 ix86_emit_save_regs ();
6317 else
6318 allocate += frame.nregs * UNITS_PER_WORD;
6319
6320 /* When using red zone we may start register saving before allocating
6321 the stack frame saving one cycle of the prologue. */
6322 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6323 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6324 : stack_pointer_rtx,
6325 -frame.nregs * UNITS_PER_WORD);
6326
6327 if (allocate == 0)
6328 ;
6329 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6330 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6331 GEN_INT (-allocate), -1);
6332 else
6333 {
6334 /* Only valid for Win32. */
6335 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6336 bool eax_live;
6337 rtx t;
6338
6339 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6340
6341 if (TARGET_64BIT_MS_ABI)
6342 eax_live = false;
6343 else
6344 eax_live = ix86_eax_live_at_start_p ();
6345
6346 if (eax_live)
6347 {
6348 emit_insn (gen_push (eax));
6349 allocate -= UNITS_PER_WORD;
6350 }
6351
6352 emit_move_insn (eax, GEN_INT (allocate));
6353
6354 if (TARGET_64BIT)
6355 insn = gen_allocate_stack_worker_64 (eax);
6356 else
6357 insn = gen_allocate_stack_worker_32 (eax);
6358 insn = emit_insn (insn);
6359 RTX_FRAME_RELATED_P (insn) = 1;
6360 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6361 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6362 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6363 t, REG_NOTES (insn));
6364
6365 if (eax_live)
6366 {
6367 if (frame_pointer_needed)
6368 t = plus_constant (hard_frame_pointer_rtx,
6369 allocate
6370 - frame.to_allocate
6371 - frame.nregs * UNITS_PER_WORD);
6372 else
6373 t = plus_constant (stack_pointer_rtx, allocate);
6374 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6375 }
6376 }
6377
6378 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6379 {
6380 if (!frame_pointer_needed || !frame.to_allocate)
6381 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6382 else
6383 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6384 -frame.nregs * UNITS_PER_WORD);
6385 }
6386
6387 pic_reg_used = false;
6388 if (pic_offset_table_rtx
6389 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6390 || current_function_profile))
6391 {
6392 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6393
6394 if (alt_pic_reg_used != INVALID_REGNUM)
6395 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6396
6397 pic_reg_used = true;
6398 }
6399
6400 if (pic_reg_used)
6401 {
6402 if (TARGET_64BIT)
6403 {
6404 if (ix86_cmodel == CM_LARGE_PIC)
6405 {
6406 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6407 rtx label = gen_label_rtx ();
6408 emit_label (label);
6409 LABEL_PRESERVE_P (label) = 1;
6410 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6411 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6412 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6413 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6414 pic_offset_table_rtx, tmp_reg));
6415 }
6416 else
6417 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6418 }
6419 else
6420 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6421 }
6422
6423 /* Prevent function calls from being scheduled before the call to mcount.
6424 In the pic_reg_used case, make sure that the got load isn't deleted. */
6425 if (current_function_profile)
6426 {
6427 if (pic_reg_used)
6428 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6429 emit_insn (gen_blockage ());
6430 }
6431 }
6432
6433 /* Emit code to restore saved registers using MOV insns. First register
6434 is restored from POINTER + OFFSET. */
6435 static void
6436 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6437 int maybe_eh_return)
6438 {
6439 int regno;
6440 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6441
6442 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6443 if (ix86_save_reg (regno, maybe_eh_return))
6444 {
6445 /* Ensure that adjust_address won't be forced to produce pointer
6446 out of range allowed by x86-64 instruction set. */
6447 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6448 {
6449 rtx r11;
6450
6451 r11 = gen_rtx_REG (DImode, R11_REG);
6452 emit_move_insn (r11, GEN_INT (offset));
6453 emit_insn (gen_adddi3 (r11, r11, pointer));
6454 base_address = gen_rtx_MEM (Pmode, r11);
6455 offset = 0;
6456 }
6457 emit_move_insn (gen_rtx_REG (Pmode, regno),
6458 adjust_address (base_address, Pmode, offset));
6459 offset += UNITS_PER_WORD;
6460 }
6461 }
6462
6463 /* Restore function stack, frame, and registers. */
6464
6465 void
6466 ix86_expand_epilogue (int style)
6467 {
6468 int regno;
6469 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6470 struct ix86_frame frame;
6471 HOST_WIDE_INT offset;
6472
6473 ix86_compute_frame_layout (&frame);
6474
6475 /* Calculate start of saved registers relative to ebp. Special care
6476 must be taken for the normal return case of a function using
6477 eh_return: the eax and edx registers are marked as saved, but not
6478 restored along this path. */
6479 offset = frame.nregs;
6480 if (current_function_calls_eh_return && style != 2)
6481 offset -= 2;
6482 offset *= -UNITS_PER_WORD;
6483
6484 /* If we're only restoring one register and sp is not valid then
6485 using a move instruction to restore the register since it's
6486 less work than reloading sp and popping the register.
6487
6488 The default code result in stack adjustment using add/lea instruction,
6489 while this code results in LEAVE instruction (or discrete equivalent),
6490 so it is profitable in some other cases as well. Especially when there
6491 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6492 and there is exactly one register to pop. This heuristic may need some
6493 tuning in future. */
6494 if ((!sp_valid && frame.nregs <= 1)
6495 || (TARGET_EPILOGUE_USING_MOVE
6496 && cfun->machine->use_fast_prologue_epilogue
6497 && (frame.nregs > 1 || frame.to_allocate))
6498 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6499 || (frame_pointer_needed && TARGET_USE_LEAVE
6500 && cfun->machine->use_fast_prologue_epilogue
6501 && frame.nregs == 1)
6502 || current_function_calls_eh_return)
6503 {
6504 /* Restore registers. We can use ebp or esp to address the memory
6505 locations. If both are available, default to ebp, since offsets
6506 are known to be small. Only exception is esp pointing directly to the
6507 end of block of saved registers, where we may simplify addressing
6508 mode. */
6509
6510 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6511 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6512 frame.to_allocate, style == 2);
6513 else
6514 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6515 offset, style == 2);
6516
6517 /* eh_return epilogues need %ecx added to the stack pointer. */
6518 if (style == 2)
6519 {
6520 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6521
6522 if (frame_pointer_needed)
6523 {
6524 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6525 tmp = plus_constant (tmp, UNITS_PER_WORD);
6526 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6527
6528 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6529 emit_move_insn (hard_frame_pointer_rtx, tmp);
6530
6531 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6532 const0_rtx, style);
6533 }
6534 else
6535 {
6536 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6537 tmp = plus_constant (tmp, (frame.to_allocate
6538 + frame.nregs * UNITS_PER_WORD));
6539 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6540 }
6541 }
6542 else if (!frame_pointer_needed)
6543 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6544 GEN_INT (frame.to_allocate
6545 + frame.nregs * UNITS_PER_WORD),
6546 style);
6547 /* If not an i386, mov & pop is faster than "leave". */
6548 else if (TARGET_USE_LEAVE || optimize_size
6549 || !cfun->machine->use_fast_prologue_epilogue)
6550 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6551 else
6552 {
6553 pro_epilogue_adjust_stack (stack_pointer_rtx,
6554 hard_frame_pointer_rtx,
6555 const0_rtx, style);
6556 if (TARGET_64BIT)
6557 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6558 else
6559 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6560 }
6561 }
6562 else
6563 {
6564 /* First step is to deallocate the stack frame so that we can
6565 pop the registers. */
6566 if (!sp_valid)
6567 {
6568 gcc_assert (frame_pointer_needed);
6569 pro_epilogue_adjust_stack (stack_pointer_rtx,
6570 hard_frame_pointer_rtx,
6571 GEN_INT (offset), style);
6572 }
6573 else if (frame.to_allocate)
6574 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6575 GEN_INT (frame.to_allocate), style);
6576
6577 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6578 if (ix86_save_reg (regno, false))
6579 {
6580 if (TARGET_64BIT)
6581 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6582 else
6583 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6584 }
6585 if (frame_pointer_needed)
6586 {
6587 /* Leave results in shorter dependency chains on CPUs that are
6588 able to grok it fast. */
6589 if (TARGET_USE_LEAVE)
6590 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6591 else if (TARGET_64BIT)
6592 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6593 else
6594 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6595 }
6596 }
6597
6598 if (cfun->machine->force_align_arg_pointer)
6599 {
6600 emit_insn (gen_addsi3 (stack_pointer_rtx,
6601 cfun->machine->force_align_arg_pointer,
6602 GEN_INT (-4)));
6603 }
6604
6605 /* Sibcall epilogues don't want a return instruction. */
6606 if (style == 0)
6607 return;
6608
6609 if (current_function_pops_args && current_function_args_size)
6610 {
6611 rtx popc = GEN_INT (current_function_pops_args);
6612
6613 /* i386 can only pop 64K bytes. If asked to pop more, pop
6614 return address, do explicit add, and jump indirectly to the
6615 caller. */
6616
6617 if (current_function_pops_args >= 65536)
6618 {
6619 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6620
6621 /* There is no "pascal" calling convention in any 64bit ABI. */
6622 gcc_assert (!TARGET_64BIT);
6623
6624 emit_insn (gen_popsi1 (ecx));
6625 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6626 emit_jump_insn (gen_return_indirect_internal (ecx));
6627 }
6628 else
6629 emit_jump_insn (gen_return_pop_internal (popc));
6630 }
6631 else
6632 emit_jump_insn (gen_return_internal ());
6633 }
6634
6635 /* Reset from the function's potential modifications. */
6636
6637 static void
6638 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6639 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6640 {
6641 if (pic_offset_table_rtx)
6642 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6643 #if TARGET_MACHO
6644 /* Mach-O doesn't support labels at the end of objects, so if
6645 it looks like we might want one, insert a NOP. */
6646 {
6647 rtx insn = get_last_insn ();
6648 while (insn
6649 && NOTE_P (insn)
6650 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6651 insn = PREV_INSN (insn);
6652 if (insn
6653 && (LABEL_P (insn)
6654 || (NOTE_P (insn)
6655 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6656 fputs ("\tnop\n", file);
6657 }
6658 #endif
6659
6660 }
6661 \f
6662 /* Extract the parts of an RTL expression that is a valid memory address
6663 for an instruction. Return 0 if the structure of the address is
6664 grossly off. Return -1 if the address contains ASHIFT, so it is not
6665 strictly valid, but still used for computing length of lea instruction. */
6666
6667 int
6668 ix86_decompose_address (rtx addr, struct ix86_address *out)
6669 {
6670 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6671 rtx base_reg, index_reg;
6672 HOST_WIDE_INT scale = 1;
6673 rtx scale_rtx = NULL_RTX;
6674 int retval = 1;
6675 enum ix86_address_seg seg = SEG_DEFAULT;
6676
6677 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6678 base = addr;
6679 else if (GET_CODE (addr) == PLUS)
6680 {
6681 rtx addends[4], op;
6682 int n = 0, i;
6683
6684 op = addr;
6685 do
6686 {
6687 if (n >= 4)
6688 return 0;
6689 addends[n++] = XEXP (op, 1);
6690 op = XEXP (op, 0);
6691 }
6692 while (GET_CODE (op) == PLUS);
6693 if (n >= 4)
6694 return 0;
6695 addends[n] = op;
6696
6697 for (i = n; i >= 0; --i)
6698 {
6699 op = addends[i];
6700 switch (GET_CODE (op))
6701 {
6702 case MULT:
6703 if (index)
6704 return 0;
6705 index = XEXP (op, 0);
6706 scale_rtx = XEXP (op, 1);
6707 break;
6708
6709 case UNSPEC:
6710 if (XINT (op, 1) == UNSPEC_TP
6711 && TARGET_TLS_DIRECT_SEG_REFS
6712 && seg == SEG_DEFAULT)
6713 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6714 else
6715 return 0;
6716 break;
6717
6718 case REG:
6719 case SUBREG:
6720 if (!base)
6721 base = op;
6722 else if (!index)
6723 index = op;
6724 else
6725 return 0;
6726 break;
6727
6728 case CONST:
6729 case CONST_INT:
6730 case SYMBOL_REF:
6731 case LABEL_REF:
6732 if (disp)
6733 return 0;
6734 disp = op;
6735 break;
6736
6737 default:
6738 return 0;
6739 }
6740 }
6741 }
6742 else if (GET_CODE (addr) == MULT)
6743 {
6744 index = XEXP (addr, 0); /* index*scale */
6745 scale_rtx = XEXP (addr, 1);
6746 }
6747 else if (GET_CODE (addr) == ASHIFT)
6748 {
6749 rtx tmp;
6750
6751 /* We're called for lea too, which implements ashift on occasion. */
6752 index = XEXP (addr, 0);
6753 tmp = XEXP (addr, 1);
6754 if (!CONST_INT_P (tmp))
6755 return 0;
6756 scale = INTVAL (tmp);
6757 if ((unsigned HOST_WIDE_INT) scale > 3)
6758 return 0;
6759 scale = 1 << scale;
6760 retval = -1;
6761 }
6762 else
6763 disp = addr; /* displacement */
6764
6765 /* Extract the integral value of scale. */
6766 if (scale_rtx)
6767 {
6768 if (!CONST_INT_P (scale_rtx))
6769 return 0;
6770 scale = INTVAL (scale_rtx);
6771 }
6772
6773 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6774 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6775
6776 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6777 if (base_reg && index_reg && scale == 1
6778 && (index_reg == arg_pointer_rtx
6779 || index_reg == frame_pointer_rtx
6780 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6781 {
6782 rtx tmp;
6783 tmp = base, base = index, index = tmp;
6784 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6785 }
6786
6787 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6788 if ((base_reg == hard_frame_pointer_rtx
6789 || base_reg == frame_pointer_rtx
6790 || base_reg == arg_pointer_rtx) && !disp)
6791 disp = const0_rtx;
6792
6793 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6794 Avoid this by transforming to [%esi+0]. */
6795 if (TARGET_K6 && !optimize_size
6796 && base_reg && !index_reg && !disp
6797 && REG_P (base_reg)
6798 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6799 disp = const0_rtx;
6800
6801 /* Special case: encode reg+reg instead of reg*2. */
6802 if (!base && index && scale && scale == 2)
6803 base = index, base_reg = index_reg, scale = 1;
6804
6805 /* Special case: scaling cannot be encoded without base or displacement. */
6806 if (!base && !disp && index && scale != 1)
6807 disp = const0_rtx;
6808
6809 out->base = base;
6810 out->index = index;
6811 out->disp = disp;
6812 out->scale = scale;
6813 out->seg = seg;
6814
6815 return retval;
6816 }
6817 \f
6818 /* Return cost of the memory address x.
6819 For i386, it is better to use a complex address than let gcc copy
6820 the address into a reg and make a new pseudo. But not if the address
6821 requires to two regs - that would mean more pseudos with longer
6822 lifetimes. */
6823 static int
6824 ix86_address_cost (rtx x)
6825 {
6826 struct ix86_address parts;
6827 int cost = 1;
6828 int ok = ix86_decompose_address (x, &parts);
6829
6830 gcc_assert (ok);
6831
6832 if (parts.base && GET_CODE (parts.base) == SUBREG)
6833 parts.base = SUBREG_REG (parts.base);
6834 if (parts.index && GET_CODE (parts.index) == SUBREG)
6835 parts.index = SUBREG_REG (parts.index);
6836
6837 /* Attempt to minimize number of registers in the address. */
6838 if ((parts.base
6839 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6840 || (parts.index
6841 && (!REG_P (parts.index)
6842 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6843 cost++;
6844
6845 if (parts.base
6846 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6847 && parts.index
6848 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6849 && parts.base != parts.index)
6850 cost++;
6851
6852 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6853 since it's predecode logic can't detect the length of instructions
6854 and it degenerates to vector decoded. Increase cost of such
6855 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6856 to split such addresses or even refuse such addresses at all.
6857
6858 Following addressing modes are affected:
6859 [base+scale*index]
6860 [scale*index+disp]
6861 [base+index]
6862
6863 The first and last case may be avoidable by explicitly coding the zero in
6864 memory address, but I don't have AMD-K6 machine handy to check this
6865 theory. */
6866
6867 if (TARGET_K6
6868 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6869 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6870 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6871 cost += 10;
6872
6873 return cost;
6874 }
6875 \f
6876 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6877 this is used for to form addresses to local data when -fPIC is in
6878 use. */
6879
6880 static bool
6881 darwin_local_data_pic (rtx disp)
6882 {
6883 if (GET_CODE (disp) == MINUS)
6884 {
6885 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6886 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6887 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6888 {
6889 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6890 if (! strcmp (sym_name, "<pic base>"))
6891 return true;
6892 }
6893 }
6894
6895 return false;
6896 }
6897
6898 /* Determine if a given RTX is a valid constant. We already know this
6899 satisfies CONSTANT_P. */
6900
6901 bool
6902 legitimate_constant_p (rtx x)
6903 {
6904 switch (GET_CODE (x))
6905 {
6906 case CONST:
6907 x = XEXP (x, 0);
6908
6909 if (GET_CODE (x) == PLUS)
6910 {
6911 if (!CONST_INT_P (XEXP (x, 1)))
6912 return false;
6913 x = XEXP (x, 0);
6914 }
6915
6916 if (TARGET_MACHO && darwin_local_data_pic (x))
6917 return true;
6918
6919 /* Only some unspecs are valid as "constants". */
6920 if (GET_CODE (x) == UNSPEC)
6921 switch (XINT (x, 1))
6922 {
6923 case UNSPEC_GOT:
6924 case UNSPEC_GOTOFF:
6925 case UNSPEC_PLTOFF:
6926 return TARGET_64BIT;
6927 case UNSPEC_TPOFF:
6928 case UNSPEC_NTPOFF:
6929 x = XVECEXP (x, 0, 0);
6930 return (GET_CODE (x) == SYMBOL_REF
6931 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6932 case UNSPEC_DTPOFF:
6933 x = XVECEXP (x, 0, 0);
6934 return (GET_CODE (x) == SYMBOL_REF
6935 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6936 default:
6937 return false;
6938 }
6939
6940 /* We must have drilled down to a symbol. */
6941 if (GET_CODE (x) == LABEL_REF)
6942 return true;
6943 if (GET_CODE (x) != SYMBOL_REF)
6944 return false;
6945 /* FALLTHRU */
6946
6947 case SYMBOL_REF:
6948 /* TLS symbols are never valid. */
6949 if (SYMBOL_REF_TLS_MODEL (x))
6950 return false;
6951
6952 /* DLLIMPORT symbols are never valid. */
6953 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6954 && SYMBOL_REF_DLLIMPORT_P (x))
6955 return false;
6956 break;
6957
6958 case CONST_DOUBLE:
6959 if (GET_MODE (x) == TImode
6960 && x != CONST0_RTX (TImode)
6961 && !TARGET_64BIT)
6962 return false;
6963 break;
6964
6965 case CONST_VECTOR:
6966 if (x == CONST0_RTX (GET_MODE (x)))
6967 return true;
6968 return false;
6969
6970 default:
6971 break;
6972 }
6973
6974 /* Otherwise we handle everything else in the move patterns. */
6975 return true;
6976 }
6977
6978 /* Determine if it's legal to put X into the constant pool. This
6979 is not possible for the address of thread-local symbols, which
6980 is checked above. */
6981
6982 static bool
6983 ix86_cannot_force_const_mem (rtx x)
6984 {
6985 /* We can always put integral constants and vectors in memory. */
6986 switch (GET_CODE (x))
6987 {
6988 case CONST_INT:
6989 case CONST_DOUBLE:
6990 case CONST_VECTOR:
6991 return false;
6992
6993 default:
6994 break;
6995 }
6996 return !legitimate_constant_p (x);
6997 }
6998
6999 /* Determine if a given RTX is a valid constant address. */
7000
7001 bool
7002 constant_address_p (rtx x)
7003 {
7004 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7005 }
7006
7007 /* Nonzero if the constant value X is a legitimate general operand
7008 when generating PIC code. It is given that flag_pic is on and
7009 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7010
7011 bool
7012 legitimate_pic_operand_p (rtx x)
7013 {
7014 rtx inner;
7015
7016 switch (GET_CODE (x))
7017 {
7018 case CONST:
7019 inner = XEXP (x, 0);
7020 if (GET_CODE (inner) == PLUS
7021 && CONST_INT_P (XEXP (inner, 1)))
7022 inner = XEXP (inner, 0);
7023
7024 /* Only some unspecs are valid as "constants". */
7025 if (GET_CODE (inner) == UNSPEC)
7026 switch (XINT (inner, 1))
7027 {
7028 case UNSPEC_GOT:
7029 case UNSPEC_GOTOFF:
7030 case UNSPEC_PLTOFF:
7031 return TARGET_64BIT;
7032 case UNSPEC_TPOFF:
7033 x = XVECEXP (inner, 0, 0);
7034 return (GET_CODE (x) == SYMBOL_REF
7035 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7036 default:
7037 return false;
7038 }
7039 /* FALLTHRU */
7040
7041 case SYMBOL_REF:
7042 case LABEL_REF:
7043 return legitimate_pic_address_disp_p (x);
7044
7045 default:
7046 return true;
7047 }
7048 }
7049
7050 /* Determine if a given CONST RTX is a valid memory displacement
7051 in PIC mode. */
7052
7053 int
7054 legitimate_pic_address_disp_p (rtx disp)
7055 {
7056 bool saw_plus;
7057
7058 /* In 64bit mode we can allow direct addresses of symbols and labels
7059 when they are not dynamic symbols. */
7060 if (TARGET_64BIT)
7061 {
7062 rtx op0 = disp, op1;
7063
7064 switch (GET_CODE (disp))
7065 {
7066 case LABEL_REF:
7067 return true;
7068
7069 case CONST:
7070 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7071 break;
7072 op0 = XEXP (XEXP (disp, 0), 0);
7073 op1 = XEXP (XEXP (disp, 0), 1);
7074 if (!CONST_INT_P (op1)
7075 || INTVAL (op1) >= 16*1024*1024
7076 || INTVAL (op1) < -16*1024*1024)
7077 break;
7078 if (GET_CODE (op0) == LABEL_REF)
7079 return true;
7080 if (GET_CODE (op0) != SYMBOL_REF)
7081 break;
7082 /* FALLTHRU */
7083
7084 case SYMBOL_REF:
7085 /* TLS references should always be enclosed in UNSPEC. */
7086 if (SYMBOL_REF_TLS_MODEL (op0))
7087 return false;
7088 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7089 && ix86_cmodel != CM_LARGE_PIC)
7090 return true;
7091 break;
7092
7093 default:
7094 break;
7095 }
7096 }
7097 if (GET_CODE (disp) != CONST)
7098 return 0;
7099 disp = XEXP (disp, 0);
7100
7101 if (TARGET_64BIT)
7102 {
7103 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7104 of GOT tables. We should not need these anyway. */
7105 if (GET_CODE (disp) != UNSPEC
7106 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7107 && XINT (disp, 1) != UNSPEC_GOTOFF
7108 && XINT (disp, 1) != UNSPEC_PLTOFF))
7109 return 0;
7110
7111 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7112 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7113 return 0;
7114 return 1;
7115 }
7116
7117 saw_plus = false;
7118 if (GET_CODE (disp) == PLUS)
7119 {
7120 if (!CONST_INT_P (XEXP (disp, 1)))
7121 return 0;
7122 disp = XEXP (disp, 0);
7123 saw_plus = true;
7124 }
7125
7126 if (TARGET_MACHO && darwin_local_data_pic (disp))
7127 return 1;
7128
7129 if (GET_CODE (disp) != UNSPEC)
7130 return 0;
7131
7132 switch (XINT (disp, 1))
7133 {
7134 case UNSPEC_GOT:
7135 if (saw_plus)
7136 return false;
7137 /* We need to check for both symbols and labels because VxWorks loads
7138 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7139 details. */
7140 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7141 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7142 case UNSPEC_GOTOFF:
7143 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7144 While ABI specify also 32bit relocation but we don't produce it in
7145 small PIC model at all. */
7146 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7147 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7148 && !TARGET_64BIT)
7149 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7150 return false;
7151 case UNSPEC_GOTTPOFF:
7152 case UNSPEC_GOTNTPOFF:
7153 case UNSPEC_INDNTPOFF:
7154 if (saw_plus)
7155 return false;
7156 disp = XVECEXP (disp, 0, 0);
7157 return (GET_CODE (disp) == SYMBOL_REF
7158 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7159 case UNSPEC_NTPOFF:
7160 disp = XVECEXP (disp, 0, 0);
7161 return (GET_CODE (disp) == SYMBOL_REF
7162 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7163 case UNSPEC_DTPOFF:
7164 disp = XVECEXP (disp, 0, 0);
7165 return (GET_CODE (disp) == SYMBOL_REF
7166 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7167 }
7168
7169 return 0;
7170 }
7171
7172 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7173 memory address for an instruction. The MODE argument is the machine mode
7174 for the MEM expression that wants to use this address.
7175
7176 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7177 convert common non-canonical forms to canonical form so that they will
7178 be recognized. */
7179
7180 int
7181 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7182 rtx addr, int strict)
7183 {
7184 struct ix86_address parts;
7185 rtx base, index, disp;
7186 HOST_WIDE_INT scale;
7187 const char *reason = NULL;
7188 rtx reason_rtx = NULL_RTX;
7189
7190 if (ix86_decompose_address (addr, &parts) <= 0)
7191 {
7192 reason = "decomposition failed";
7193 goto report_error;
7194 }
7195
7196 base = parts.base;
7197 index = parts.index;
7198 disp = parts.disp;
7199 scale = parts.scale;
7200
7201 /* Validate base register.
7202
7203 Don't allow SUBREG's that span more than a word here. It can lead to spill
7204 failures when the base is one word out of a two word structure, which is
7205 represented internally as a DImode int. */
7206
7207 if (base)
7208 {
7209 rtx reg;
7210 reason_rtx = base;
7211
7212 if (REG_P (base))
7213 reg = base;
7214 else if (GET_CODE (base) == SUBREG
7215 && REG_P (SUBREG_REG (base))
7216 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7217 <= UNITS_PER_WORD)
7218 reg = SUBREG_REG (base);
7219 else
7220 {
7221 reason = "base is not a register";
7222 goto report_error;
7223 }
7224
7225 if (GET_MODE (base) != Pmode)
7226 {
7227 reason = "base is not in Pmode";
7228 goto report_error;
7229 }
7230
7231 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7232 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7233 {
7234 reason = "base is not valid";
7235 goto report_error;
7236 }
7237 }
7238
7239 /* Validate index register.
7240
7241 Don't allow SUBREG's that span more than a word here -- same as above. */
7242
7243 if (index)
7244 {
7245 rtx reg;
7246 reason_rtx = index;
7247
7248 if (REG_P (index))
7249 reg = index;
7250 else if (GET_CODE (index) == SUBREG
7251 && REG_P (SUBREG_REG (index))
7252 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7253 <= UNITS_PER_WORD)
7254 reg = SUBREG_REG (index);
7255 else
7256 {
7257 reason = "index is not a register";
7258 goto report_error;
7259 }
7260
7261 if (GET_MODE (index) != Pmode)
7262 {
7263 reason = "index is not in Pmode";
7264 goto report_error;
7265 }
7266
7267 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7268 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7269 {
7270 reason = "index is not valid";
7271 goto report_error;
7272 }
7273 }
7274
7275 /* Validate scale factor. */
7276 if (scale != 1)
7277 {
7278 reason_rtx = GEN_INT (scale);
7279 if (!index)
7280 {
7281 reason = "scale without index";
7282 goto report_error;
7283 }
7284
7285 if (scale != 2 && scale != 4 && scale != 8)
7286 {
7287 reason = "scale is not a valid multiplier";
7288 goto report_error;
7289 }
7290 }
7291
7292 /* Validate displacement. */
7293 if (disp)
7294 {
7295 reason_rtx = disp;
7296
7297 if (GET_CODE (disp) == CONST
7298 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7299 switch (XINT (XEXP (disp, 0), 1))
7300 {
7301 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7302 used. While ABI specify also 32bit relocations, we don't produce
7303 them at all and use IP relative instead. */
7304 case UNSPEC_GOT:
7305 case UNSPEC_GOTOFF:
7306 gcc_assert (flag_pic);
7307 if (!TARGET_64BIT)
7308 goto is_legitimate_pic;
7309 reason = "64bit address unspec";
7310 goto report_error;
7311
7312 case UNSPEC_GOTPCREL:
7313 gcc_assert (flag_pic);
7314 goto is_legitimate_pic;
7315
7316 case UNSPEC_GOTTPOFF:
7317 case UNSPEC_GOTNTPOFF:
7318 case UNSPEC_INDNTPOFF:
7319 case UNSPEC_NTPOFF:
7320 case UNSPEC_DTPOFF:
7321 break;
7322
7323 default:
7324 reason = "invalid address unspec";
7325 goto report_error;
7326 }
7327
7328 else if (SYMBOLIC_CONST (disp)
7329 && (flag_pic
7330 || (TARGET_MACHO
7331 #if TARGET_MACHO
7332 && MACHOPIC_INDIRECT
7333 && !machopic_operand_p (disp)
7334 #endif
7335 )))
7336 {
7337
7338 is_legitimate_pic:
7339 if (TARGET_64BIT && (index || base))
7340 {
7341 /* foo@dtpoff(%rX) is ok. */
7342 if (GET_CODE (disp) != CONST
7343 || GET_CODE (XEXP (disp, 0)) != PLUS
7344 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7345 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7346 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7347 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7348 {
7349 reason = "non-constant pic memory reference";
7350 goto report_error;
7351 }
7352 }
7353 else if (! legitimate_pic_address_disp_p (disp))
7354 {
7355 reason = "displacement is an invalid pic construct";
7356 goto report_error;
7357 }
7358
7359 /* This code used to verify that a symbolic pic displacement
7360 includes the pic_offset_table_rtx register.
7361
7362 While this is good idea, unfortunately these constructs may
7363 be created by "adds using lea" optimization for incorrect
7364 code like:
7365
7366 int a;
7367 int foo(int i)
7368 {
7369 return *(&a+i);
7370 }
7371
7372 This code is nonsensical, but results in addressing
7373 GOT table with pic_offset_table_rtx base. We can't
7374 just refuse it easily, since it gets matched by
7375 "addsi3" pattern, that later gets split to lea in the
7376 case output register differs from input. While this
7377 can be handled by separate addsi pattern for this case
7378 that never results in lea, this seems to be easier and
7379 correct fix for crash to disable this test. */
7380 }
7381 else if (GET_CODE (disp) != LABEL_REF
7382 && !CONST_INT_P (disp)
7383 && (GET_CODE (disp) != CONST
7384 || !legitimate_constant_p (disp))
7385 && (GET_CODE (disp) != SYMBOL_REF
7386 || !legitimate_constant_p (disp)))
7387 {
7388 reason = "displacement is not constant";
7389 goto report_error;
7390 }
7391 else if (TARGET_64BIT
7392 && !x86_64_immediate_operand (disp, VOIDmode))
7393 {
7394 reason = "displacement is out of range";
7395 goto report_error;
7396 }
7397 }
7398
7399 /* Everything looks valid. */
7400 return TRUE;
7401
7402 report_error:
7403 return FALSE;
7404 }
7405 \f
7406 /* Return a unique alias set for the GOT. */
7407
7408 static alias_set_type
7409 ix86_GOT_alias_set (void)
7410 {
7411 static alias_set_type set = -1;
7412 if (set == -1)
7413 set = new_alias_set ();
7414 return set;
7415 }
7416
7417 /* Return a legitimate reference for ORIG (an address) using the
7418 register REG. If REG is 0, a new pseudo is generated.
7419
7420 There are two types of references that must be handled:
7421
7422 1. Global data references must load the address from the GOT, via
7423 the PIC reg. An insn is emitted to do this load, and the reg is
7424 returned.
7425
7426 2. Static data references, constant pool addresses, and code labels
7427 compute the address as an offset from the GOT, whose base is in
7428 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7429 differentiate them from global data objects. The returned
7430 address is the PIC reg + an unspec constant.
7431
7432 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7433 reg also appears in the address. */
7434
7435 static rtx
7436 legitimize_pic_address (rtx orig, rtx reg)
7437 {
7438 rtx addr = orig;
7439 rtx new_rtx = orig;
7440 rtx base;
7441
7442 #if TARGET_MACHO
7443 if (TARGET_MACHO && !TARGET_64BIT)
7444 {
7445 if (reg == 0)
7446 reg = gen_reg_rtx (Pmode);
7447 /* Use the generic Mach-O PIC machinery. */
7448 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7449 }
7450 #endif
7451
7452 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7453 new_rtx = addr;
7454 else if (TARGET_64BIT
7455 && ix86_cmodel != CM_SMALL_PIC
7456 && gotoff_operand (addr, Pmode))
7457 {
7458 rtx tmpreg;
7459 /* This symbol may be referenced via a displacement from the PIC
7460 base address (@GOTOFF). */
7461
7462 if (reload_in_progress)
7463 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7464 if (GET_CODE (addr) == CONST)
7465 addr = XEXP (addr, 0);
7466 if (GET_CODE (addr) == PLUS)
7467 {
7468 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7469 UNSPEC_GOTOFF);
7470 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7471 }
7472 else
7473 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7474 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7475 if (!reg)
7476 tmpreg = gen_reg_rtx (Pmode);
7477 else
7478 tmpreg = reg;
7479 emit_move_insn (tmpreg, new_rtx);
7480
7481 if (reg != 0)
7482 {
7483 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7484 tmpreg, 1, OPTAB_DIRECT);
7485 new_rtx = reg;
7486 }
7487 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7488 }
7489 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7490 {
7491 /* This symbol may be referenced via a displacement from the PIC
7492 base address (@GOTOFF). */
7493
7494 if (reload_in_progress)
7495 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7496 if (GET_CODE (addr) == CONST)
7497 addr = XEXP (addr, 0);
7498 if (GET_CODE (addr) == PLUS)
7499 {
7500 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7501 UNSPEC_GOTOFF);
7502 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7503 }
7504 else
7505 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7506 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7507 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7508
7509 if (reg != 0)
7510 {
7511 emit_move_insn (reg, new_rtx);
7512 new_rtx = reg;
7513 }
7514 }
7515 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7516 /* We can't use @GOTOFF for text labels on VxWorks;
7517 see gotoff_operand. */
7518 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7519 {
7520 /* Given that we've already handled dllimport variables separately
7521 in legitimize_address, and all other variables should satisfy
7522 legitimate_pic_address_disp_p, we should never arrive here. */
7523 gcc_assert (!TARGET_64BIT_MS_ABI);
7524
7525 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7526 {
7527 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7528 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7529 new_rtx = gen_const_mem (Pmode, new_rtx);
7530 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7531
7532 if (reg == 0)
7533 reg = gen_reg_rtx (Pmode);
7534 /* Use directly gen_movsi, otherwise the address is loaded
7535 into register for CSE. We don't want to CSE this addresses,
7536 instead we CSE addresses from the GOT table, so skip this. */
7537 emit_insn (gen_movsi (reg, new_rtx));
7538 new_rtx = reg;
7539 }
7540 else
7541 {
7542 /* This symbol must be referenced via a load from the
7543 Global Offset Table (@GOT). */
7544
7545 if (reload_in_progress)
7546 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7547 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7548 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7549 if (TARGET_64BIT)
7550 new_rtx = force_reg (Pmode, new_rtx);
7551 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7552 new_rtx = gen_const_mem (Pmode, new_rtx);
7553 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7554
7555 if (reg == 0)
7556 reg = gen_reg_rtx (Pmode);
7557 emit_move_insn (reg, new_rtx);
7558 new_rtx = reg;
7559 }
7560 }
7561 else
7562 {
7563 if (CONST_INT_P (addr)
7564 && !x86_64_immediate_operand (addr, VOIDmode))
7565 {
7566 if (reg)
7567 {
7568 emit_move_insn (reg, addr);
7569 new_rtx = reg;
7570 }
7571 else
7572 new_rtx = force_reg (Pmode, addr);
7573 }
7574 else if (GET_CODE (addr) == CONST)
7575 {
7576 addr = XEXP (addr, 0);
7577
7578 /* We must match stuff we generate before. Assume the only
7579 unspecs that can get here are ours. Not that we could do
7580 anything with them anyway.... */
7581 if (GET_CODE (addr) == UNSPEC
7582 || (GET_CODE (addr) == PLUS
7583 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7584 return orig;
7585 gcc_assert (GET_CODE (addr) == PLUS);
7586 }
7587 if (GET_CODE (addr) == PLUS)
7588 {
7589 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7590
7591 /* Check first to see if this is a constant offset from a @GOTOFF
7592 symbol reference. */
7593 if (gotoff_operand (op0, Pmode)
7594 && CONST_INT_P (op1))
7595 {
7596 if (!TARGET_64BIT)
7597 {
7598 if (reload_in_progress)
7599 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7600 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7601 UNSPEC_GOTOFF);
7602 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7603 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7604 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7605
7606 if (reg != 0)
7607 {
7608 emit_move_insn (reg, new_rtx);
7609 new_rtx = reg;
7610 }
7611 }
7612 else
7613 {
7614 if (INTVAL (op1) < -16*1024*1024
7615 || INTVAL (op1) >= 16*1024*1024)
7616 {
7617 if (!x86_64_immediate_operand (op1, Pmode))
7618 op1 = force_reg (Pmode, op1);
7619 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7620 }
7621 }
7622 }
7623 else
7624 {
7625 base = legitimize_pic_address (XEXP (addr, 0), reg);
7626 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7627 base == reg ? NULL_RTX : reg);
7628
7629 if (CONST_INT_P (new_rtx))
7630 new_rtx = plus_constant (base, INTVAL (new_rtx));
7631 else
7632 {
7633 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7634 {
7635 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7636 new_rtx = XEXP (new_rtx, 1);
7637 }
7638 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7639 }
7640 }
7641 }
7642 }
7643 return new_rtx;
7644 }
7645 \f
7646 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7647
7648 static rtx
7649 get_thread_pointer (int to_reg)
7650 {
7651 rtx tp, reg, insn;
7652
7653 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7654 if (!to_reg)
7655 return tp;
7656
7657 reg = gen_reg_rtx (Pmode);
7658 insn = gen_rtx_SET (VOIDmode, reg, tp);
7659 insn = emit_insn (insn);
7660
7661 return reg;
7662 }
7663
7664 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7665 false if we expect this to be used for a memory address and true if
7666 we expect to load the address into a register. */
7667
7668 static rtx
7669 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7670 {
7671 rtx dest, base, off, pic, tp;
7672 int type;
7673
7674 switch (model)
7675 {
7676 case TLS_MODEL_GLOBAL_DYNAMIC:
7677 dest = gen_reg_rtx (Pmode);
7678 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7679
7680 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7681 {
7682 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7683
7684 start_sequence ();
7685 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7686 insns = get_insns ();
7687 end_sequence ();
7688
7689 CONST_OR_PURE_CALL_P (insns) = 1;
7690 emit_libcall_block (insns, dest, rax, x);
7691 }
7692 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7693 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7694 else
7695 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7696
7697 if (TARGET_GNU2_TLS)
7698 {
7699 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7700
7701 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7702 }
7703 break;
7704
7705 case TLS_MODEL_LOCAL_DYNAMIC:
7706 base = gen_reg_rtx (Pmode);
7707 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7708
7709 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7710 {
7711 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7712
7713 start_sequence ();
7714 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7715 insns = get_insns ();
7716 end_sequence ();
7717
7718 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7719 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7720 CONST_OR_PURE_CALL_P (insns) = 1;
7721 emit_libcall_block (insns, base, rax, note);
7722 }
7723 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7724 emit_insn (gen_tls_local_dynamic_base_64 (base));
7725 else
7726 emit_insn (gen_tls_local_dynamic_base_32 (base));
7727
7728 if (TARGET_GNU2_TLS)
7729 {
7730 rtx x = ix86_tls_module_base ();
7731
7732 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7733 gen_rtx_MINUS (Pmode, x, tp));
7734 }
7735
7736 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7737 off = gen_rtx_CONST (Pmode, off);
7738
7739 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7740
7741 if (TARGET_GNU2_TLS)
7742 {
7743 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7744
7745 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7746 }
7747
7748 break;
7749
7750 case TLS_MODEL_INITIAL_EXEC:
7751 if (TARGET_64BIT)
7752 {
7753 pic = NULL;
7754 type = UNSPEC_GOTNTPOFF;
7755 }
7756 else if (flag_pic)
7757 {
7758 if (reload_in_progress)
7759 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7760 pic = pic_offset_table_rtx;
7761 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7762 }
7763 else if (!TARGET_ANY_GNU_TLS)
7764 {
7765 pic = gen_reg_rtx (Pmode);
7766 emit_insn (gen_set_got (pic));
7767 type = UNSPEC_GOTTPOFF;
7768 }
7769 else
7770 {
7771 pic = NULL;
7772 type = UNSPEC_INDNTPOFF;
7773 }
7774
7775 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7776 off = gen_rtx_CONST (Pmode, off);
7777 if (pic)
7778 off = gen_rtx_PLUS (Pmode, pic, off);
7779 off = gen_const_mem (Pmode, off);
7780 set_mem_alias_set (off, ix86_GOT_alias_set ());
7781
7782 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7783 {
7784 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7785 off = force_reg (Pmode, off);
7786 return gen_rtx_PLUS (Pmode, base, off);
7787 }
7788 else
7789 {
7790 base = get_thread_pointer (true);
7791 dest = gen_reg_rtx (Pmode);
7792 emit_insn (gen_subsi3 (dest, base, off));
7793 }
7794 break;
7795
7796 case TLS_MODEL_LOCAL_EXEC:
7797 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7798 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7799 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7800 off = gen_rtx_CONST (Pmode, off);
7801
7802 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7803 {
7804 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7805 return gen_rtx_PLUS (Pmode, base, off);
7806 }
7807 else
7808 {
7809 base = get_thread_pointer (true);
7810 dest = gen_reg_rtx (Pmode);
7811 emit_insn (gen_subsi3 (dest, base, off));
7812 }
7813 break;
7814
7815 default:
7816 gcc_unreachable ();
7817 }
7818
7819 return dest;
7820 }
7821
7822 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7823 to symbol DECL. */
7824
7825 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7826 htab_t dllimport_map;
7827
7828 static tree
7829 get_dllimport_decl (tree decl)
7830 {
7831 struct tree_map *h, in;
7832 void **loc;
7833 const char *name;
7834 const char *prefix;
7835 size_t namelen, prefixlen;
7836 char *imp_name;
7837 tree to;
7838 rtx rtl;
7839
7840 if (!dllimport_map)
7841 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7842
7843 in.hash = htab_hash_pointer (decl);
7844 in.base.from = decl;
7845 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7846 h = (struct tree_map *) *loc;
7847 if (h)
7848 return h->to;
7849
7850 *loc = h = GGC_NEW (struct tree_map);
7851 h->hash = in.hash;
7852 h->base.from = decl;
7853 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7854 DECL_ARTIFICIAL (to) = 1;
7855 DECL_IGNORED_P (to) = 1;
7856 DECL_EXTERNAL (to) = 1;
7857 TREE_READONLY (to) = 1;
7858
7859 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7860 name = targetm.strip_name_encoding (name);
7861 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7862 namelen = strlen (name);
7863 prefixlen = strlen (prefix);
7864 imp_name = (char *) alloca (namelen + prefixlen + 1);
7865 memcpy (imp_name, prefix, prefixlen);
7866 memcpy (imp_name + prefixlen, name, namelen + 1);
7867
7868 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7869 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7870 SET_SYMBOL_REF_DECL (rtl, to);
7871 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7872
7873 rtl = gen_const_mem (Pmode, rtl);
7874 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7875
7876 SET_DECL_RTL (to, rtl);
7877 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7878
7879 return to;
7880 }
7881
7882 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7883 true if we require the result be a register. */
7884
7885 static rtx
7886 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7887 {
7888 tree imp_decl;
7889 rtx x;
7890
7891 gcc_assert (SYMBOL_REF_DECL (symbol));
7892 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7893
7894 x = DECL_RTL (imp_decl);
7895 if (want_reg)
7896 x = force_reg (Pmode, x);
7897 return x;
7898 }
7899
7900 /* Try machine-dependent ways of modifying an illegitimate address
7901 to be legitimate. If we find one, return the new, valid address.
7902 This macro is used in only one place: `memory_address' in explow.c.
7903
7904 OLDX is the address as it was before break_out_memory_refs was called.
7905 In some cases it is useful to look at this to decide what needs to be done.
7906
7907 MODE and WIN are passed so that this macro can use
7908 GO_IF_LEGITIMATE_ADDRESS.
7909
7910 It is always safe for this macro to do nothing. It exists to recognize
7911 opportunities to optimize the output.
7912
7913 For the 80386, we handle X+REG by loading X into a register R and
7914 using R+REG. R will go in a general reg and indexing will be used.
7915 However, if REG is a broken-out memory address or multiplication,
7916 nothing needs to be done because REG can certainly go in a general reg.
7917
7918 When -fpic is used, special handling is needed for symbolic references.
7919 See comments by legitimize_pic_address in i386.c for details. */
7920
7921 rtx
7922 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7923 {
7924 int changed = 0;
7925 unsigned log;
7926
7927 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7928 if (log)
7929 return legitimize_tls_address (x, (enum tls_model) log, false);
7930 if (GET_CODE (x) == CONST
7931 && GET_CODE (XEXP (x, 0)) == PLUS
7932 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7933 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7934 {
7935 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7936 (enum tls_model) log, false);
7937 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7938 }
7939
7940 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7941 {
7942 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7943 return legitimize_dllimport_symbol (x, true);
7944 if (GET_CODE (x) == CONST
7945 && GET_CODE (XEXP (x, 0)) == PLUS
7946 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7947 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7948 {
7949 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7950 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7951 }
7952 }
7953
7954 if (flag_pic && SYMBOLIC_CONST (x))
7955 return legitimize_pic_address (x, 0);
7956
7957 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7958 if (GET_CODE (x) == ASHIFT
7959 && CONST_INT_P (XEXP (x, 1))
7960 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7961 {
7962 changed = 1;
7963 log = INTVAL (XEXP (x, 1));
7964 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7965 GEN_INT (1 << log));
7966 }
7967
7968 if (GET_CODE (x) == PLUS)
7969 {
7970 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7971
7972 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7973 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7974 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7975 {
7976 changed = 1;
7977 log = INTVAL (XEXP (XEXP (x, 0), 1));
7978 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7979 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7980 GEN_INT (1 << log));
7981 }
7982
7983 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7984 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7985 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7986 {
7987 changed = 1;
7988 log = INTVAL (XEXP (XEXP (x, 1), 1));
7989 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7990 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7991 GEN_INT (1 << log));
7992 }
7993
7994 /* Put multiply first if it isn't already. */
7995 if (GET_CODE (XEXP (x, 1)) == MULT)
7996 {
7997 rtx tmp = XEXP (x, 0);
7998 XEXP (x, 0) = XEXP (x, 1);
7999 XEXP (x, 1) = tmp;
8000 changed = 1;
8001 }
8002
8003 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8004 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8005 created by virtual register instantiation, register elimination, and
8006 similar optimizations. */
8007 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8008 {
8009 changed = 1;
8010 x = gen_rtx_PLUS (Pmode,
8011 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8012 XEXP (XEXP (x, 1), 0)),
8013 XEXP (XEXP (x, 1), 1));
8014 }
8015
8016 /* Canonicalize
8017 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8018 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8019 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8020 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8021 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8022 && CONSTANT_P (XEXP (x, 1)))
8023 {
8024 rtx constant;
8025 rtx other = NULL_RTX;
8026
8027 if (CONST_INT_P (XEXP (x, 1)))
8028 {
8029 constant = XEXP (x, 1);
8030 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8031 }
8032 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8033 {
8034 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8035 other = XEXP (x, 1);
8036 }
8037 else
8038 constant = 0;
8039
8040 if (constant)
8041 {
8042 changed = 1;
8043 x = gen_rtx_PLUS (Pmode,
8044 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8045 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8046 plus_constant (other, INTVAL (constant)));
8047 }
8048 }
8049
8050 if (changed && legitimate_address_p (mode, x, FALSE))
8051 return x;
8052
8053 if (GET_CODE (XEXP (x, 0)) == MULT)
8054 {
8055 changed = 1;
8056 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8057 }
8058
8059 if (GET_CODE (XEXP (x, 1)) == MULT)
8060 {
8061 changed = 1;
8062 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8063 }
8064
8065 if (changed
8066 && REG_P (XEXP (x, 1))
8067 && REG_P (XEXP (x, 0)))
8068 return x;
8069
8070 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8071 {
8072 changed = 1;
8073 x = legitimize_pic_address (x, 0);
8074 }
8075
8076 if (changed && legitimate_address_p (mode, x, FALSE))
8077 return x;
8078
8079 if (REG_P (XEXP (x, 0)))
8080 {
8081 rtx temp = gen_reg_rtx (Pmode);
8082 rtx val = force_operand (XEXP (x, 1), temp);
8083 if (val != temp)
8084 emit_move_insn (temp, val);
8085
8086 XEXP (x, 1) = temp;
8087 return x;
8088 }
8089
8090 else if (REG_P (XEXP (x, 1)))
8091 {
8092 rtx temp = gen_reg_rtx (Pmode);
8093 rtx val = force_operand (XEXP (x, 0), temp);
8094 if (val != temp)
8095 emit_move_insn (temp, val);
8096
8097 XEXP (x, 0) = temp;
8098 return x;
8099 }
8100 }
8101
8102 return x;
8103 }
8104 \f
8105 /* Print an integer constant expression in assembler syntax. Addition
8106 and subtraction are the only arithmetic that may appear in these
8107 expressions. FILE is the stdio stream to write to, X is the rtx, and
8108 CODE is the operand print code from the output string. */
8109
8110 static void
8111 output_pic_addr_const (FILE *file, rtx x, int code)
8112 {
8113 char buf[256];
8114
8115 switch (GET_CODE (x))
8116 {
8117 case PC:
8118 gcc_assert (flag_pic);
8119 putc ('.', file);
8120 break;
8121
8122 case SYMBOL_REF:
8123 if (! TARGET_MACHO || TARGET_64BIT)
8124 output_addr_const (file, x);
8125 else
8126 {
8127 const char *name = XSTR (x, 0);
8128
8129 /* Mark the decl as referenced so that cgraph will
8130 output the function. */
8131 if (SYMBOL_REF_DECL (x))
8132 mark_decl_referenced (SYMBOL_REF_DECL (x));
8133
8134 #if TARGET_MACHO
8135 if (MACHOPIC_INDIRECT
8136 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8137 name = machopic_indirection_name (x, /*stub_p=*/true);
8138 #endif
8139 assemble_name (file, name);
8140 }
8141 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8142 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8143 fputs ("@PLT", file);
8144 break;
8145
8146 case LABEL_REF:
8147 x = XEXP (x, 0);
8148 /* FALLTHRU */
8149 case CODE_LABEL:
8150 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8151 assemble_name (asm_out_file, buf);
8152 break;
8153
8154 case CONST_INT:
8155 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8156 break;
8157
8158 case CONST:
8159 /* This used to output parentheses around the expression,
8160 but that does not work on the 386 (either ATT or BSD assembler). */
8161 output_pic_addr_const (file, XEXP (x, 0), code);
8162 break;
8163
8164 case CONST_DOUBLE:
8165 if (GET_MODE (x) == VOIDmode)
8166 {
8167 /* We can use %d if the number is <32 bits and positive. */
8168 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8169 fprintf (file, "0x%lx%08lx",
8170 (unsigned long) CONST_DOUBLE_HIGH (x),
8171 (unsigned long) CONST_DOUBLE_LOW (x));
8172 else
8173 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8174 }
8175 else
8176 /* We can't handle floating point constants;
8177 PRINT_OPERAND must handle them. */
8178 output_operand_lossage ("floating constant misused");
8179 break;
8180
8181 case PLUS:
8182 /* Some assemblers need integer constants to appear first. */
8183 if (CONST_INT_P (XEXP (x, 0)))
8184 {
8185 output_pic_addr_const (file, XEXP (x, 0), code);
8186 putc ('+', file);
8187 output_pic_addr_const (file, XEXP (x, 1), code);
8188 }
8189 else
8190 {
8191 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8192 output_pic_addr_const (file, XEXP (x, 1), code);
8193 putc ('+', file);
8194 output_pic_addr_const (file, XEXP (x, 0), code);
8195 }
8196 break;
8197
8198 case MINUS:
8199 if (!TARGET_MACHO)
8200 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8201 output_pic_addr_const (file, XEXP (x, 0), code);
8202 putc ('-', file);
8203 output_pic_addr_const (file, XEXP (x, 1), code);
8204 if (!TARGET_MACHO)
8205 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8206 break;
8207
8208 case UNSPEC:
8209 gcc_assert (XVECLEN (x, 0) == 1);
8210 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8211 switch (XINT (x, 1))
8212 {
8213 case UNSPEC_GOT:
8214 fputs ("@GOT", file);
8215 break;
8216 case UNSPEC_GOTOFF:
8217 fputs ("@GOTOFF", file);
8218 break;
8219 case UNSPEC_PLTOFF:
8220 fputs ("@PLTOFF", file);
8221 break;
8222 case UNSPEC_GOTPCREL:
8223 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8224 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8225 break;
8226 case UNSPEC_GOTTPOFF:
8227 /* FIXME: This might be @TPOFF in Sun ld too. */
8228 fputs ("@GOTTPOFF", file);
8229 break;
8230 case UNSPEC_TPOFF:
8231 fputs ("@TPOFF", file);
8232 break;
8233 case UNSPEC_NTPOFF:
8234 if (TARGET_64BIT)
8235 fputs ("@TPOFF", file);
8236 else
8237 fputs ("@NTPOFF", file);
8238 break;
8239 case UNSPEC_DTPOFF:
8240 fputs ("@DTPOFF", file);
8241 break;
8242 case UNSPEC_GOTNTPOFF:
8243 if (TARGET_64BIT)
8244 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8245 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8246 else
8247 fputs ("@GOTNTPOFF", file);
8248 break;
8249 case UNSPEC_INDNTPOFF:
8250 fputs ("@INDNTPOFF", file);
8251 break;
8252 default:
8253 output_operand_lossage ("invalid UNSPEC as operand");
8254 break;
8255 }
8256 break;
8257
8258 default:
8259 output_operand_lossage ("invalid expression as operand");
8260 }
8261 }
8262
8263 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8264 We need to emit DTP-relative relocations. */
8265
8266 static void ATTRIBUTE_UNUSED
8267 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8268 {
8269 fputs (ASM_LONG, file);
8270 output_addr_const (file, x);
8271 fputs ("@DTPOFF", file);
8272 switch (size)
8273 {
8274 case 4:
8275 break;
8276 case 8:
8277 fputs (", 0", file);
8278 break;
8279 default:
8280 gcc_unreachable ();
8281 }
8282 }
8283
8284 /* In the name of slightly smaller debug output, and to cater to
8285 general assembler lossage, recognize PIC+GOTOFF and turn it back
8286 into a direct symbol reference.
8287
8288 On Darwin, this is necessary to avoid a crash, because Darwin
8289 has a different PIC label for each routine but the DWARF debugging
8290 information is not associated with any particular routine, so it's
8291 necessary to remove references to the PIC label from RTL stored by
8292 the DWARF output code. */
8293
8294 static rtx
8295 ix86_delegitimize_address (rtx orig_x)
8296 {
8297 rtx x = orig_x;
8298 /* reg_addend is NULL or a multiple of some register. */
8299 rtx reg_addend = NULL_RTX;
8300 /* const_addend is NULL or a const_int. */
8301 rtx const_addend = NULL_RTX;
8302 /* This is the result, or NULL. */
8303 rtx result = NULL_RTX;
8304
8305 if (MEM_P (x))
8306 x = XEXP (x, 0);
8307
8308 if (TARGET_64BIT)
8309 {
8310 if (GET_CODE (x) != CONST
8311 || GET_CODE (XEXP (x, 0)) != UNSPEC
8312 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8313 || !MEM_P (orig_x))
8314 return orig_x;
8315 return XVECEXP (XEXP (x, 0), 0, 0);
8316 }
8317
8318 if (GET_CODE (x) != PLUS
8319 || GET_CODE (XEXP (x, 1)) != CONST)
8320 return orig_x;
8321
8322 if (REG_P (XEXP (x, 0))
8323 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8324 /* %ebx + GOT/GOTOFF */
8325 ;
8326 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8327 {
8328 /* %ebx + %reg * scale + GOT/GOTOFF */
8329 reg_addend = XEXP (x, 0);
8330 if (REG_P (XEXP (reg_addend, 0))
8331 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8332 reg_addend = XEXP (reg_addend, 1);
8333 else if (REG_P (XEXP (reg_addend, 1))
8334 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8335 reg_addend = XEXP (reg_addend, 0);
8336 else
8337 return orig_x;
8338 if (!REG_P (reg_addend)
8339 && GET_CODE (reg_addend) != MULT
8340 && GET_CODE (reg_addend) != ASHIFT)
8341 return orig_x;
8342 }
8343 else
8344 return orig_x;
8345
8346 x = XEXP (XEXP (x, 1), 0);
8347 if (GET_CODE (x) == PLUS
8348 && CONST_INT_P (XEXP (x, 1)))
8349 {
8350 const_addend = XEXP (x, 1);
8351 x = XEXP (x, 0);
8352 }
8353
8354 if (GET_CODE (x) == UNSPEC
8355 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8356 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8357 result = XVECEXP (x, 0, 0);
8358
8359 if (TARGET_MACHO && darwin_local_data_pic (x)
8360 && !MEM_P (orig_x))
8361 result = XEXP (x, 0);
8362
8363 if (! result)
8364 return orig_x;
8365
8366 if (const_addend)
8367 result = gen_rtx_PLUS (Pmode, result, const_addend);
8368 if (reg_addend)
8369 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8370 return result;
8371 }
8372
8373 /* If X is a machine specific address (i.e. a symbol or label being
8374 referenced as a displacement from the GOT implemented using an
8375 UNSPEC), then return the base term. Otherwise return X. */
8376
8377 rtx
8378 ix86_find_base_term (rtx x)
8379 {
8380 rtx term;
8381
8382 if (TARGET_64BIT)
8383 {
8384 if (GET_CODE (x) != CONST)
8385 return x;
8386 term = XEXP (x, 0);
8387 if (GET_CODE (term) == PLUS
8388 && (CONST_INT_P (XEXP (term, 1))
8389 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8390 term = XEXP (term, 0);
8391 if (GET_CODE (term) != UNSPEC
8392 || XINT (term, 1) != UNSPEC_GOTPCREL)
8393 return x;
8394
8395 term = XVECEXP (term, 0, 0);
8396
8397 if (GET_CODE (term) != SYMBOL_REF
8398 && GET_CODE (term) != LABEL_REF)
8399 return x;
8400
8401 return term;
8402 }
8403
8404 term = ix86_delegitimize_address (x);
8405
8406 if (GET_CODE (term) != SYMBOL_REF
8407 && GET_CODE (term) != LABEL_REF)
8408 return x;
8409
8410 return term;
8411 }
8412 \f
8413 static void
8414 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8415 int fp, FILE *file)
8416 {
8417 const char *suffix;
8418
8419 if (mode == CCFPmode || mode == CCFPUmode)
8420 {
8421 enum rtx_code second_code, bypass_code;
8422 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8423 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8424 code = ix86_fp_compare_code_to_integer (code);
8425 mode = CCmode;
8426 }
8427 if (reverse)
8428 code = reverse_condition (code);
8429
8430 switch (code)
8431 {
8432 case EQ:
8433 switch (mode)
8434 {
8435 case CCAmode:
8436 suffix = "a";
8437 break;
8438
8439 case CCCmode:
8440 suffix = "c";
8441 break;
8442
8443 case CCOmode:
8444 suffix = "o";
8445 break;
8446
8447 case CCSmode:
8448 suffix = "s";
8449 break;
8450
8451 default:
8452 suffix = "e";
8453 }
8454 break;
8455 case NE:
8456 switch (mode)
8457 {
8458 case CCAmode:
8459 suffix = "na";
8460 break;
8461
8462 case CCCmode:
8463 suffix = "nc";
8464 break;
8465
8466 case CCOmode:
8467 suffix = "no";
8468 break;
8469
8470 case CCSmode:
8471 suffix = "ns";
8472 break;
8473
8474 default:
8475 suffix = "ne";
8476 }
8477 break;
8478 case GT:
8479 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8480 suffix = "g";
8481 break;
8482 case GTU:
8483 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8484 Those same assemblers have the same but opposite lossage on cmov. */
8485 if (mode == CCmode)
8486 suffix = fp ? "nbe" : "a";
8487 else if (mode == CCCmode)
8488 suffix = "b";
8489 else
8490 gcc_unreachable ();
8491 break;
8492 case LT:
8493 switch (mode)
8494 {
8495 case CCNOmode:
8496 case CCGOCmode:
8497 suffix = "s";
8498 break;
8499
8500 case CCmode:
8501 case CCGCmode:
8502 suffix = "l";
8503 break;
8504
8505 default:
8506 gcc_unreachable ();
8507 }
8508 break;
8509 case LTU:
8510 gcc_assert (mode == CCmode || mode == CCCmode);
8511 suffix = "b";
8512 break;
8513 case GE:
8514 switch (mode)
8515 {
8516 case CCNOmode:
8517 case CCGOCmode:
8518 suffix = "ns";
8519 break;
8520
8521 case CCmode:
8522 case CCGCmode:
8523 suffix = "ge";
8524 break;
8525
8526 default:
8527 gcc_unreachable ();
8528 }
8529 break;
8530 case GEU:
8531 /* ??? As above. */
8532 gcc_assert (mode == CCmode || mode == CCCmode);
8533 suffix = fp ? "nb" : "ae";
8534 break;
8535 case LE:
8536 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8537 suffix = "le";
8538 break;
8539 case LEU:
8540 /* ??? As above. */
8541 if (mode == CCmode)
8542 suffix = "be";
8543 else if (mode == CCCmode)
8544 suffix = fp ? "nb" : "ae";
8545 else
8546 gcc_unreachable ();
8547 break;
8548 case UNORDERED:
8549 suffix = fp ? "u" : "p";
8550 break;
8551 case ORDERED:
8552 suffix = fp ? "nu" : "np";
8553 break;
8554 default:
8555 gcc_unreachable ();
8556 }
8557 fputs (suffix, file);
8558 }
8559
8560 /* Print the name of register X to FILE based on its machine mode and number.
8561 If CODE is 'w', pretend the mode is HImode.
8562 If CODE is 'b', pretend the mode is QImode.
8563 If CODE is 'k', pretend the mode is SImode.
8564 If CODE is 'q', pretend the mode is DImode.
8565 If CODE is 'h', pretend the reg is the 'high' byte register.
8566 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8567
8568 void
8569 print_reg (rtx x, int code, FILE *file)
8570 {
8571 gcc_assert (x == pc_rtx
8572 || (REGNO (x) != ARG_POINTER_REGNUM
8573 && REGNO (x) != FRAME_POINTER_REGNUM
8574 && REGNO (x) != FLAGS_REG
8575 && REGNO (x) != FPSR_REG
8576 && REGNO (x) != FPCR_REG));
8577
8578 if (ASSEMBLER_DIALECT == ASM_ATT)
8579 putc ('%', file);
8580
8581 if (x == pc_rtx)
8582 {
8583 gcc_assert (TARGET_64BIT);
8584 fputs ("rip", file);
8585 return;
8586 }
8587
8588 if (code == 'w' || MMX_REG_P (x))
8589 code = 2;
8590 else if (code == 'b')
8591 code = 1;
8592 else if (code == 'k')
8593 code = 4;
8594 else if (code == 'q')
8595 code = 8;
8596 else if (code == 'y')
8597 code = 3;
8598 else if (code == 'h')
8599 code = 0;
8600 else
8601 code = GET_MODE_SIZE (GET_MODE (x));
8602
8603 /* Irritatingly, AMD extended registers use different naming convention
8604 from the normal registers. */
8605 if (REX_INT_REG_P (x))
8606 {
8607 gcc_assert (TARGET_64BIT);
8608 switch (code)
8609 {
8610 case 0:
8611 error ("extended registers have no high halves");
8612 break;
8613 case 1:
8614 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8615 break;
8616 case 2:
8617 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8618 break;
8619 case 4:
8620 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8621 break;
8622 case 8:
8623 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8624 break;
8625 default:
8626 error ("unsupported operand size for extended register");
8627 break;
8628 }
8629 return;
8630 }
8631 switch (code)
8632 {
8633 case 3:
8634 if (STACK_TOP_P (x))
8635 {
8636 fputs ("st(0)", file);
8637 break;
8638 }
8639 /* FALLTHRU */
8640 case 8:
8641 case 4:
8642 case 12:
8643 if (! ANY_FP_REG_P (x))
8644 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8645 /* FALLTHRU */
8646 case 16:
8647 case 2:
8648 normal:
8649 fputs (hi_reg_name[REGNO (x)], file);
8650 break;
8651 case 1:
8652 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8653 goto normal;
8654 fputs (qi_reg_name[REGNO (x)], file);
8655 break;
8656 case 0:
8657 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8658 goto normal;
8659 fputs (qi_high_reg_name[REGNO (x)], file);
8660 break;
8661 default:
8662 gcc_unreachable ();
8663 }
8664 }
8665
8666 /* Locate some local-dynamic symbol still in use by this function
8667 so that we can print its name in some tls_local_dynamic_base
8668 pattern. */
8669
8670 static int
8671 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8672 {
8673 rtx x = *px;
8674
8675 if (GET_CODE (x) == SYMBOL_REF
8676 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8677 {
8678 cfun->machine->some_ld_name = XSTR (x, 0);
8679 return 1;
8680 }
8681
8682 return 0;
8683 }
8684
8685 static const char *
8686 get_some_local_dynamic_name (void)
8687 {
8688 rtx insn;
8689
8690 if (cfun->machine->some_ld_name)
8691 return cfun->machine->some_ld_name;
8692
8693 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8694 if (INSN_P (insn)
8695 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8696 return cfun->machine->some_ld_name;
8697
8698 gcc_unreachable ();
8699 }
8700
8701 /* Meaning of CODE:
8702 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8703 C -- print opcode suffix for set/cmov insn.
8704 c -- like C, but print reversed condition
8705 F,f -- likewise, but for floating-point.
8706 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8707 otherwise nothing
8708 R -- print the prefix for register names.
8709 z -- print the opcode suffix for the size of the current operand.
8710 * -- print a star (in certain assembler syntax)
8711 A -- print an absolute memory reference.
8712 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8713 s -- print a shift double count, followed by the assemblers argument
8714 delimiter.
8715 b -- print the QImode name of the register for the indicated operand.
8716 %b0 would print %al if operands[0] is reg 0.
8717 w -- likewise, print the HImode name of the register.
8718 k -- likewise, print the SImode name of the register.
8719 q -- likewise, print the DImode name of the register.
8720 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8721 y -- print "st(0)" instead of "st" as a register.
8722 D -- print condition for SSE cmp instruction.
8723 P -- if PIC, print an @PLT suffix.
8724 X -- don't print any sort of PIC '@' suffix for a symbol.
8725 & -- print some in-use local-dynamic symbol name.
8726 H -- print a memory address offset by 8; used for sse high-parts
8727 Y -- print condition for SSE5 com* instruction.
8728 + -- print a branch hint as 'cs' or 'ds' prefix
8729 ; -- print a semicolon (after prefixes due to bug in older gas).
8730 */
8731
8732 void
8733 print_operand (FILE *file, rtx x, int code)
8734 {
8735 if (code)
8736 {
8737 switch (code)
8738 {
8739 case '*':
8740 if (ASSEMBLER_DIALECT == ASM_ATT)
8741 putc ('*', file);
8742 return;
8743
8744 case '&':
8745 assemble_name (file, get_some_local_dynamic_name ());
8746 return;
8747
8748 case 'A':
8749 switch (ASSEMBLER_DIALECT)
8750 {
8751 case ASM_ATT:
8752 putc ('*', file);
8753 break;
8754
8755 case ASM_INTEL:
8756 /* Intel syntax. For absolute addresses, registers should not
8757 be surrounded by braces. */
8758 if (!REG_P (x))
8759 {
8760 putc ('[', file);
8761 PRINT_OPERAND (file, x, 0);
8762 putc (']', file);
8763 return;
8764 }
8765 break;
8766
8767 default:
8768 gcc_unreachable ();
8769 }
8770
8771 PRINT_OPERAND (file, x, 0);
8772 return;
8773
8774
8775 case 'L':
8776 if (ASSEMBLER_DIALECT == ASM_ATT)
8777 putc ('l', file);
8778 return;
8779
8780 case 'W':
8781 if (ASSEMBLER_DIALECT == ASM_ATT)
8782 putc ('w', file);
8783 return;
8784
8785 case 'B':
8786 if (ASSEMBLER_DIALECT == ASM_ATT)
8787 putc ('b', file);
8788 return;
8789
8790 case 'Q':
8791 if (ASSEMBLER_DIALECT == ASM_ATT)
8792 putc ('l', file);
8793 return;
8794
8795 case 'S':
8796 if (ASSEMBLER_DIALECT == ASM_ATT)
8797 putc ('s', file);
8798 return;
8799
8800 case 'T':
8801 if (ASSEMBLER_DIALECT == ASM_ATT)
8802 putc ('t', file);
8803 return;
8804
8805 case 'z':
8806 /* 387 opcodes don't get size suffixes if the operands are
8807 registers. */
8808 if (STACK_REG_P (x))
8809 return;
8810
8811 /* Likewise if using Intel opcodes. */
8812 if (ASSEMBLER_DIALECT == ASM_INTEL)
8813 return;
8814
8815 /* This is the size of op from size of operand. */
8816 switch (GET_MODE_SIZE (GET_MODE (x)))
8817 {
8818 case 1:
8819 putc ('b', file);
8820 return;
8821
8822 case 2:
8823 if (MEM_P (x))
8824 {
8825 #ifdef HAVE_GAS_FILDS_FISTS
8826 putc ('s', file);
8827 #endif
8828 return;
8829 }
8830 else
8831 putc ('w', file);
8832 return;
8833
8834 case 4:
8835 if (GET_MODE (x) == SFmode)
8836 {
8837 putc ('s', file);
8838 return;
8839 }
8840 else
8841 putc ('l', file);
8842 return;
8843
8844 case 12:
8845 case 16:
8846 putc ('t', file);
8847 return;
8848
8849 case 8:
8850 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8851 {
8852 #ifdef GAS_MNEMONICS
8853 putc ('q', file);
8854 #else
8855 putc ('l', file);
8856 putc ('l', file);
8857 #endif
8858 }
8859 else
8860 putc ('l', file);
8861 return;
8862
8863 default:
8864 gcc_unreachable ();
8865 }
8866
8867 case 'b':
8868 case 'w':
8869 case 'k':
8870 case 'q':
8871 case 'h':
8872 case 'y':
8873 case 'X':
8874 case 'P':
8875 break;
8876
8877 case 's':
8878 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8879 {
8880 PRINT_OPERAND (file, x, 0);
8881 putc (',', file);
8882 }
8883 return;
8884
8885 case 'D':
8886 /* Little bit of braindamage here. The SSE compare instructions
8887 does use completely different names for the comparisons that the
8888 fp conditional moves. */
8889 switch (GET_CODE (x))
8890 {
8891 case EQ:
8892 case UNEQ:
8893 fputs ("eq", file);
8894 break;
8895 case LT:
8896 case UNLT:
8897 fputs ("lt", file);
8898 break;
8899 case LE:
8900 case UNLE:
8901 fputs ("le", file);
8902 break;
8903 case UNORDERED:
8904 fputs ("unord", file);
8905 break;
8906 case NE:
8907 case LTGT:
8908 fputs ("neq", file);
8909 break;
8910 case UNGE:
8911 case GE:
8912 fputs ("nlt", file);
8913 break;
8914 case UNGT:
8915 case GT:
8916 fputs ("nle", file);
8917 break;
8918 case ORDERED:
8919 fputs ("ord", file);
8920 break;
8921 default:
8922 gcc_unreachable ();
8923 }
8924 return;
8925 case 'O':
8926 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8927 if (ASSEMBLER_DIALECT == ASM_ATT)
8928 {
8929 switch (GET_MODE (x))
8930 {
8931 case HImode: putc ('w', file); break;
8932 case SImode:
8933 case SFmode: putc ('l', file); break;
8934 case DImode:
8935 case DFmode: putc ('q', file); break;
8936 default: gcc_unreachable ();
8937 }
8938 putc ('.', file);
8939 }
8940 #endif
8941 return;
8942 case 'C':
8943 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8944 return;
8945 case 'F':
8946 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8947 if (ASSEMBLER_DIALECT == ASM_ATT)
8948 putc ('.', file);
8949 #endif
8950 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8951 return;
8952
8953 /* Like above, but reverse condition */
8954 case 'c':
8955 /* Check to see if argument to %c is really a constant
8956 and not a condition code which needs to be reversed. */
8957 if (!COMPARISON_P (x))
8958 {
8959 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8960 return;
8961 }
8962 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8963 return;
8964 case 'f':
8965 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8966 if (ASSEMBLER_DIALECT == ASM_ATT)
8967 putc ('.', file);
8968 #endif
8969 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8970 return;
8971
8972 case 'H':
8973 /* It doesn't actually matter what mode we use here, as we're
8974 only going to use this for printing. */
8975 x = adjust_address_nv (x, DImode, 8);
8976 break;
8977
8978 case '+':
8979 {
8980 rtx x;
8981
8982 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8983 return;
8984
8985 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8986 if (x)
8987 {
8988 int pred_val = INTVAL (XEXP (x, 0));
8989
8990 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8991 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8992 {
8993 int taken = pred_val > REG_BR_PROB_BASE / 2;
8994 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8995
8996 /* Emit hints only in the case default branch prediction
8997 heuristics would fail. */
8998 if (taken != cputaken)
8999 {
9000 /* We use 3e (DS) prefix for taken branches and
9001 2e (CS) prefix for not taken branches. */
9002 if (taken)
9003 fputs ("ds ; ", file);
9004 else
9005 fputs ("cs ; ", file);
9006 }
9007 }
9008 }
9009 return;
9010 }
9011
9012 case 'Y':
9013 switch (GET_CODE (x))
9014 {
9015 case NE:
9016 fputs ("neq", file);
9017 break;
9018 case EQ:
9019 fputs ("eq", file);
9020 break;
9021 case GE:
9022 case GEU:
9023 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9024 break;
9025 case GT:
9026 case GTU:
9027 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9028 break;
9029 case LE:
9030 case LEU:
9031 fputs ("le", file);
9032 break;
9033 case LT:
9034 case LTU:
9035 fputs ("lt", file);
9036 break;
9037 case UNORDERED:
9038 fputs ("unord", file);
9039 break;
9040 case ORDERED:
9041 fputs ("ord", file);
9042 break;
9043 case UNEQ:
9044 fputs ("ueq", file);
9045 break;
9046 case UNGE:
9047 fputs ("nlt", file);
9048 break;
9049 case UNGT:
9050 fputs ("nle", file);
9051 break;
9052 case UNLE:
9053 fputs ("ule", file);
9054 break;
9055 case UNLT:
9056 fputs ("ult", file);
9057 break;
9058 case LTGT:
9059 fputs ("une", file);
9060 break;
9061 default:
9062 gcc_unreachable ();
9063 }
9064 return;
9065
9066 case ';':
9067 #if TARGET_MACHO
9068 fputs (" ; ", file);
9069 #else
9070 fputc (' ', file);
9071 #endif
9072 return;
9073
9074 default:
9075 output_operand_lossage ("invalid operand code '%c'", code);
9076 }
9077 }
9078
9079 if (REG_P (x))
9080 print_reg (x, code, file);
9081
9082 else if (MEM_P (x))
9083 {
9084 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9085 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9086 && GET_MODE (x) != BLKmode)
9087 {
9088 const char * size;
9089 switch (GET_MODE_SIZE (GET_MODE (x)))
9090 {
9091 case 1: size = "BYTE"; break;
9092 case 2: size = "WORD"; break;
9093 case 4: size = "DWORD"; break;
9094 case 8: size = "QWORD"; break;
9095 case 12: size = "XWORD"; break;
9096 case 16:
9097 if (GET_MODE (x) == XFmode)
9098 size = "XWORD";
9099 else
9100 size = "XMMWORD";
9101 break;
9102 default:
9103 gcc_unreachable ();
9104 }
9105
9106 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9107 if (code == 'b')
9108 size = "BYTE";
9109 else if (code == 'w')
9110 size = "WORD";
9111 else if (code == 'k')
9112 size = "DWORD";
9113
9114 fputs (size, file);
9115 fputs (" PTR ", file);
9116 }
9117
9118 x = XEXP (x, 0);
9119 /* Avoid (%rip) for call operands. */
9120 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9121 && !CONST_INT_P (x))
9122 output_addr_const (file, x);
9123 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9124 output_operand_lossage ("invalid constraints for operand");
9125 else
9126 output_address (x);
9127 }
9128
9129 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9130 {
9131 REAL_VALUE_TYPE r;
9132 long l;
9133
9134 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9135 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9136
9137 if (ASSEMBLER_DIALECT == ASM_ATT)
9138 putc ('$', file);
9139 fprintf (file, "0x%08lx", l);
9140 }
9141
9142 /* These float cases don't actually occur as immediate operands. */
9143 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9144 {
9145 char dstr[30];
9146
9147 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9148 fprintf (file, "%s", dstr);
9149 }
9150
9151 else if (GET_CODE (x) == CONST_DOUBLE
9152 && GET_MODE (x) == XFmode)
9153 {
9154 char dstr[30];
9155
9156 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9157 fprintf (file, "%s", dstr);
9158 }
9159
9160 else
9161 {
9162 /* We have patterns that allow zero sets of memory, for instance.
9163 In 64-bit mode, we should probably support all 8-byte vectors,
9164 since we can in fact encode that into an immediate. */
9165 if (GET_CODE (x) == CONST_VECTOR)
9166 {
9167 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9168 x = const0_rtx;
9169 }
9170
9171 if (code != 'P')
9172 {
9173 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9174 {
9175 if (ASSEMBLER_DIALECT == ASM_ATT)
9176 putc ('$', file);
9177 }
9178 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9179 || GET_CODE (x) == LABEL_REF)
9180 {
9181 if (ASSEMBLER_DIALECT == ASM_ATT)
9182 putc ('$', file);
9183 else
9184 fputs ("OFFSET FLAT:", file);
9185 }
9186 }
9187 if (CONST_INT_P (x))
9188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9189 else if (flag_pic)
9190 output_pic_addr_const (file, x, code);
9191 else
9192 output_addr_const (file, x);
9193 }
9194 }
9195 \f
9196 /* Print a memory operand whose address is ADDR. */
9197
9198 void
9199 print_operand_address (FILE *file, rtx addr)
9200 {
9201 struct ix86_address parts;
9202 rtx base, index, disp;
9203 int scale;
9204 int ok = ix86_decompose_address (addr, &parts);
9205
9206 gcc_assert (ok);
9207
9208 base = parts.base;
9209 index = parts.index;
9210 disp = parts.disp;
9211 scale = parts.scale;
9212
9213 switch (parts.seg)
9214 {
9215 case SEG_DEFAULT:
9216 break;
9217 case SEG_FS:
9218 case SEG_GS:
9219 if (ASSEMBLER_DIALECT == ASM_ATT)
9220 putc ('%', file);
9221 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9222 break;
9223 default:
9224 gcc_unreachable ();
9225 }
9226
9227 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9228 if (TARGET_64BIT && !base && !index)
9229 {
9230 rtx symbol = disp;
9231
9232 if (GET_CODE (disp) == CONST
9233 && GET_CODE (XEXP (disp, 0)) == PLUS
9234 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9235 symbol = XEXP (XEXP (disp, 0), 0);
9236
9237 if (GET_CODE (symbol) == LABEL_REF
9238 || (GET_CODE (symbol) == SYMBOL_REF
9239 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9240 base = pc_rtx;
9241 }
9242 if (!base && !index)
9243 {
9244 /* Displacement only requires special attention. */
9245
9246 if (CONST_INT_P (disp))
9247 {
9248 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9249 fputs ("ds:", file);
9250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9251 }
9252 else if (flag_pic)
9253 output_pic_addr_const (file, disp, 0);
9254 else
9255 output_addr_const (file, disp);
9256 }
9257 else
9258 {
9259 if (ASSEMBLER_DIALECT == ASM_ATT)
9260 {
9261 if (disp)
9262 {
9263 if (flag_pic)
9264 output_pic_addr_const (file, disp, 0);
9265 else if (GET_CODE (disp) == LABEL_REF)
9266 output_asm_label (disp);
9267 else
9268 output_addr_const (file, disp);
9269 }
9270
9271 putc ('(', file);
9272 if (base)
9273 print_reg (base, 0, file);
9274 if (index)
9275 {
9276 putc (',', file);
9277 print_reg (index, 0, file);
9278 if (scale != 1)
9279 fprintf (file, ",%d", scale);
9280 }
9281 putc (')', file);
9282 }
9283 else
9284 {
9285 rtx offset = NULL_RTX;
9286
9287 if (disp)
9288 {
9289 /* Pull out the offset of a symbol; print any symbol itself. */
9290 if (GET_CODE (disp) == CONST
9291 && GET_CODE (XEXP (disp, 0)) == PLUS
9292 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9293 {
9294 offset = XEXP (XEXP (disp, 0), 1);
9295 disp = gen_rtx_CONST (VOIDmode,
9296 XEXP (XEXP (disp, 0), 0));
9297 }
9298
9299 if (flag_pic)
9300 output_pic_addr_const (file, disp, 0);
9301 else if (GET_CODE (disp) == LABEL_REF)
9302 output_asm_label (disp);
9303 else if (CONST_INT_P (disp))
9304 offset = disp;
9305 else
9306 output_addr_const (file, disp);
9307 }
9308
9309 putc ('[', file);
9310 if (base)
9311 {
9312 print_reg (base, 0, file);
9313 if (offset)
9314 {
9315 if (INTVAL (offset) >= 0)
9316 putc ('+', file);
9317 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9318 }
9319 }
9320 else if (offset)
9321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9322 else
9323 putc ('0', file);
9324
9325 if (index)
9326 {
9327 putc ('+', file);
9328 print_reg (index, 0, file);
9329 if (scale != 1)
9330 fprintf (file, "*%d", scale);
9331 }
9332 putc (']', file);
9333 }
9334 }
9335 }
9336
9337 bool
9338 output_addr_const_extra (FILE *file, rtx x)
9339 {
9340 rtx op;
9341
9342 if (GET_CODE (x) != UNSPEC)
9343 return false;
9344
9345 op = XVECEXP (x, 0, 0);
9346 switch (XINT (x, 1))
9347 {
9348 case UNSPEC_GOTTPOFF:
9349 output_addr_const (file, op);
9350 /* FIXME: This might be @TPOFF in Sun ld. */
9351 fputs ("@GOTTPOFF", file);
9352 break;
9353 case UNSPEC_TPOFF:
9354 output_addr_const (file, op);
9355 fputs ("@TPOFF", file);
9356 break;
9357 case UNSPEC_NTPOFF:
9358 output_addr_const (file, op);
9359 if (TARGET_64BIT)
9360 fputs ("@TPOFF", file);
9361 else
9362 fputs ("@NTPOFF", file);
9363 break;
9364 case UNSPEC_DTPOFF:
9365 output_addr_const (file, op);
9366 fputs ("@DTPOFF", file);
9367 break;
9368 case UNSPEC_GOTNTPOFF:
9369 output_addr_const (file, op);
9370 if (TARGET_64BIT)
9371 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9372 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9373 else
9374 fputs ("@GOTNTPOFF", file);
9375 break;
9376 case UNSPEC_INDNTPOFF:
9377 output_addr_const (file, op);
9378 fputs ("@INDNTPOFF", file);
9379 break;
9380
9381 default:
9382 return false;
9383 }
9384
9385 return true;
9386 }
9387 \f
9388 /* Split one or more DImode RTL references into pairs of SImode
9389 references. The RTL can be REG, offsettable MEM, integer constant, or
9390 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9391 split and "num" is its length. lo_half and hi_half are output arrays
9392 that parallel "operands". */
9393
9394 void
9395 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9396 {
9397 while (num--)
9398 {
9399 rtx op = operands[num];
9400
9401 /* simplify_subreg refuse to split volatile memory addresses,
9402 but we still have to handle it. */
9403 if (MEM_P (op))
9404 {
9405 lo_half[num] = adjust_address (op, SImode, 0);
9406 hi_half[num] = adjust_address (op, SImode, 4);
9407 }
9408 else
9409 {
9410 lo_half[num] = simplify_gen_subreg (SImode, op,
9411 GET_MODE (op) == VOIDmode
9412 ? DImode : GET_MODE (op), 0);
9413 hi_half[num] = simplify_gen_subreg (SImode, op,
9414 GET_MODE (op) == VOIDmode
9415 ? DImode : GET_MODE (op), 4);
9416 }
9417 }
9418 }
9419 /* Split one or more TImode RTL references into pairs of DImode
9420 references. The RTL can be REG, offsettable MEM, integer constant, or
9421 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9422 split and "num" is its length. lo_half and hi_half are output arrays
9423 that parallel "operands". */
9424
9425 void
9426 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9427 {
9428 while (num--)
9429 {
9430 rtx op = operands[num];
9431
9432 /* simplify_subreg refuse to split volatile memory addresses, but we
9433 still have to handle it. */
9434 if (MEM_P (op))
9435 {
9436 lo_half[num] = adjust_address (op, DImode, 0);
9437 hi_half[num] = adjust_address (op, DImode, 8);
9438 }
9439 else
9440 {
9441 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9442 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9443 }
9444 }
9445 }
9446 \f
9447 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9448 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9449 is the expression of the binary operation. The output may either be
9450 emitted here, or returned to the caller, like all output_* functions.
9451
9452 There is no guarantee that the operands are the same mode, as they
9453 might be within FLOAT or FLOAT_EXTEND expressions. */
9454
9455 #ifndef SYSV386_COMPAT
9456 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9457 wants to fix the assemblers because that causes incompatibility
9458 with gcc. No-one wants to fix gcc because that causes
9459 incompatibility with assemblers... You can use the option of
9460 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9461 #define SYSV386_COMPAT 1
9462 #endif
9463
9464 const char *
9465 output_387_binary_op (rtx insn, rtx *operands)
9466 {
9467 static char buf[30];
9468 const char *p;
9469 const char *ssep;
9470 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9471
9472 #ifdef ENABLE_CHECKING
9473 /* Even if we do not want to check the inputs, this documents input
9474 constraints. Which helps in understanding the following code. */
9475 if (STACK_REG_P (operands[0])
9476 && ((REG_P (operands[1])
9477 && REGNO (operands[0]) == REGNO (operands[1])
9478 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9479 || (REG_P (operands[2])
9480 && REGNO (operands[0]) == REGNO (operands[2])
9481 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9482 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9483 ; /* ok */
9484 else
9485 gcc_assert (is_sse);
9486 #endif
9487
9488 switch (GET_CODE (operands[3]))
9489 {
9490 case PLUS:
9491 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9492 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9493 p = "fiadd";
9494 else
9495 p = "fadd";
9496 ssep = "add";
9497 break;
9498
9499 case MINUS:
9500 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9501 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9502 p = "fisub";
9503 else
9504 p = "fsub";
9505 ssep = "sub";
9506 break;
9507
9508 case MULT:
9509 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9510 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9511 p = "fimul";
9512 else
9513 p = "fmul";
9514 ssep = "mul";
9515 break;
9516
9517 case DIV:
9518 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9519 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9520 p = "fidiv";
9521 else
9522 p = "fdiv";
9523 ssep = "div";
9524 break;
9525
9526 default:
9527 gcc_unreachable ();
9528 }
9529
9530 if (is_sse)
9531 {
9532 strcpy (buf, ssep);
9533 if (GET_MODE (operands[0]) == SFmode)
9534 strcat (buf, "ss\t{%2, %0|%0, %2}");
9535 else
9536 strcat (buf, "sd\t{%2, %0|%0, %2}");
9537 return buf;
9538 }
9539 strcpy (buf, p);
9540
9541 switch (GET_CODE (operands[3]))
9542 {
9543 case MULT:
9544 case PLUS:
9545 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9546 {
9547 rtx temp = operands[2];
9548 operands[2] = operands[1];
9549 operands[1] = temp;
9550 }
9551
9552 /* know operands[0] == operands[1]. */
9553
9554 if (MEM_P (operands[2]))
9555 {
9556 p = "%z2\t%2";
9557 break;
9558 }
9559
9560 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9561 {
9562 if (STACK_TOP_P (operands[0]))
9563 /* How is it that we are storing to a dead operand[2]?
9564 Well, presumably operands[1] is dead too. We can't
9565 store the result to st(0) as st(0) gets popped on this
9566 instruction. Instead store to operands[2] (which I
9567 think has to be st(1)). st(1) will be popped later.
9568 gcc <= 2.8.1 didn't have this check and generated
9569 assembly code that the Unixware assembler rejected. */
9570 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9571 else
9572 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9573 break;
9574 }
9575
9576 if (STACK_TOP_P (operands[0]))
9577 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9578 else
9579 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9580 break;
9581
9582 case MINUS:
9583 case DIV:
9584 if (MEM_P (operands[1]))
9585 {
9586 p = "r%z1\t%1";
9587 break;
9588 }
9589
9590 if (MEM_P (operands[2]))
9591 {
9592 p = "%z2\t%2";
9593 break;
9594 }
9595
9596 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9597 {
9598 #if SYSV386_COMPAT
9599 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9600 derived assemblers, confusingly reverse the direction of
9601 the operation for fsub{r} and fdiv{r} when the
9602 destination register is not st(0). The Intel assembler
9603 doesn't have this brain damage. Read !SYSV386_COMPAT to
9604 figure out what the hardware really does. */
9605 if (STACK_TOP_P (operands[0]))
9606 p = "{p\t%0, %2|rp\t%2, %0}";
9607 else
9608 p = "{rp\t%2, %0|p\t%0, %2}";
9609 #else
9610 if (STACK_TOP_P (operands[0]))
9611 /* As above for fmul/fadd, we can't store to st(0). */
9612 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9613 else
9614 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9615 #endif
9616 break;
9617 }
9618
9619 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9620 {
9621 #if SYSV386_COMPAT
9622 if (STACK_TOP_P (operands[0]))
9623 p = "{rp\t%0, %1|p\t%1, %0}";
9624 else
9625 p = "{p\t%1, %0|rp\t%0, %1}";
9626 #else
9627 if (STACK_TOP_P (operands[0]))
9628 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9629 else
9630 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9631 #endif
9632 break;
9633 }
9634
9635 if (STACK_TOP_P (operands[0]))
9636 {
9637 if (STACK_TOP_P (operands[1]))
9638 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9639 else
9640 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9641 break;
9642 }
9643 else if (STACK_TOP_P (operands[1]))
9644 {
9645 #if SYSV386_COMPAT
9646 p = "{\t%1, %0|r\t%0, %1}";
9647 #else
9648 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9649 #endif
9650 }
9651 else
9652 {
9653 #if SYSV386_COMPAT
9654 p = "{r\t%2, %0|\t%0, %2}";
9655 #else
9656 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9657 #endif
9658 }
9659 break;
9660
9661 default:
9662 gcc_unreachable ();
9663 }
9664
9665 strcat (buf, p);
9666 return buf;
9667 }
9668
9669 /* Return needed mode for entity in optimize_mode_switching pass. */
9670
9671 int
9672 ix86_mode_needed (int entity, rtx insn)
9673 {
9674 enum attr_i387_cw mode;
9675
9676 /* The mode UNINITIALIZED is used to store control word after a
9677 function call or ASM pattern. The mode ANY specify that function
9678 has no requirements on the control word and make no changes in the
9679 bits we are interested in. */
9680
9681 if (CALL_P (insn)
9682 || (NONJUMP_INSN_P (insn)
9683 && (asm_noperands (PATTERN (insn)) >= 0
9684 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9685 return I387_CW_UNINITIALIZED;
9686
9687 if (recog_memoized (insn) < 0)
9688 return I387_CW_ANY;
9689
9690 mode = get_attr_i387_cw (insn);
9691
9692 switch (entity)
9693 {
9694 case I387_TRUNC:
9695 if (mode == I387_CW_TRUNC)
9696 return mode;
9697 break;
9698
9699 case I387_FLOOR:
9700 if (mode == I387_CW_FLOOR)
9701 return mode;
9702 break;
9703
9704 case I387_CEIL:
9705 if (mode == I387_CW_CEIL)
9706 return mode;
9707 break;
9708
9709 case I387_MASK_PM:
9710 if (mode == I387_CW_MASK_PM)
9711 return mode;
9712 break;
9713
9714 default:
9715 gcc_unreachable ();
9716 }
9717
9718 return I387_CW_ANY;
9719 }
9720
9721 /* Output code to initialize control word copies used by trunc?f?i and
9722 rounding patterns. CURRENT_MODE is set to current control word,
9723 while NEW_MODE is set to new control word. */
9724
9725 void
9726 emit_i387_cw_initialization (int mode)
9727 {
9728 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9729 rtx new_mode;
9730
9731 enum ix86_stack_slot slot;
9732
9733 rtx reg = gen_reg_rtx (HImode);
9734
9735 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9736 emit_move_insn (reg, copy_rtx (stored_mode));
9737
9738 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9739 {
9740 switch (mode)
9741 {
9742 case I387_CW_TRUNC:
9743 /* round toward zero (truncate) */
9744 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9745 slot = SLOT_CW_TRUNC;
9746 break;
9747
9748 case I387_CW_FLOOR:
9749 /* round down toward -oo */
9750 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9751 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9752 slot = SLOT_CW_FLOOR;
9753 break;
9754
9755 case I387_CW_CEIL:
9756 /* round up toward +oo */
9757 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9758 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9759 slot = SLOT_CW_CEIL;
9760 break;
9761
9762 case I387_CW_MASK_PM:
9763 /* mask precision exception for nearbyint() */
9764 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9765 slot = SLOT_CW_MASK_PM;
9766 break;
9767
9768 default:
9769 gcc_unreachable ();
9770 }
9771 }
9772 else
9773 {
9774 switch (mode)
9775 {
9776 case I387_CW_TRUNC:
9777 /* round toward zero (truncate) */
9778 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9779 slot = SLOT_CW_TRUNC;
9780 break;
9781
9782 case I387_CW_FLOOR:
9783 /* round down toward -oo */
9784 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9785 slot = SLOT_CW_FLOOR;
9786 break;
9787
9788 case I387_CW_CEIL:
9789 /* round up toward +oo */
9790 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9791 slot = SLOT_CW_CEIL;
9792 break;
9793
9794 case I387_CW_MASK_PM:
9795 /* mask precision exception for nearbyint() */
9796 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9797 slot = SLOT_CW_MASK_PM;
9798 break;
9799
9800 default:
9801 gcc_unreachable ();
9802 }
9803 }
9804
9805 gcc_assert (slot < MAX_386_STACK_LOCALS);
9806
9807 new_mode = assign_386_stack_local (HImode, slot);
9808 emit_move_insn (new_mode, reg);
9809 }
9810
9811 /* Output code for INSN to convert a float to a signed int. OPERANDS
9812 are the insn operands. The output may be [HSD]Imode and the input
9813 operand may be [SDX]Fmode. */
9814
9815 const char *
9816 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9817 {
9818 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9819 int dimode_p = GET_MODE (operands[0]) == DImode;
9820 int round_mode = get_attr_i387_cw (insn);
9821
9822 /* Jump through a hoop or two for DImode, since the hardware has no
9823 non-popping instruction. We used to do this a different way, but
9824 that was somewhat fragile and broke with post-reload splitters. */
9825 if ((dimode_p || fisttp) && !stack_top_dies)
9826 output_asm_insn ("fld\t%y1", operands);
9827
9828 gcc_assert (STACK_TOP_P (operands[1]));
9829 gcc_assert (MEM_P (operands[0]));
9830 gcc_assert (GET_MODE (operands[1]) != TFmode);
9831
9832 if (fisttp)
9833 output_asm_insn ("fisttp%z0\t%0", operands);
9834 else
9835 {
9836 if (round_mode != I387_CW_ANY)
9837 output_asm_insn ("fldcw\t%3", operands);
9838 if (stack_top_dies || dimode_p)
9839 output_asm_insn ("fistp%z0\t%0", operands);
9840 else
9841 output_asm_insn ("fist%z0\t%0", operands);
9842 if (round_mode != I387_CW_ANY)
9843 output_asm_insn ("fldcw\t%2", operands);
9844 }
9845
9846 return "";
9847 }
9848
9849 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9850 have the values zero or one, indicates the ffreep insn's operand
9851 from the OPERANDS array. */
9852
9853 static const char *
9854 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9855 {
9856 if (TARGET_USE_FFREEP)
9857 #if HAVE_AS_IX86_FFREEP
9858 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9859 #else
9860 {
9861 static char retval[] = ".word\t0xc_df";
9862 int regno = REGNO (operands[opno]);
9863
9864 gcc_assert (FP_REGNO_P (regno));
9865
9866 retval[9] = '0' + (regno - FIRST_STACK_REG);
9867 return retval;
9868 }
9869 #endif
9870
9871 return opno ? "fstp\t%y1" : "fstp\t%y0";
9872 }
9873
9874
9875 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9876 should be used. UNORDERED_P is true when fucom should be used. */
9877
9878 const char *
9879 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9880 {
9881 int stack_top_dies;
9882 rtx cmp_op0, cmp_op1;
9883 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9884
9885 if (eflags_p)
9886 {
9887 cmp_op0 = operands[0];
9888 cmp_op1 = operands[1];
9889 }
9890 else
9891 {
9892 cmp_op0 = operands[1];
9893 cmp_op1 = operands[2];
9894 }
9895
9896 if (is_sse)
9897 {
9898 if (GET_MODE (operands[0]) == SFmode)
9899 if (unordered_p)
9900 return "ucomiss\t{%1, %0|%0, %1}";
9901 else
9902 return "comiss\t{%1, %0|%0, %1}";
9903 else
9904 if (unordered_p)
9905 return "ucomisd\t{%1, %0|%0, %1}";
9906 else
9907 return "comisd\t{%1, %0|%0, %1}";
9908 }
9909
9910 gcc_assert (STACK_TOP_P (cmp_op0));
9911
9912 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9913
9914 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9915 {
9916 if (stack_top_dies)
9917 {
9918 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9919 return output_387_ffreep (operands, 1);
9920 }
9921 else
9922 return "ftst\n\tfnstsw\t%0";
9923 }
9924
9925 if (STACK_REG_P (cmp_op1)
9926 && stack_top_dies
9927 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9928 && REGNO (cmp_op1) != FIRST_STACK_REG)
9929 {
9930 /* If both the top of the 387 stack dies, and the other operand
9931 is also a stack register that dies, then this must be a
9932 `fcompp' float compare */
9933
9934 if (eflags_p)
9935 {
9936 /* There is no double popping fcomi variant. Fortunately,
9937 eflags is immune from the fstp's cc clobbering. */
9938 if (unordered_p)
9939 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9940 else
9941 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9942 return output_387_ffreep (operands, 0);
9943 }
9944 else
9945 {
9946 if (unordered_p)
9947 return "fucompp\n\tfnstsw\t%0";
9948 else
9949 return "fcompp\n\tfnstsw\t%0";
9950 }
9951 }
9952 else
9953 {
9954 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9955
9956 static const char * const alt[16] =
9957 {
9958 "fcom%z2\t%y2\n\tfnstsw\t%0",
9959 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9960 "fucom%z2\t%y2\n\tfnstsw\t%0",
9961 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9962
9963 "ficom%z2\t%y2\n\tfnstsw\t%0",
9964 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9965 NULL,
9966 NULL,
9967
9968 "fcomi\t{%y1, %0|%0, %y1}",
9969 "fcomip\t{%y1, %0|%0, %y1}",
9970 "fucomi\t{%y1, %0|%0, %y1}",
9971 "fucomip\t{%y1, %0|%0, %y1}",
9972
9973 NULL,
9974 NULL,
9975 NULL,
9976 NULL
9977 };
9978
9979 int mask;
9980 const char *ret;
9981
9982 mask = eflags_p << 3;
9983 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9984 mask |= unordered_p << 1;
9985 mask |= stack_top_dies;
9986
9987 gcc_assert (mask < 16);
9988 ret = alt[mask];
9989 gcc_assert (ret);
9990
9991 return ret;
9992 }
9993 }
9994
9995 void
9996 ix86_output_addr_vec_elt (FILE *file, int value)
9997 {
9998 const char *directive = ASM_LONG;
9999
10000 #ifdef ASM_QUAD
10001 if (TARGET_64BIT)
10002 directive = ASM_QUAD;
10003 #else
10004 gcc_assert (!TARGET_64BIT);
10005 #endif
10006
10007 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10008 }
10009
10010 void
10011 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10012 {
10013 const char *directive = ASM_LONG;
10014
10015 #ifdef ASM_QUAD
10016 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10017 directive = ASM_QUAD;
10018 #else
10019 gcc_assert (!TARGET_64BIT);
10020 #endif
10021 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10022 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10023 fprintf (file, "%s%s%d-%s%d\n",
10024 directive, LPREFIX, value, LPREFIX, rel);
10025 else if (HAVE_AS_GOTOFF_IN_DATA)
10026 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10027 #if TARGET_MACHO
10028 else if (TARGET_MACHO)
10029 {
10030 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10031 machopic_output_function_base_name (file);
10032 fprintf(file, "\n");
10033 }
10034 #endif
10035 else
10036 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10037 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10038 }
10039 \f
10040 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10041 for the target. */
10042
10043 void
10044 ix86_expand_clear (rtx dest)
10045 {
10046 rtx tmp;
10047
10048 /* We play register width games, which are only valid after reload. */
10049 gcc_assert (reload_completed);
10050
10051 /* Avoid HImode and its attendant prefix byte. */
10052 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10053 dest = gen_rtx_REG (SImode, REGNO (dest));
10054 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10055
10056 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10057 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10058 {
10059 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10060 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10061 }
10062
10063 emit_insn (tmp);
10064 }
10065
10066 /* X is an unchanging MEM. If it is a constant pool reference, return
10067 the constant pool rtx, else NULL. */
10068
10069 rtx
10070 maybe_get_pool_constant (rtx x)
10071 {
10072 x = ix86_delegitimize_address (XEXP (x, 0));
10073
10074 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10075 return get_pool_constant (x);
10076
10077 return NULL_RTX;
10078 }
10079
10080 void
10081 ix86_expand_move (enum machine_mode mode, rtx operands[])
10082 {
10083 rtx op0, op1;
10084 enum tls_model model;
10085
10086 op0 = operands[0];
10087 op1 = operands[1];
10088
10089 if (GET_CODE (op1) == SYMBOL_REF)
10090 {
10091 model = SYMBOL_REF_TLS_MODEL (op1);
10092 if (model)
10093 {
10094 op1 = legitimize_tls_address (op1, model, true);
10095 op1 = force_operand (op1, op0);
10096 if (op1 == op0)
10097 return;
10098 }
10099 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10100 && SYMBOL_REF_DLLIMPORT_P (op1))
10101 op1 = legitimize_dllimport_symbol (op1, false);
10102 }
10103 else if (GET_CODE (op1) == CONST
10104 && GET_CODE (XEXP (op1, 0)) == PLUS
10105 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10106 {
10107 rtx addend = XEXP (XEXP (op1, 0), 1);
10108 rtx symbol = XEXP (XEXP (op1, 0), 0);
10109 rtx tmp = NULL;
10110
10111 model = SYMBOL_REF_TLS_MODEL (symbol);
10112 if (model)
10113 tmp = legitimize_tls_address (symbol, model, true);
10114 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10115 && SYMBOL_REF_DLLIMPORT_P (symbol))
10116 tmp = legitimize_dllimport_symbol (symbol, true);
10117
10118 if (tmp)
10119 {
10120 tmp = force_operand (tmp, NULL);
10121 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10122 op0, 1, OPTAB_DIRECT);
10123 if (tmp == op0)
10124 return;
10125 }
10126 }
10127
10128 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10129 {
10130 if (TARGET_MACHO && !TARGET_64BIT)
10131 {
10132 #if TARGET_MACHO
10133 if (MACHOPIC_PURE)
10134 {
10135 rtx temp = ((reload_in_progress
10136 || ((op0 && REG_P (op0))
10137 && mode == Pmode))
10138 ? op0 : gen_reg_rtx (Pmode));
10139 op1 = machopic_indirect_data_reference (op1, temp);
10140 op1 = machopic_legitimize_pic_address (op1, mode,
10141 temp == op1 ? 0 : temp);
10142 }
10143 else if (MACHOPIC_INDIRECT)
10144 op1 = machopic_indirect_data_reference (op1, 0);
10145 if (op0 == op1)
10146 return;
10147 #endif
10148 }
10149 else
10150 {
10151 if (MEM_P (op0))
10152 op1 = force_reg (Pmode, op1);
10153 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10154 {
10155 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10156 op1 = legitimize_pic_address (op1, reg);
10157 if (op0 == op1)
10158 return;
10159 }
10160 }
10161 }
10162 else
10163 {
10164 if (MEM_P (op0)
10165 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10166 || !push_operand (op0, mode))
10167 && MEM_P (op1))
10168 op1 = force_reg (mode, op1);
10169
10170 if (push_operand (op0, mode)
10171 && ! general_no_elim_operand (op1, mode))
10172 op1 = copy_to_mode_reg (mode, op1);
10173
10174 /* Force large constants in 64bit compilation into register
10175 to get them CSEed. */
10176 if (can_create_pseudo_p ()
10177 && (mode == DImode) && TARGET_64BIT
10178 && immediate_operand (op1, mode)
10179 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10180 && !register_operand (op0, mode)
10181 && optimize)
10182 op1 = copy_to_mode_reg (mode, op1);
10183
10184 if (can_create_pseudo_p ()
10185 && FLOAT_MODE_P (mode)
10186 && GET_CODE (op1) == CONST_DOUBLE)
10187 {
10188 /* If we are loading a floating point constant to a register,
10189 force the value to memory now, since we'll get better code
10190 out the back end. */
10191
10192 op1 = validize_mem (force_const_mem (mode, op1));
10193 if (!register_operand (op0, mode))
10194 {
10195 rtx temp = gen_reg_rtx (mode);
10196 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10197 emit_move_insn (op0, temp);
10198 return;
10199 }
10200 }
10201 }
10202
10203 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10204 }
10205
10206 void
10207 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10208 {
10209 rtx op0 = operands[0], op1 = operands[1];
10210 unsigned int align = GET_MODE_ALIGNMENT (mode);
10211
10212 /* Force constants other than zero into memory. We do not know how
10213 the instructions used to build constants modify the upper 64 bits
10214 of the register, once we have that information we may be able
10215 to handle some of them more efficiently. */
10216 if (can_create_pseudo_p ()
10217 && register_operand (op0, mode)
10218 && (CONSTANT_P (op1)
10219 || (GET_CODE (op1) == SUBREG
10220 && CONSTANT_P (SUBREG_REG (op1))))
10221 && standard_sse_constant_p (op1) <= 0)
10222 op1 = validize_mem (force_const_mem (mode, op1));
10223
10224 /* TDmode values are passed as TImode on the stack. TImode values
10225 are moved via xmm registers, and moving them to stack can result in
10226 unaligned memory access. Use ix86_expand_vector_move_misalign()
10227 if memory operand is not aligned correctly. */
10228 if (can_create_pseudo_p ()
10229 && (mode == TImode) && !TARGET_64BIT
10230 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10231 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10232 {
10233 rtx tmp[2];
10234
10235 /* ix86_expand_vector_move_misalign() does not like constants ... */
10236 if (CONSTANT_P (op1)
10237 || (GET_CODE (op1) == SUBREG
10238 && CONSTANT_P (SUBREG_REG (op1))))
10239 op1 = validize_mem (force_const_mem (mode, op1));
10240
10241 /* ... nor both arguments in memory. */
10242 if (!register_operand (op0, mode)
10243 && !register_operand (op1, mode))
10244 op1 = force_reg (mode, op1);
10245
10246 tmp[0] = op0; tmp[1] = op1;
10247 ix86_expand_vector_move_misalign (mode, tmp);
10248 return;
10249 }
10250
10251 /* Make operand1 a register if it isn't already. */
10252 if (can_create_pseudo_p ()
10253 && !register_operand (op0, mode)
10254 && !register_operand (op1, mode))
10255 {
10256 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10257 return;
10258 }
10259
10260 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10261 }
10262
10263 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10264 straight to ix86_expand_vector_move. */
10265 /* Code generation for scalar reg-reg moves of single and double precision data:
10266 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10267 movaps reg, reg
10268 else
10269 movss reg, reg
10270 if (x86_sse_partial_reg_dependency == true)
10271 movapd reg, reg
10272 else
10273 movsd reg, reg
10274
10275 Code generation for scalar loads of double precision data:
10276 if (x86_sse_split_regs == true)
10277 movlpd mem, reg (gas syntax)
10278 else
10279 movsd mem, reg
10280
10281 Code generation for unaligned packed loads of single precision data
10282 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10283 if (x86_sse_unaligned_move_optimal)
10284 movups mem, reg
10285
10286 if (x86_sse_partial_reg_dependency == true)
10287 {
10288 xorps reg, reg
10289 movlps mem, reg
10290 movhps mem+8, reg
10291 }
10292 else
10293 {
10294 movlps mem, reg
10295 movhps mem+8, reg
10296 }
10297
10298 Code generation for unaligned packed loads of double precision data
10299 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10300 if (x86_sse_unaligned_move_optimal)
10301 movupd mem, reg
10302
10303 if (x86_sse_split_regs == true)
10304 {
10305 movlpd mem, reg
10306 movhpd mem+8, reg
10307 }
10308 else
10309 {
10310 movsd mem, reg
10311 movhpd mem+8, reg
10312 }
10313 */
10314
10315 void
10316 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10317 {
10318 rtx op0, op1, m;
10319
10320 op0 = operands[0];
10321 op1 = operands[1];
10322
10323 if (MEM_P (op1))
10324 {
10325 /* If we're optimizing for size, movups is the smallest. */
10326 if (optimize_size)
10327 {
10328 op0 = gen_lowpart (V4SFmode, op0);
10329 op1 = gen_lowpart (V4SFmode, op1);
10330 emit_insn (gen_sse_movups (op0, op1));
10331 return;
10332 }
10333
10334 /* ??? If we have typed data, then it would appear that using
10335 movdqu is the only way to get unaligned data loaded with
10336 integer type. */
10337 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10338 {
10339 op0 = gen_lowpart (V16QImode, op0);
10340 op1 = gen_lowpart (V16QImode, op1);
10341 emit_insn (gen_sse2_movdqu (op0, op1));
10342 return;
10343 }
10344
10345 if (TARGET_SSE2 && mode == V2DFmode)
10346 {
10347 rtx zero;
10348
10349 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10350 {
10351 op0 = gen_lowpart (V2DFmode, op0);
10352 op1 = gen_lowpart (V2DFmode, op1);
10353 emit_insn (gen_sse2_movupd (op0, op1));
10354 return;
10355 }
10356
10357 /* When SSE registers are split into halves, we can avoid
10358 writing to the top half twice. */
10359 if (TARGET_SSE_SPLIT_REGS)
10360 {
10361 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10362 zero = op0;
10363 }
10364 else
10365 {
10366 /* ??? Not sure about the best option for the Intel chips.
10367 The following would seem to satisfy; the register is
10368 entirely cleared, breaking the dependency chain. We
10369 then store to the upper half, with a dependency depth
10370 of one. A rumor has it that Intel recommends two movsd
10371 followed by an unpacklpd, but this is unconfirmed. And
10372 given that the dependency depth of the unpacklpd would
10373 still be one, I'm not sure why this would be better. */
10374 zero = CONST0_RTX (V2DFmode);
10375 }
10376
10377 m = adjust_address (op1, DFmode, 0);
10378 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10379 m = adjust_address (op1, DFmode, 8);
10380 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10381 }
10382 else
10383 {
10384 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10385 {
10386 op0 = gen_lowpart (V4SFmode, op0);
10387 op1 = gen_lowpart (V4SFmode, op1);
10388 emit_insn (gen_sse_movups (op0, op1));
10389 return;
10390 }
10391
10392 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10393 emit_move_insn (op0, CONST0_RTX (mode));
10394 else
10395 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10396
10397 if (mode != V4SFmode)
10398 op0 = gen_lowpart (V4SFmode, op0);
10399 m = adjust_address (op1, V2SFmode, 0);
10400 emit_insn (gen_sse_loadlps (op0, op0, m));
10401 m = adjust_address (op1, V2SFmode, 8);
10402 emit_insn (gen_sse_loadhps (op0, op0, m));
10403 }
10404 }
10405 else if (MEM_P (op0))
10406 {
10407 /* If we're optimizing for size, movups is the smallest. */
10408 if (optimize_size)
10409 {
10410 op0 = gen_lowpart (V4SFmode, op0);
10411 op1 = gen_lowpart (V4SFmode, op1);
10412 emit_insn (gen_sse_movups (op0, op1));
10413 return;
10414 }
10415
10416 /* ??? Similar to above, only less clear because of quote
10417 typeless stores unquote. */
10418 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10419 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10420 {
10421 op0 = gen_lowpart (V16QImode, op0);
10422 op1 = gen_lowpart (V16QImode, op1);
10423 emit_insn (gen_sse2_movdqu (op0, op1));
10424 return;
10425 }
10426
10427 if (TARGET_SSE2 && mode == V2DFmode)
10428 {
10429 m = adjust_address (op0, DFmode, 0);
10430 emit_insn (gen_sse2_storelpd (m, op1));
10431 m = adjust_address (op0, DFmode, 8);
10432 emit_insn (gen_sse2_storehpd (m, op1));
10433 }
10434 else
10435 {
10436 if (mode != V4SFmode)
10437 op1 = gen_lowpart (V4SFmode, op1);
10438 m = adjust_address (op0, V2SFmode, 0);
10439 emit_insn (gen_sse_storelps (m, op1));
10440 m = adjust_address (op0, V2SFmode, 8);
10441 emit_insn (gen_sse_storehps (m, op1));
10442 }
10443 }
10444 else
10445 gcc_unreachable ();
10446 }
10447
10448 /* Expand a push in MODE. This is some mode for which we do not support
10449 proper push instructions, at least from the registers that we expect
10450 the value to live in. */
10451
10452 void
10453 ix86_expand_push (enum machine_mode mode, rtx x)
10454 {
10455 rtx tmp;
10456
10457 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10458 GEN_INT (-GET_MODE_SIZE (mode)),
10459 stack_pointer_rtx, 1, OPTAB_DIRECT);
10460 if (tmp != stack_pointer_rtx)
10461 emit_move_insn (stack_pointer_rtx, tmp);
10462
10463 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10464 emit_move_insn (tmp, x);
10465 }
10466
10467 /* Helper function of ix86_fixup_binary_operands to canonicalize
10468 operand order. Returns true if the operands should be swapped. */
10469
10470 static bool
10471 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10472 rtx operands[])
10473 {
10474 rtx dst = operands[0];
10475 rtx src1 = operands[1];
10476 rtx src2 = operands[2];
10477
10478 /* If the operation is not commutative, we can't do anything. */
10479 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10480 return false;
10481
10482 /* Highest priority is that src1 should match dst. */
10483 if (rtx_equal_p (dst, src1))
10484 return false;
10485 if (rtx_equal_p (dst, src2))
10486 return true;
10487
10488 /* Next highest priority is that immediate constants come second. */
10489 if (immediate_operand (src2, mode))
10490 return false;
10491 if (immediate_operand (src1, mode))
10492 return true;
10493
10494 /* Lowest priority is that memory references should come second. */
10495 if (MEM_P (src2))
10496 return false;
10497 if (MEM_P (src1))
10498 return true;
10499
10500 return false;
10501 }
10502
10503
10504 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10505 destination to use for the operation. If different from the true
10506 destination in operands[0], a copy operation will be required. */
10507
10508 rtx
10509 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10510 rtx operands[])
10511 {
10512 rtx dst = operands[0];
10513 rtx src1 = operands[1];
10514 rtx src2 = operands[2];
10515
10516 /* Canonicalize operand order. */
10517 if (ix86_swap_binary_operands_p (code, mode, operands))
10518 {
10519 rtx temp = src1;
10520 src1 = src2;
10521 src2 = temp;
10522 }
10523
10524 /* Both source operands cannot be in memory. */
10525 if (MEM_P (src1) && MEM_P (src2))
10526 {
10527 /* Optimization: Only read from memory once. */
10528 if (rtx_equal_p (src1, src2))
10529 {
10530 src2 = force_reg (mode, src2);
10531 src1 = src2;
10532 }
10533 else
10534 src2 = force_reg (mode, src2);
10535 }
10536
10537 /* If the destination is memory, and we do not have matching source
10538 operands, do things in registers. */
10539 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10540 dst = gen_reg_rtx (mode);
10541
10542 /* Source 1 cannot be a constant. */
10543 if (CONSTANT_P (src1))
10544 src1 = force_reg (mode, src1);
10545
10546 /* Source 1 cannot be a non-matching memory. */
10547 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10548 src1 = force_reg (mode, src1);
10549
10550 operands[1] = src1;
10551 operands[2] = src2;
10552 return dst;
10553 }
10554
10555 /* Similarly, but assume that the destination has already been
10556 set up properly. */
10557
10558 void
10559 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10560 enum machine_mode mode, rtx operands[])
10561 {
10562 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10563 gcc_assert (dst == operands[0]);
10564 }
10565
10566 /* Attempt to expand a binary operator. Make the expansion closer to the
10567 actual machine, then just general_operand, which will allow 3 separate
10568 memory references (one output, two input) in a single insn. */
10569
10570 void
10571 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10572 rtx operands[])
10573 {
10574 rtx src1, src2, dst, op, clob;
10575
10576 dst = ix86_fixup_binary_operands (code, mode, operands);
10577 src1 = operands[1];
10578 src2 = operands[2];
10579
10580 /* Emit the instruction. */
10581
10582 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10583 if (reload_in_progress)
10584 {
10585 /* Reload doesn't know about the flags register, and doesn't know that
10586 it doesn't want to clobber it. We can only do this with PLUS. */
10587 gcc_assert (code == PLUS);
10588 emit_insn (op);
10589 }
10590 else
10591 {
10592 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10593 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10594 }
10595
10596 /* Fix up the destination if needed. */
10597 if (dst != operands[0])
10598 emit_move_insn (operands[0], dst);
10599 }
10600
10601 /* Return TRUE or FALSE depending on whether the binary operator meets the
10602 appropriate constraints. */
10603
10604 int
10605 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10606 rtx operands[3])
10607 {
10608 rtx dst = operands[0];
10609 rtx src1 = operands[1];
10610 rtx src2 = operands[2];
10611
10612 /* Both source operands cannot be in memory. */
10613 if (MEM_P (src1) && MEM_P (src2))
10614 return 0;
10615
10616 /* Canonicalize operand order for commutative operators. */
10617 if (ix86_swap_binary_operands_p (code, mode, operands))
10618 {
10619 rtx temp = src1;
10620 src1 = src2;
10621 src2 = temp;
10622 }
10623
10624 /* If the destination is memory, we must have a matching source operand. */
10625 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10626 return 0;
10627
10628 /* Source 1 cannot be a constant. */
10629 if (CONSTANT_P (src1))
10630 return 0;
10631
10632 /* Source 1 cannot be a non-matching memory. */
10633 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10634 return 0;
10635
10636 return 1;
10637 }
10638
10639 /* Attempt to expand a unary operator. Make the expansion closer to the
10640 actual machine, then just general_operand, which will allow 2 separate
10641 memory references (one output, one input) in a single insn. */
10642
10643 void
10644 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10645 rtx operands[])
10646 {
10647 int matching_memory;
10648 rtx src, dst, op, clob;
10649
10650 dst = operands[0];
10651 src = operands[1];
10652
10653 /* If the destination is memory, and we do not have matching source
10654 operands, do things in registers. */
10655 matching_memory = 0;
10656 if (MEM_P (dst))
10657 {
10658 if (rtx_equal_p (dst, src))
10659 matching_memory = 1;
10660 else
10661 dst = gen_reg_rtx (mode);
10662 }
10663
10664 /* When source operand is memory, destination must match. */
10665 if (MEM_P (src) && !matching_memory)
10666 src = force_reg (mode, src);
10667
10668 /* Emit the instruction. */
10669
10670 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10671 if (reload_in_progress || code == NOT)
10672 {
10673 /* Reload doesn't know about the flags register, and doesn't know that
10674 it doesn't want to clobber it. */
10675 gcc_assert (code == NOT);
10676 emit_insn (op);
10677 }
10678 else
10679 {
10680 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10681 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10682 }
10683
10684 /* Fix up the destination if needed. */
10685 if (dst != operands[0])
10686 emit_move_insn (operands[0], dst);
10687 }
10688
10689 /* Return TRUE or FALSE depending on whether the unary operator meets the
10690 appropriate constraints. */
10691
10692 int
10693 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10694 enum machine_mode mode ATTRIBUTE_UNUSED,
10695 rtx operands[2] ATTRIBUTE_UNUSED)
10696 {
10697 /* If one of operands is memory, source and destination must match. */
10698 if ((MEM_P (operands[0])
10699 || MEM_P (operands[1]))
10700 && ! rtx_equal_p (operands[0], operands[1]))
10701 return FALSE;
10702 return TRUE;
10703 }
10704
10705 /* Post-reload splitter for converting an SF or DFmode value in an
10706 SSE register into an unsigned SImode. */
10707
10708 void
10709 ix86_split_convert_uns_si_sse (rtx operands[])
10710 {
10711 enum machine_mode vecmode;
10712 rtx value, large, zero_or_two31, input, two31, x;
10713
10714 large = operands[1];
10715 zero_or_two31 = operands[2];
10716 input = operands[3];
10717 two31 = operands[4];
10718 vecmode = GET_MODE (large);
10719 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10720
10721 /* Load up the value into the low element. We must ensure that the other
10722 elements are valid floats -- zero is the easiest such value. */
10723 if (MEM_P (input))
10724 {
10725 if (vecmode == V4SFmode)
10726 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10727 else
10728 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10729 }
10730 else
10731 {
10732 input = gen_rtx_REG (vecmode, REGNO (input));
10733 emit_move_insn (value, CONST0_RTX (vecmode));
10734 if (vecmode == V4SFmode)
10735 emit_insn (gen_sse_movss (value, value, input));
10736 else
10737 emit_insn (gen_sse2_movsd (value, value, input));
10738 }
10739
10740 emit_move_insn (large, two31);
10741 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10742
10743 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10744 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10745
10746 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10747 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10748
10749 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10750 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10751
10752 large = gen_rtx_REG (V4SImode, REGNO (large));
10753 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10754
10755 x = gen_rtx_REG (V4SImode, REGNO (value));
10756 if (vecmode == V4SFmode)
10757 emit_insn (gen_sse2_cvttps2dq (x, value));
10758 else
10759 emit_insn (gen_sse2_cvttpd2dq (x, value));
10760 value = x;
10761
10762 emit_insn (gen_xorv4si3 (value, value, large));
10763 }
10764
10765 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10766 Expects the 64-bit DImode to be supplied in a pair of integral
10767 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10768 -mfpmath=sse, !optimize_size only. */
10769
10770 void
10771 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10772 {
10773 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10774 rtx int_xmm, fp_xmm;
10775 rtx biases, exponents;
10776 rtx x;
10777
10778 int_xmm = gen_reg_rtx (V4SImode);
10779 if (TARGET_INTER_UNIT_MOVES)
10780 emit_insn (gen_movdi_to_sse (int_xmm, input));
10781 else if (TARGET_SSE_SPLIT_REGS)
10782 {
10783 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10784 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10785 }
10786 else
10787 {
10788 x = gen_reg_rtx (V2DImode);
10789 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10790 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10791 }
10792
10793 x = gen_rtx_CONST_VECTOR (V4SImode,
10794 gen_rtvec (4, GEN_INT (0x43300000UL),
10795 GEN_INT (0x45300000UL),
10796 const0_rtx, const0_rtx));
10797 exponents = validize_mem (force_const_mem (V4SImode, x));
10798
10799 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10800 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10801
10802 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10803 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10804 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10805 (0x1.0p84 + double(fp_value_hi_xmm)).
10806 Note these exponents differ by 32. */
10807
10808 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10809
10810 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10811 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10812 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10813 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10814 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10815 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10816 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10817 biases = validize_mem (force_const_mem (V2DFmode, biases));
10818 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10819
10820 /* Add the upper and lower DFmode values together. */
10821 if (TARGET_SSE3)
10822 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10823 else
10824 {
10825 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10826 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10827 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10828 }
10829
10830 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10831 }
10832
10833 /* Convert an unsigned SImode value into a DFmode. Only currently used
10834 for SSE, but applicable anywhere. */
10835
10836 void
10837 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10838 {
10839 REAL_VALUE_TYPE TWO31r;
10840 rtx x, fp;
10841
10842 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10843 NULL, 1, OPTAB_DIRECT);
10844
10845 fp = gen_reg_rtx (DFmode);
10846 emit_insn (gen_floatsidf2 (fp, x));
10847
10848 real_ldexp (&TWO31r, &dconst1, 31);
10849 x = const_double_from_real_value (TWO31r, DFmode);
10850
10851 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10852 if (x != target)
10853 emit_move_insn (target, x);
10854 }
10855
10856 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10857 32-bit mode; otherwise we have a direct convert instruction. */
10858
10859 void
10860 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10861 {
10862 REAL_VALUE_TYPE TWO32r;
10863 rtx fp_lo, fp_hi, x;
10864
10865 fp_lo = gen_reg_rtx (DFmode);
10866 fp_hi = gen_reg_rtx (DFmode);
10867
10868 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10869
10870 real_ldexp (&TWO32r, &dconst1, 32);
10871 x = const_double_from_real_value (TWO32r, DFmode);
10872 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10873
10874 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10875
10876 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10877 0, OPTAB_DIRECT);
10878 if (x != target)
10879 emit_move_insn (target, x);
10880 }
10881
10882 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10883 For x86_32, -mfpmath=sse, !optimize_size only. */
10884 void
10885 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10886 {
10887 REAL_VALUE_TYPE ONE16r;
10888 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10889
10890 real_ldexp (&ONE16r, &dconst1, 16);
10891 x = const_double_from_real_value (ONE16r, SFmode);
10892 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10893 NULL, 0, OPTAB_DIRECT);
10894 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10895 NULL, 0, OPTAB_DIRECT);
10896 fp_hi = gen_reg_rtx (SFmode);
10897 fp_lo = gen_reg_rtx (SFmode);
10898 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10899 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10900 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10901 0, OPTAB_DIRECT);
10902 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10903 0, OPTAB_DIRECT);
10904 if (!rtx_equal_p (target, fp_hi))
10905 emit_move_insn (target, fp_hi);
10906 }
10907
10908 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10909 then replicate the value for all elements of the vector
10910 register. */
10911
10912 rtx
10913 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10914 {
10915 rtvec v;
10916 switch (mode)
10917 {
10918 case SImode:
10919 gcc_assert (vect);
10920 v = gen_rtvec (4, value, value, value, value);
10921 return gen_rtx_CONST_VECTOR (V4SImode, v);
10922
10923 case DImode:
10924 gcc_assert (vect);
10925 v = gen_rtvec (2, value, value);
10926 return gen_rtx_CONST_VECTOR (V2DImode, v);
10927
10928 case SFmode:
10929 if (vect)
10930 v = gen_rtvec (4, value, value, value, value);
10931 else
10932 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10933 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10934 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10935
10936 case DFmode:
10937 if (vect)
10938 v = gen_rtvec (2, value, value);
10939 else
10940 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10941 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10942
10943 default:
10944 gcc_unreachable ();
10945 }
10946 }
10947
10948 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10949 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10950 for an SSE register. If VECT is true, then replicate the mask for
10951 all elements of the vector register. If INVERT is true, then create
10952 a mask excluding the sign bit. */
10953
10954 rtx
10955 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10956 {
10957 enum machine_mode vec_mode, imode;
10958 HOST_WIDE_INT hi, lo;
10959 int shift = 63;
10960 rtx v;
10961 rtx mask;
10962
10963 /* Find the sign bit, sign extended to 2*HWI. */
10964 switch (mode)
10965 {
10966 case SImode:
10967 case SFmode:
10968 imode = SImode;
10969 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10970 lo = 0x80000000, hi = lo < 0;
10971 break;
10972
10973 case DImode:
10974 case DFmode:
10975 imode = DImode;
10976 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10977 if (HOST_BITS_PER_WIDE_INT >= 64)
10978 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10979 else
10980 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10981 break;
10982
10983 case TImode:
10984 case TFmode:
10985 imode = TImode;
10986 vec_mode = VOIDmode;
10987 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10988 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10989 break;
10990
10991 default:
10992 gcc_unreachable ();
10993 }
10994
10995 if (invert)
10996 lo = ~lo, hi = ~hi;
10997
10998 /* Force this value into the low part of a fp vector constant. */
10999 mask = immed_double_const (lo, hi, imode);
11000 mask = gen_lowpart (mode, mask);
11001
11002 if (vec_mode == VOIDmode)
11003 return force_reg (mode, mask);
11004
11005 v = ix86_build_const_vector (mode, vect, mask);
11006 return force_reg (vec_mode, v);
11007 }
11008
11009 /* Generate code for floating point ABS or NEG. */
11010
11011 void
11012 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11013 rtx operands[])
11014 {
11015 rtx mask, set, use, clob, dst, src;
11016 bool matching_memory;
11017 bool use_sse = false;
11018 bool vector_mode = VECTOR_MODE_P (mode);
11019 enum machine_mode elt_mode = mode;
11020
11021 if (vector_mode)
11022 {
11023 elt_mode = GET_MODE_INNER (mode);
11024 use_sse = true;
11025 }
11026 else if (mode == TFmode)
11027 use_sse = true;
11028 else if (TARGET_SSE_MATH)
11029 use_sse = SSE_FLOAT_MODE_P (mode);
11030
11031 /* NEG and ABS performed with SSE use bitwise mask operations.
11032 Create the appropriate mask now. */
11033 if (use_sse)
11034 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11035 else
11036 mask = NULL_RTX;
11037
11038 dst = operands[0];
11039 src = operands[1];
11040
11041 /* If the destination is memory, and we don't have matching source
11042 operands or we're using the x87, do things in registers. */
11043 matching_memory = false;
11044 if (MEM_P (dst))
11045 {
11046 if (use_sse && rtx_equal_p (dst, src))
11047 matching_memory = true;
11048 else
11049 dst = gen_reg_rtx (mode);
11050 }
11051 if (MEM_P (src) && !matching_memory)
11052 src = force_reg (mode, src);
11053
11054 if (vector_mode)
11055 {
11056 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11057 set = gen_rtx_SET (VOIDmode, dst, set);
11058 emit_insn (set);
11059 }
11060 else
11061 {
11062 set = gen_rtx_fmt_e (code, mode, src);
11063 set = gen_rtx_SET (VOIDmode, dst, set);
11064 if (mask)
11065 {
11066 use = gen_rtx_USE (VOIDmode, mask);
11067 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11068 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11069 gen_rtvec (3, set, use, clob)));
11070 }
11071 else
11072 emit_insn (set);
11073 }
11074
11075 if (dst != operands[0])
11076 emit_move_insn (operands[0], dst);
11077 }
11078
11079 /* Expand a copysign operation. Special case operand 0 being a constant. */
11080
11081 void
11082 ix86_expand_copysign (rtx operands[])
11083 {
11084 enum machine_mode mode, vmode;
11085 rtx dest, op0, op1, mask, nmask;
11086
11087 dest = operands[0];
11088 op0 = operands[1];
11089 op1 = operands[2];
11090
11091 mode = GET_MODE (dest);
11092 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11093
11094 if (GET_CODE (op0) == CONST_DOUBLE)
11095 {
11096 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11097
11098 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11099 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11100
11101 if (mode == SFmode || mode == DFmode)
11102 {
11103 if (op0 == CONST0_RTX (mode))
11104 op0 = CONST0_RTX (vmode);
11105 else
11106 {
11107 rtvec v;
11108
11109 if (mode == SFmode)
11110 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11111 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11112 else
11113 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11114 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11115 }
11116 }
11117
11118 mask = ix86_build_signbit_mask (mode, 0, 0);
11119
11120 if (mode == SFmode)
11121 copysign_insn = gen_copysignsf3_const;
11122 else if (mode == DFmode)
11123 copysign_insn = gen_copysigndf3_const;
11124 else
11125 copysign_insn = gen_copysigntf3_const;
11126
11127 emit_insn (copysign_insn (dest, op0, op1, mask));
11128 }
11129 else
11130 {
11131 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11132
11133 nmask = ix86_build_signbit_mask (mode, 0, 1);
11134 mask = ix86_build_signbit_mask (mode, 0, 0);
11135
11136 if (mode == SFmode)
11137 copysign_insn = gen_copysignsf3_var;
11138 else if (mode == DFmode)
11139 copysign_insn = gen_copysigndf3_var;
11140 else
11141 copysign_insn = gen_copysigntf3_var;
11142
11143 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11144 }
11145 }
11146
11147 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11148 be a constant, and so has already been expanded into a vector constant. */
11149
11150 void
11151 ix86_split_copysign_const (rtx operands[])
11152 {
11153 enum machine_mode mode, vmode;
11154 rtx dest, op0, op1, mask, x;
11155
11156 dest = operands[0];
11157 op0 = operands[1];
11158 op1 = operands[2];
11159 mask = operands[3];
11160
11161 mode = GET_MODE (dest);
11162 vmode = GET_MODE (mask);
11163
11164 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11165 x = gen_rtx_AND (vmode, dest, mask);
11166 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11167
11168 if (op0 != CONST0_RTX (vmode))
11169 {
11170 x = gen_rtx_IOR (vmode, dest, op0);
11171 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11172 }
11173 }
11174
11175 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11176 so we have to do two masks. */
11177
11178 void
11179 ix86_split_copysign_var (rtx operands[])
11180 {
11181 enum machine_mode mode, vmode;
11182 rtx dest, scratch, op0, op1, mask, nmask, x;
11183
11184 dest = operands[0];
11185 scratch = operands[1];
11186 op0 = operands[2];
11187 op1 = operands[3];
11188 nmask = operands[4];
11189 mask = operands[5];
11190
11191 mode = GET_MODE (dest);
11192 vmode = GET_MODE (mask);
11193
11194 if (rtx_equal_p (op0, op1))
11195 {
11196 /* Shouldn't happen often (it's useless, obviously), but when it does
11197 we'd generate incorrect code if we continue below. */
11198 emit_move_insn (dest, op0);
11199 return;
11200 }
11201
11202 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11203 {
11204 gcc_assert (REGNO (op1) == REGNO (scratch));
11205
11206 x = gen_rtx_AND (vmode, scratch, mask);
11207 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11208
11209 dest = mask;
11210 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11211 x = gen_rtx_NOT (vmode, dest);
11212 x = gen_rtx_AND (vmode, x, op0);
11213 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11214 }
11215 else
11216 {
11217 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11218 {
11219 x = gen_rtx_AND (vmode, scratch, mask);
11220 }
11221 else /* alternative 2,4 */
11222 {
11223 gcc_assert (REGNO (mask) == REGNO (scratch));
11224 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11225 x = gen_rtx_AND (vmode, scratch, op1);
11226 }
11227 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11228
11229 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11230 {
11231 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11232 x = gen_rtx_AND (vmode, dest, nmask);
11233 }
11234 else /* alternative 3,4 */
11235 {
11236 gcc_assert (REGNO (nmask) == REGNO (dest));
11237 dest = nmask;
11238 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11239 x = gen_rtx_AND (vmode, dest, op0);
11240 }
11241 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11242 }
11243
11244 x = gen_rtx_IOR (vmode, dest, scratch);
11245 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11246 }
11247
11248 /* Return TRUE or FALSE depending on whether the first SET in INSN
11249 has source and destination with matching CC modes, and that the
11250 CC mode is at least as constrained as REQ_MODE. */
11251
11252 int
11253 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11254 {
11255 rtx set;
11256 enum machine_mode set_mode;
11257
11258 set = PATTERN (insn);
11259 if (GET_CODE (set) == PARALLEL)
11260 set = XVECEXP (set, 0, 0);
11261 gcc_assert (GET_CODE (set) == SET);
11262 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11263
11264 set_mode = GET_MODE (SET_DEST (set));
11265 switch (set_mode)
11266 {
11267 case CCNOmode:
11268 if (req_mode != CCNOmode
11269 && (req_mode != CCmode
11270 || XEXP (SET_SRC (set), 1) != const0_rtx))
11271 return 0;
11272 break;
11273 case CCmode:
11274 if (req_mode == CCGCmode)
11275 return 0;
11276 /* FALLTHRU */
11277 case CCGCmode:
11278 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11279 return 0;
11280 /* FALLTHRU */
11281 case CCGOCmode:
11282 if (req_mode == CCZmode)
11283 return 0;
11284 /* FALLTHRU */
11285 case CCZmode:
11286 break;
11287
11288 default:
11289 gcc_unreachable ();
11290 }
11291
11292 return (GET_MODE (SET_SRC (set)) == set_mode);
11293 }
11294
11295 /* Generate insn patterns to do an integer compare of OPERANDS. */
11296
11297 static rtx
11298 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11299 {
11300 enum machine_mode cmpmode;
11301 rtx tmp, flags;
11302
11303 cmpmode = SELECT_CC_MODE (code, op0, op1);
11304 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11305
11306 /* This is very simple, but making the interface the same as in the
11307 FP case makes the rest of the code easier. */
11308 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11309 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11310
11311 /* Return the test that should be put into the flags user, i.e.
11312 the bcc, scc, or cmov instruction. */
11313 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11314 }
11315
11316 /* Figure out whether to use ordered or unordered fp comparisons.
11317 Return the appropriate mode to use. */
11318
11319 enum machine_mode
11320 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11321 {
11322 /* ??? In order to make all comparisons reversible, we do all comparisons
11323 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11324 all forms trapping and nontrapping comparisons, we can make inequality
11325 comparisons trapping again, since it results in better code when using
11326 FCOM based compares. */
11327 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11328 }
11329
11330 enum machine_mode
11331 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11332 {
11333 enum machine_mode mode = GET_MODE (op0);
11334
11335 if (SCALAR_FLOAT_MODE_P (mode))
11336 {
11337 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11338 return ix86_fp_compare_mode (code);
11339 }
11340
11341 switch (code)
11342 {
11343 /* Only zero flag is needed. */
11344 case EQ: /* ZF=0 */
11345 case NE: /* ZF!=0 */
11346 return CCZmode;
11347 /* Codes needing carry flag. */
11348 case GEU: /* CF=0 */
11349 case LTU: /* CF=1 */
11350 /* Detect overflow checks. They need just the carry flag. */
11351 if (GET_CODE (op0) == PLUS
11352 && rtx_equal_p (op1, XEXP (op0, 0)))
11353 return CCCmode;
11354 else
11355 return CCmode;
11356 case GTU: /* CF=0 & ZF=0 */
11357 case LEU: /* CF=1 | ZF=1 */
11358 /* Detect overflow checks. They need just the carry flag. */
11359 if (GET_CODE (op0) == MINUS
11360 && rtx_equal_p (op1, XEXP (op0, 0)))
11361 return CCCmode;
11362 else
11363 return CCmode;
11364 /* Codes possibly doable only with sign flag when
11365 comparing against zero. */
11366 case GE: /* SF=OF or SF=0 */
11367 case LT: /* SF<>OF or SF=1 */
11368 if (op1 == const0_rtx)
11369 return CCGOCmode;
11370 else
11371 /* For other cases Carry flag is not required. */
11372 return CCGCmode;
11373 /* Codes doable only with sign flag when comparing
11374 against zero, but we miss jump instruction for it
11375 so we need to use relational tests against overflow
11376 that thus needs to be zero. */
11377 case GT: /* ZF=0 & SF=OF */
11378 case LE: /* ZF=1 | SF<>OF */
11379 if (op1 == const0_rtx)
11380 return CCNOmode;
11381 else
11382 return CCGCmode;
11383 /* strcmp pattern do (use flags) and combine may ask us for proper
11384 mode. */
11385 case USE:
11386 return CCmode;
11387 default:
11388 gcc_unreachable ();
11389 }
11390 }
11391
11392 /* Return the fixed registers used for condition codes. */
11393
11394 static bool
11395 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11396 {
11397 *p1 = FLAGS_REG;
11398 *p2 = FPSR_REG;
11399 return true;
11400 }
11401
11402 /* If two condition code modes are compatible, return a condition code
11403 mode which is compatible with both. Otherwise, return
11404 VOIDmode. */
11405
11406 static enum machine_mode
11407 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11408 {
11409 if (m1 == m2)
11410 return m1;
11411
11412 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11413 return VOIDmode;
11414
11415 if ((m1 == CCGCmode && m2 == CCGOCmode)
11416 || (m1 == CCGOCmode && m2 == CCGCmode))
11417 return CCGCmode;
11418
11419 switch (m1)
11420 {
11421 default:
11422 gcc_unreachable ();
11423
11424 case CCmode:
11425 case CCGCmode:
11426 case CCGOCmode:
11427 case CCNOmode:
11428 case CCAmode:
11429 case CCCmode:
11430 case CCOmode:
11431 case CCSmode:
11432 case CCZmode:
11433 switch (m2)
11434 {
11435 default:
11436 return VOIDmode;
11437
11438 case CCmode:
11439 case CCGCmode:
11440 case CCGOCmode:
11441 case CCNOmode:
11442 case CCAmode:
11443 case CCCmode:
11444 case CCOmode:
11445 case CCSmode:
11446 case CCZmode:
11447 return CCmode;
11448 }
11449
11450 case CCFPmode:
11451 case CCFPUmode:
11452 /* These are only compatible with themselves, which we already
11453 checked above. */
11454 return VOIDmode;
11455 }
11456 }
11457
11458 /* Split comparison code CODE into comparisons we can do using branch
11459 instructions. BYPASS_CODE is comparison code for branch that will
11460 branch around FIRST_CODE and SECOND_CODE. If some of branches
11461 is not required, set value to UNKNOWN.
11462 We never require more than two branches. */
11463
11464 void
11465 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11466 enum rtx_code *first_code,
11467 enum rtx_code *second_code)
11468 {
11469 *first_code = code;
11470 *bypass_code = UNKNOWN;
11471 *second_code = UNKNOWN;
11472
11473 /* The fcomi comparison sets flags as follows:
11474
11475 cmp ZF PF CF
11476 > 0 0 0
11477 < 0 0 1
11478 = 1 0 0
11479 un 1 1 1 */
11480
11481 switch (code)
11482 {
11483 case GT: /* GTU - CF=0 & ZF=0 */
11484 case GE: /* GEU - CF=0 */
11485 case ORDERED: /* PF=0 */
11486 case UNORDERED: /* PF=1 */
11487 case UNEQ: /* EQ - ZF=1 */
11488 case UNLT: /* LTU - CF=1 */
11489 case UNLE: /* LEU - CF=1 | ZF=1 */
11490 case LTGT: /* EQ - ZF=0 */
11491 break;
11492 case LT: /* LTU - CF=1 - fails on unordered */
11493 *first_code = UNLT;
11494 *bypass_code = UNORDERED;
11495 break;
11496 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11497 *first_code = UNLE;
11498 *bypass_code = UNORDERED;
11499 break;
11500 case EQ: /* EQ - ZF=1 - fails on unordered */
11501 *first_code = UNEQ;
11502 *bypass_code = UNORDERED;
11503 break;
11504 case NE: /* NE - ZF=0 - fails on unordered */
11505 *first_code = LTGT;
11506 *second_code = UNORDERED;
11507 break;
11508 case UNGE: /* GEU - CF=0 - fails on unordered */
11509 *first_code = GE;
11510 *second_code = UNORDERED;
11511 break;
11512 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11513 *first_code = GT;
11514 *second_code = UNORDERED;
11515 break;
11516 default:
11517 gcc_unreachable ();
11518 }
11519 if (!TARGET_IEEE_FP)
11520 {
11521 *second_code = UNKNOWN;
11522 *bypass_code = UNKNOWN;
11523 }
11524 }
11525
11526 /* Return cost of comparison done fcom + arithmetics operations on AX.
11527 All following functions do use number of instructions as a cost metrics.
11528 In future this should be tweaked to compute bytes for optimize_size and
11529 take into account performance of various instructions on various CPUs. */
11530 static int
11531 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11532 {
11533 if (!TARGET_IEEE_FP)
11534 return 4;
11535 /* The cost of code output by ix86_expand_fp_compare. */
11536 switch (code)
11537 {
11538 case UNLE:
11539 case UNLT:
11540 case LTGT:
11541 case GT:
11542 case GE:
11543 case UNORDERED:
11544 case ORDERED:
11545 case UNEQ:
11546 return 4;
11547 break;
11548 case LT:
11549 case NE:
11550 case EQ:
11551 case UNGE:
11552 return 5;
11553 break;
11554 case LE:
11555 case UNGT:
11556 return 6;
11557 break;
11558 default:
11559 gcc_unreachable ();
11560 }
11561 }
11562
11563 /* Return cost of comparison done using fcomi operation.
11564 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11565 static int
11566 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11567 {
11568 enum rtx_code bypass_code, first_code, second_code;
11569 /* Return arbitrarily high cost when instruction is not supported - this
11570 prevents gcc from using it. */
11571 if (!TARGET_CMOVE)
11572 return 1024;
11573 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11574 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11575 }
11576
11577 /* Return cost of comparison done using sahf operation.
11578 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11579 static int
11580 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11581 {
11582 enum rtx_code bypass_code, first_code, second_code;
11583 /* Return arbitrarily high cost when instruction is not preferred - this
11584 avoids gcc from using it. */
11585 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11586 return 1024;
11587 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11588 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11589 }
11590
11591 /* Compute cost of the comparison done using any method.
11592 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11593 static int
11594 ix86_fp_comparison_cost (enum rtx_code code)
11595 {
11596 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11597 int min;
11598
11599 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11600 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11601
11602 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11603 if (min > sahf_cost)
11604 min = sahf_cost;
11605 if (min > fcomi_cost)
11606 min = fcomi_cost;
11607 return min;
11608 }
11609
11610 /* Return true if we should use an FCOMI instruction for this
11611 fp comparison. */
11612
11613 int
11614 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11615 {
11616 enum rtx_code swapped_code = swap_condition (code);
11617
11618 return ((ix86_fp_comparison_cost (code)
11619 == ix86_fp_comparison_fcomi_cost (code))
11620 || (ix86_fp_comparison_cost (swapped_code)
11621 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11622 }
11623
11624 /* Swap, force into registers, or otherwise massage the two operands
11625 to a fp comparison. The operands are updated in place; the new
11626 comparison code is returned. */
11627
11628 static enum rtx_code
11629 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11630 {
11631 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11632 rtx op0 = *pop0, op1 = *pop1;
11633 enum machine_mode op_mode = GET_MODE (op0);
11634 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11635
11636 /* All of the unordered compare instructions only work on registers.
11637 The same is true of the fcomi compare instructions. The XFmode
11638 compare instructions require registers except when comparing
11639 against zero or when converting operand 1 from fixed point to
11640 floating point. */
11641
11642 if (!is_sse
11643 && (fpcmp_mode == CCFPUmode
11644 || (op_mode == XFmode
11645 && ! (standard_80387_constant_p (op0) == 1
11646 || standard_80387_constant_p (op1) == 1)
11647 && GET_CODE (op1) != FLOAT)
11648 || ix86_use_fcomi_compare (code)))
11649 {
11650 op0 = force_reg (op_mode, op0);
11651 op1 = force_reg (op_mode, op1);
11652 }
11653 else
11654 {
11655 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11656 things around if they appear profitable, otherwise force op0
11657 into a register. */
11658
11659 if (standard_80387_constant_p (op0) == 0
11660 || (MEM_P (op0)
11661 && ! (standard_80387_constant_p (op1) == 0
11662 || MEM_P (op1))))
11663 {
11664 rtx tmp;
11665 tmp = op0, op0 = op1, op1 = tmp;
11666 code = swap_condition (code);
11667 }
11668
11669 if (!REG_P (op0))
11670 op0 = force_reg (op_mode, op0);
11671
11672 if (CONSTANT_P (op1))
11673 {
11674 int tmp = standard_80387_constant_p (op1);
11675 if (tmp == 0)
11676 op1 = validize_mem (force_const_mem (op_mode, op1));
11677 else if (tmp == 1)
11678 {
11679 if (TARGET_CMOVE)
11680 op1 = force_reg (op_mode, op1);
11681 }
11682 else
11683 op1 = force_reg (op_mode, op1);
11684 }
11685 }
11686
11687 /* Try to rearrange the comparison to make it cheaper. */
11688 if (ix86_fp_comparison_cost (code)
11689 > ix86_fp_comparison_cost (swap_condition (code))
11690 && (REG_P (op1) || can_create_pseudo_p ()))
11691 {
11692 rtx tmp;
11693 tmp = op0, op0 = op1, op1 = tmp;
11694 code = swap_condition (code);
11695 if (!REG_P (op0))
11696 op0 = force_reg (op_mode, op0);
11697 }
11698
11699 *pop0 = op0;
11700 *pop1 = op1;
11701 return code;
11702 }
11703
11704 /* Convert comparison codes we use to represent FP comparison to integer
11705 code that will result in proper branch. Return UNKNOWN if no such code
11706 is available. */
11707
11708 enum rtx_code
11709 ix86_fp_compare_code_to_integer (enum rtx_code code)
11710 {
11711 switch (code)
11712 {
11713 case GT:
11714 return GTU;
11715 case GE:
11716 return GEU;
11717 case ORDERED:
11718 case UNORDERED:
11719 return code;
11720 break;
11721 case UNEQ:
11722 return EQ;
11723 break;
11724 case UNLT:
11725 return LTU;
11726 break;
11727 case UNLE:
11728 return LEU;
11729 break;
11730 case LTGT:
11731 return NE;
11732 break;
11733 default:
11734 return UNKNOWN;
11735 }
11736 }
11737
11738 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11739
11740 static rtx
11741 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11742 rtx *second_test, rtx *bypass_test)
11743 {
11744 enum machine_mode fpcmp_mode, intcmp_mode;
11745 rtx tmp, tmp2;
11746 int cost = ix86_fp_comparison_cost (code);
11747 enum rtx_code bypass_code, first_code, second_code;
11748
11749 fpcmp_mode = ix86_fp_compare_mode (code);
11750 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11751
11752 if (second_test)
11753 *second_test = NULL_RTX;
11754 if (bypass_test)
11755 *bypass_test = NULL_RTX;
11756
11757 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11758
11759 /* Do fcomi/sahf based test when profitable. */
11760 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11761 && (bypass_code == UNKNOWN || bypass_test)
11762 && (second_code == UNKNOWN || second_test))
11763 {
11764 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11765 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11766 tmp);
11767 if (TARGET_CMOVE)
11768 emit_insn (tmp);
11769 else
11770 {
11771 gcc_assert (TARGET_SAHF);
11772
11773 if (!scratch)
11774 scratch = gen_reg_rtx (HImode);
11775 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11776
11777 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11778 }
11779
11780 /* The FP codes work out to act like unsigned. */
11781 intcmp_mode = fpcmp_mode;
11782 code = first_code;
11783 if (bypass_code != UNKNOWN)
11784 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11785 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11786 const0_rtx);
11787 if (second_code != UNKNOWN)
11788 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11789 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11790 const0_rtx);
11791 }
11792 else
11793 {
11794 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11795 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11796 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11797 if (!scratch)
11798 scratch = gen_reg_rtx (HImode);
11799 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11800
11801 /* In the unordered case, we have to check C2 for NaN's, which
11802 doesn't happen to work out to anything nice combination-wise.
11803 So do some bit twiddling on the value we've got in AH to come
11804 up with an appropriate set of condition codes. */
11805
11806 intcmp_mode = CCNOmode;
11807 switch (code)
11808 {
11809 case GT:
11810 case UNGT:
11811 if (code == GT || !TARGET_IEEE_FP)
11812 {
11813 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11814 code = EQ;
11815 }
11816 else
11817 {
11818 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11819 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11820 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11821 intcmp_mode = CCmode;
11822 code = GEU;
11823 }
11824 break;
11825 case LT:
11826 case UNLT:
11827 if (code == LT && TARGET_IEEE_FP)
11828 {
11829 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11830 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11831 intcmp_mode = CCmode;
11832 code = EQ;
11833 }
11834 else
11835 {
11836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11837 code = NE;
11838 }
11839 break;
11840 case GE:
11841 case UNGE:
11842 if (code == GE || !TARGET_IEEE_FP)
11843 {
11844 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11845 code = EQ;
11846 }
11847 else
11848 {
11849 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11850 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11851 GEN_INT (0x01)));
11852 code = NE;
11853 }
11854 break;
11855 case LE:
11856 case UNLE:
11857 if (code == LE && TARGET_IEEE_FP)
11858 {
11859 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11860 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11861 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11862 intcmp_mode = CCmode;
11863 code = LTU;
11864 }
11865 else
11866 {
11867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11868 code = NE;
11869 }
11870 break;
11871 case EQ:
11872 case UNEQ:
11873 if (code == EQ && TARGET_IEEE_FP)
11874 {
11875 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11876 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11877 intcmp_mode = CCmode;
11878 code = EQ;
11879 }
11880 else
11881 {
11882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11883 code = NE;
11884 break;
11885 }
11886 break;
11887 case NE:
11888 case LTGT:
11889 if (code == NE && TARGET_IEEE_FP)
11890 {
11891 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11892 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11893 GEN_INT (0x40)));
11894 code = NE;
11895 }
11896 else
11897 {
11898 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11899 code = EQ;
11900 }
11901 break;
11902
11903 case UNORDERED:
11904 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11905 code = NE;
11906 break;
11907 case ORDERED:
11908 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11909 code = EQ;
11910 break;
11911
11912 default:
11913 gcc_unreachable ();
11914 }
11915 }
11916
11917 /* Return the test that should be put into the flags user, i.e.
11918 the bcc, scc, or cmov instruction. */
11919 return gen_rtx_fmt_ee (code, VOIDmode,
11920 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11921 const0_rtx);
11922 }
11923
11924 rtx
11925 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11926 {
11927 rtx op0, op1, ret;
11928 op0 = ix86_compare_op0;
11929 op1 = ix86_compare_op1;
11930
11931 if (second_test)
11932 *second_test = NULL_RTX;
11933 if (bypass_test)
11934 *bypass_test = NULL_RTX;
11935
11936 if (ix86_compare_emitted)
11937 {
11938 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11939 ix86_compare_emitted = NULL_RTX;
11940 }
11941 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11942 {
11943 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11944 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11945 second_test, bypass_test);
11946 }
11947 else
11948 ret = ix86_expand_int_compare (code, op0, op1);
11949
11950 return ret;
11951 }
11952
11953 /* Return true if the CODE will result in nontrivial jump sequence. */
11954 bool
11955 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11956 {
11957 enum rtx_code bypass_code, first_code, second_code;
11958 if (!TARGET_CMOVE)
11959 return true;
11960 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11961 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11962 }
11963
11964 void
11965 ix86_expand_branch (enum rtx_code code, rtx label)
11966 {
11967 rtx tmp;
11968
11969 /* If we have emitted a compare insn, go straight to simple.
11970 ix86_expand_compare won't emit anything if ix86_compare_emitted
11971 is non NULL. */
11972 if (ix86_compare_emitted)
11973 goto simple;
11974
11975 switch (GET_MODE (ix86_compare_op0))
11976 {
11977 case QImode:
11978 case HImode:
11979 case SImode:
11980 simple:
11981 tmp = ix86_expand_compare (code, NULL, NULL);
11982 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11983 gen_rtx_LABEL_REF (VOIDmode, label),
11984 pc_rtx);
11985 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11986 return;
11987
11988 case SFmode:
11989 case DFmode:
11990 case XFmode:
11991 {
11992 rtvec vec;
11993 int use_fcomi;
11994 enum rtx_code bypass_code, first_code, second_code;
11995
11996 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11997 &ix86_compare_op1);
11998
11999 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12000
12001 /* Check whether we will use the natural sequence with one jump. If
12002 so, we can expand jump early. Otherwise delay expansion by
12003 creating compound insn to not confuse optimizers. */
12004 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12005 {
12006 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12007 gen_rtx_LABEL_REF (VOIDmode, label),
12008 pc_rtx, NULL_RTX, NULL_RTX);
12009 }
12010 else
12011 {
12012 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12013 ix86_compare_op0, ix86_compare_op1);
12014 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12015 gen_rtx_LABEL_REF (VOIDmode, label),
12016 pc_rtx);
12017 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12018
12019 use_fcomi = ix86_use_fcomi_compare (code);
12020 vec = rtvec_alloc (3 + !use_fcomi);
12021 RTVEC_ELT (vec, 0) = tmp;
12022 RTVEC_ELT (vec, 1)
12023 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12024 RTVEC_ELT (vec, 2)
12025 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12026 if (! use_fcomi)
12027 RTVEC_ELT (vec, 3)
12028 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12029
12030 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12031 }
12032 return;
12033 }
12034
12035 case DImode:
12036 if (TARGET_64BIT)
12037 goto simple;
12038 case TImode:
12039 /* Expand DImode branch into multiple compare+branch. */
12040 {
12041 rtx lo[2], hi[2], label2;
12042 enum rtx_code code1, code2, code3;
12043 enum machine_mode submode;
12044
12045 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12046 {
12047 tmp = ix86_compare_op0;
12048 ix86_compare_op0 = ix86_compare_op1;
12049 ix86_compare_op1 = tmp;
12050 code = swap_condition (code);
12051 }
12052 if (GET_MODE (ix86_compare_op0) == DImode)
12053 {
12054 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12055 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12056 submode = SImode;
12057 }
12058 else
12059 {
12060 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12061 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12062 submode = DImode;
12063 }
12064
12065 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12066 avoid two branches. This costs one extra insn, so disable when
12067 optimizing for size. */
12068
12069 if ((code == EQ || code == NE)
12070 && (!optimize_size
12071 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12072 {
12073 rtx xor0, xor1;
12074
12075 xor1 = hi[0];
12076 if (hi[1] != const0_rtx)
12077 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12078 NULL_RTX, 0, OPTAB_WIDEN);
12079
12080 xor0 = lo[0];
12081 if (lo[1] != const0_rtx)
12082 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12083 NULL_RTX, 0, OPTAB_WIDEN);
12084
12085 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12086 NULL_RTX, 0, OPTAB_WIDEN);
12087
12088 ix86_compare_op0 = tmp;
12089 ix86_compare_op1 = const0_rtx;
12090 ix86_expand_branch (code, label);
12091 return;
12092 }
12093
12094 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12095 op1 is a constant and the low word is zero, then we can just
12096 examine the high word. Similarly for low word -1 and
12097 less-or-equal-than or greater-than. */
12098
12099 if (CONST_INT_P (hi[1]))
12100 switch (code)
12101 {
12102 case LT: case LTU: case GE: case GEU:
12103 if (lo[1] == const0_rtx)
12104 {
12105 ix86_compare_op0 = hi[0];
12106 ix86_compare_op1 = hi[1];
12107 ix86_expand_branch (code, label);
12108 return;
12109 }
12110 case LE: case LEU: case GT: case GTU:
12111 if (lo[1] == constm1_rtx)
12112 {
12113 ix86_compare_op0 = hi[0];
12114 ix86_compare_op1 = hi[1];
12115 ix86_expand_branch (code, label);
12116 return;
12117 }
12118 default:
12119 break;
12120 }
12121
12122 /* Otherwise, we need two or three jumps. */
12123
12124 label2 = gen_label_rtx ();
12125
12126 code1 = code;
12127 code2 = swap_condition (code);
12128 code3 = unsigned_condition (code);
12129
12130 switch (code)
12131 {
12132 case LT: case GT: case LTU: case GTU:
12133 break;
12134
12135 case LE: code1 = LT; code2 = GT; break;
12136 case GE: code1 = GT; code2 = LT; break;
12137 case LEU: code1 = LTU; code2 = GTU; break;
12138 case GEU: code1 = GTU; code2 = LTU; break;
12139
12140 case EQ: code1 = UNKNOWN; code2 = NE; break;
12141 case NE: code2 = UNKNOWN; break;
12142
12143 default:
12144 gcc_unreachable ();
12145 }
12146
12147 /*
12148 * a < b =>
12149 * if (hi(a) < hi(b)) goto true;
12150 * if (hi(a) > hi(b)) goto false;
12151 * if (lo(a) < lo(b)) goto true;
12152 * false:
12153 */
12154
12155 ix86_compare_op0 = hi[0];
12156 ix86_compare_op1 = hi[1];
12157
12158 if (code1 != UNKNOWN)
12159 ix86_expand_branch (code1, label);
12160 if (code2 != UNKNOWN)
12161 ix86_expand_branch (code2, label2);
12162
12163 ix86_compare_op0 = lo[0];
12164 ix86_compare_op1 = lo[1];
12165 ix86_expand_branch (code3, label);
12166
12167 if (code2 != UNKNOWN)
12168 emit_label (label2);
12169 return;
12170 }
12171
12172 default:
12173 gcc_unreachable ();
12174 }
12175 }
12176
12177 /* Split branch based on floating point condition. */
12178 void
12179 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12180 rtx target1, rtx target2, rtx tmp, rtx pushed)
12181 {
12182 rtx second, bypass;
12183 rtx label = NULL_RTX;
12184 rtx condition;
12185 int bypass_probability = -1, second_probability = -1, probability = -1;
12186 rtx i;
12187
12188 if (target2 != pc_rtx)
12189 {
12190 rtx tmp = target2;
12191 code = reverse_condition_maybe_unordered (code);
12192 target2 = target1;
12193 target1 = tmp;
12194 }
12195
12196 condition = ix86_expand_fp_compare (code, op1, op2,
12197 tmp, &second, &bypass);
12198
12199 /* Remove pushed operand from stack. */
12200 if (pushed)
12201 ix86_free_from_memory (GET_MODE (pushed));
12202
12203 if (split_branch_probability >= 0)
12204 {
12205 /* Distribute the probabilities across the jumps.
12206 Assume the BYPASS and SECOND to be always test
12207 for UNORDERED. */
12208 probability = split_branch_probability;
12209
12210 /* Value of 1 is low enough to make no need for probability
12211 to be updated. Later we may run some experiments and see
12212 if unordered values are more frequent in practice. */
12213 if (bypass)
12214 bypass_probability = 1;
12215 if (second)
12216 second_probability = 1;
12217 }
12218 if (bypass != NULL_RTX)
12219 {
12220 label = gen_label_rtx ();
12221 i = emit_jump_insn (gen_rtx_SET
12222 (VOIDmode, pc_rtx,
12223 gen_rtx_IF_THEN_ELSE (VOIDmode,
12224 bypass,
12225 gen_rtx_LABEL_REF (VOIDmode,
12226 label),
12227 pc_rtx)));
12228 if (bypass_probability >= 0)
12229 REG_NOTES (i)
12230 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12231 GEN_INT (bypass_probability),
12232 REG_NOTES (i));
12233 }
12234 i = emit_jump_insn (gen_rtx_SET
12235 (VOIDmode, pc_rtx,
12236 gen_rtx_IF_THEN_ELSE (VOIDmode,
12237 condition, target1, target2)));
12238 if (probability >= 0)
12239 REG_NOTES (i)
12240 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12241 GEN_INT (probability),
12242 REG_NOTES (i));
12243 if (second != NULL_RTX)
12244 {
12245 i = emit_jump_insn (gen_rtx_SET
12246 (VOIDmode, pc_rtx,
12247 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12248 target2)));
12249 if (second_probability >= 0)
12250 REG_NOTES (i)
12251 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12252 GEN_INT (second_probability),
12253 REG_NOTES (i));
12254 }
12255 if (label != NULL_RTX)
12256 emit_label (label);
12257 }
12258
12259 int
12260 ix86_expand_setcc (enum rtx_code code, rtx dest)
12261 {
12262 rtx ret, tmp, tmpreg, equiv;
12263 rtx second_test, bypass_test;
12264
12265 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12266 return 0; /* FAIL */
12267
12268 gcc_assert (GET_MODE (dest) == QImode);
12269
12270 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12271 PUT_MODE (ret, QImode);
12272
12273 tmp = dest;
12274 tmpreg = dest;
12275
12276 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12277 if (bypass_test || second_test)
12278 {
12279 rtx test = second_test;
12280 int bypass = 0;
12281 rtx tmp2 = gen_reg_rtx (QImode);
12282 if (bypass_test)
12283 {
12284 gcc_assert (!second_test);
12285 test = bypass_test;
12286 bypass = 1;
12287 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12288 }
12289 PUT_MODE (test, QImode);
12290 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12291
12292 if (bypass)
12293 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12294 else
12295 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12296 }
12297
12298 /* Attach a REG_EQUAL note describing the comparison result. */
12299 if (ix86_compare_op0 && ix86_compare_op1)
12300 {
12301 equiv = simplify_gen_relational (code, QImode,
12302 GET_MODE (ix86_compare_op0),
12303 ix86_compare_op0, ix86_compare_op1);
12304 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12305 }
12306
12307 return 1; /* DONE */
12308 }
12309
12310 /* Expand comparison setting or clearing carry flag. Return true when
12311 successful and set pop for the operation. */
12312 static bool
12313 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12314 {
12315 enum machine_mode mode =
12316 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12317
12318 /* Do not handle DImode compares that go through special path. */
12319 if (mode == (TARGET_64BIT ? TImode : DImode))
12320 return false;
12321
12322 if (SCALAR_FLOAT_MODE_P (mode))
12323 {
12324 rtx second_test = NULL, bypass_test = NULL;
12325 rtx compare_op, compare_seq;
12326
12327 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12328
12329 /* Shortcut: following common codes never translate
12330 into carry flag compares. */
12331 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12332 || code == ORDERED || code == UNORDERED)
12333 return false;
12334
12335 /* These comparisons require zero flag; swap operands so they won't. */
12336 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12337 && !TARGET_IEEE_FP)
12338 {
12339 rtx tmp = op0;
12340 op0 = op1;
12341 op1 = tmp;
12342 code = swap_condition (code);
12343 }
12344
12345 /* Try to expand the comparison and verify that we end up with
12346 carry flag based comparison. This fails to be true only when
12347 we decide to expand comparison using arithmetic that is not
12348 too common scenario. */
12349 start_sequence ();
12350 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12351 &second_test, &bypass_test);
12352 compare_seq = get_insns ();
12353 end_sequence ();
12354
12355 if (second_test || bypass_test)
12356 return false;
12357
12358 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12359 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12360 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12361 else
12362 code = GET_CODE (compare_op);
12363
12364 if (code != LTU && code != GEU)
12365 return false;
12366
12367 emit_insn (compare_seq);
12368 *pop = compare_op;
12369 return true;
12370 }
12371
12372 if (!INTEGRAL_MODE_P (mode))
12373 return false;
12374
12375 switch (code)
12376 {
12377 case LTU:
12378 case GEU:
12379 break;
12380
12381 /* Convert a==0 into (unsigned)a<1. */
12382 case EQ:
12383 case NE:
12384 if (op1 != const0_rtx)
12385 return false;
12386 op1 = const1_rtx;
12387 code = (code == EQ ? LTU : GEU);
12388 break;
12389
12390 /* Convert a>b into b<a or a>=b-1. */
12391 case GTU:
12392 case LEU:
12393 if (CONST_INT_P (op1))
12394 {
12395 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12396 /* Bail out on overflow. We still can swap operands but that
12397 would force loading of the constant into register. */
12398 if (op1 == const0_rtx
12399 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12400 return false;
12401 code = (code == GTU ? GEU : LTU);
12402 }
12403 else
12404 {
12405 rtx tmp = op1;
12406 op1 = op0;
12407 op0 = tmp;
12408 code = (code == GTU ? LTU : GEU);
12409 }
12410 break;
12411
12412 /* Convert a>=0 into (unsigned)a<0x80000000. */
12413 case LT:
12414 case GE:
12415 if (mode == DImode || op1 != const0_rtx)
12416 return false;
12417 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12418 code = (code == LT ? GEU : LTU);
12419 break;
12420 case LE:
12421 case GT:
12422 if (mode == DImode || op1 != constm1_rtx)
12423 return false;
12424 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12425 code = (code == LE ? GEU : LTU);
12426 break;
12427
12428 default:
12429 return false;
12430 }
12431 /* Swapping operands may cause constant to appear as first operand. */
12432 if (!nonimmediate_operand (op0, VOIDmode))
12433 {
12434 if (!can_create_pseudo_p ())
12435 return false;
12436 op0 = force_reg (mode, op0);
12437 }
12438 ix86_compare_op0 = op0;
12439 ix86_compare_op1 = op1;
12440 *pop = ix86_expand_compare (code, NULL, NULL);
12441 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12442 return true;
12443 }
12444
12445 int
12446 ix86_expand_int_movcc (rtx operands[])
12447 {
12448 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12449 rtx compare_seq, compare_op;
12450 rtx second_test, bypass_test;
12451 enum machine_mode mode = GET_MODE (operands[0]);
12452 bool sign_bit_compare_p = false;;
12453
12454 start_sequence ();
12455 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12456 compare_seq = get_insns ();
12457 end_sequence ();
12458
12459 compare_code = GET_CODE (compare_op);
12460
12461 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12462 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12463 sign_bit_compare_p = true;
12464
12465 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12466 HImode insns, we'd be swallowed in word prefix ops. */
12467
12468 if ((mode != HImode || TARGET_FAST_PREFIX)
12469 && (mode != (TARGET_64BIT ? TImode : DImode))
12470 && CONST_INT_P (operands[2])
12471 && CONST_INT_P (operands[3]))
12472 {
12473 rtx out = operands[0];
12474 HOST_WIDE_INT ct = INTVAL (operands[2]);
12475 HOST_WIDE_INT cf = INTVAL (operands[3]);
12476 HOST_WIDE_INT diff;
12477
12478 diff = ct - cf;
12479 /* Sign bit compares are better done using shifts than we do by using
12480 sbb. */
12481 if (sign_bit_compare_p
12482 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12483 ix86_compare_op1, &compare_op))
12484 {
12485 /* Detect overlap between destination and compare sources. */
12486 rtx tmp = out;
12487
12488 if (!sign_bit_compare_p)
12489 {
12490 bool fpcmp = false;
12491
12492 compare_code = GET_CODE (compare_op);
12493
12494 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12495 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12496 {
12497 fpcmp = true;
12498 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12499 }
12500
12501 /* To simplify rest of code, restrict to the GEU case. */
12502 if (compare_code == LTU)
12503 {
12504 HOST_WIDE_INT tmp = ct;
12505 ct = cf;
12506 cf = tmp;
12507 compare_code = reverse_condition (compare_code);
12508 code = reverse_condition (code);
12509 }
12510 else
12511 {
12512 if (fpcmp)
12513 PUT_CODE (compare_op,
12514 reverse_condition_maybe_unordered
12515 (GET_CODE (compare_op)));
12516 else
12517 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12518 }
12519 diff = ct - cf;
12520
12521 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12522 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12523 tmp = gen_reg_rtx (mode);
12524
12525 if (mode == DImode)
12526 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12527 else
12528 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12529 }
12530 else
12531 {
12532 if (code == GT || code == GE)
12533 code = reverse_condition (code);
12534 else
12535 {
12536 HOST_WIDE_INT tmp = ct;
12537 ct = cf;
12538 cf = tmp;
12539 diff = ct - cf;
12540 }
12541 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12542 ix86_compare_op1, VOIDmode, 0, -1);
12543 }
12544
12545 if (diff == 1)
12546 {
12547 /*
12548 * cmpl op0,op1
12549 * sbbl dest,dest
12550 * [addl dest, ct]
12551 *
12552 * Size 5 - 8.
12553 */
12554 if (ct)
12555 tmp = expand_simple_binop (mode, PLUS,
12556 tmp, GEN_INT (ct),
12557 copy_rtx (tmp), 1, OPTAB_DIRECT);
12558 }
12559 else if (cf == -1)
12560 {
12561 /*
12562 * cmpl op0,op1
12563 * sbbl dest,dest
12564 * orl $ct, dest
12565 *
12566 * Size 8.
12567 */
12568 tmp = expand_simple_binop (mode, IOR,
12569 tmp, GEN_INT (ct),
12570 copy_rtx (tmp), 1, OPTAB_DIRECT);
12571 }
12572 else if (diff == -1 && ct)
12573 {
12574 /*
12575 * cmpl op0,op1
12576 * sbbl dest,dest
12577 * notl dest
12578 * [addl dest, cf]
12579 *
12580 * Size 8 - 11.
12581 */
12582 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12583 if (cf)
12584 tmp = expand_simple_binop (mode, PLUS,
12585 copy_rtx (tmp), GEN_INT (cf),
12586 copy_rtx (tmp), 1, OPTAB_DIRECT);
12587 }
12588 else
12589 {
12590 /*
12591 * cmpl op0,op1
12592 * sbbl dest,dest
12593 * [notl dest]
12594 * andl cf - ct, dest
12595 * [addl dest, ct]
12596 *
12597 * Size 8 - 11.
12598 */
12599
12600 if (cf == 0)
12601 {
12602 cf = ct;
12603 ct = 0;
12604 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12605 }
12606
12607 tmp = expand_simple_binop (mode, AND,
12608 copy_rtx (tmp),
12609 gen_int_mode (cf - ct, mode),
12610 copy_rtx (tmp), 1, OPTAB_DIRECT);
12611 if (ct)
12612 tmp = expand_simple_binop (mode, PLUS,
12613 copy_rtx (tmp), GEN_INT (ct),
12614 copy_rtx (tmp), 1, OPTAB_DIRECT);
12615 }
12616
12617 if (!rtx_equal_p (tmp, out))
12618 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12619
12620 return 1; /* DONE */
12621 }
12622
12623 if (diff < 0)
12624 {
12625 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12626
12627 HOST_WIDE_INT tmp;
12628 tmp = ct, ct = cf, cf = tmp;
12629 diff = -diff;
12630
12631 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12632 {
12633 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12634
12635 /* We may be reversing unordered compare to normal compare, that
12636 is not valid in general (we may convert non-trapping condition
12637 to trapping one), however on i386 we currently emit all
12638 comparisons unordered. */
12639 compare_code = reverse_condition_maybe_unordered (compare_code);
12640 code = reverse_condition_maybe_unordered (code);
12641 }
12642 else
12643 {
12644 compare_code = reverse_condition (compare_code);
12645 code = reverse_condition (code);
12646 }
12647 }
12648
12649 compare_code = UNKNOWN;
12650 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12651 && CONST_INT_P (ix86_compare_op1))
12652 {
12653 if (ix86_compare_op1 == const0_rtx
12654 && (code == LT || code == GE))
12655 compare_code = code;
12656 else if (ix86_compare_op1 == constm1_rtx)
12657 {
12658 if (code == LE)
12659 compare_code = LT;
12660 else if (code == GT)
12661 compare_code = GE;
12662 }
12663 }
12664
12665 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12666 if (compare_code != UNKNOWN
12667 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12668 && (cf == -1 || ct == -1))
12669 {
12670 /* If lea code below could be used, only optimize
12671 if it results in a 2 insn sequence. */
12672
12673 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12674 || diff == 3 || diff == 5 || diff == 9)
12675 || (compare_code == LT && ct == -1)
12676 || (compare_code == GE && cf == -1))
12677 {
12678 /*
12679 * notl op1 (if necessary)
12680 * sarl $31, op1
12681 * orl cf, op1
12682 */
12683 if (ct != -1)
12684 {
12685 cf = ct;
12686 ct = -1;
12687 code = reverse_condition (code);
12688 }
12689
12690 out = emit_store_flag (out, code, ix86_compare_op0,
12691 ix86_compare_op1, VOIDmode, 0, -1);
12692
12693 out = expand_simple_binop (mode, IOR,
12694 out, GEN_INT (cf),
12695 out, 1, OPTAB_DIRECT);
12696 if (out != operands[0])
12697 emit_move_insn (operands[0], out);
12698
12699 return 1; /* DONE */
12700 }
12701 }
12702
12703
12704 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12705 || diff == 3 || diff == 5 || diff == 9)
12706 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12707 && (mode != DImode
12708 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12709 {
12710 /*
12711 * xorl dest,dest
12712 * cmpl op1,op2
12713 * setcc dest
12714 * lea cf(dest*(ct-cf)),dest
12715 *
12716 * Size 14.
12717 *
12718 * This also catches the degenerate setcc-only case.
12719 */
12720
12721 rtx tmp;
12722 int nops;
12723
12724 out = emit_store_flag (out, code, ix86_compare_op0,
12725 ix86_compare_op1, VOIDmode, 0, 1);
12726
12727 nops = 0;
12728 /* On x86_64 the lea instruction operates on Pmode, so we need
12729 to get arithmetics done in proper mode to match. */
12730 if (diff == 1)
12731 tmp = copy_rtx (out);
12732 else
12733 {
12734 rtx out1;
12735 out1 = copy_rtx (out);
12736 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12737 nops++;
12738 if (diff & 1)
12739 {
12740 tmp = gen_rtx_PLUS (mode, tmp, out1);
12741 nops++;
12742 }
12743 }
12744 if (cf != 0)
12745 {
12746 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12747 nops++;
12748 }
12749 if (!rtx_equal_p (tmp, out))
12750 {
12751 if (nops == 1)
12752 out = force_operand (tmp, copy_rtx (out));
12753 else
12754 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12755 }
12756 if (!rtx_equal_p (out, operands[0]))
12757 emit_move_insn (operands[0], copy_rtx (out));
12758
12759 return 1; /* DONE */
12760 }
12761
12762 /*
12763 * General case: Jumpful:
12764 * xorl dest,dest cmpl op1, op2
12765 * cmpl op1, op2 movl ct, dest
12766 * setcc dest jcc 1f
12767 * decl dest movl cf, dest
12768 * andl (cf-ct),dest 1:
12769 * addl ct,dest
12770 *
12771 * Size 20. Size 14.
12772 *
12773 * This is reasonably steep, but branch mispredict costs are
12774 * high on modern cpus, so consider failing only if optimizing
12775 * for space.
12776 */
12777
12778 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12779 && BRANCH_COST >= 2)
12780 {
12781 if (cf == 0)
12782 {
12783 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12784
12785 cf = ct;
12786 ct = 0;
12787
12788 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12789 {
12790 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12791
12792 /* We may be reversing unordered compare to normal compare,
12793 that is not valid in general (we may convert non-trapping
12794 condition to trapping one), however on i386 we currently
12795 emit all comparisons unordered. */
12796 code = reverse_condition_maybe_unordered (code);
12797 }
12798 else
12799 {
12800 code = reverse_condition (code);
12801 if (compare_code != UNKNOWN)
12802 compare_code = reverse_condition (compare_code);
12803 }
12804 }
12805
12806 if (compare_code != UNKNOWN)
12807 {
12808 /* notl op1 (if needed)
12809 sarl $31, op1
12810 andl (cf-ct), op1
12811 addl ct, op1
12812
12813 For x < 0 (resp. x <= -1) there will be no notl,
12814 so if possible swap the constants to get rid of the
12815 complement.
12816 True/false will be -1/0 while code below (store flag
12817 followed by decrement) is 0/-1, so the constants need
12818 to be exchanged once more. */
12819
12820 if (compare_code == GE || !cf)
12821 {
12822 code = reverse_condition (code);
12823 compare_code = LT;
12824 }
12825 else
12826 {
12827 HOST_WIDE_INT tmp = cf;
12828 cf = ct;
12829 ct = tmp;
12830 }
12831
12832 out = emit_store_flag (out, code, ix86_compare_op0,
12833 ix86_compare_op1, VOIDmode, 0, -1);
12834 }
12835 else
12836 {
12837 out = emit_store_flag (out, code, ix86_compare_op0,
12838 ix86_compare_op1, VOIDmode, 0, 1);
12839
12840 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12841 copy_rtx (out), 1, OPTAB_DIRECT);
12842 }
12843
12844 out = expand_simple_binop (mode, AND, copy_rtx (out),
12845 gen_int_mode (cf - ct, mode),
12846 copy_rtx (out), 1, OPTAB_DIRECT);
12847 if (ct)
12848 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12849 copy_rtx (out), 1, OPTAB_DIRECT);
12850 if (!rtx_equal_p (out, operands[0]))
12851 emit_move_insn (operands[0], copy_rtx (out));
12852
12853 return 1; /* DONE */
12854 }
12855 }
12856
12857 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12858 {
12859 /* Try a few things more with specific constants and a variable. */
12860
12861 optab op;
12862 rtx var, orig_out, out, tmp;
12863
12864 if (BRANCH_COST <= 2)
12865 return 0; /* FAIL */
12866
12867 /* If one of the two operands is an interesting constant, load a
12868 constant with the above and mask it in with a logical operation. */
12869
12870 if (CONST_INT_P (operands[2]))
12871 {
12872 var = operands[3];
12873 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12874 operands[3] = constm1_rtx, op = and_optab;
12875 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12876 operands[3] = const0_rtx, op = ior_optab;
12877 else
12878 return 0; /* FAIL */
12879 }
12880 else if (CONST_INT_P (operands[3]))
12881 {
12882 var = operands[2];
12883 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12884 operands[2] = constm1_rtx, op = and_optab;
12885 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12886 operands[2] = const0_rtx, op = ior_optab;
12887 else
12888 return 0; /* FAIL */
12889 }
12890 else
12891 return 0; /* FAIL */
12892
12893 orig_out = operands[0];
12894 tmp = gen_reg_rtx (mode);
12895 operands[0] = tmp;
12896
12897 /* Recurse to get the constant loaded. */
12898 if (ix86_expand_int_movcc (operands) == 0)
12899 return 0; /* FAIL */
12900
12901 /* Mask in the interesting variable. */
12902 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12903 OPTAB_WIDEN);
12904 if (!rtx_equal_p (out, orig_out))
12905 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12906
12907 return 1; /* DONE */
12908 }
12909
12910 /*
12911 * For comparison with above,
12912 *
12913 * movl cf,dest
12914 * movl ct,tmp
12915 * cmpl op1,op2
12916 * cmovcc tmp,dest
12917 *
12918 * Size 15.
12919 */
12920
12921 if (! nonimmediate_operand (operands[2], mode))
12922 operands[2] = force_reg (mode, operands[2]);
12923 if (! nonimmediate_operand (operands[3], mode))
12924 operands[3] = force_reg (mode, operands[3]);
12925
12926 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12927 {
12928 rtx tmp = gen_reg_rtx (mode);
12929 emit_move_insn (tmp, operands[3]);
12930 operands[3] = tmp;
12931 }
12932 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12933 {
12934 rtx tmp = gen_reg_rtx (mode);
12935 emit_move_insn (tmp, operands[2]);
12936 operands[2] = tmp;
12937 }
12938
12939 if (! register_operand (operands[2], VOIDmode)
12940 && (mode == QImode
12941 || ! register_operand (operands[3], VOIDmode)))
12942 operands[2] = force_reg (mode, operands[2]);
12943
12944 if (mode == QImode
12945 && ! register_operand (operands[3], VOIDmode))
12946 operands[3] = force_reg (mode, operands[3]);
12947
12948 emit_insn (compare_seq);
12949 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12950 gen_rtx_IF_THEN_ELSE (mode,
12951 compare_op, operands[2],
12952 operands[3])));
12953 if (bypass_test)
12954 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12955 gen_rtx_IF_THEN_ELSE (mode,
12956 bypass_test,
12957 copy_rtx (operands[3]),
12958 copy_rtx (operands[0]))));
12959 if (second_test)
12960 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12961 gen_rtx_IF_THEN_ELSE (mode,
12962 second_test,
12963 copy_rtx (operands[2]),
12964 copy_rtx (operands[0]))));
12965
12966 return 1; /* DONE */
12967 }
12968
12969 /* Swap, force into registers, or otherwise massage the two operands
12970 to an sse comparison with a mask result. Thus we differ a bit from
12971 ix86_prepare_fp_compare_args which expects to produce a flags result.
12972
12973 The DEST operand exists to help determine whether to commute commutative
12974 operators. The POP0/POP1 operands are updated in place. The new
12975 comparison code is returned, or UNKNOWN if not implementable. */
12976
12977 static enum rtx_code
12978 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12979 rtx *pop0, rtx *pop1)
12980 {
12981 rtx tmp;
12982
12983 switch (code)
12984 {
12985 case LTGT:
12986 case UNEQ:
12987 /* We have no LTGT as an operator. We could implement it with
12988 NE & ORDERED, but this requires an extra temporary. It's
12989 not clear that it's worth it. */
12990 return UNKNOWN;
12991
12992 case LT:
12993 case LE:
12994 case UNGT:
12995 case UNGE:
12996 /* These are supported directly. */
12997 break;
12998
12999 case EQ:
13000 case NE:
13001 case UNORDERED:
13002 case ORDERED:
13003 /* For commutative operators, try to canonicalize the destination
13004 operand to be first in the comparison - this helps reload to
13005 avoid extra moves. */
13006 if (!dest || !rtx_equal_p (dest, *pop1))
13007 break;
13008 /* FALLTHRU */
13009
13010 case GE:
13011 case GT:
13012 case UNLE:
13013 case UNLT:
13014 /* These are not supported directly. Swap the comparison operands
13015 to transform into something that is supported. */
13016 tmp = *pop0;
13017 *pop0 = *pop1;
13018 *pop1 = tmp;
13019 code = swap_condition (code);
13020 break;
13021
13022 default:
13023 gcc_unreachable ();
13024 }
13025
13026 return code;
13027 }
13028
13029 /* Detect conditional moves that exactly match min/max operational
13030 semantics. Note that this is IEEE safe, as long as we don't
13031 interchange the operands.
13032
13033 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13034 and TRUE if the operation is successful and instructions are emitted. */
13035
13036 static bool
13037 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13038 rtx cmp_op1, rtx if_true, rtx if_false)
13039 {
13040 enum machine_mode mode;
13041 bool is_min;
13042 rtx tmp;
13043
13044 if (code == LT)
13045 ;
13046 else if (code == UNGE)
13047 {
13048 tmp = if_true;
13049 if_true = if_false;
13050 if_false = tmp;
13051 }
13052 else
13053 return false;
13054
13055 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13056 is_min = true;
13057 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13058 is_min = false;
13059 else
13060 return false;
13061
13062 mode = GET_MODE (dest);
13063
13064 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13065 but MODE may be a vector mode and thus not appropriate. */
13066 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13067 {
13068 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13069 rtvec v;
13070
13071 if_true = force_reg (mode, if_true);
13072 v = gen_rtvec (2, if_true, if_false);
13073 tmp = gen_rtx_UNSPEC (mode, v, u);
13074 }
13075 else
13076 {
13077 code = is_min ? SMIN : SMAX;
13078 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13079 }
13080
13081 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13082 return true;
13083 }
13084
13085 /* Expand an sse vector comparison. Return the register with the result. */
13086
13087 static rtx
13088 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13089 rtx op_true, rtx op_false)
13090 {
13091 enum machine_mode mode = GET_MODE (dest);
13092 rtx x;
13093
13094 cmp_op0 = force_reg (mode, cmp_op0);
13095 if (!nonimmediate_operand (cmp_op1, mode))
13096 cmp_op1 = force_reg (mode, cmp_op1);
13097
13098 if (optimize
13099 || reg_overlap_mentioned_p (dest, op_true)
13100 || reg_overlap_mentioned_p (dest, op_false))
13101 dest = gen_reg_rtx (mode);
13102
13103 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13104 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13105
13106 return dest;
13107 }
13108
13109 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13110 operations. This is used for both scalar and vector conditional moves. */
13111
13112 static void
13113 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13114 {
13115 enum machine_mode mode = GET_MODE (dest);
13116 rtx t2, t3, x;
13117
13118 if (TARGET_SSE5)
13119 {
13120 rtx pcmov = gen_rtx_SET (mode, dest,
13121 gen_rtx_IF_THEN_ELSE (mode, cmp,
13122 op_true,
13123 op_false));
13124 emit_insn (pcmov);
13125 }
13126 else if (op_false == CONST0_RTX (mode))
13127 {
13128 op_true = force_reg (mode, op_true);
13129 x = gen_rtx_AND (mode, cmp, op_true);
13130 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13131 }
13132 else if (op_true == CONST0_RTX (mode))
13133 {
13134 op_false = force_reg (mode, op_false);
13135 x = gen_rtx_NOT (mode, cmp);
13136 x = gen_rtx_AND (mode, x, op_false);
13137 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13138 }
13139 else
13140 {
13141 op_true = force_reg (mode, op_true);
13142 op_false = force_reg (mode, op_false);
13143
13144 t2 = gen_reg_rtx (mode);
13145 if (optimize)
13146 t3 = gen_reg_rtx (mode);
13147 else
13148 t3 = dest;
13149
13150 x = gen_rtx_AND (mode, op_true, cmp);
13151 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13152
13153 x = gen_rtx_NOT (mode, cmp);
13154 x = gen_rtx_AND (mode, x, op_false);
13155 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13156
13157 x = gen_rtx_IOR (mode, t3, t2);
13158 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13159 }
13160 }
13161
13162 /* Expand a floating-point conditional move. Return true if successful. */
13163
13164 int
13165 ix86_expand_fp_movcc (rtx operands[])
13166 {
13167 enum machine_mode mode = GET_MODE (operands[0]);
13168 enum rtx_code code = GET_CODE (operands[1]);
13169 rtx tmp, compare_op, second_test, bypass_test;
13170
13171 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13172 {
13173 enum machine_mode cmode;
13174
13175 /* Since we've no cmove for sse registers, don't force bad register
13176 allocation just to gain access to it. Deny movcc when the
13177 comparison mode doesn't match the move mode. */
13178 cmode = GET_MODE (ix86_compare_op0);
13179 if (cmode == VOIDmode)
13180 cmode = GET_MODE (ix86_compare_op1);
13181 if (cmode != mode)
13182 return 0;
13183
13184 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13185 &ix86_compare_op0,
13186 &ix86_compare_op1);
13187 if (code == UNKNOWN)
13188 return 0;
13189
13190 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13191 ix86_compare_op1, operands[2],
13192 operands[3]))
13193 return 1;
13194
13195 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13196 ix86_compare_op1, operands[2], operands[3]);
13197 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13198 return 1;
13199 }
13200
13201 /* The floating point conditional move instructions don't directly
13202 support conditions resulting from a signed integer comparison. */
13203
13204 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13205
13206 /* The floating point conditional move instructions don't directly
13207 support signed integer comparisons. */
13208
13209 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13210 {
13211 gcc_assert (!second_test && !bypass_test);
13212 tmp = gen_reg_rtx (QImode);
13213 ix86_expand_setcc (code, tmp);
13214 code = NE;
13215 ix86_compare_op0 = tmp;
13216 ix86_compare_op1 = const0_rtx;
13217 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13218 }
13219 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13220 {
13221 tmp = gen_reg_rtx (mode);
13222 emit_move_insn (tmp, operands[3]);
13223 operands[3] = tmp;
13224 }
13225 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13226 {
13227 tmp = gen_reg_rtx (mode);
13228 emit_move_insn (tmp, operands[2]);
13229 operands[2] = tmp;
13230 }
13231
13232 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13233 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13234 operands[2], operands[3])));
13235 if (bypass_test)
13236 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13237 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13238 operands[3], operands[0])));
13239 if (second_test)
13240 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13241 gen_rtx_IF_THEN_ELSE (mode, second_test,
13242 operands[2], operands[0])));
13243
13244 return 1;
13245 }
13246
13247 /* Expand a floating-point vector conditional move; a vcond operation
13248 rather than a movcc operation. */
13249
13250 bool
13251 ix86_expand_fp_vcond (rtx operands[])
13252 {
13253 enum rtx_code code = GET_CODE (operands[3]);
13254 rtx cmp;
13255
13256 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13257 &operands[4], &operands[5]);
13258 if (code == UNKNOWN)
13259 return false;
13260
13261 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13262 operands[5], operands[1], operands[2]))
13263 return true;
13264
13265 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13266 operands[1], operands[2]);
13267 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13268 return true;
13269 }
13270
13271 /* Expand a signed/unsigned integral vector conditional move. */
13272
13273 bool
13274 ix86_expand_int_vcond (rtx operands[])
13275 {
13276 enum machine_mode mode = GET_MODE (operands[0]);
13277 enum rtx_code code = GET_CODE (operands[3]);
13278 bool negate = false;
13279 rtx x, cop0, cop1;
13280
13281 cop0 = operands[4];
13282 cop1 = operands[5];
13283
13284 /* Canonicalize the comparison to EQ, GT, GTU. */
13285 switch (code)
13286 {
13287 case EQ:
13288 case GT:
13289 case GTU:
13290 break;
13291
13292 case NE:
13293 case LE:
13294 case LEU:
13295 code = reverse_condition (code);
13296 negate = true;
13297 break;
13298
13299 case GE:
13300 case GEU:
13301 code = reverse_condition (code);
13302 negate = true;
13303 /* FALLTHRU */
13304
13305 case LT:
13306 case LTU:
13307 code = swap_condition (code);
13308 x = cop0, cop0 = cop1, cop1 = x;
13309 break;
13310
13311 default:
13312 gcc_unreachable ();
13313 }
13314
13315 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13316 if (mode == V2DImode)
13317 {
13318 switch (code)
13319 {
13320 case EQ:
13321 /* SSE4.1 supports EQ. */
13322 if (!TARGET_SSE4_1)
13323 return false;
13324 break;
13325
13326 case GT:
13327 case GTU:
13328 /* SSE4.2 supports GT/GTU. */
13329 if (!TARGET_SSE4_2)
13330 return false;
13331 break;
13332
13333 default:
13334 gcc_unreachable ();
13335 }
13336 }
13337
13338 /* Unsigned parallel compare is not supported by the hardware. Play some
13339 tricks to turn this into a signed comparison against 0. */
13340 if (code == GTU)
13341 {
13342 cop0 = force_reg (mode, cop0);
13343
13344 switch (mode)
13345 {
13346 case V4SImode:
13347 case V2DImode:
13348 {
13349 rtx t1, t2, mask;
13350
13351 /* Perform a parallel modulo subtraction. */
13352 t1 = gen_reg_rtx (mode);
13353 emit_insn ((mode == V4SImode
13354 ? gen_subv4si3
13355 : gen_subv2di3) (t1, cop0, cop1));
13356
13357 /* Extract the original sign bit of op0. */
13358 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13359 true, false);
13360 t2 = gen_reg_rtx (mode);
13361 emit_insn ((mode == V4SImode
13362 ? gen_andv4si3
13363 : gen_andv2di3) (t2, cop0, mask));
13364
13365 /* XOR it back into the result of the subtraction. This results
13366 in the sign bit set iff we saw unsigned underflow. */
13367 x = gen_reg_rtx (mode);
13368 emit_insn ((mode == V4SImode
13369 ? gen_xorv4si3
13370 : gen_xorv2di3) (x, t1, t2));
13371
13372 code = GT;
13373 }
13374 break;
13375
13376 case V16QImode:
13377 case V8HImode:
13378 /* Perform a parallel unsigned saturating subtraction. */
13379 x = gen_reg_rtx (mode);
13380 emit_insn (gen_rtx_SET (VOIDmode, x,
13381 gen_rtx_US_MINUS (mode, cop0, cop1)));
13382
13383 code = EQ;
13384 negate = !negate;
13385 break;
13386
13387 default:
13388 gcc_unreachable ();
13389 }
13390
13391 cop0 = x;
13392 cop1 = CONST0_RTX (mode);
13393 }
13394
13395 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13396 operands[1+negate], operands[2-negate]);
13397
13398 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13399 operands[2-negate]);
13400 return true;
13401 }
13402
13403 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13404 true if we should do zero extension, else sign extension. HIGH_P is
13405 true if we want the N/2 high elements, else the low elements. */
13406
13407 void
13408 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13409 {
13410 enum machine_mode imode = GET_MODE (operands[1]);
13411 rtx (*unpack)(rtx, rtx, rtx);
13412 rtx se, dest;
13413
13414 switch (imode)
13415 {
13416 case V16QImode:
13417 if (high_p)
13418 unpack = gen_vec_interleave_highv16qi;
13419 else
13420 unpack = gen_vec_interleave_lowv16qi;
13421 break;
13422 case V8HImode:
13423 if (high_p)
13424 unpack = gen_vec_interleave_highv8hi;
13425 else
13426 unpack = gen_vec_interleave_lowv8hi;
13427 break;
13428 case V4SImode:
13429 if (high_p)
13430 unpack = gen_vec_interleave_highv4si;
13431 else
13432 unpack = gen_vec_interleave_lowv4si;
13433 break;
13434 default:
13435 gcc_unreachable ();
13436 }
13437
13438 dest = gen_lowpart (imode, operands[0]);
13439
13440 if (unsigned_p)
13441 se = force_reg (imode, CONST0_RTX (imode));
13442 else
13443 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13444 operands[1], pc_rtx, pc_rtx);
13445
13446 emit_insn (unpack (dest, operands[1], se));
13447 }
13448
13449 /* This function performs the same task as ix86_expand_sse_unpack,
13450 but with SSE4.1 instructions. */
13451
13452 void
13453 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13454 {
13455 enum machine_mode imode = GET_MODE (operands[1]);
13456 rtx (*unpack)(rtx, rtx);
13457 rtx src, dest;
13458
13459 switch (imode)
13460 {
13461 case V16QImode:
13462 if (unsigned_p)
13463 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13464 else
13465 unpack = gen_sse4_1_extendv8qiv8hi2;
13466 break;
13467 case V8HImode:
13468 if (unsigned_p)
13469 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13470 else
13471 unpack = gen_sse4_1_extendv4hiv4si2;
13472 break;
13473 case V4SImode:
13474 if (unsigned_p)
13475 unpack = gen_sse4_1_zero_extendv2siv2di2;
13476 else
13477 unpack = gen_sse4_1_extendv2siv2di2;
13478 break;
13479 default:
13480 gcc_unreachable ();
13481 }
13482
13483 dest = operands[0];
13484 if (high_p)
13485 {
13486 /* Shift higher 8 bytes to lower 8 bytes. */
13487 src = gen_reg_rtx (imode);
13488 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13489 gen_lowpart (TImode, operands[1]),
13490 GEN_INT (64)));
13491 }
13492 else
13493 src = operands[1];
13494
13495 emit_insn (unpack (dest, src));
13496 }
13497
13498 /* This function performs the same task as ix86_expand_sse_unpack,
13499 but with amdfam15 instructions. */
13500
13501 #define PPERM_SRC 0x00 /* copy source */
13502 #define PPERM_INVERT 0x20 /* invert source */
13503 #define PPERM_REVERSE 0x40 /* bit reverse source */
13504 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13505 #define PPERM_ZERO 0x80 /* all 0's */
13506 #define PPERM_ONES 0xa0 /* all 1's */
13507 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13508 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13509
13510 #define PPERM_SRC1 0x00 /* use first source byte */
13511 #define PPERM_SRC2 0x10 /* use second source byte */
13512
13513 void
13514 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13515 {
13516 enum machine_mode imode = GET_MODE (operands[1]);
13517 int pperm_bytes[16];
13518 int i;
13519 int h = (high_p) ? 8 : 0;
13520 int h2;
13521 int sign_extend;
13522 rtvec v = rtvec_alloc (16);
13523 rtvec vs;
13524 rtx x, p;
13525 rtx op0 = operands[0], op1 = operands[1];
13526
13527 switch (imode)
13528 {
13529 case V16QImode:
13530 vs = rtvec_alloc (8);
13531 h2 = (high_p) ? 8 : 0;
13532 for (i = 0; i < 8; i++)
13533 {
13534 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13535 pperm_bytes[2*i+1] = ((unsigned_p)
13536 ? PPERM_ZERO
13537 : PPERM_SIGN | PPERM_SRC2 | i | h);
13538 }
13539
13540 for (i = 0; i < 16; i++)
13541 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13542
13543 for (i = 0; i < 8; i++)
13544 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13545
13546 p = gen_rtx_PARALLEL (VOIDmode, vs);
13547 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13548 if (unsigned_p)
13549 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13550 else
13551 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13552 break;
13553
13554 case V8HImode:
13555 vs = rtvec_alloc (4);
13556 h2 = (high_p) ? 4 : 0;
13557 for (i = 0; i < 4; i++)
13558 {
13559 sign_extend = ((unsigned_p)
13560 ? PPERM_ZERO
13561 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13562 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13563 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13564 pperm_bytes[4*i+2] = sign_extend;
13565 pperm_bytes[4*i+3] = sign_extend;
13566 }
13567
13568 for (i = 0; i < 16; i++)
13569 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13570
13571 for (i = 0; i < 4; i++)
13572 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13573
13574 p = gen_rtx_PARALLEL (VOIDmode, vs);
13575 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13576 if (unsigned_p)
13577 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13578 else
13579 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13580 break;
13581
13582 case V4SImode:
13583 vs = rtvec_alloc (2);
13584 h2 = (high_p) ? 2 : 0;
13585 for (i = 0; i < 2; i++)
13586 {
13587 sign_extend = ((unsigned_p)
13588 ? PPERM_ZERO
13589 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13590 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13591 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13592 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13593 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13594 pperm_bytes[8*i+4] = sign_extend;
13595 pperm_bytes[8*i+5] = sign_extend;
13596 pperm_bytes[8*i+6] = sign_extend;
13597 pperm_bytes[8*i+7] = sign_extend;
13598 }
13599
13600 for (i = 0; i < 16; i++)
13601 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13602
13603 for (i = 0; i < 2; i++)
13604 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13605
13606 p = gen_rtx_PARALLEL (VOIDmode, vs);
13607 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13608 if (unsigned_p)
13609 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13610 else
13611 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13612 break;
13613
13614 default:
13615 gcc_unreachable ();
13616 }
13617
13618 return;
13619 }
13620
13621 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13622 next narrower integer vector type */
13623 void
13624 ix86_expand_sse5_pack (rtx operands[3])
13625 {
13626 enum machine_mode imode = GET_MODE (operands[0]);
13627 int pperm_bytes[16];
13628 int i;
13629 rtvec v = rtvec_alloc (16);
13630 rtx x;
13631 rtx op0 = operands[0];
13632 rtx op1 = operands[1];
13633 rtx op2 = operands[2];
13634
13635 switch (imode)
13636 {
13637 case V16QImode:
13638 for (i = 0; i < 8; i++)
13639 {
13640 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13641 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13642 }
13643
13644 for (i = 0; i < 16; i++)
13645 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13646
13647 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13648 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13649 break;
13650
13651 case V8HImode:
13652 for (i = 0; i < 4; i++)
13653 {
13654 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13655 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13656 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13657 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13658 }
13659
13660 for (i = 0; i < 16; i++)
13661 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13662
13663 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13664 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13665 break;
13666
13667 case V4SImode:
13668 for (i = 0; i < 2; i++)
13669 {
13670 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13671 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13672 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13673 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13674 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13675 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13676 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13677 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13678 }
13679
13680 for (i = 0; i < 16; i++)
13681 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13682
13683 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13684 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13685 break;
13686
13687 default:
13688 gcc_unreachable ();
13689 }
13690
13691 return;
13692 }
13693
13694 /* Expand conditional increment or decrement using adb/sbb instructions.
13695 The default case using setcc followed by the conditional move can be
13696 done by generic code. */
13697 int
13698 ix86_expand_int_addcc (rtx operands[])
13699 {
13700 enum rtx_code code = GET_CODE (operands[1]);
13701 rtx compare_op;
13702 rtx val = const0_rtx;
13703 bool fpcmp = false;
13704 enum machine_mode mode = GET_MODE (operands[0]);
13705
13706 if (operands[3] != const1_rtx
13707 && operands[3] != constm1_rtx)
13708 return 0;
13709 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13710 ix86_compare_op1, &compare_op))
13711 return 0;
13712 code = GET_CODE (compare_op);
13713
13714 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13715 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13716 {
13717 fpcmp = true;
13718 code = ix86_fp_compare_code_to_integer (code);
13719 }
13720
13721 if (code != LTU)
13722 {
13723 val = constm1_rtx;
13724 if (fpcmp)
13725 PUT_CODE (compare_op,
13726 reverse_condition_maybe_unordered
13727 (GET_CODE (compare_op)));
13728 else
13729 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13730 }
13731 PUT_MODE (compare_op, mode);
13732
13733 /* Construct either adc or sbb insn. */
13734 if ((code == LTU) == (operands[3] == constm1_rtx))
13735 {
13736 switch (GET_MODE (operands[0]))
13737 {
13738 case QImode:
13739 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13740 break;
13741 case HImode:
13742 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13743 break;
13744 case SImode:
13745 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13746 break;
13747 case DImode:
13748 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13749 break;
13750 default:
13751 gcc_unreachable ();
13752 }
13753 }
13754 else
13755 {
13756 switch (GET_MODE (operands[0]))
13757 {
13758 case QImode:
13759 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13760 break;
13761 case HImode:
13762 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13763 break;
13764 case SImode:
13765 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13766 break;
13767 case DImode:
13768 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13769 break;
13770 default:
13771 gcc_unreachable ();
13772 }
13773 }
13774 return 1; /* DONE */
13775 }
13776
13777
13778 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13779 works for floating pointer parameters and nonoffsetable memories.
13780 For pushes, it returns just stack offsets; the values will be saved
13781 in the right order. Maximally three parts are generated. */
13782
13783 static int
13784 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13785 {
13786 int size;
13787
13788 if (!TARGET_64BIT)
13789 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13790 else
13791 size = (GET_MODE_SIZE (mode) + 4) / 8;
13792
13793 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13794 gcc_assert (size >= 2 && size <= 3);
13795
13796 /* Optimize constant pool reference to immediates. This is used by fp
13797 moves, that force all constants to memory to allow combining. */
13798 if (MEM_P (operand) && MEM_READONLY_P (operand))
13799 {
13800 rtx tmp = maybe_get_pool_constant (operand);
13801 if (tmp)
13802 operand = tmp;
13803 }
13804
13805 if (MEM_P (operand) && !offsettable_memref_p (operand))
13806 {
13807 /* The only non-offsetable memories we handle are pushes. */
13808 int ok = push_operand (operand, VOIDmode);
13809
13810 gcc_assert (ok);
13811
13812 operand = copy_rtx (operand);
13813 PUT_MODE (operand, Pmode);
13814 parts[0] = parts[1] = parts[2] = operand;
13815 return size;
13816 }
13817
13818 if (GET_CODE (operand) == CONST_VECTOR)
13819 {
13820 enum machine_mode imode = int_mode_for_mode (mode);
13821 /* Caution: if we looked through a constant pool memory above,
13822 the operand may actually have a different mode now. That's
13823 ok, since we want to pun this all the way back to an integer. */
13824 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13825 gcc_assert (operand != NULL);
13826 mode = imode;
13827 }
13828
13829 if (!TARGET_64BIT)
13830 {
13831 if (mode == DImode)
13832 split_di (&operand, 1, &parts[0], &parts[1]);
13833 else
13834 {
13835 if (REG_P (operand))
13836 {
13837 gcc_assert (reload_completed);
13838 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13839 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13840 if (size == 3)
13841 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13842 }
13843 else if (offsettable_memref_p (operand))
13844 {
13845 operand = adjust_address (operand, SImode, 0);
13846 parts[0] = operand;
13847 parts[1] = adjust_address (operand, SImode, 4);
13848 if (size == 3)
13849 parts[2] = adjust_address (operand, SImode, 8);
13850 }
13851 else if (GET_CODE (operand) == CONST_DOUBLE)
13852 {
13853 REAL_VALUE_TYPE r;
13854 long l[4];
13855
13856 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13857 switch (mode)
13858 {
13859 case XFmode:
13860 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13861 parts[2] = gen_int_mode (l[2], SImode);
13862 break;
13863 case DFmode:
13864 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13865 break;
13866 default:
13867 gcc_unreachable ();
13868 }
13869 parts[1] = gen_int_mode (l[1], SImode);
13870 parts[0] = gen_int_mode (l[0], SImode);
13871 }
13872 else
13873 gcc_unreachable ();
13874 }
13875 }
13876 else
13877 {
13878 if (mode == TImode)
13879 split_ti (&operand, 1, &parts[0], &parts[1]);
13880 if (mode == XFmode || mode == TFmode)
13881 {
13882 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13883 if (REG_P (operand))
13884 {
13885 gcc_assert (reload_completed);
13886 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13887 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13888 }
13889 else if (offsettable_memref_p (operand))
13890 {
13891 operand = adjust_address (operand, DImode, 0);
13892 parts[0] = operand;
13893 parts[1] = adjust_address (operand, upper_mode, 8);
13894 }
13895 else if (GET_CODE (operand) == CONST_DOUBLE)
13896 {
13897 REAL_VALUE_TYPE r;
13898 long l[4];
13899
13900 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13901 real_to_target (l, &r, mode);
13902
13903 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13904 if (HOST_BITS_PER_WIDE_INT >= 64)
13905 parts[0]
13906 = gen_int_mode
13907 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13908 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13909 DImode);
13910 else
13911 parts[0] = immed_double_const (l[0], l[1], DImode);
13912
13913 if (upper_mode == SImode)
13914 parts[1] = gen_int_mode (l[2], SImode);
13915 else if (HOST_BITS_PER_WIDE_INT >= 64)
13916 parts[1]
13917 = gen_int_mode
13918 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13919 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13920 DImode);
13921 else
13922 parts[1] = immed_double_const (l[2], l[3], DImode);
13923 }
13924 else
13925 gcc_unreachable ();
13926 }
13927 }
13928
13929 return size;
13930 }
13931
13932 /* Emit insns to perform a move or push of DI, DF, and XF values.
13933 Return false when normal moves are needed; true when all required
13934 insns have been emitted. Operands 2-4 contain the input values
13935 int the correct order; operands 5-7 contain the output values. */
13936
13937 void
13938 ix86_split_long_move (rtx operands[])
13939 {
13940 rtx part[2][3];
13941 int nparts;
13942 int push = 0;
13943 int collisions = 0;
13944 enum machine_mode mode = GET_MODE (operands[0]);
13945
13946 /* The DFmode expanders may ask us to move double.
13947 For 64bit target this is single move. By hiding the fact
13948 here we simplify i386.md splitters. */
13949 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13950 {
13951 /* Optimize constant pool reference to immediates. This is used by
13952 fp moves, that force all constants to memory to allow combining. */
13953
13954 if (MEM_P (operands[1])
13955 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13956 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13957 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13958 if (push_operand (operands[0], VOIDmode))
13959 {
13960 operands[0] = copy_rtx (operands[0]);
13961 PUT_MODE (operands[0], Pmode);
13962 }
13963 else
13964 operands[0] = gen_lowpart (DImode, operands[0]);
13965 operands[1] = gen_lowpart (DImode, operands[1]);
13966 emit_move_insn (operands[0], operands[1]);
13967 return;
13968 }
13969
13970 /* The only non-offsettable memory we handle is push. */
13971 if (push_operand (operands[0], VOIDmode))
13972 push = 1;
13973 else
13974 gcc_assert (!MEM_P (operands[0])
13975 || offsettable_memref_p (operands[0]));
13976
13977 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13978 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13979
13980 /* When emitting push, take care for source operands on the stack. */
13981 if (push && MEM_P (operands[1])
13982 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13983 {
13984 if (nparts == 3)
13985 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13986 XEXP (part[1][2], 0));
13987 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13988 XEXP (part[1][1], 0));
13989 }
13990
13991 /* We need to do copy in the right order in case an address register
13992 of the source overlaps the destination. */
13993 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13994 {
13995 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13996 collisions++;
13997 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13998 collisions++;
13999 if (nparts == 3
14000 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14001 collisions++;
14002
14003 /* Collision in the middle part can be handled by reordering. */
14004 if (collisions == 1 && nparts == 3
14005 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14006 {
14007 rtx tmp;
14008 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14009 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14010 }
14011
14012 /* If there are more collisions, we can't handle it by reordering.
14013 Do an lea to the last part and use only one colliding move. */
14014 else if (collisions > 1)
14015 {
14016 rtx base;
14017
14018 collisions = 1;
14019
14020 base = part[0][nparts - 1];
14021
14022 /* Handle the case when the last part isn't valid for lea.
14023 Happens in 64-bit mode storing the 12-byte XFmode. */
14024 if (GET_MODE (base) != Pmode)
14025 base = gen_rtx_REG (Pmode, REGNO (base));
14026
14027 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14028 part[1][0] = replace_equiv_address (part[1][0], base);
14029 part[1][1] = replace_equiv_address (part[1][1],
14030 plus_constant (base, UNITS_PER_WORD));
14031 if (nparts == 3)
14032 part[1][2] = replace_equiv_address (part[1][2],
14033 plus_constant (base, 8));
14034 }
14035 }
14036
14037 if (push)
14038 {
14039 if (!TARGET_64BIT)
14040 {
14041 if (nparts == 3)
14042 {
14043 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14044 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14045 emit_move_insn (part[0][2], part[1][2]);
14046 }
14047 }
14048 else
14049 {
14050 /* In 64bit mode we don't have 32bit push available. In case this is
14051 register, it is OK - we will just use larger counterpart. We also
14052 retype memory - these comes from attempt to avoid REX prefix on
14053 moving of second half of TFmode value. */
14054 if (GET_MODE (part[1][1]) == SImode)
14055 {
14056 switch (GET_CODE (part[1][1]))
14057 {
14058 case MEM:
14059 part[1][1] = adjust_address (part[1][1], DImode, 0);
14060 break;
14061
14062 case REG:
14063 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14064 break;
14065
14066 default:
14067 gcc_unreachable ();
14068 }
14069
14070 if (GET_MODE (part[1][0]) == SImode)
14071 part[1][0] = part[1][1];
14072 }
14073 }
14074 emit_move_insn (part[0][1], part[1][1]);
14075 emit_move_insn (part[0][0], part[1][0]);
14076 return;
14077 }
14078
14079 /* Choose correct order to not overwrite the source before it is copied. */
14080 if ((REG_P (part[0][0])
14081 && REG_P (part[1][1])
14082 && (REGNO (part[0][0]) == REGNO (part[1][1])
14083 || (nparts == 3
14084 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14085 || (collisions > 0
14086 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14087 {
14088 if (nparts == 3)
14089 {
14090 operands[2] = part[0][2];
14091 operands[3] = part[0][1];
14092 operands[4] = part[0][0];
14093 operands[5] = part[1][2];
14094 operands[6] = part[1][1];
14095 operands[7] = part[1][0];
14096 }
14097 else
14098 {
14099 operands[2] = part[0][1];
14100 operands[3] = part[0][0];
14101 operands[5] = part[1][1];
14102 operands[6] = part[1][0];
14103 }
14104 }
14105 else
14106 {
14107 if (nparts == 3)
14108 {
14109 operands[2] = part[0][0];
14110 operands[3] = part[0][1];
14111 operands[4] = part[0][2];
14112 operands[5] = part[1][0];
14113 operands[6] = part[1][1];
14114 operands[7] = part[1][2];
14115 }
14116 else
14117 {
14118 operands[2] = part[0][0];
14119 operands[3] = part[0][1];
14120 operands[5] = part[1][0];
14121 operands[6] = part[1][1];
14122 }
14123 }
14124
14125 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14126 if (optimize_size)
14127 {
14128 if (CONST_INT_P (operands[5])
14129 && operands[5] != const0_rtx
14130 && REG_P (operands[2]))
14131 {
14132 if (CONST_INT_P (operands[6])
14133 && INTVAL (operands[6]) == INTVAL (operands[5]))
14134 operands[6] = operands[2];
14135
14136 if (nparts == 3
14137 && CONST_INT_P (operands[7])
14138 && INTVAL (operands[7]) == INTVAL (operands[5]))
14139 operands[7] = operands[2];
14140 }
14141
14142 if (nparts == 3
14143 && CONST_INT_P (operands[6])
14144 && operands[6] != const0_rtx
14145 && REG_P (operands[3])
14146 && CONST_INT_P (operands[7])
14147 && INTVAL (operands[7]) == INTVAL (operands[6]))
14148 operands[7] = operands[3];
14149 }
14150
14151 emit_move_insn (operands[2], operands[5]);
14152 emit_move_insn (operands[3], operands[6]);
14153 if (nparts == 3)
14154 emit_move_insn (operands[4], operands[7]);
14155
14156 return;
14157 }
14158
14159 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14160 left shift by a constant, either using a single shift or
14161 a sequence of add instructions. */
14162
14163 static void
14164 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14165 {
14166 if (count == 1)
14167 {
14168 emit_insn ((mode == DImode
14169 ? gen_addsi3
14170 : gen_adddi3) (operand, operand, operand));
14171 }
14172 else if (!optimize_size
14173 && count * ix86_cost->add <= ix86_cost->shift_const)
14174 {
14175 int i;
14176 for (i=0; i<count; i++)
14177 {
14178 emit_insn ((mode == DImode
14179 ? gen_addsi3
14180 : gen_adddi3) (operand, operand, operand));
14181 }
14182 }
14183 else
14184 emit_insn ((mode == DImode
14185 ? gen_ashlsi3
14186 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14187 }
14188
14189 void
14190 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14191 {
14192 rtx low[2], high[2];
14193 int count;
14194 const int single_width = mode == DImode ? 32 : 64;
14195
14196 if (CONST_INT_P (operands[2]))
14197 {
14198 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14199 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14200
14201 if (count >= single_width)
14202 {
14203 emit_move_insn (high[0], low[1]);
14204 emit_move_insn (low[0], const0_rtx);
14205
14206 if (count > single_width)
14207 ix86_expand_ashl_const (high[0], count - single_width, mode);
14208 }
14209 else
14210 {
14211 if (!rtx_equal_p (operands[0], operands[1]))
14212 emit_move_insn (operands[0], operands[1]);
14213 emit_insn ((mode == DImode
14214 ? gen_x86_shld_1
14215 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14216 ix86_expand_ashl_const (low[0], count, mode);
14217 }
14218 return;
14219 }
14220
14221 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14222
14223 if (operands[1] == const1_rtx)
14224 {
14225 /* Assuming we've chosen a QImode capable registers, then 1 << N
14226 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14227 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14228 {
14229 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14230
14231 ix86_expand_clear (low[0]);
14232 ix86_expand_clear (high[0]);
14233 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14234
14235 d = gen_lowpart (QImode, low[0]);
14236 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14237 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14238 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14239
14240 d = gen_lowpart (QImode, high[0]);
14241 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14242 s = gen_rtx_NE (QImode, flags, const0_rtx);
14243 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14244 }
14245
14246 /* Otherwise, we can get the same results by manually performing
14247 a bit extract operation on bit 5/6, and then performing the two
14248 shifts. The two methods of getting 0/1 into low/high are exactly
14249 the same size. Avoiding the shift in the bit extract case helps
14250 pentium4 a bit; no one else seems to care much either way. */
14251 else
14252 {
14253 rtx x;
14254
14255 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14256 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14257 else
14258 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14259 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14260
14261 emit_insn ((mode == DImode
14262 ? gen_lshrsi3
14263 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14264 emit_insn ((mode == DImode
14265 ? gen_andsi3
14266 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14267 emit_move_insn (low[0], high[0]);
14268 emit_insn ((mode == DImode
14269 ? gen_xorsi3
14270 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14271 }
14272
14273 emit_insn ((mode == DImode
14274 ? gen_ashlsi3
14275 : gen_ashldi3) (low[0], low[0], operands[2]));
14276 emit_insn ((mode == DImode
14277 ? gen_ashlsi3
14278 : gen_ashldi3) (high[0], high[0], operands[2]));
14279 return;
14280 }
14281
14282 if (operands[1] == constm1_rtx)
14283 {
14284 /* For -1 << N, we can avoid the shld instruction, because we
14285 know that we're shifting 0...31/63 ones into a -1. */
14286 emit_move_insn (low[0], constm1_rtx);
14287 if (optimize_size)
14288 emit_move_insn (high[0], low[0]);
14289 else
14290 emit_move_insn (high[0], constm1_rtx);
14291 }
14292 else
14293 {
14294 if (!rtx_equal_p (operands[0], operands[1]))
14295 emit_move_insn (operands[0], operands[1]);
14296
14297 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14298 emit_insn ((mode == DImode
14299 ? gen_x86_shld_1
14300 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14301 }
14302
14303 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14304
14305 if (TARGET_CMOVE && scratch)
14306 {
14307 ix86_expand_clear (scratch);
14308 emit_insn ((mode == DImode
14309 ? gen_x86_shift_adj_1
14310 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14311 }
14312 else
14313 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14314 }
14315
14316 void
14317 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14318 {
14319 rtx low[2], high[2];
14320 int count;
14321 const int single_width = mode == DImode ? 32 : 64;
14322
14323 if (CONST_INT_P (operands[2]))
14324 {
14325 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14326 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14327
14328 if (count == single_width * 2 - 1)
14329 {
14330 emit_move_insn (high[0], high[1]);
14331 emit_insn ((mode == DImode
14332 ? gen_ashrsi3
14333 : gen_ashrdi3) (high[0], high[0],
14334 GEN_INT (single_width - 1)));
14335 emit_move_insn (low[0], high[0]);
14336
14337 }
14338 else if (count >= single_width)
14339 {
14340 emit_move_insn (low[0], high[1]);
14341 emit_move_insn (high[0], low[0]);
14342 emit_insn ((mode == DImode
14343 ? gen_ashrsi3
14344 : gen_ashrdi3) (high[0], high[0],
14345 GEN_INT (single_width - 1)));
14346 if (count > single_width)
14347 emit_insn ((mode == DImode
14348 ? gen_ashrsi3
14349 : gen_ashrdi3) (low[0], low[0],
14350 GEN_INT (count - single_width)));
14351 }
14352 else
14353 {
14354 if (!rtx_equal_p (operands[0], operands[1]))
14355 emit_move_insn (operands[0], operands[1]);
14356 emit_insn ((mode == DImode
14357 ? gen_x86_shrd_1
14358 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14359 emit_insn ((mode == DImode
14360 ? gen_ashrsi3
14361 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14362 }
14363 }
14364 else
14365 {
14366 if (!rtx_equal_p (operands[0], operands[1]))
14367 emit_move_insn (operands[0], operands[1]);
14368
14369 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14370
14371 emit_insn ((mode == DImode
14372 ? gen_x86_shrd_1
14373 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14374 emit_insn ((mode == DImode
14375 ? gen_ashrsi3
14376 : gen_ashrdi3) (high[0], high[0], operands[2]));
14377
14378 if (TARGET_CMOVE && scratch)
14379 {
14380 emit_move_insn (scratch, high[0]);
14381 emit_insn ((mode == DImode
14382 ? gen_ashrsi3
14383 : gen_ashrdi3) (scratch, scratch,
14384 GEN_INT (single_width - 1)));
14385 emit_insn ((mode == DImode
14386 ? gen_x86_shift_adj_1
14387 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14388 scratch));
14389 }
14390 else
14391 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14392 }
14393 }
14394
14395 void
14396 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14397 {
14398 rtx low[2], high[2];
14399 int count;
14400 const int single_width = mode == DImode ? 32 : 64;
14401
14402 if (CONST_INT_P (operands[2]))
14403 {
14404 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14405 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14406
14407 if (count >= single_width)
14408 {
14409 emit_move_insn (low[0], high[1]);
14410 ix86_expand_clear (high[0]);
14411
14412 if (count > single_width)
14413 emit_insn ((mode == DImode
14414 ? gen_lshrsi3
14415 : gen_lshrdi3) (low[0], low[0],
14416 GEN_INT (count - single_width)));
14417 }
14418 else
14419 {
14420 if (!rtx_equal_p (operands[0], operands[1]))
14421 emit_move_insn (operands[0], operands[1]);
14422 emit_insn ((mode == DImode
14423 ? gen_x86_shrd_1
14424 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14425 emit_insn ((mode == DImode
14426 ? gen_lshrsi3
14427 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14428 }
14429 }
14430 else
14431 {
14432 if (!rtx_equal_p (operands[0], operands[1]))
14433 emit_move_insn (operands[0], operands[1]);
14434
14435 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14436
14437 emit_insn ((mode == DImode
14438 ? gen_x86_shrd_1
14439 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14440 emit_insn ((mode == DImode
14441 ? gen_lshrsi3
14442 : gen_lshrdi3) (high[0], high[0], operands[2]));
14443
14444 /* Heh. By reversing the arguments, we can reuse this pattern. */
14445 if (TARGET_CMOVE && scratch)
14446 {
14447 ix86_expand_clear (scratch);
14448 emit_insn ((mode == DImode
14449 ? gen_x86_shift_adj_1
14450 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14451 scratch));
14452 }
14453 else
14454 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14455 }
14456 }
14457
14458 /* Predict just emitted jump instruction to be taken with probability PROB. */
14459 static void
14460 predict_jump (int prob)
14461 {
14462 rtx insn = get_last_insn ();
14463 gcc_assert (JUMP_P (insn));
14464 REG_NOTES (insn)
14465 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14466 GEN_INT (prob),
14467 REG_NOTES (insn));
14468 }
14469
14470 /* Helper function for the string operations below. Dest VARIABLE whether
14471 it is aligned to VALUE bytes. If true, jump to the label. */
14472 static rtx
14473 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14474 {
14475 rtx label = gen_label_rtx ();
14476 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14477 if (GET_MODE (variable) == DImode)
14478 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14479 else
14480 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14481 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14482 1, label);
14483 if (epilogue)
14484 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14485 else
14486 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14487 return label;
14488 }
14489
14490 /* Adjust COUNTER by the VALUE. */
14491 static void
14492 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14493 {
14494 if (GET_MODE (countreg) == DImode)
14495 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14496 else
14497 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14498 }
14499
14500 /* Zero extend possibly SImode EXP to Pmode register. */
14501 rtx
14502 ix86_zero_extend_to_Pmode (rtx exp)
14503 {
14504 rtx r;
14505 if (GET_MODE (exp) == VOIDmode)
14506 return force_reg (Pmode, exp);
14507 if (GET_MODE (exp) == Pmode)
14508 return copy_to_mode_reg (Pmode, exp);
14509 r = gen_reg_rtx (Pmode);
14510 emit_insn (gen_zero_extendsidi2 (r, exp));
14511 return r;
14512 }
14513
14514 /* Divide COUNTREG by SCALE. */
14515 static rtx
14516 scale_counter (rtx countreg, int scale)
14517 {
14518 rtx sc;
14519 rtx piece_size_mask;
14520
14521 if (scale == 1)
14522 return countreg;
14523 if (CONST_INT_P (countreg))
14524 return GEN_INT (INTVAL (countreg) / scale);
14525 gcc_assert (REG_P (countreg));
14526
14527 piece_size_mask = GEN_INT (scale - 1);
14528 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14529 GEN_INT (exact_log2 (scale)),
14530 NULL, 1, OPTAB_DIRECT);
14531 return sc;
14532 }
14533
14534 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14535 DImode for constant loop counts. */
14536
14537 static enum machine_mode
14538 counter_mode (rtx count_exp)
14539 {
14540 if (GET_MODE (count_exp) != VOIDmode)
14541 return GET_MODE (count_exp);
14542 if (GET_CODE (count_exp) != CONST_INT)
14543 return Pmode;
14544 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14545 return DImode;
14546 return SImode;
14547 }
14548
14549 /* When SRCPTR is non-NULL, output simple loop to move memory
14550 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14551 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14552 equivalent loop to set memory by VALUE (supposed to be in MODE).
14553
14554 The size is rounded down to whole number of chunk size moved at once.
14555 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14556
14557
14558 static void
14559 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14560 rtx destptr, rtx srcptr, rtx value,
14561 rtx count, enum machine_mode mode, int unroll,
14562 int expected_size)
14563 {
14564 rtx out_label, top_label, iter, tmp;
14565 enum machine_mode iter_mode = counter_mode (count);
14566 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14567 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14568 rtx size;
14569 rtx x_addr;
14570 rtx y_addr;
14571 int i;
14572
14573 top_label = gen_label_rtx ();
14574 out_label = gen_label_rtx ();
14575 iter = gen_reg_rtx (iter_mode);
14576
14577 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14578 NULL, 1, OPTAB_DIRECT);
14579 /* Those two should combine. */
14580 if (piece_size == const1_rtx)
14581 {
14582 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14583 true, out_label);
14584 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14585 }
14586 emit_move_insn (iter, const0_rtx);
14587
14588 emit_label (top_label);
14589
14590 tmp = convert_modes (Pmode, iter_mode, iter, true);
14591 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14592 destmem = change_address (destmem, mode, x_addr);
14593
14594 if (srcmem)
14595 {
14596 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14597 srcmem = change_address (srcmem, mode, y_addr);
14598
14599 /* When unrolling for chips that reorder memory reads and writes,
14600 we can save registers by using single temporary.
14601 Also using 4 temporaries is overkill in 32bit mode. */
14602 if (!TARGET_64BIT && 0)
14603 {
14604 for (i = 0; i < unroll; i++)
14605 {
14606 if (i)
14607 {
14608 destmem =
14609 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14610 srcmem =
14611 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14612 }
14613 emit_move_insn (destmem, srcmem);
14614 }
14615 }
14616 else
14617 {
14618 rtx tmpreg[4];
14619 gcc_assert (unroll <= 4);
14620 for (i = 0; i < unroll; i++)
14621 {
14622 tmpreg[i] = gen_reg_rtx (mode);
14623 if (i)
14624 {
14625 srcmem =
14626 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14627 }
14628 emit_move_insn (tmpreg[i], srcmem);
14629 }
14630 for (i = 0; i < unroll; i++)
14631 {
14632 if (i)
14633 {
14634 destmem =
14635 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14636 }
14637 emit_move_insn (destmem, tmpreg[i]);
14638 }
14639 }
14640 }
14641 else
14642 for (i = 0; i < unroll; i++)
14643 {
14644 if (i)
14645 destmem =
14646 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14647 emit_move_insn (destmem, value);
14648 }
14649
14650 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14651 true, OPTAB_LIB_WIDEN);
14652 if (tmp != iter)
14653 emit_move_insn (iter, tmp);
14654
14655 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14656 true, top_label);
14657 if (expected_size != -1)
14658 {
14659 expected_size /= GET_MODE_SIZE (mode) * unroll;
14660 if (expected_size == 0)
14661 predict_jump (0);
14662 else if (expected_size > REG_BR_PROB_BASE)
14663 predict_jump (REG_BR_PROB_BASE - 1);
14664 else
14665 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14666 }
14667 else
14668 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14669 iter = ix86_zero_extend_to_Pmode (iter);
14670 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14671 true, OPTAB_LIB_WIDEN);
14672 if (tmp != destptr)
14673 emit_move_insn (destptr, tmp);
14674 if (srcptr)
14675 {
14676 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14677 true, OPTAB_LIB_WIDEN);
14678 if (tmp != srcptr)
14679 emit_move_insn (srcptr, tmp);
14680 }
14681 emit_label (out_label);
14682 }
14683
14684 /* Output "rep; mov" instruction.
14685 Arguments have same meaning as for previous function */
14686 static void
14687 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14688 rtx destptr, rtx srcptr,
14689 rtx count,
14690 enum machine_mode mode)
14691 {
14692 rtx destexp;
14693 rtx srcexp;
14694 rtx countreg;
14695
14696 /* If the size is known, it is shorter to use rep movs. */
14697 if (mode == QImode && CONST_INT_P (count)
14698 && !(INTVAL (count) & 3))
14699 mode = SImode;
14700
14701 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14702 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14703 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14704 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14705 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14706 if (mode != QImode)
14707 {
14708 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14709 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14710 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14711 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14712 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14713 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14714 }
14715 else
14716 {
14717 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14718 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14719 }
14720 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14721 destexp, srcexp));
14722 }
14723
14724 /* Output "rep; stos" instruction.
14725 Arguments have same meaning as for previous function */
14726 static void
14727 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14728 rtx count,
14729 enum machine_mode mode)
14730 {
14731 rtx destexp;
14732 rtx countreg;
14733
14734 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14735 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14736 value = force_reg (mode, gen_lowpart (mode, value));
14737 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14738 if (mode != QImode)
14739 {
14740 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14741 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14742 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14743 }
14744 else
14745 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14746 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14747 }
14748
14749 static void
14750 emit_strmov (rtx destmem, rtx srcmem,
14751 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14752 {
14753 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14754 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14755 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14756 }
14757
14758 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14759 static void
14760 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14761 rtx destptr, rtx srcptr, rtx count, int max_size)
14762 {
14763 rtx src, dest;
14764 if (CONST_INT_P (count))
14765 {
14766 HOST_WIDE_INT countval = INTVAL (count);
14767 int offset = 0;
14768
14769 if ((countval & 0x10) && max_size > 16)
14770 {
14771 if (TARGET_64BIT)
14772 {
14773 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14774 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14775 }
14776 else
14777 gcc_unreachable ();
14778 offset += 16;
14779 }
14780 if ((countval & 0x08) && max_size > 8)
14781 {
14782 if (TARGET_64BIT)
14783 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14784 else
14785 {
14786 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14787 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14788 }
14789 offset += 8;
14790 }
14791 if ((countval & 0x04) && max_size > 4)
14792 {
14793 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14794 offset += 4;
14795 }
14796 if ((countval & 0x02) && max_size > 2)
14797 {
14798 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14799 offset += 2;
14800 }
14801 if ((countval & 0x01) && max_size > 1)
14802 {
14803 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14804 offset += 1;
14805 }
14806 return;
14807 }
14808 if (max_size > 8)
14809 {
14810 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14811 count, 1, OPTAB_DIRECT);
14812 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14813 count, QImode, 1, 4);
14814 return;
14815 }
14816
14817 /* When there are stringops, we can cheaply increase dest and src pointers.
14818 Otherwise we save code size by maintaining offset (zero is readily
14819 available from preceding rep operation) and using x86 addressing modes.
14820 */
14821 if (TARGET_SINGLE_STRINGOP)
14822 {
14823 if (max_size > 4)
14824 {
14825 rtx label = ix86_expand_aligntest (count, 4, true);
14826 src = change_address (srcmem, SImode, srcptr);
14827 dest = change_address (destmem, SImode, destptr);
14828 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14829 emit_label (label);
14830 LABEL_NUSES (label) = 1;
14831 }
14832 if (max_size > 2)
14833 {
14834 rtx label = ix86_expand_aligntest (count, 2, true);
14835 src = change_address (srcmem, HImode, srcptr);
14836 dest = change_address (destmem, HImode, destptr);
14837 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14838 emit_label (label);
14839 LABEL_NUSES (label) = 1;
14840 }
14841 if (max_size > 1)
14842 {
14843 rtx label = ix86_expand_aligntest (count, 1, true);
14844 src = change_address (srcmem, QImode, srcptr);
14845 dest = change_address (destmem, QImode, destptr);
14846 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14847 emit_label (label);
14848 LABEL_NUSES (label) = 1;
14849 }
14850 }
14851 else
14852 {
14853 rtx offset = force_reg (Pmode, const0_rtx);
14854 rtx tmp;
14855
14856 if (max_size > 4)
14857 {
14858 rtx label = ix86_expand_aligntest (count, 4, true);
14859 src = change_address (srcmem, SImode, srcptr);
14860 dest = change_address (destmem, SImode, destptr);
14861 emit_move_insn (dest, src);
14862 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14863 true, OPTAB_LIB_WIDEN);
14864 if (tmp != offset)
14865 emit_move_insn (offset, tmp);
14866 emit_label (label);
14867 LABEL_NUSES (label) = 1;
14868 }
14869 if (max_size > 2)
14870 {
14871 rtx label = ix86_expand_aligntest (count, 2, true);
14872 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14873 src = change_address (srcmem, HImode, tmp);
14874 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14875 dest = change_address (destmem, HImode, tmp);
14876 emit_move_insn (dest, src);
14877 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14878 true, OPTAB_LIB_WIDEN);
14879 if (tmp != offset)
14880 emit_move_insn (offset, tmp);
14881 emit_label (label);
14882 LABEL_NUSES (label) = 1;
14883 }
14884 if (max_size > 1)
14885 {
14886 rtx label = ix86_expand_aligntest (count, 1, true);
14887 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14888 src = change_address (srcmem, QImode, tmp);
14889 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14890 dest = change_address (destmem, QImode, tmp);
14891 emit_move_insn (dest, src);
14892 emit_label (label);
14893 LABEL_NUSES (label) = 1;
14894 }
14895 }
14896 }
14897
14898 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14899 static void
14900 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14901 rtx count, int max_size)
14902 {
14903 count =
14904 expand_simple_binop (counter_mode (count), AND, count,
14905 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14906 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14907 gen_lowpart (QImode, value), count, QImode,
14908 1, max_size / 2);
14909 }
14910
14911 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14912 static void
14913 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14914 {
14915 rtx dest;
14916
14917 if (CONST_INT_P (count))
14918 {
14919 HOST_WIDE_INT countval = INTVAL (count);
14920 int offset = 0;
14921
14922 if ((countval & 0x10) && max_size > 16)
14923 {
14924 if (TARGET_64BIT)
14925 {
14926 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14927 emit_insn (gen_strset (destptr, dest, value));
14928 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14929 emit_insn (gen_strset (destptr, dest, value));
14930 }
14931 else
14932 gcc_unreachable ();
14933 offset += 16;
14934 }
14935 if ((countval & 0x08) && max_size > 8)
14936 {
14937 if (TARGET_64BIT)
14938 {
14939 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14940 emit_insn (gen_strset (destptr, dest, value));
14941 }
14942 else
14943 {
14944 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14945 emit_insn (gen_strset (destptr, dest, value));
14946 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14947 emit_insn (gen_strset (destptr, dest, value));
14948 }
14949 offset += 8;
14950 }
14951 if ((countval & 0x04) && max_size > 4)
14952 {
14953 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14954 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14955 offset += 4;
14956 }
14957 if ((countval & 0x02) && max_size > 2)
14958 {
14959 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14960 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14961 offset += 2;
14962 }
14963 if ((countval & 0x01) && max_size > 1)
14964 {
14965 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14966 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14967 offset += 1;
14968 }
14969 return;
14970 }
14971 if (max_size > 32)
14972 {
14973 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14974 return;
14975 }
14976 if (max_size > 16)
14977 {
14978 rtx label = ix86_expand_aligntest (count, 16, true);
14979 if (TARGET_64BIT)
14980 {
14981 dest = change_address (destmem, DImode, destptr);
14982 emit_insn (gen_strset (destptr, dest, value));
14983 emit_insn (gen_strset (destptr, dest, value));
14984 }
14985 else
14986 {
14987 dest = change_address (destmem, SImode, destptr);
14988 emit_insn (gen_strset (destptr, dest, value));
14989 emit_insn (gen_strset (destptr, dest, value));
14990 emit_insn (gen_strset (destptr, dest, value));
14991 emit_insn (gen_strset (destptr, dest, value));
14992 }
14993 emit_label (label);
14994 LABEL_NUSES (label) = 1;
14995 }
14996 if (max_size > 8)
14997 {
14998 rtx label = ix86_expand_aligntest (count, 8, true);
14999 if (TARGET_64BIT)
15000 {
15001 dest = change_address (destmem, DImode, destptr);
15002 emit_insn (gen_strset (destptr, dest, value));
15003 }
15004 else
15005 {
15006 dest = change_address (destmem, SImode, destptr);
15007 emit_insn (gen_strset (destptr, dest, value));
15008 emit_insn (gen_strset (destptr, dest, value));
15009 }
15010 emit_label (label);
15011 LABEL_NUSES (label) = 1;
15012 }
15013 if (max_size > 4)
15014 {
15015 rtx label = ix86_expand_aligntest (count, 4, true);
15016 dest = change_address (destmem, SImode, destptr);
15017 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15018 emit_label (label);
15019 LABEL_NUSES (label) = 1;
15020 }
15021 if (max_size > 2)
15022 {
15023 rtx label = ix86_expand_aligntest (count, 2, true);
15024 dest = change_address (destmem, HImode, destptr);
15025 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15026 emit_label (label);
15027 LABEL_NUSES (label) = 1;
15028 }
15029 if (max_size > 1)
15030 {
15031 rtx label = ix86_expand_aligntest (count, 1, true);
15032 dest = change_address (destmem, QImode, destptr);
15033 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15034 emit_label (label);
15035 LABEL_NUSES (label) = 1;
15036 }
15037 }
15038
15039 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15040 DESIRED_ALIGNMENT. */
15041 static void
15042 expand_movmem_prologue (rtx destmem, rtx srcmem,
15043 rtx destptr, rtx srcptr, rtx count,
15044 int align, int desired_alignment)
15045 {
15046 if (align <= 1 && desired_alignment > 1)
15047 {
15048 rtx label = ix86_expand_aligntest (destptr, 1, false);
15049 srcmem = change_address (srcmem, QImode, srcptr);
15050 destmem = change_address (destmem, QImode, destptr);
15051 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15052 ix86_adjust_counter (count, 1);
15053 emit_label (label);
15054 LABEL_NUSES (label) = 1;
15055 }
15056 if (align <= 2 && desired_alignment > 2)
15057 {
15058 rtx label = ix86_expand_aligntest (destptr, 2, false);
15059 srcmem = change_address (srcmem, HImode, srcptr);
15060 destmem = change_address (destmem, HImode, destptr);
15061 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15062 ix86_adjust_counter (count, 2);
15063 emit_label (label);
15064 LABEL_NUSES (label) = 1;
15065 }
15066 if (align <= 4 && desired_alignment > 4)
15067 {
15068 rtx label = ix86_expand_aligntest (destptr, 4, false);
15069 srcmem = change_address (srcmem, SImode, srcptr);
15070 destmem = change_address (destmem, SImode, destptr);
15071 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15072 ix86_adjust_counter (count, 4);
15073 emit_label (label);
15074 LABEL_NUSES (label) = 1;
15075 }
15076 gcc_assert (desired_alignment <= 8);
15077 }
15078
15079 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15080 DESIRED_ALIGNMENT. */
15081 static void
15082 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15083 int align, int desired_alignment)
15084 {
15085 if (align <= 1 && desired_alignment > 1)
15086 {
15087 rtx label = ix86_expand_aligntest (destptr, 1, false);
15088 destmem = change_address (destmem, QImode, destptr);
15089 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15090 ix86_adjust_counter (count, 1);
15091 emit_label (label);
15092 LABEL_NUSES (label) = 1;
15093 }
15094 if (align <= 2 && desired_alignment > 2)
15095 {
15096 rtx label = ix86_expand_aligntest (destptr, 2, false);
15097 destmem = change_address (destmem, HImode, destptr);
15098 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15099 ix86_adjust_counter (count, 2);
15100 emit_label (label);
15101 LABEL_NUSES (label) = 1;
15102 }
15103 if (align <= 4 && desired_alignment > 4)
15104 {
15105 rtx label = ix86_expand_aligntest (destptr, 4, false);
15106 destmem = change_address (destmem, SImode, destptr);
15107 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15108 ix86_adjust_counter (count, 4);
15109 emit_label (label);
15110 LABEL_NUSES (label) = 1;
15111 }
15112 gcc_assert (desired_alignment <= 8);
15113 }
15114
15115 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15116 static enum stringop_alg
15117 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15118 int *dynamic_check)
15119 {
15120 const struct stringop_algs * algs;
15121 /* Algorithms using the rep prefix want at least edi and ecx;
15122 additionally, memset wants eax and memcpy wants esi. Don't
15123 consider such algorithms if the user has appropriated those
15124 registers for their own purposes. */
15125 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15126 || (memset
15127 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15128
15129 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15130 || (alg != rep_prefix_1_byte \
15131 && alg != rep_prefix_4_byte \
15132 && alg != rep_prefix_8_byte))
15133
15134 *dynamic_check = -1;
15135 if (memset)
15136 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15137 else
15138 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15139 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15140 return stringop_alg;
15141 /* rep; movq or rep; movl is the smallest variant. */
15142 else if (optimize_size)
15143 {
15144 if (!count || (count & 3))
15145 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15146 else
15147 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15148 }
15149 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15150 */
15151 else if (expected_size != -1 && expected_size < 4)
15152 return loop_1_byte;
15153 else if (expected_size != -1)
15154 {
15155 unsigned int i;
15156 enum stringop_alg alg = libcall;
15157 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15158 {
15159 /* We get here if the algorithms that were not libcall-based
15160 were rep-prefix based and we are unable to use rep prefixes
15161 based on global register usage. Break out of the loop and
15162 use the heuristic below. */
15163 if (algs->size[i].max == 0)
15164 break;
15165 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15166 {
15167 enum stringop_alg candidate = algs->size[i].alg;
15168
15169 if (candidate != libcall && ALG_USABLE_P (candidate))
15170 alg = candidate;
15171 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15172 last non-libcall inline algorithm. */
15173 if (TARGET_INLINE_ALL_STRINGOPS)
15174 {
15175 /* When the current size is best to be copied by a libcall,
15176 but we are still forced to inline, run the heuristic below
15177 that will pick code for medium sized blocks. */
15178 if (alg != libcall)
15179 return alg;
15180 break;
15181 }
15182 else if (ALG_USABLE_P (candidate))
15183 return candidate;
15184 }
15185 }
15186 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15187 }
15188 /* When asked to inline the call anyway, try to pick meaningful choice.
15189 We look for maximal size of block that is faster to copy by hand and
15190 take blocks of at most of that size guessing that average size will
15191 be roughly half of the block.
15192
15193 If this turns out to be bad, we might simply specify the preferred
15194 choice in ix86_costs. */
15195 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15196 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15197 {
15198 int max = -1;
15199 enum stringop_alg alg;
15200 int i;
15201 bool any_alg_usable_p = true;
15202
15203 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15204 {
15205 enum stringop_alg candidate = algs->size[i].alg;
15206 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15207
15208 if (candidate != libcall && candidate
15209 && ALG_USABLE_P (candidate))
15210 max = algs->size[i].max;
15211 }
15212 /* If there aren't any usable algorithms, then recursing on
15213 smaller sizes isn't going to find anything. Just return the
15214 simple byte-at-a-time copy loop. */
15215 if (!any_alg_usable_p)
15216 {
15217 /* Pick something reasonable. */
15218 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15219 *dynamic_check = 128;
15220 return loop_1_byte;
15221 }
15222 if (max == -1)
15223 max = 4096;
15224 alg = decide_alg (count, max / 2, memset, dynamic_check);
15225 gcc_assert (*dynamic_check == -1);
15226 gcc_assert (alg != libcall);
15227 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15228 *dynamic_check = max;
15229 return alg;
15230 }
15231 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15232 #undef ALG_USABLE_P
15233 }
15234
15235 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15236 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15237 static int
15238 decide_alignment (int align,
15239 enum stringop_alg alg,
15240 int expected_size)
15241 {
15242 int desired_align = 0;
15243 switch (alg)
15244 {
15245 case no_stringop:
15246 gcc_unreachable ();
15247 case loop:
15248 case unrolled_loop:
15249 desired_align = GET_MODE_SIZE (Pmode);
15250 break;
15251 case rep_prefix_8_byte:
15252 desired_align = 8;
15253 break;
15254 case rep_prefix_4_byte:
15255 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15256 copying whole cacheline at once. */
15257 if (TARGET_PENTIUMPRO)
15258 desired_align = 8;
15259 else
15260 desired_align = 4;
15261 break;
15262 case rep_prefix_1_byte:
15263 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15264 copying whole cacheline at once. */
15265 if (TARGET_PENTIUMPRO)
15266 desired_align = 8;
15267 else
15268 desired_align = 1;
15269 break;
15270 case loop_1_byte:
15271 desired_align = 1;
15272 break;
15273 case libcall:
15274 return 0;
15275 }
15276
15277 if (optimize_size)
15278 desired_align = 1;
15279 if (desired_align < align)
15280 desired_align = align;
15281 if (expected_size != -1 && expected_size < 4)
15282 desired_align = align;
15283 return desired_align;
15284 }
15285
15286 /* Return the smallest power of 2 greater than VAL. */
15287 static int
15288 smallest_pow2_greater_than (int val)
15289 {
15290 int ret = 1;
15291 while (ret <= val)
15292 ret <<= 1;
15293 return ret;
15294 }
15295
15296 /* Expand string move (memcpy) operation. Use i386 string operations when
15297 profitable. expand_setmem contains similar code. The code depends upon
15298 architecture, block size and alignment, but always has the same
15299 overall structure:
15300
15301 1) Prologue guard: Conditional that jumps up to epilogues for small
15302 blocks that can be handled by epilogue alone. This is faster but
15303 also needed for correctness, since prologue assume the block is larger
15304 than the desired alignment.
15305
15306 Optional dynamic check for size and libcall for large
15307 blocks is emitted here too, with -minline-stringops-dynamically.
15308
15309 2) Prologue: copy first few bytes in order to get destination aligned
15310 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15311 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15312 We emit either a jump tree on power of two sized blocks, or a byte loop.
15313
15314 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15315 with specified algorithm.
15316
15317 4) Epilogue: code copying tail of the block that is too small to be
15318 handled by main body (or up to size guarded by prologue guard). */
15319
15320 int
15321 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15322 rtx expected_align_exp, rtx expected_size_exp)
15323 {
15324 rtx destreg;
15325 rtx srcreg;
15326 rtx label = NULL;
15327 rtx tmp;
15328 rtx jump_around_label = NULL;
15329 HOST_WIDE_INT align = 1;
15330 unsigned HOST_WIDE_INT count = 0;
15331 HOST_WIDE_INT expected_size = -1;
15332 int size_needed = 0, epilogue_size_needed;
15333 int desired_align = 0;
15334 enum stringop_alg alg;
15335 int dynamic_check;
15336
15337 if (CONST_INT_P (align_exp))
15338 align = INTVAL (align_exp);
15339 /* i386 can do misaligned access on reasonably increased cost. */
15340 if (CONST_INT_P (expected_align_exp)
15341 && INTVAL (expected_align_exp) > align)
15342 align = INTVAL (expected_align_exp);
15343 if (CONST_INT_P (count_exp))
15344 count = expected_size = INTVAL (count_exp);
15345 if (CONST_INT_P (expected_size_exp) && count == 0)
15346 expected_size = INTVAL (expected_size_exp);
15347
15348 /* Make sure we don't need to care about overflow later on. */
15349 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15350 return 0;
15351
15352 /* Step 0: Decide on preferred algorithm, desired alignment and
15353 size of chunks to be copied by main loop. */
15354
15355 alg = decide_alg (count, expected_size, false, &dynamic_check);
15356 desired_align = decide_alignment (align, alg, expected_size);
15357
15358 if (!TARGET_ALIGN_STRINGOPS)
15359 align = desired_align;
15360
15361 if (alg == libcall)
15362 return 0;
15363 gcc_assert (alg != no_stringop);
15364 if (!count)
15365 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15366 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15367 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15368 switch (alg)
15369 {
15370 case libcall:
15371 case no_stringop:
15372 gcc_unreachable ();
15373 case loop:
15374 size_needed = GET_MODE_SIZE (Pmode);
15375 break;
15376 case unrolled_loop:
15377 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15378 break;
15379 case rep_prefix_8_byte:
15380 size_needed = 8;
15381 break;
15382 case rep_prefix_4_byte:
15383 size_needed = 4;
15384 break;
15385 case rep_prefix_1_byte:
15386 case loop_1_byte:
15387 size_needed = 1;
15388 break;
15389 }
15390
15391 epilogue_size_needed = size_needed;
15392
15393 /* Step 1: Prologue guard. */
15394
15395 /* Alignment code needs count to be in register. */
15396 if (CONST_INT_P (count_exp) && desired_align > align)
15397 count_exp = force_reg (counter_mode (count_exp), count_exp);
15398 gcc_assert (desired_align >= 1 && align >= 1);
15399
15400 /* Ensure that alignment prologue won't copy past end of block. */
15401 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15402 {
15403 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15404 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15405 Make sure it is power of 2. */
15406 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15407
15408 if (CONST_INT_P (count_exp))
15409 {
15410 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15411 goto epilogue;
15412 }
15413 else
15414 {
15415 label = gen_label_rtx ();
15416 emit_cmp_and_jump_insns (count_exp,
15417 GEN_INT (epilogue_size_needed),
15418 LTU, 0, counter_mode (count_exp), 1, label);
15419 if (expected_size == -1 || expected_size < epilogue_size_needed)
15420 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15421 else
15422 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15423 }
15424 }
15425
15426 /* Emit code to decide on runtime whether library call or inline should be
15427 used. */
15428 if (dynamic_check != -1)
15429 {
15430 if (CONST_INT_P (count_exp))
15431 {
15432 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15433 {
15434 emit_block_move_via_libcall (dst, src, count_exp, false);
15435 count_exp = const0_rtx;
15436 goto epilogue;
15437 }
15438 }
15439 else
15440 {
15441 rtx hot_label = gen_label_rtx ();
15442 jump_around_label = gen_label_rtx ();
15443 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15444 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15445 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15446 emit_block_move_via_libcall (dst, src, count_exp, false);
15447 emit_jump (jump_around_label);
15448 emit_label (hot_label);
15449 }
15450 }
15451
15452 /* Step 2: Alignment prologue. */
15453
15454 if (desired_align > align)
15455 {
15456 /* Except for the first move in epilogue, we no longer know
15457 constant offset in aliasing info. It don't seems to worth
15458 the pain to maintain it for the first move, so throw away
15459 the info early. */
15460 src = change_address (src, BLKmode, srcreg);
15461 dst = change_address (dst, BLKmode, destreg);
15462 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15463 desired_align);
15464 }
15465 if (label && size_needed == 1)
15466 {
15467 emit_label (label);
15468 LABEL_NUSES (label) = 1;
15469 label = NULL;
15470 }
15471
15472 /* Step 3: Main loop. */
15473
15474 switch (alg)
15475 {
15476 case libcall:
15477 case no_stringop:
15478 gcc_unreachable ();
15479 case loop_1_byte:
15480 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15481 count_exp, QImode, 1, expected_size);
15482 break;
15483 case loop:
15484 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15485 count_exp, Pmode, 1, expected_size);
15486 break;
15487 case unrolled_loop:
15488 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15489 registers for 4 temporaries anyway. */
15490 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15491 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15492 expected_size);
15493 break;
15494 case rep_prefix_8_byte:
15495 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15496 DImode);
15497 break;
15498 case rep_prefix_4_byte:
15499 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15500 SImode);
15501 break;
15502 case rep_prefix_1_byte:
15503 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15504 QImode);
15505 break;
15506 }
15507 /* Adjust properly the offset of src and dest memory for aliasing. */
15508 if (CONST_INT_P (count_exp))
15509 {
15510 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15511 (count / size_needed) * size_needed);
15512 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15513 (count / size_needed) * size_needed);
15514 }
15515 else
15516 {
15517 src = change_address (src, BLKmode, srcreg);
15518 dst = change_address (dst, BLKmode, destreg);
15519 }
15520
15521 /* Step 4: Epilogue to copy the remaining bytes. */
15522 epilogue:
15523 if (label)
15524 {
15525 /* When the main loop is done, COUNT_EXP might hold original count,
15526 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15527 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15528 bytes. Compensate if needed. */
15529
15530 if (size_needed < epilogue_size_needed)
15531 {
15532 tmp =
15533 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15534 GEN_INT (size_needed - 1), count_exp, 1,
15535 OPTAB_DIRECT);
15536 if (tmp != count_exp)
15537 emit_move_insn (count_exp, tmp);
15538 }
15539 emit_label (label);
15540 LABEL_NUSES (label) = 1;
15541 }
15542
15543 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15544 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15545 epilogue_size_needed);
15546 if (jump_around_label)
15547 emit_label (jump_around_label);
15548 return 1;
15549 }
15550
15551 /* Helper function for memcpy. For QImode value 0xXY produce
15552 0xXYXYXYXY of wide specified by MODE. This is essentially
15553 a * 0x10101010, but we can do slightly better than
15554 synth_mult by unwinding the sequence by hand on CPUs with
15555 slow multiply. */
15556 static rtx
15557 promote_duplicated_reg (enum machine_mode mode, rtx val)
15558 {
15559 enum machine_mode valmode = GET_MODE (val);
15560 rtx tmp;
15561 int nops = mode == DImode ? 3 : 2;
15562
15563 gcc_assert (mode == SImode || mode == DImode);
15564 if (val == const0_rtx)
15565 return copy_to_mode_reg (mode, const0_rtx);
15566 if (CONST_INT_P (val))
15567 {
15568 HOST_WIDE_INT v = INTVAL (val) & 255;
15569
15570 v |= v << 8;
15571 v |= v << 16;
15572 if (mode == DImode)
15573 v |= (v << 16) << 16;
15574 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15575 }
15576
15577 if (valmode == VOIDmode)
15578 valmode = QImode;
15579 if (valmode != QImode)
15580 val = gen_lowpart (QImode, val);
15581 if (mode == QImode)
15582 return val;
15583 if (!TARGET_PARTIAL_REG_STALL)
15584 nops--;
15585 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15586 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15587 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15588 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15589 {
15590 rtx reg = convert_modes (mode, QImode, val, true);
15591 tmp = promote_duplicated_reg (mode, const1_rtx);
15592 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15593 OPTAB_DIRECT);
15594 }
15595 else
15596 {
15597 rtx reg = convert_modes (mode, QImode, val, true);
15598
15599 if (!TARGET_PARTIAL_REG_STALL)
15600 if (mode == SImode)
15601 emit_insn (gen_movsi_insv_1 (reg, reg));
15602 else
15603 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15604 else
15605 {
15606 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15607 NULL, 1, OPTAB_DIRECT);
15608 reg =
15609 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15610 }
15611 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15612 NULL, 1, OPTAB_DIRECT);
15613 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15614 if (mode == SImode)
15615 return reg;
15616 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15617 NULL, 1, OPTAB_DIRECT);
15618 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15619 return reg;
15620 }
15621 }
15622
15623 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15624 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15625 alignment from ALIGN to DESIRED_ALIGN. */
15626 static rtx
15627 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15628 {
15629 rtx promoted_val;
15630
15631 if (TARGET_64BIT
15632 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15633 promoted_val = promote_duplicated_reg (DImode, val);
15634 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15635 promoted_val = promote_duplicated_reg (SImode, val);
15636 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15637 promoted_val = promote_duplicated_reg (HImode, val);
15638 else
15639 promoted_val = val;
15640
15641 return promoted_val;
15642 }
15643
15644 /* Expand string clear operation (bzero). Use i386 string operations when
15645 profitable. See expand_movmem comment for explanation of individual
15646 steps performed. */
15647 int
15648 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15649 rtx expected_align_exp, rtx expected_size_exp)
15650 {
15651 rtx destreg;
15652 rtx label = NULL;
15653 rtx tmp;
15654 rtx jump_around_label = NULL;
15655 HOST_WIDE_INT align = 1;
15656 unsigned HOST_WIDE_INT count = 0;
15657 HOST_WIDE_INT expected_size = -1;
15658 int size_needed = 0, epilogue_size_needed;
15659 int desired_align = 0;
15660 enum stringop_alg alg;
15661 rtx promoted_val = NULL;
15662 bool force_loopy_epilogue = false;
15663 int dynamic_check;
15664
15665 if (CONST_INT_P (align_exp))
15666 align = INTVAL (align_exp);
15667 /* i386 can do misaligned access on reasonably increased cost. */
15668 if (CONST_INT_P (expected_align_exp)
15669 && INTVAL (expected_align_exp) > align)
15670 align = INTVAL (expected_align_exp);
15671 if (CONST_INT_P (count_exp))
15672 count = expected_size = INTVAL (count_exp);
15673 if (CONST_INT_P (expected_size_exp) && count == 0)
15674 expected_size = INTVAL (expected_size_exp);
15675
15676 /* Make sure we don't need to care about overflow later on. */
15677 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15678 return 0;
15679
15680 /* Step 0: Decide on preferred algorithm, desired alignment and
15681 size of chunks to be copied by main loop. */
15682
15683 alg = decide_alg (count, expected_size, true, &dynamic_check);
15684 desired_align = decide_alignment (align, alg, expected_size);
15685
15686 if (!TARGET_ALIGN_STRINGOPS)
15687 align = desired_align;
15688
15689 if (alg == libcall)
15690 return 0;
15691 gcc_assert (alg != no_stringop);
15692 if (!count)
15693 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15694 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15695 switch (alg)
15696 {
15697 case libcall:
15698 case no_stringop:
15699 gcc_unreachable ();
15700 case loop:
15701 size_needed = GET_MODE_SIZE (Pmode);
15702 break;
15703 case unrolled_loop:
15704 size_needed = GET_MODE_SIZE (Pmode) * 4;
15705 break;
15706 case rep_prefix_8_byte:
15707 size_needed = 8;
15708 break;
15709 case rep_prefix_4_byte:
15710 size_needed = 4;
15711 break;
15712 case rep_prefix_1_byte:
15713 case loop_1_byte:
15714 size_needed = 1;
15715 break;
15716 }
15717 epilogue_size_needed = size_needed;
15718
15719 /* Step 1: Prologue guard. */
15720
15721 /* Alignment code needs count to be in register. */
15722 if (CONST_INT_P (count_exp) && desired_align > align)
15723 {
15724 enum machine_mode mode = SImode;
15725 if (TARGET_64BIT && (count & ~0xffffffff))
15726 mode = DImode;
15727 count_exp = force_reg (mode, count_exp);
15728 }
15729 /* Do the cheap promotion to allow better CSE across the
15730 main loop and epilogue (ie one load of the big constant in the
15731 front of all code. */
15732 if (CONST_INT_P (val_exp))
15733 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15734 desired_align, align);
15735 /* Ensure that alignment prologue won't copy past end of block. */
15736 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15737 {
15738 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15739 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15740 Make sure it is power of 2. */
15741 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15742
15743 /* To improve performance of small blocks, we jump around the VAL
15744 promoting mode. This mean that if the promoted VAL is not constant,
15745 we might not use it in the epilogue and have to use byte
15746 loop variant. */
15747 if (epilogue_size_needed > 2 && !promoted_val)
15748 force_loopy_epilogue = true;
15749 label = gen_label_rtx ();
15750 emit_cmp_and_jump_insns (count_exp,
15751 GEN_INT (epilogue_size_needed),
15752 LTU, 0, counter_mode (count_exp), 1, label);
15753 if (GET_CODE (count_exp) == CONST_INT)
15754 ;
15755 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15756 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15757 else
15758 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15759 }
15760 if (dynamic_check != -1)
15761 {
15762 rtx hot_label = gen_label_rtx ();
15763 jump_around_label = gen_label_rtx ();
15764 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15765 LEU, 0, counter_mode (count_exp), 1, hot_label);
15766 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15767 set_storage_via_libcall (dst, count_exp, val_exp, false);
15768 emit_jump (jump_around_label);
15769 emit_label (hot_label);
15770 }
15771
15772 /* Step 2: Alignment prologue. */
15773
15774 /* Do the expensive promotion once we branched off the small blocks. */
15775 if (!promoted_val)
15776 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15777 desired_align, align);
15778 gcc_assert (desired_align >= 1 && align >= 1);
15779
15780 if (desired_align > align)
15781 {
15782 /* Except for the first move in epilogue, we no longer know
15783 constant offset in aliasing info. It don't seems to worth
15784 the pain to maintain it for the first move, so throw away
15785 the info early. */
15786 dst = change_address (dst, BLKmode, destreg);
15787 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15788 desired_align);
15789 }
15790 if (label && size_needed == 1)
15791 {
15792 emit_label (label);
15793 LABEL_NUSES (label) = 1;
15794 label = NULL;
15795 }
15796
15797 /* Step 3: Main loop. */
15798
15799 switch (alg)
15800 {
15801 case libcall:
15802 case no_stringop:
15803 gcc_unreachable ();
15804 case loop_1_byte:
15805 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15806 count_exp, QImode, 1, expected_size);
15807 break;
15808 case loop:
15809 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15810 count_exp, Pmode, 1, expected_size);
15811 break;
15812 case unrolled_loop:
15813 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15814 count_exp, Pmode, 4, expected_size);
15815 break;
15816 case rep_prefix_8_byte:
15817 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15818 DImode);
15819 break;
15820 case rep_prefix_4_byte:
15821 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15822 SImode);
15823 break;
15824 case rep_prefix_1_byte:
15825 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15826 QImode);
15827 break;
15828 }
15829 /* Adjust properly the offset of src and dest memory for aliasing. */
15830 if (CONST_INT_P (count_exp))
15831 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15832 (count / size_needed) * size_needed);
15833 else
15834 dst = change_address (dst, BLKmode, destreg);
15835
15836 /* Step 4: Epilogue to copy the remaining bytes. */
15837
15838 if (label)
15839 {
15840 /* When the main loop is done, COUNT_EXP might hold original count,
15841 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15842 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15843 bytes. Compensate if needed. */
15844
15845 if (size_needed < desired_align - align)
15846 {
15847 tmp =
15848 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15849 GEN_INT (size_needed - 1), count_exp, 1,
15850 OPTAB_DIRECT);
15851 size_needed = desired_align - align + 1;
15852 if (tmp != count_exp)
15853 emit_move_insn (count_exp, tmp);
15854 }
15855 emit_label (label);
15856 LABEL_NUSES (label) = 1;
15857 }
15858 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15859 {
15860 if (force_loopy_epilogue)
15861 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15862 size_needed);
15863 else
15864 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15865 size_needed);
15866 }
15867 if (jump_around_label)
15868 emit_label (jump_around_label);
15869 return 1;
15870 }
15871
15872 /* Expand the appropriate insns for doing strlen if not just doing
15873 repnz; scasb
15874
15875 out = result, initialized with the start address
15876 align_rtx = alignment of the address.
15877 scratch = scratch register, initialized with the startaddress when
15878 not aligned, otherwise undefined
15879
15880 This is just the body. It needs the initializations mentioned above and
15881 some address computing at the end. These things are done in i386.md. */
15882
15883 static void
15884 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15885 {
15886 int align;
15887 rtx tmp;
15888 rtx align_2_label = NULL_RTX;
15889 rtx align_3_label = NULL_RTX;
15890 rtx align_4_label = gen_label_rtx ();
15891 rtx end_0_label = gen_label_rtx ();
15892 rtx mem;
15893 rtx tmpreg = gen_reg_rtx (SImode);
15894 rtx scratch = gen_reg_rtx (SImode);
15895 rtx cmp;
15896
15897 align = 0;
15898 if (CONST_INT_P (align_rtx))
15899 align = INTVAL (align_rtx);
15900
15901 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15902
15903 /* Is there a known alignment and is it less than 4? */
15904 if (align < 4)
15905 {
15906 rtx scratch1 = gen_reg_rtx (Pmode);
15907 emit_move_insn (scratch1, out);
15908 /* Is there a known alignment and is it not 2? */
15909 if (align != 2)
15910 {
15911 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15912 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15913
15914 /* Leave just the 3 lower bits. */
15915 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15916 NULL_RTX, 0, OPTAB_WIDEN);
15917
15918 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15919 Pmode, 1, align_4_label);
15920 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15921 Pmode, 1, align_2_label);
15922 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15923 Pmode, 1, align_3_label);
15924 }
15925 else
15926 {
15927 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15928 check if is aligned to 4 - byte. */
15929
15930 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15931 NULL_RTX, 0, OPTAB_WIDEN);
15932
15933 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15934 Pmode, 1, align_4_label);
15935 }
15936
15937 mem = change_address (src, QImode, out);
15938
15939 /* Now compare the bytes. */
15940
15941 /* Compare the first n unaligned byte on a byte per byte basis. */
15942 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15943 QImode, 1, end_0_label);
15944
15945 /* Increment the address. */
15946 if (TARGET_64BIT)
15947 emit_insn (gen_adddi3 (out, out, const1_rtx));
15948 else
15949 emit_insn (gen_addsi3 (out, out, const1_rtx));
15950
15951 /* Not needed with an alignment of 2 */
15952 if (align != 2)
15953 {
15954 emit_label (align_2_label);
15955
15956 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15957 end_0_label);
15958
15959 if (TARGET_64BIT)
15960 emit_insn (gen_adddi3 (out, out, const1_rtx));
15961 else
15962 emit_insn (gen_addsi3 (out, out, const1_rtx));
15963
15964 emit_label (align_3_label);
15965 }
15966
15967 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15968 end_0_label);
15969
15970 if (TARGET_64BIT)
15971 emit_insn (gen_adddi3 (out, out, const1_rtx));
15972 else
15973 emit_insn (gen_addsi3 (out, out, const1_rtx));
15974 }
15975
15976 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15977 align this loop. It gives only huge programs, but does not help to
15978 speed up. */
15979 emit_label (align_4_label);
15980
15981 mem = change_address (src, SImode, out);
15982 emit_move_insn (scratch, mem);
15983 if (TARGET_64BIT)
15984 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15985 else
15986 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15987
15988 /* This formula yields a nonzero result iff one of the bytes is zero.
15989 This saves three branches inside loop and many cycles. */
15990
15991 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15992 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15993 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15994 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15995 gen_int_mode (0x80808080, SImode)));
15996 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15997 align_4_label);
15998
15999 if (TARGET_CMOVE)
16000 {
16001 rtx reg = gen_reg_rtx (SImode);
16002 rtx reg2 = gen_reg_rtx (Pmode);
16003 emit_move_insn (reg, tmpreg);
16004 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16005
16006 /* If zero is not in the first two bytes, move two bytes forward. */
16007 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16008 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16009 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16010 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16011 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16012 reg,
16013 tmpreg)));
16014 /* Emit lea manually to avoid clobbering of flags. */
16015 emit_insn (gen_rtx_SET (SImode, reg2,
16016 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16017
16018 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16019 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16020 emit_insn (gen_rtx_SET (VOIDmode, out,
16021 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16022 reg2,
16023 out)));
16024
16025 }
16026 else
16027 {
16028 rtx end_2_label = gen_label_rtx ();
16029 /* Is zero in the first two bytes? */
16030
16031 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16032 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16033 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16034 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16035 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16036 pc_rtx);
16037 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16038 JUMP_LABEL (tmp) = end_2_label;
16039
16040 /* Not in the first two. Move two bytes forward. */
16041 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16042 if (TARGET_64BIT)
16043 emit_insn (gen_adddi3 (out, out, const2_rtx));
16044 else
16045 emit_insn (gen_addsi3 (out, out, const2_rtx));
16046
16047 emit_label (end_2_label);
16048
16049 }
16050
16051 /* Avoid branch in fixing the byte. */
16052 tmpreg = gen_lowpart (QImode, tmpreg);
16053 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16054 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16055 if (TARGET_64BIT)
16056 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16057 else
16058 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16059
16060 emit_label (end_0_label);
16061 }
16062
16063 /* Expand strlen. */
16064
16065 int
16066 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16067 {
16068 rtx addr, scratch1, scratch2, scratch3, scratch4;
16069
16070 /* The generic case of strlen expander is long. Avoid it's
16071 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16072
16073 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16074 && !TARGET_INLINE_ALL_STRINGOPS
16075 && !optimize_size
16076 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16077 return 0;
16078
16079 addr = force_reg (Pmode, XEXP (src, 0));
16080 scratch1 = gen_reg_rtx (Pmode);
16081
16082 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16083 && !optimize_size)
16084 {
16085 /* Well it seems that some optimizer does not combine a call like
16086 foo(strlen(bar), strlen(bar));
16087 when the move and the subtraction is done here. It does calculate
16088 the length just once when these instructions are done inside of
16089 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16090 often used and I use one fewer register for the lifetime of
16091 output_strlen_unroll() this is better. */
16092
16093 emit_move_insn (out, addr);
16094
16095 ix86_expand_strlensi_unroll_1 (out, src, align);
16096
16097 /* strlensi_unroll_1 returns the address of the zero at the end of
16098 the string, like memchr(), so compute the length by subtracting
16099 the start address. */
16100 if (TARGET_64BIT)
16101 emit_insn (gen_subdi3 (out, out, addr));
16102 else
16103 emit_insn (gen_subsi3 (out, out, addr));
16104 }
16105 else
16106 {
16107 rtx unspec;
16108
16109 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16110 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16111 return false;
16112
16113 scratch2 = gen_reg_rtx (Pmode);
16114 scratch3 = gen_reg_rtx (Pmode);
16115 scratch4 = force_reg (Pmode, constm1_rtx);
16116
16117 emit_move_insn (scratch3, addr);
16118 eoschar = force_reg (QImode, eoschar);
16119
16120 src = replace_equiv_address_nv (src, scratch3);
16121
16122 /* If .md starts supporting :P, this can be done in .md. */
16123 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16124 scratch4), UNSPEC_SCAS);
16125 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16126 if (TARGET_64BIT)
16127 {
16128 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16129 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16130 }
16131 else
16132 {
16133 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16134 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16135 }
16136 }
16137 return 1;
16138 }
16139
16140 /* For given symbol (function) construct code to compute address of it's PLT
16141 entry in large x86-64 PIC model. */
16142 rtx
16143 construct_plt_address (rtx symbol)
16144 {
16145 rtx tmp = gen_reg_rtx (Pmode);
16146 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16147
16148 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16149 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16150
16151 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16152 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16153 return tmp;
16154 }
16155
16156 void
16157 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16158 rtx callarg2 ATTRIBUTE_UNUSED,
16159 rtx pop, int sibcall)
16160 {
16161 rtx use = NULL, call;
16162
16163 if (pop == const0_rtx)
16164 pop = NULL;
16165 gcc_assert (!TARGET_64BIT || !pop);
16166
16167 if (TARGET_MACHO && !TARGET_64BIT)
16168 {
16169 #if TARGET_MACHO
16170 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16171 fnaddr = machopic_indirect_call_target (fnaddr);
16172 #endif
16173 }
16174 else
16175 {
16176 /* Static functions and indirect calls don't need the pic register. */
16177 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16178 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16179 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16180 use_reg (&use, pic_offset_table_rtx);
16181 }
16182
16183 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16184 {
16185 rtx al = gen_rtx_REG (QImode, AX_REG);
16186 emit_move_insn (al, callarg2);
16187 use_reg (&use, al);
16188 }
16189
16190 if (ix86_cmodel == CM_LARGE_PIC
16191 && GET_CODE (fnaddr) == MEM
16192 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16193 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16194 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16195 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16196 {
16197 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16198 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16199 }
16200 if (sibcall && TARGET_64BIT
16201 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16202 {
16203 rtx addr;
16204 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16205 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16206 emit_move_insn (fnaddr, addr);
16207 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16208 }
16209
16210 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16211 if (retval)
16212 call = gen_rtx_SET (VOIDmode, retval, call);
16213 if (pop)
16214 {
16215 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16216 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16217 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16218 }
16219
16220 call = emit_call_insn (call);
16221 if (use)
16222 CALL_INSN_FUNCTION_USAGE (call) = use;
16223 }
16224
16225 \f
16226 /* Clear stack slot assignments remembered from previous functions.
16227 This is called from INIT_EXPANDERS once before RTL is emitted for each
16228 function. */
16229
16230 static struct machine_function *
16231 ix86_init_machine_status (void)
16232 {
16233 struct machine_function *f;
16234
16235 f = GGC_CNEW (struct machine_function);
16236 f->use_fast_prologue_epilogue_nregs = -1;
16237 f->tls_descriptor_call_expanded_p = 0;
16238
16239 return f;
16240 }
16241
16242 /* Return a MEM corresponding to a stack slot with mode MODE.
16243 Allocate a new slot if necessary.
16244
16245 The RTL for a function can have several slots available: N is
16246 which slot to use. */
16247
16248 rtx
16249 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16250 {
16251 struct stack_local_entry *s;
16252
16253 gcc_assert (n < MAX_386_STACK_LOCALS);
16254
16255 /* Virtual slot is valid only before vregs are instantiated. */
16256 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16257
16258 for (s = ix86_stack_locals; s; s = s->next)
16259 if (s->mode == mode && s->n == n)
16260 return copy_rtx (s->rtl);
16261
16262 s = (struct stack_local_entry *)
16263 ggc_alloc (sizeof (struct stack_local_entry));
16264 s->n = n;
16265 s->mode = mode;
16266 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16267
16268 s->next = ix86_stack_locals;
16269 ix86_stack_locals = s;
16270 return s->rtl;
16271 }
16272
16273 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16274
16275 static GTY(()) rtx ix86_tls_symbol;
16276 rtx
16277 ix86_tls_get_addr (void)
16278 {
16279
16280 if (!ix86_tls_symbol)
16281 {
16282 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16283 (TARGET_ANY_GNU_TLS
16284 && !TARGET_64BIT)
16285 ? "___tls_get_addr"
16286 : "__tls_get_addr");
16287 }
16288
16289 return ix86_tls_symbol;
16290 }
16291
16292 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16293
16294 static GTY(()) rtx ix86_tls_module_base_symbol;
16295 rtx
16296 ix86_tls_module_base (void)
16297 {
16298
16299 if (!ix86_tls_module_base_symbol)
16300 {
16301 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16302 "_TLS_MODULE_BASE_");
16303 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16304 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16305 }
16306
16307 return ix86_tls_module_base_symbol;
16308 }
16309 \f
16310 /* Calculate the length of the memory address in the instruction
16311 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16312
16313 int
16314 memory_address_length (rtx addr)
16315 {
16316 struct ix86_address parts;
16317 rtx base, index, disp;
16318 int len;
16319 int ok;
16320
16321 if (GET_CODE (addr) == PRE_DEC
16322 || GET_CODE (addr) == POST_INC
16323 || GET_CODE (addr) == PRE_MODIFY
16324 || GET_CODE (addr) == POST_MODIFY)
16325 return 0;
16326
16327 ok = ix86_decompose_address (addr, &parts);
16328 gcc_assert (ok);
16329
16330 if (parts.base && GET_CODE (parts.base) == SUBREG)
16331 parts.base = SUBREG_REG (parts.base);
16332 if (parts.index && GET_CODE (parts.index) == SUBREG)
16333 parts.index = SUBREG_REG (parts.index);
16334
16335 base = parts.base;
16336 index = parts.index;
16337 disp = parts.disp;
16338 len = 0;
16339
16340 /* Rule of thumb:
16341 - esp as the base always wants an index,
16342 - ebp as the base always wants a displacement. */
16343
16344 /* Register Indirect. */
16345 if (base && !index && !disp)
16346 {
16347 /* esp (for its index) and ebp (for its displacement) need
16348 the two-byte modrm form. */
16349 if (addr == stack_pointer_rtx
16350 || addr == arg_pointer_rtx
16351 || addr == frame_pointer_rtx
16352 || addr == hard_frame_pointer_rtx)
16353 len = 1;
16354 }
16355
16356 /* Direct Addressing. */
16357 else if (disp && !base && !index)
16358 len = 4;
16359
16360 else
16361 {
16362 /* Find the length of the displacement constant. */
16363 if (disp)
16364 {
16365 if (base && satisfies_constraint_K (disp))
16366 len = 1;
16367 else
16368 len = 4;
16369 }
16370 /* ebp always wants a displacement. */
16371 else if (base == hard_frame_pointer_rtx)
16372 len = 1;
16373
16374 /* An index requires the two-byte modrm form.... */
16375 if (index
16376 /* ...like esp, which always wants an index. */
16377 || base == stack_pointer_rtx
16378 || base == arg_pointer_rtx
16379 || base == frame_pointer_rtx)
16380 len += 1;
16381 }
16382
16383 return len;
16384 }
16385
16386 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16387 is set, expect that insn have 8bit immediate alternative. */
16388 int
16389 ix86_attr_length_immediate_default (rtx insn, int shortform)
16390 {
16391 int len = 0;
16392 int i;
16393 extract_insn_cached (insn);
16394 for (i = recog_data.n_operands - 1; i >= 0; --i)
16395 if (CONSTANT_P (recog_data.operand[i]))
16396 {
16397 gcc_assert (!len);
16398 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16399 len = 1;
16400 else
16401 {
16402 switch (get_attr_mode (insn))
16403 {
16404 case MODE_QI:
16405 len+=1;
16406 break;
16407 case MODE_HI:
16408 len+=2;
16409 break;
16410 case MODE_SI:
16411 len+=4;
16412 break;
16413 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16414 case MODE_DI:
16415 len+=4;
16416 break;
16417 default:
16418 fatal_insn ("unknown insn mode", insn);
16419 }
16420 }
16421 }
16422 return len;
16423 }
16424 /* Compute default value for "length_address" attribute. */
16425 int
16426 ix86_attr_length_address_default (rtx insn)
16427 {
16428 int i;
16429
16430 if (get_attr_type (insn) == TYPE_LEA)
16431 {
16432 rtx set = PATTERN (insn);
16433
16434 if (GET_CODE (set) == PARALLEL)
16435 set = XVECEXP (set, 0, 0);
16436
16437 gcc_assert (GET_CODE (set) == SET);
16438
16439 return memory_address_length (SET_SRC (set));
16440 }
16441
16442 extract_insn_cached (insn);
16443 for (i = recog_data.n_operands - 1; i >= 0; --i)
16444 if (MEM_P (recog_data.operand[i]))
16445 {
16446 return memory_address_length (XEXP (recog_data.operand[i], 0));
16447 break;
16448 }
16449 return 0;
16450 }
16451 \f
16452 /* Return the maximum number of instructions a cpu can issue. */
16453
16454 static int
16455 ix86_issue_rate (void)
16456 {
16457 switch (ix86_tune)
16458 {
16459 case PROCESSOR_PENTIUM:
16460 case PROCESSOR_K6:
16461 return 2;
16462
16463 case PROCESSOR_PENTIUMPRO:
16464 case PROCESSOR_PENTIUM4:
16465 case PROCESSOR_ATHLON:
16466 case PROCESSOR_K8:
16467 case PROCESSOR_AMDFAM10:
16468 case PROCESSOR_NOCONA:
16469 case PROCESSOR_GENERIC32:
16470 case PROCESSOR_GENERIC64:
16471 return 3;
16472
16473 case PROCESSOR_CORE2:
16474 return 4;
16475
16476 default:
16477 return 1;
16478 }
16479 }
16480
16481 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16482 by DEP_INSN and nothing set by DEP_INSN. */
16483
16484 static int
16485 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16486 {
16487 rtx set, set2;
16488
16489 /* Simplify the test for uninteresting insns. */
16490 if (insn_type != TYPE_SETCC
16491 && insn_type != TYPE_ICMOV
16492 && insn_type != TYPE_FCMOV
16493 && insn_type != TYPE_IBR)
16494 return 0;
16495
16496 if ((set = single_set (dep_insn)) != 0)
16497 {
16498 set = SET_DEST (set);
16499 set2 = NULL_RTX;
16500 }
16501 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16502 && XVECLEN (PATTERN (dep_insn), 0) == 2
16503 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16504 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16505 {
16506 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16507 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16508 }
16509 else
16510 return 0;
16511
16512 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16513 return 0;
16514
16515 /* This test is true if the dependent insn reads the flags but
16516 not any other potentially set register. */
16517 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16518 return 0;
16519
16520 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16521 return 0;
16522
16523 return 1;
16524 }
16525
16526 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16527 address with operands set by DEP_INSN. */
16528
16529 static int
16530 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16531 {
16532 rtx addr;
16533
16534 if (insn_type == TYPE_LEA
16535 && TARGET_PENTIUM)
16536 {
16537 addr = PATTERN (insn);
16538
16539 if (GET_CODE (addr) == PARALLEL)
16540 addr = XVECEXP (addr, 0, 0);
16541
16542 gcc_assert (GET_CODE (addr) == SET);
16543
16544 addr = SET_SRC (addr);
16545 }
16546 else
16547 {
16548 int i;
16549 extract_insn_cached (insn);
16550 for (i = recog_data.n_operands - 1; i >= 0; --i)
16551 if (MEM_P (recog_data.operand[i]))
16552 {
16553 addr = XEXP (recog_data.operand[i], 0);
16554 goto found;
16555 }
16556 return 0;
16557 found:;
16558 }
16559
16560 return modified_in_p (addr, dep_insn);
16561 }
16562
16563 static int
16564 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16565 {
16566 enum attr_type insn_type, dep_insn_type;
16567 enum attr_memory memory;
16568 rtx set, set2;
16569 int dep_insn_code_number;
16570
16571 /* Anti and output dependencies have zero cost on all CPUs. */
16572 if (REG_NOTE_KIND (link) != 0)
16573 return 0;
16574
16575 dep_insn_code_number = recog_memoized (dep_insn);
16576
16577 /* If we can't recognize the insns, we can't really do anything. */
16578 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16579 return cost;
16580
16581 insn_type = get_attr_type (insn);
16582 dep_insn_type = get_attr_type (dep_insn);
16583
16584 switch (ix86_tune)
16585 {
16586 case PROCESSOR_PENTIUM:
16587 /* Address Generation Interlock adds a cycle of latency. */
16588 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16589 cost += 1;
16590
16591 /* ??? Compares pair with jump/setcc. */
16592 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16593 cost = 0;
16594
16595 /* Floating point stores require value to be ready one cycle earlier. */
16596 if (insn_type == TYPE_FMOV
16597 && get_attr_memory (insn) == MEMORY_STORE
16598 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16599 cost += 1;
16600 break;
16601
16602 case PROCESSOR_PENTIUMPRO:
16603 memory = get_attr_memory (insn);
16604
16605 /* INT->FP conversion is expensive. */
16606 if (get_attr_fp_int_src (dep_insn))
16607 cost += 5;
16608
16609 /* There is one cycle extra latency between an FP op and a store. */
16610 if (insn_type == TYPE_FMOV
16611 && (set = single_set (dep_insn)) != NULL_RTX
16612 && (set2 = single_set (insn)) != NULL_RTX
16613 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16614 && MEM_P (SET_DEST (set2)))
16615 cost += 1;
16616
16617 /* Show ability of reorder buffer to hide latency of load by executing
16618 in parallel with previous instruction in case
16619 previous instruction is not needed to compute the address. */
16620 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16621 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16622 {
16623 /* Claim moves to take one cycle, as core can issue one load
16624 at time and the next load can start cycle later. */
16625 if (dep_insn_type == TYPE_IMOV
16626 || dep_insn_type == TYPE_FMOV)
16627 cost = 1;
16628 else if (cost > 1)
16629 cost--;
16630 }
16631 break;
16632
16633 case PROCESSOR_K6:
16634 memory = get_attr_memory (insn);
16635
16636 /* The esp dependency is resolved before the instruction is really
16637 finished. */
16638 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16639 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16640 return 1;
16641
16642 /* INT->FP conversion is expensive. */
16643 if (get_attr_fp_int_src (dep_insn))
16644 cost += 5;
16645
16646 /* Show ability of reorder buffer to hide latency of load by executing
16647 in parallel with previous instruction in case
16648 previous instruction is not needed to compute the address. */
16649 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16650 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16651 {
16652 /* Claim moves to take one cycle, as core can issue one load
16653 at time and the next load can start cycle later. */
16654 if (dep_insn_type == TYPE_IMOV
16655 || dep_insn_type == TYPE_FMOV)
16656 cost = 1;
16657 else if (cost > 2)
16658 cost -= 2;
16659 else
16660 cost = 1;
16661 }
16662 break;
16663
16664 case PROCESSOR_ATHLON:
16665 case PROCESSOR_K8:
16666 case PROCESSOR_AMDFAM10:
16667 case PROCESSOR_GENERIC32:
16668 case PROCESSOR_GENERIC64:
16669 memory = get_attr_memory (insn);
16670
16671 /* Show ability of reorder buffer to hide latency of load by executing
16672 in parallel with previous instruction in case
16673 previous instruction is not needed to compute the address. */
16674 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16675 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16676 {
16677 enum attr_unit unit = get_attr_unit (insn);
16678 int loadcost = 3;
16679
16680 /* Because of the difference between the length of integer and
16681 floating unit pipeline preparation stages, the memory operands
16682 for floating point are cheaper.
16683
16684 ??? For Athlon it the difference is most probably 2. */
16685 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16686 loadcost = 3;
16687 else
16688 loadcost = TARGET_ATHLON ? 2 : 0;
16689
16690 if (cost >= loadcost)
16691 cost -= loadcost;
16692 else
16693 cost = 0;
16694 }
16695
16696 default:
16697 break;
16698 }
16699
16700 return cost;
16701 }
16702
16703 /* How many alternative schedules to try. This should be as wide as the
16704 scheduling freedom in the DFA, but no wider. Making this value too
16705 large results extra work for the scheduler. */
16706
16707 static int
16708 ia32_multipass_dfa_lookahead (void)
16709 {
16710 switch (ix86_tune)
16711 {
16712 case PROCESSOR_PENTIUM:
16713 return 2;
16714
16715 case PROCESSOR_PENTIUMPRO:
16716 case PROCESSOR_K6:
16717 return 1;
16718
16719 default:
16720 return 0;
16721 }
16722 }
16723
16724 \f
16725 /* Compute the alignment given to a constant that is being placed in memory.
16726 EXP is the constant and ALIGN is the alignment that the object would
16727 ordinarily have.
16728 The value of this function is used instead of that alignment to align
16729 the object. */
16730
16731 int
16732 ix86_constant_alignment (tree exp, int align)
16733 {
16734 if (TREE_CODE (exp) == REAL_CST)
16735 {
16736 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16737 return 64;
16738 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16739 return 128;
16740 }
16741 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16742 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16743 return BITS_PER_WORD;
16744
16745 return align;
16746 }
16747
16748 /* Compute the alignment for a static variable.
16749 TYPE is the data type, and ALIGN is the alignment that
16750 the object would ordinarily have. The value of this function is used
16751 instead of that alignment to align the object. */
16752
16753 int
16754 ix86_data_alignment (tree type, int align)
16755 {
16756 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16757
16758 if (AGGREGATE_TYPE_P (type)
16759 && TYPE_SIZE (type)
16760 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16761 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16762 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16763 && align < max_align)
16764 align = max_align;
16765
16766 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16767 to 16byte boundary. */
16768 if (TARGET_64BIT)
16769 {
16770 if (AGGREGATE_TYPE_P (type)
16771 && TYPE_SIZE (type)
16772 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16773 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16774 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16775 return 128;
16776 }
16777
16778 if (TREE_CODE (type) == ARRAY_TYPE)
16779 {
16780 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16781 return 64;
16782 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16783 return 128;
16784 }
16785 else if (TREE_CODE (type) == COMPLEX_TYPE)
16786 {
16787
16788 if (TYPE_MODE (type) == DCmode && align < 64)
16789 return 64;
16790 if (TYPE_MODE (type) == XCmode && align < 128)
16791 return 128;
16792 }
16793 else if ((TREE_CODE (type) == RECORD_TYPE
16794 || TREE_CODE (type) == UNION_TYPE
16795 || TREE_CODE (type) == QUAL_UNION_TYPE)
16796 && TYPE_FIELDS (type))
16797 {
16798 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16799 return 64;
16800 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16801 return 128;
16802 }
16803 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16804 || TREE_CODE (type) == INTEGER_TYPE)
16805 {
16806 if (TYPE_MODE (type) == DFmode && align < 64)
16807 return 64;
16808 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16809 return 128;
16810 }
16811
16812 return align;
16813 }
16814
16815 /* Compute the alignment for a local variable.
16816 TYPE is the data type, and ALIGN is the alignment that
16817 the object would ordinarily have. The value of this macro is used
16818 instead of that alignment to align the object. */
16819
16820 int
16821 ix86_local_alignment (tree type, int align)
16822 {
16823 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16824 to 16byte boundary. */
16825 if (TARGET_64BIT)
16826 {
16827 if (AGGREGATE_TYPE_P (type)
16828 && TYPE_SIZE (type)
16829 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16830 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16831 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16832 return 128;
16833 }
16834 if (TREE_CODE (type) == ARRAY_TYPE)
16835 {
16836 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16837 return 64;
16838 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16839 return 128;
16840 }
16841 else if (TREE_CODE (type) == COMPLEX_TYPE)
16842 {
16843 if (TYPE_MODE (type) == DCmode && align < 64)
16844 return 64;
16845 if (TYPE_MODE (type) == XCmode && align < 128)
16846 return 128;
16847 }
16848 else if ((TREE_CODE (type) == RECORD_TYPE
16849 || TREE_CODE (type) == UNION_TYPE
16850 || TREE_CODE (type) == QUAL_UNION_TYPE)
16851 && TYPE_FIELDS (type))
16852 {
16853 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16854 return 64;
16855 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16856 return 128;
16857 }
16858 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16859 || TREE_CODE (type) == INTEGER_TYPE)
16860 {
16861
16862 if (TYPE_MODE (type) == DFmode && align < 64)
16863 return 64;
16864 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16865 return 128;
16866 }
16867 return align;
16868 }
16869 \f
16870 /* Emit RTL insns to initialize the variable parts of a trampoline.
16871 FNADDR is an RTX for the address of the function's pure code.
16872 CXT is an RTX for the static chain value for the function. */
16873 void
16874 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16875 {
16876 if (!TARGET_64BIT)
16877 {
16878 /* Compute offset from the end of the jmp to the target function. */
16879 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16880 plus_constant (tramp, 10),
16881 NULL_RTX, 1, OPTAB_DIRECT);
16882 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16883 gen_int_mode (0xb9, QImode));
16884 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16885 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16886 gen_int_mode (0xe9, QImode));
16887 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16888 }
16889 else
16890 {
16891 int offset = 0;
16892 /* Try to load address using shorter movl instead of movabs.
16893 We may want to support movq for kernel mode, but kernel does not use
16894 trampolines at the moment. */
16895 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16896 {
16897 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16898 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16899 gen_int_mode (0xbb41, HImode));
16900 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16901 gen_lowpart (SImode, fnaddr));
16902 offset += 6;
16903 }
16904 else
16905 {
16906 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16907 gen_int_mode (0xbb49, HImode));
16908 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16909 fnaddr);
16910 offset += 10;
16911 }
16912 /* Load static chain using movabs to r10. */
16913 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16914 gen_int_mode (0xba49, HImode));
16915 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16916 cxt);
16917 offset += 10;
16918 /* Jump to the r11 */
16919 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16920 gen_int_mode (0xff49, HImode));
16921 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16922 gen_int_mode (0xe3, QImode));
16923 offset += 3;
16924 gcc_assert (offset <= TRAMPOLINE_SIZE);
16925 }
16926
16927 #ifdef ENABLE_EXECUTE_STACK
16928 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16929 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16930 #endif
16931 }
16932 \f
16933 /* Codes for all the SSE/MMX builtins. */
16934 enum ix86_builtins
16935 {
16936 IX86_BUILTIN_ADDPS,
16937 IX86_BUILTIN_ADDSS,
16938 IX86_BUILTIN_DIVPS,
16939 IX86_BUILTIN_DIVSS,
16940 IX86_BUILTIN_MULPS,
16941 IX86_BUILTIN_MULSS,
16942 IX86_BUILTIN_SUBPS,
16943 IX86_BUILTIN_SUBSS,
16944
16945 IX86_BUILTIN_CMPEQPS,
16946 IX86_BUILTIN_CMPLTPS,
16947 IX86_BUILTIN_CMPLEPS,
16948 IX86_BUILTIN_CMPGTPS,
16949 IX86_BUILTIN_CMPGEPS,
16950 IX86_BUILTIN_CMPNEQPS,
16951 IX86_BUILTIN_CMPNLTPS,
16952 IX86_BUILTIN_CMPNLEPS,
16953 IX86_BUILTIN_CMPNGTPS,
16954 IX86_BUILTIN_CMPNGEPS,
16955 IX86_BUILTIN_CMPORDPS,
16956 IX86_BUILTIN_CMPUNORDPS,
16957 IX86_BUILTIN_CMPEQSS,
16958 IX86_BUILTIN_CMPLTSS,
16959 IX86_BUILTIN_CMPLESS,
16960 IX86_BUILTIN_CMPNEQSS,
16961 IX86_BUILTIN_CMPNLTSS,
16962 IX86_BUILTIN_CMPNLESS,
16963 IX86_BUILTIN_CMPNGTSS,
16964 IX86_BUILTIN_CMPNGESS,
16965 IX86_BUILTIN_CMPORDSS,
16966 IX86_BUILTIN_CMPUNORDSS,
16967
16968 IX86_BUILTIN_COMIEQSS,
16969 IX86_BUILTIN_COMILTSS,
16970 IX86_BUILTIN_COMILESS,
16971 IX86_BUILTIN_COMIGTSS,
16972 IX86_BUILTIN_COMIGESS,
16973 IX86_BUILTIN_COMINEQSS,
16974 IX86_BUILTIN_UCOMIEQSS,
16975 IX86_BUILTIN_UCOMILTSS,
16976 IX86_BUILTIN_UCOMILESS,
16977 IX86_BUILTIN_UCOMIGTSS,
16978 IX86_BUILTIN_UCOMIGESS,
16979 IX86_BUILTIN_UCOMINEQSS,
16980
16981 IX86_BUILTIN_CVTPI2PS,
16982 IX86_BUILTIN_CVTPS2PI,
16983 IX86_BUILTIN_CVTSI2SS,
16984 IX86_BUILTIN_CVTSI642SS,
16985 IX86_BUILTIN_CVTSS2SI,
16986 IX86_BUILTIN_CVTSS2SI64,
16987 IX86_BUILTIN_CVTTPS2PI,
16988 IX86_BUILTIN_CVTTSS2SI,
16989 IX86_BUILTIN_CVTTSS2SI64,
16990
16991 IX86_BUILTIN_MAXPS,
16992 IX86_BUILTIN_MAXSS,
16993 IX86_BUILTIN_MINPS,
16994 IX86_BUILTIN_MINSS,
16995
16996 IX86_BUILTIN_LOADUPS,
16997 IX86_BUILTIN_STOREUPS,
16998 IX86_BUILTIN_MOVSS,
16999
17000 IX86_BUILTIN_MOVHLPS,
17001 IX86_BUILTIN_MOVLHPS,
17002 IX86_BUILTIN_LOADHPS,
17003 IX86_BUILTIN_LOADLPS,
17004 IX86_BUILTIN_STOREHPS,
17005 IX86_BUILTIN_STORELPS,
17006
17007 IX86_BUILTIN_MASKMOVQ,
17008 IX86_BUILTIN_MOVMSKPS,
17009 IX86_BUILTIN_PMOVMSKB,
17010
17011 IX86_BUILTIN_MOVNTPS,
17012 IX86_BUILTIN_MOVNTQ,
17013
17014 IX86_BUILTIN_LOADDQU,
17015 IX86_BUILTIN_STOREDQU,
17016
17017 IX86_BUILTIN_PACKSSWB,
17018 IX86_BUILTIN_PACKSSDW,
17019 IX86_BUILTIN_PACKUSWB,
17020
17021 IX86_BUILTIN_PADDB,
17022 IX86_BUILTIN_PADDW,
17023 IX86_BUILTIN_PADDD,
17024 IX86_BUILTIN_PADDQ,
17025 IX86_BUILTIN_PADDSB,
17026 IX86_BUILTIN_PADDSW,
17027 IX86_BUILTIN_PADDUSB,
17028 IX86_BUILTIN_PADDUSW,
17029 IX86_BUILTIN_PSUBB,
17030 IX86_BUILTIN_PSUBW,
17031 IX86_BUILTIN_PSUBD,
17032 IX86_BUILTIN_PSUBQ,
17033 IX86_BUILTIN_PSUBSB,
17034 IX86_BUILTIN_PSUBSW,
17035 IX86_BUILTIN_PSUBUSB,
17036 IX86_BUILTIN_PSUBUSW,
17037
17038 IX86_BUILTIN_PAND,
17039 IX86_BUILTIN_PANDN,
17040 IX86_BUILTIN_POR,
17041 IX86_BUILTIN_PXOR,
17042
17043 IX86_BUILTIN_PAVGB,
17044 IX86_BUILTIN_PAVGW,
17045
17046 IX86_BUILTIN_PCMPEQB,
17047 IX86_BUILTIN_PCMPEQW,
17048 IX86_BUILTIN_PCMPEQD,
17049 IX86_BUILTIN_PCMPGTB,
17050 IX86_BUILTIN_PCMPGTW,
17051 IX86_BUILTIN_PCMPGTD,
17052
17053 IX86_BUILTIN_PMADDWD,
17054
17055 IX86_BUILTIN_PMAXSW,
17056 IX86_BUILTIN_PMAXUB,
17057 IX86_BUILTIN_PMINSW,
17058 IX86_BUILTIN_PMINUB,
17059
17060 IX86_BUILTIN_PMULHUW,
17061 IX86_BUILTIN_PMULHW,
17062 IX86_BUILTIN_PMULLW,
17063
17064 IX86_BUILTIN_PSADBW,
17065 IX86_BUILTIN_PSHUFW,
17066
17067 IX86_BUILTIN_PSLLW,
17068 IX86_BUILTIN_PSLLD,
17069 IX86_BUILTIN_PSLLQ,
17070 IX86_BUILTIN_PSRAW,
17071 IX86_BUILTIN_PSRAD,
17072 IX86_BUILTIN_PSRLW,
17073 IX86_BUILTIN_PSRLD,
17074 IX86_BUILTIN_PSRLQ,
17075 IX86_BUILTIN_PSLLWI,
17076 IX86_BUILTIN_PSLLDI,
17077 IX86_BUILTIN_PSLLQI,
17078 IX86_BUILTIN_PSRAWI,
17079 IX86_BUILTIN_PSRADI,
17080 IX86_BUILTIN_PSRLWI,
17081 IX86_BUILTIN_PSRLDI,
17082 IX86_BUILTIN_PSRLQI,
17083
17084 IX86_BUILTIN_PUNPCKHBW,
17085 IX86_BUILTIN_PUNPCKHWD,
17086 IX86_BUILTIN_PUNPCKHDQ,
17087 IX86_BUILTIN_PUNPCKLBW,
17088 IX86_BUILTIN_PUNPCKLWD,
17089 IX86_BUILTIN_PUNPCKLDQ,
17090
17091 IX86_BUILTIN_SHUFPS,
17092
17093 IX86_BUILTIN_RCPPS,
17094 IX86_BUILTIN_RCPSS,
17095 IX86_BUILTIN_RSQRTPS,
17096 IX86_BUILTIN_RSQRTPS_NR,
17097 IX86_BUILTIN_RSQRTSS,
17098 IX86_BUILTIN_RSQRTF,
17099 IX86_BUILTIN_SQRTPS,
17100 IX86_BUILTIN_SQRTPS_NR,
17101 IX86_BUILTIN_SQRTSS,
17102
17103 IX86_BUILTIN_UNPCKHPS,
17104 IX86_BUILTIN_UNPCKLPS,
17105
17106 IX86_BUILTIN_ANDPS,
17107 IX86_BUILTIN_ANDNPS,
17108 IX86_BUILTIN_ORPS,
17109 IX86_BUILTIN_XORPS,
17110
17111 IX86_BUILTIN_EMMS,
17112 IX86_BUILTIN_LDMXCSR,
17113 IX86_BUILTIN_STMXCSR,
17114 IX86_BUILTIN_SFENCE,
17115
17116 /* 3DNow! Original */
17117 IX86_BUILTIN_FEMMS,
17118 IX86_BUILTIN_PAVGUSB,
17119 IX86_BUILTIN_PF2ID,
17120 IX86_BUILTIN_PFACC,
17121 IX86_BUILTIN_PFADD,
17122 IX86_BUILTIN_PFCMPEQ,
17123 IX86_BUILTIN_PFCMPGE,
17124 IX86_BUILTIN_PFCMPGT,
17125 IX86_BUILTIN_PFMAX,
17126 IX86_BUILTIN_PFMIN,
17127 IX86_BUILTIN_PFMUL,
17128 IX86_BUILTIN_PFRCP,
17129 IX86_BUILTIN_PFRCPIT1,
17130 IX86_BUILTIN_PFRCPIT2,
17131 IX86_BUILTIN_PFRSQIT1,
17132 IX86_BUILTIN_PFRSQRT,
17133 IX86_BUILTIN_PFSUB,
17134 IX86_BUILTIN_PFSUBR,
17135 IX86_BUILTIN_PI2FD,
17136 IX86_BUILTIN_PMULHRW,
17137
17138 /* 3DNow! Athlon Extensions */
17139 IX86_BUILTIN_PF2IW,
17140 IX86_BUILTIN_PFNACC,
17141 IX86_BUILTIN_PFPNACC,
17142 IX86_BUILTIN_PI2FW,
17143 IX86_BUILTIN_PSWAPDSI,
17144 IX86_BUILTIN_PSWAPDSF,
17145
17146 /* SSE2 */
17147 IX86_BUILTIN_ADDPD,
17148 IX86_BUILTIN_ADDSD,
17149 IX86_BUILTIN_DIVPD,
17150 IX86_BUILTIN_DIVSD,
17151 IX86_BUILTIN_MULPD,
17152 IX86_BUILTIN_MULSD,
17153 IX86_BUILTIN_SUBPD,
17154 IX86_BUILTIN_SUBSD,
17155
17156 IX86_BUILTIN_CMPEQPD,
17157 IX86_BUILTIN_CMPLTPD,
17158 IX86_BUILTIN_CMPLEPD,
17159 IX86_BUILTIN_CMPGTPD,
17160 IX86_BUILTIN_CMPGEPD,
17161 IX86_BUILTIN_CMPNEQPD,
17162 IX86_BUILTIN_CMPNLTPD,
17163 IX86_BUILTIN_CMPNLEPD,
17164 IX86_BUILTIN_CMPNGTPD,
17165 IX86_BUILTIN_CMPNGEPD,
17166 IX86_BUILTIN_CMPORDPD,
17167 IX86_BUILTIN_CMPUNORDPD,
17168 IX86_BUILTIN_CMPEQSD,
17169 IX86_BUILTIN_CMPLTSD,
17170 IX86_BUILTIN_CMPLESD,
17171 IX86_BUILTIN_CMPNEQSD,
17172 IX86_BUILTIN_CMPNLTSD,
17173 IX86_BUILTIN_CMPNLESD,
17174 IX86_BUILTIN_CMPORDSD,
17175 IX86_BUILTIN_CMPUNORDSD,
17176
17177 IX86_BUILTIN_COMIEQSD,
17178 IX86_BUILTIN_COMILTSD,
17179 IX86_BUILTIN_COMILESD,
17180 IX86_BUILTIN_COMIGTSD,
17181 IX86_BUILTIN_COMIGESD,
17182 IX86_BUILTIN_COMINEQSD,
17183 IX86_BUILTIN_UCOMIEQSD,
17184 IX86_BUILTIN_UCOMILTSD,
17185 IX86_BUILTIN_UCOMILESD,
17186 IX86_BUILTIN_UCOMIGTSD,
17187 IX86_BUILTIN_UCOMIGESD,
17188 IX86_BUILTIN_UCOMINEQSD,
17189
17190 IX86_BUILTIN_MAXPD,
17191 IX86_BUILTIN_MAXSD,
17192 IX86_BUILTIN_MINPD,
17193 IX86_BUILTIN_MINSD,
17194
17195 IX86_BUILTIN_ANDPD,
17196 IX86_BUILTIN_ANDNPD,
17197 IX86_BUILTIN_ORPD,
17198 IX86_BUILTIN_XORPD,
17199
17200 IX86_BUILTIN_SQRTPD,
17201 IX86_BUILTIN_SQRTSD,
17202
17203 IX86_BUILTIN_UNPCKHPD,
17204 IX86_BUILTIN_UNPCKLPD,
17205
17206 IX86_BUILTIN_SHUFPD,
17207
17208 IX86_BUILTIN_LOADUPD,
17209 IX86_BUILTIN_STOREUPD,
17210 IX86_BUILTIN_MOVSD,
17211
17212 IX86_BUILTIN_LOADHPD,
17213 IX86_BUILTIN_LOADLPD,
17214
17215 IX86_BUILTIN_CVTDQ2PD,
17216 IX86_BUILTIN_CVTDQ2PS,
17217
17218 IX86_BUILTIN_CVTPD2DQ,
17219 IX86_BUILTIN_CVTPD2PI,
17220 IX86_BUILTIN_CVTPD2PS,
17221 IX86_BUILTIN_CVTTPD2DQ,
17222 IX86_BUILTIN_CVTTPD2PI,
17223
17224 IX86_BUILTIN_CVTPI2PD,
17225 IX86_BUILTIN_CVTSI2SD,
17226 IX86_BUILTIN_CVTSI642SD,
17227
17228 IX86_BUILTIN_CVTSD2SI,
17229 IX86_BUILTIN_CVTSD2SI64,
17230 IX86_BUILTIN_CVTSD2SS,
17231 IX86_BUILTIN_CVTSS2SD,
17232 IX86_BUILTIN_CVTTSD2SI,
17233 IX86_BUILTIN_CVTTSD2SI64,
17234
17235 IX86_BUILTIN_CVTPS2DQ,
17236 IX86_BUILTIN_CVTPS2PD,
17237 IX86_BUILTIN_CVTTPS2DQ,
17238
17239 IX86_BUILTIN_MOVNTI,
17240 IX86_BUILTIN_MOVNTPD,
17241 IX86_BUILTIN_MOVNTDQ,
17242
17243 /* SSE2 MMX */
17244 IX86_BUILTIN_MASKMOVDQU,
17245 IX86_BUILTIN_MOVMSKPD,
17246 IX86_BUILTIN_PMOVMSKB128,
17247
17248 IX86_BUILTIN_PACKSSWB128,
17249 IX86_BUILTIN_PACKSSDW128,
17250 IX86_BUILTIN_PACKUSWB128,
17251
17252 IX86_BUILTIN_PADDB128,
17253 IX86_BUILTIN_PADDW128,
17254 IX86_BUILTIN_PADDD128,
17255 IX86_BUILTIN_PADDQ128,
17256 IX86_BUILTIN_PADDSB128,
17257 IX86_BUILTIN_PADDSW128,
17258 IX86_BUILTIN_PADDUSB128,
17259 IX86_BUILTIN_PADDUSW128,
17260 IX86_BUILTIN_PSUBB128,
17261 IX86_BUILTIN_PSUBW128,
17262 IX86_BUILTIN_PSUBD128,
17263 IX86_BUILTIN_PSUBQ128,
17264 IX86_BUILTIN_PSUBSB128,
17265 IX86_BUILTIN_PSUBSW128,
17266 IX86_BUILTIN_PSUBUSB128,
17267 IX86_BUILTIN_PSUBUSW128,
17268
17269 IX86_BUILTIN_PAND128,
17270 IX86_BUILTIN_PANDN128,
17271 IX86_BUILTIN_POR128,
17272 IX86_BUILTIN_PXOR128,
17273
17274 IX86_BUILTIN_PAVGB128,
17275 IX86_BUILTIN_PAVGW128,
17276
17277 IX86_BUILTIN_PCMPEQB128,
17278 IX86_BUILTIN_PCMPEQW128,
17279 IX86_BUILTIN_PCMPEQD128,
17280 IX86_BUILTIN_PCMPGTB128,
17281 IX86_BUILTIN_PCMPGTW128,
17282 IX86_BUILTIN_PCMPGTD128,
17283
17284 IX86_BUILTIN_PMADDWD128,
17285
17286 IX86_BUILTIN_PMAXSW128,
17287 IX86_BUILTIN_PMAXUB128,
17288 IX86_BUILTIN_PMINSW128,
17289 IX86_BUILTIN_PMINUB128,
17290
17291 IX86_BUILTIN_PMULUDQ,
17292 IX86_BUILTIN_PMULUDQ128,
17293 IX86_BUILTIN_PMULHUW128,
17294 IX86_BUILTIN_PMULHW128,
17295 IX86_BUILTIN_PMULLW128,
17296
17297 IX86_BUILTIN_PSADBW128,
17298 IX86_BUILTIN_PSHUFHW,
17299 IX86_BUILTIN_PSHUFLW,
17300 IX86_BUILTIN_PSHUFD,
17301
17302 IX86_BUILTIN_PSLLDQI128,
17303 IX86_BUILTIN_PSLLWI128,
17304 IX86_BUILTIN_PSLLDI128,
17305 IX86_BUILTIN_PSLLQI128,
17306 IX86_BUILTIN_PSRAWI128,
17307 IX86_BUILTIN_PSRADI128,
17308 IX86_BUILTIN_PSRLDQI128,
17309 IX86_BUILTIN_PSRLWI128,
17310 IX86_BUILTIN_PSRLDI128,
17311 IX86_BUILTIN_PSRLQI128,
17312
17313 IX86_BUILTIN_PSLLDQ128,
17314 IX86_BUILTIN_PSLLW128,
17315 IX86_BUILTIN_PSLLD128,
17316 IX86_BUILTIN_PSLLQ128,
17317 IX86_BUILTIN_PSRAW128,
17318 IX86_BUILTIN_PSRAD128,
17319 IX86_BUILTIN_PSRLW128,
17320 IX86_BUILTIN_PSRLD128,
17321 IX86_BUILTIN_PSRLQ128,
17322
17323 IX86_BUILTIN_PUNPCKHBW128,
17324 IX86_BUILTIN_PUNPCKHWD128,
17325 IX86_BUILTIN_PUNPCKHDQ128,
17326 IX86_BUILTIN_PUNPCKHQDQ128,
17327 IX86_BUILTIN_PUNPCKLBW128,
17328 IX86_BUILTIN_PUNPCKLWD128,
17329 IX86_BUILTIN_PUNPCKLDQ128,
17330 IX86_BUILTIN_PUNPCKLQDQ128,
17331
17332 IX86_BUILTIN_CLFLUSH,
17333 IX86_BUILTIN_MFENCE,
17334 IX86_BUILTIN_LFENCE,
17335
17336 /* Prescott New Instructions. */
17337 IX86_BUILTIN_ADDSUBPS,
17338 IX86_BUILTIN_HADDPS,
17339 IX86_BUILTIN_HSUBPS,
17340 IX86_BUILTIN_MOVSHDUP,
17341 IX86_BUILTIN_MOVSLDUP,
17342 IX86_BUILTIN_ADDSUBPD,
17343 IX86_BUILTIN_HADDPD,
17344 IX86_BUILTIN_HSUBPD,
17345 IX86_BUILTIN_LDDQU,
17346
17347 IX86_BUILTIN_MONITOR,
17348 IX86_BUILTIN_MWAIT,
17349
17350 /* SSSE3. */
17351 IX86_BUILTIN_PHADDW,
17352 IX86_BUILTIN_PHADDD,
17353 IX86_BUILTIN_PHADDSW,
17354 IX86_BUILTIN_PHSUBW,
17355 IX86_BUILTIN_PHSUBD,
17356 IX86_BUILTIN_PHSUBSW,
17357 IX86_BUILTIN_PMADDUBSW,
17358 IX86_BUILTIN_PMULHRSW,
17359 IX86_BUILTIN_PSHUFB,
17360 IX86_BUILTIN_PSIGNB,
17361 IX86_BUILTIN_PSIGNW,
17362 IX86_BUILTIN_PSIGND,
17363 IX86_BUILTIN_PALIGNR,
17364 IX86_BUILTIN_PABSB,
17365 IX86_BUILTIN_PABSW,
17366 IX86_BUILTIN_PABSD,
17367
17368 IX86_BUILTIN_PHADDW128,
17369 IX86_BUILTIN_PHADDD128,
17370 IX86_BUILTIN_PHADDSW128,
17371 IX86_BUILTIN_PHSUBW128,
17372 IX86_BUILTIN_PHSUBD128,
17373 IX86_BUILTIN_PHSUBSW128,
17374 IX86_BUILTIN_PMADDUBSW128,
17375 IX86_BUILTIN_PMULHRSW128,
17376 IX86_BUILTIN_PSHUFB128,
17377 IX86_BUILTIN_PSIGNB128,
17378 IX86_BUILTIN_PSIGNW128,
17379 IX86_BUILTIN_PSIGND128,
17380 IX86_BUILTIN_PALIGNR128,
17381 IX86_BUILTIN_PABSB128,
17382 IX86_BUILTIN_PABSW128,
17383 IX86_BUILTIN_PABSD128,
17384
17385 /* AMDFAM10 - SSE4A New Instructions. */
17386 IX86_BUILTIN_MOVNTSD,
17387 IX86_BUILTIN_MOVNTSS,
17388 IX86_BUILTIN_EXTRQI,
17389 IX86_BUILTIN_EXTRQ,
17390 IX86_BUILTIN_INSERTQI,
17391 IX86_BUILTIN_INSERTQ,
17392
17393 /* SSE4.1. */
17394 IX86_BUILTIN_BLENDPD,
17395 IX86_BUILTIN_BLENDPS,
17396 IX86_BUILTIN_BLENDVPD,
17397 IX86_BUILTIN_BLENDVPS,
17398 IX86_BUILTIN_PBLENDVB128,
17399 IX86_BUILTIN_PBLENDW128,
17400
17401 IX86_BUILTIN_DPPD,
17402 IX86_BUILTIN_DPPS,
17403
17404 IX86_BUILTIN_INSERTPS128,
17405
17406 IX86_BUILTIN_MOVNTDQA,
17407 IX86_BUILTIN_MPSADBW128,
17408 IX86_BUILTIN_PACKUSDW128,
17409 IX86_BUILTIN_PCMPEQQ,
17410 IX86_BUILTIN_PHMINPOSUW128,
17411
17412 IX86_BUILTIN_PMAXSB128,
17413 IX86_BUILTIN_PMAXSD128,
17414 IX86_BUILTIN_PMAXUD128,
17415 IX86_BUILTIN_PMAXUW128,
17416
17417 IX86_BUILTIN_PMINSB128,
17418 IX86_BUILTIN_PMINSD128,
17419 IX86_BUILTIN_PMINUD128,
17420 IX86_BUILTIN_PMINUW128,
17421
17422 IX86_BUILTIN_PMOVSXBW128,
17423 IX86_BUILTIN_PMOVSXBD128,
17424 IX86_BUILTIN_PMOVSXBQ128,
17425 IX86_BUILTIN_PMOVSXWD128,
17426 IX86_BUILTIN_PMOVSXWQ128,
17427 IX86_BUILTIN_PMOVSXDQ128,
17428
17429 IX86_BUILTIN_PMOVZXBW128,
17430 IX86_BUILTIN_PMOVZXBD128,
17431 IX86_BUILTIN_PMOVZXBQ128,
17432 IX86_BUILTIN_PMOVZXWD128,
17433 IX86_BUILTIN_PMOVZXWQ128,
17434 IX86_BUILTIN_PMOVZXDQ128,
17435
17436 IX86_BUILTIN_PMULDQ128,
17437 IX86_BUILTIN_PMULLD128,
17438
17439 IX86_BUILTIN_ROUNDPD,
17440 IX86_BUILTIN_ROUNDPS,
17441 IX86_BUILTIN_ROUNDSD,
17442 IX86_BUILTIN_ROUNDSS,
17443
17444 IX86_BUILTIN_PTESTZ,
17445 IX86_BUILTIN_PTESTC,
17446 IX86_BUILTIN_PTESTNZC,
17447
17448 IX86_BUILTIN_VEC_INIT_V2SI,
17449 IX86_BUILTIN_VEC_INIT_V4HI,
17450 IX86_BUILTIN_VEC_INIT_V8QI,
17451 IX86_BUILTIN_VEC_EXT_V2DF,
17452 IX86_BUILTIN_VEC_EXT_V2DI,
17453 IX86_BUILTIN_VEC_EXT_V4SF,
17454 IX86_BUILTIN_VEC_EXT_V4SI,
17455 IX86_BUILTIN_VEC_EXT_V8HI,
17456 IX86_BUILTIN_VEC_EXT_V2SI,
17457 IX86_BUILTIN_VEC_EXT_V4HI,
17458 IX86_BUILTIN_VEC_EXT_V16QI,
17459 IX86_BUILTIN_VEC_SET_V2DI,
17460 IX86_BUILTIN_VEC_SET_V4SF,
17461 IX86_BUILTIN_VEC_SET_V4SI,
17462 IX86_BUILTIN_VEC_SET_V8HI,
17463 IX86_BUILTIN_VEC_SET_V4HI,
17464 IX86_BUILTIN_VEC_SET_V16QI,
17465
17466 IX86_BUILTIN_VEC_PACK_SFIX,
17467
17468 /* SSE4.2. */
17469 IX86_BUILTIN_CRC32QI,
17470 IX86_BUILTIN_CRC32HI,
17471 IX86_BUILTIN_CRC32SI,
17472 IX86_BUILTIN_CRC32DI,
17473
17474 IX86_BUILTIN_PCMPESTRI128,
17475 IX86_BUILTIN_PCMPESTRM128,
17476 IX86_BUILTIN_PCMPESTRA128,
17477 IX86_BUILTIN_PCMPESTRC128,
17478 IX86_BUILTIN_PCMPESTRO128,
17479 IX86_BUILTIN_PCMPESTRS128,
17480 IX86_BUILTIN_PCMPESTRZ128,
17481 IX86_BUILTIN_PCMPISTRI128,
17482 IX86_BUILTIN_PCMPISTRM128,
17483 IX86_BUILTIN_PCMPISTRA128,
17484 IX86_BUILTIN_PCMPISTRC128,
17485 IX86_BUILTIN_PCMPISTRO128,
17486 IX86_BUILTIN_PCMPISTRS128,
17487 IX86_BUILTIN_PCMPISTRZ128,
17488
17489 IX86_BUILTIN_PCMPGTQ,
17490
17491 /* TFmode support builtins. */
17492 IX86_BUILTIN_INFQ,
17493 IX86_BUILTIN_FABSQ,
17494 IX86_BUILTIN_COPYSIGNQ,
17495
17496 /* SSE5 instructions */
17497 IX86_BUILTIN_FMADDSS,
17498 IX86_BUILTIN_FMADDSD,
17499 IX86_BUILTIN_FMADDPS,
17500 IX86_BUILTIN_FMADDPD,
17501 IX86_BUILTIN_FMSUBSS,
17502 IX86_BUILTIN_FMSUBSD,
17503 IX86_BUILTIN_FMSUBPS,
17504 IX86_BUILTIN_FMSUBPD,
17505 IX86_BUILTIN_FNMADDSS,
17506 IX86_BUILTIN_FNMADDSD,
17507 IX86_BUILTIN_FNMADDPS,
17508 IX86_BUILTIN_FNMADDPD,
17509 IX86_BUILTIN_FNMSUBSS,
17510 IX86_BUILTIN_FNMSUBSD,
17511 IX86_BUILTIN_FNMSUBPS,
17512 IX86_BUILTIN_FNMSUBPD,
17513 IX86_BUILTIN_PCMOV_V2DI,
17514 IX86_BUILTIN_PCMOV_V4SI,
17515 IX86_BUILTIN_PCMOV_V8HI,
17516 IX86_BUILTIN_PCMOV_V16QI,
17517 IX86_BUILTIN_PCMOV_V4SF,
17518 IX86_BUILTIN_PCMOV_V2DF,
17519 IX86_BUILTIN_PPERM,
17520 IX86_BUILTIN_PERMPS,
17521 IX86_BUILTIN_PERMPD,
17522 IX86_BUILTIN_PMACSSWW,
17523 IX86_BUILTIN_PMACSWW,
17524 IX86_BUILTIN_PMACSSWD,
17525 IX86_BUILTIN_PMACSWD,
17526 IX86_BUILTIN_PMACSSDD,
17527 IX86_BUILTIN_PMACSDD,
17528 IX86_BUILTIN_PMACSSDQL,
17529 IX86_BUILTIN_PMACSSDQH,
17530 IX86_BUILTIN_PMACSDQL,
17531 IX86_BUILTIN_PMACSDQH,
17532 IX86_BUILTIN_PMADCSSWD,
17533 IX86_BUILTIN_PMADCSWD,
17534 IX86_BUILTIN_PHADDBW,
17535 IX86_BUILTIN_PHADDBD,
17536 IX86_BUILTIN_PHADDBQ,
17537 IX86_BUILTIN_PHADDWD,
17538 IX86_BUILTIN_PHADDWQ,
17539 IX86_BUILTIN_PHADDDQ,
17540 IX86_BUILTIN_PHADDUBW,
17541 IX86_BUILTIN_PHADDUBD,
17542 IX86_BUILTIN_PHADDUBQ,
17543 IX86_BUILTIN_PHADDUWD,
17544 IX86_BUILTIN_PHADDUWQ,
17545 IX86_BUILTIN_PHADDUDQ,
17546 IX86_BUILTIN_PHSUBBW,
17547 IX86_BUILTIN_PHSUBWD,
17548 IX86_BUILTIN_PHSUBDQ,
17549 IX86_BUILTIN_PROTB,
17550 IX86_BUILTIN_PROTW,
17551 IX86_BUILTIN_PROTD,
17552 IX86_BUILTIN_PROTQ,
17553 IX86_BUILTIN_PROTB_IMM,
17554 IX86_BUILTIN_PROTW_IMM,
17555 IX86_BUILTIN_PROTD_IMM,
17556 IX86_BUILTIN_PROTQ_IMM,
17557 IX86_BUILTIN_PSHLB,
17558 IX86_BUILTIN_PSHLW,
17559 IX86_BUILTIN_PSHLD,
17560 IX86_BUILTIN_PSHLQ,
17561 IX86_BUILTIN_PSHAB,
17562 IX86_BUILTIN_PSHAW,
17563 IX86_BUILTIN_PSHAD,
17564 IX86_BUILTIN_PSHAQ,
17565 IX86_BUILTIN_FRCZSS,
17566 IX86_BUILTIN_FRCZSD,
17567 IX86_BUILTIN_FRCZPS,
17568 IX86_BUILTIN_FRCZPD,
17569 IX86_BUILTIN_CVTPH2PS,
17570 IX86_BUILTIN_CVTPS2PH,
17571
17572 IX86_BUILTIN_COMEQSS,
17573 IX86_BUILTIN_COMNESS,
17574 IX86_BUILTIN_COMLTSS,
17575 IX86_BUILTIN_COMLESS,
17576 IX86_BUILTIN_COMGTSS,
17577 IX86_BUILTIN_COMGESS,
17578 IX86_BUILTIN_COMUEQSS,
17579 IX86_BUILTIN_COMUNESS,
17580 IX86_BUILTIN_COMULTSS,
17581 IX86_BUILTIN_COMULESS,
17582 IX86_BUILTIN_COMUGTSS,
17583 IX86_BUILTIN_COMUGESS,
17584 IX86_BUILTIN_COMORDSS,
17585 IX86_BUILTIN_COMUNORDSS,
17586 IX86_BUILTIN_COMFALSESS,
17587 IX86_BUILTIN_COMTRUESS,
17588
17589 IX86_BUILTIN_COMEQSD,
17590 IX86_BUILTIN_COMNESD,
17591 IX86_BUILTIN_COMLTSD,
17592 IX86_BUILTIN_COMLESD,
17593 IX86_BUILTIN_COMGTSD,
17594 IX86_BUILTIN_COMGESD,
17595 IX86_BUILTIN_COMUEQSD,
17596 IX86_BUILTIN_COMUNESD,
17597 IX86_BUILTIN_COMULTSD,
17598 IX86_BUILTIN_COMULESD,
17599 IX86_BUILTIN_COMUGTSD,
17600 IX86_BUILTIN_COMUGESD,
17601 IX86_BUILTIN_COMORDSD,
17602 IX86_BUILTIN_COMUNORDSD,
17603 IX86_BUILTIN_COMFALSESD,
17604 IX86_BUILTIN_COMTRUESD,
17605
17606 IX86_BUILTIN_COMEQPS,
17607 IX86_BUILTIN_COMNEPS,
17608 IX86_BUILTIN_COMLTPS,
17609 IX86_BUILTIN_COMLEPS,
17610 IX86_BUILTIN_COMGTPS,
17611 IX86_BUILTIN_COMGEPS,
17612 IX86_BUILTIN_COMUEQPS,
17613 IX86_BUILTIN_COMUNEPS,
17614 IX86_BUILTIN_COMULTPS,
17615 IX86_BUILTIN_COMULEPS,
17616 IX86_BUILTIN_COMUGTPS,
17617 IX86_BUILTIN_COMUGEPS,
17618 IX86_BUILTIN_COMORDPS,
17619 IX86_BUILTIN_COMUNORDPS,
17620 IX86_BUILTIN_COMFALSEPS,
17621 IX86_BUILTIN_COMTRUEPS,
17622
17623 IX86_BUILTIN_COMEQPD,
17624 IX86_BUILTIN_COMNEPD,
17625 IX86_BUILTIN_COMLTPD,
17626 IX86_BUILTIN_COMLEPD,
17627 IX86_BUILTIN_COMGTPD,
17628 IX86_BUILTIN_COMGEPD,
17629 IX86_BUILTIN_COMUEQPD,
17630 IX86_BUILTIN_COMUNEPD,
17631 IX86_BUILTIN_COMULTPD,
17632 IX86_BUILTIN_COMULEPD,
17633 IX86_BUILTIN_COMUGTPD,
17634 IX86_BUILTIN_COMUGEPD,
17635 IX86_BUILTIN_COMORDPD,
17636 IX86_BUILTIN_COMUNORDPD,
17637 IX86_BUILTIN_COMFALSEPD,
17638 IX86_BUILTIN_COMTRUEPD,
17639
17640 IX86_BUILTIN_PCOMEQUB,
17641 IX86_BUILTIN_PCOMNEUB,
17642 IX86_BUILTIN_PCOMLTUB,
17643 IX86_BUILTIN_PCOMLEUB,
17644 IX86_BUILTIN_PCOMGTUB,
17645 IX86_BUILTIN_PCOMGEUB,
17646 IX86_BUILTIN_PCOMFALSEUB,
17647 IX86_BUILTIN_PCOMTRUEUB,
17648 IX86_BUILTIN_PCOMEQUW,
17649 IX86_BUILTIN_PCOMNEUW,
17650 IX86_BUILTIN_PCOMLTUW,
17651 IX86_BUILTIN_PCOMLEUW,
17652 IX86_BUILTIN_PCOMGTUW,
17653 IX86_BUILTIN_PCOMGEUW,
17654 IX86_BUILTIN_PCOMFALSEUW,
17655 IX86_BUILTIN_PCOMTRUEUW,
17656 IX86_BUILTIN_PCOMEQUD,
17657 IX86_BUILTIN_PCOMNEUD,
17658 IX86_BUILTIN_PCOMLTUD,
17659 IX86_BUILTIN_PCOMLEUD,
17660 IX86_BUILTIN_PCOMGTUD,
17661 IX86_BUILTIN_PCOMGEUD,
17662 IX86_BUILTIN_PCOMFALSEUD,
17663 IX86_BUILTIN_PCOMTRUEUD,
17664 IX86_BUILTIN_PCOMEQUQ,
17665 IX86_BUILTIN_PCOMNEUQ,
17666 IX86_BUILTIN_PCOMLTUQ,
17667 IX86_BUILTIN_PCOMLEUQ,
17668 IX86_BUILTIN_PCOMGTUQ,
17669 IX86_BUILTIN_PCOMGEUQ,
17670 IX86_BUILTIN_PCOMFALSEUQ,
17671 IX86_BUILTIN_PCOMTRUEUQ,
17672
17673 IX86_BUILTIN_PCOMEQB,
17674 IX86_BUILTIN_PCOMNEB,
17675 IX86_BUILTIN_PCOMLTB,
17676 IX86_BUILTIN_PCOMLEB,
17677 IX86_BUILTIN_PCOMGTB,
17678 IX86_BUILTIN_PCOMGEB,
17679 IX86_BUILTIN_PCOMFALSEB,
17680 IX86_BUILTIN_PCOMTRUEB,
17681 IX86_BUILTIN_PCOMEQW,
17682 IX86_BUILTIN_PCOMNEW,
17683 IX86_BUILTIN_PCOMLTW,
17684 IX86_BUILTIN_PCOMLEW,
17685 IX86_BUILTIN_PCOMGTW,
17686 IX86_BUILTIN_PCOMGEW,
17687 IX86_BUILTIN_PCOMFALSEW,
17688 IX86_BUILTIN_PCOMTRUEW,
17689 IX86_BUILTIN_PCOMEQD,
17690 IX86_BUILTIN_PCOMNED,
17691 IX86_BUILTIN_PCOMLTD,
17692 IX86_BUILTIN_PCOMLED,
17693 IX86_BUILTIN_PCOMGTD,
17694 IX86_BUILTIN_PCOMGED,
17695 IX86_BUILTIN_PCOMFALSED,
17696 IX86_BUILTIN_PCOMTRUED,
17697 IX86_BUILTIN_PCOMEQQ,
17698 IX86_BUILTIN_PCOMNEQ,
17699 IX86_BUILTIN_PCOMLTQ,
17700 IX86_BUILTIN_PCOMLEQ,
17701 IX86_BUILTIN_PCOMGTQ,
17702 IX86_BUILTIN_PCOMGEQ,
17703 IX86_BUILTIN_PCOMFALSEQ,
17704 IX86_BUILTIN_PCOMTRUEQ,
17705
17706 IX86_BUILTIN_MAX
17707 };
17708
17709 /* Table for the ix86 builtin decls. */
17710 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17711
17712 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17713 * if the target_flags include one of MASK. Stores the function decl
17714 * in the ix86_builtins array.
17715 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17716
17717 static inline tree
17718 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17719 {
17720 tree decl = NULL_TREE;
17721
17722 if (mask & ix86_isa_flags
17723 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17724 {
17725 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17726 NULL, NULL_TREE);
17727 ix86_builtins[(int) code] = decl;
17728 }
17729
17730 return decl;
17731 }
17732
17733 /* Like def_builtin, but also marks the function decl "const". */
17734
17735 static inline tree
17736 def_builtin_const (int mask, const char *name, tree type,
17737 enum ix86_builtins code)
17738 {
17739 tree decl = def_builtin (mask, name, type, code);
17740 if (decl)
17741 TREE_READONLY (decl) = 1;
17742 return decl;
17743 }
17744
17745 /* Bits for builtin_description.flag. */
17746
17747 /* Set when we don't support the comparison natively, and should
17748 swap_comparison in order to support it. */
17749 #define BUILTIN_DESC_SWAP_OPERANDS 1
17750
17751 struct builtin_description
17752 {
17753 const unsigned int mask;
17754 const enum insn_code icode;
17755 const char *const name;
17756 const enum ix86_builtins code;
17757 const enum rtx_code comparison;
17758 const int flag;
17759 };
17760
17761 static const struct builtin_description bdesc_comi[] =
17762 {
17763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17787 };
17788
17789 static const struct builtin_description bdesc_ptest[] =
17790 {
17791 /* SSE4.1 */
17792 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17793 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17794 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17795 };
17796
17797 static const struct builtin_description bdesc_pcmpestr[] =
17798 {
17799 /* SSE4.2 */
17800 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17801 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17802 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17803 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17805 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17806 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17807 };
17808
17809 static const struct builtin_description bdesc_pcmpistr[] =
17810 {
17811 /* SSE4.2 */
17812 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17813 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17814 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17815 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17816 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17817 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17818 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17819 };
17820
17821 static const struct builtin_description bdesc_crc32[] =
17822 {
17823 /* SSE4.2 */
17824 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17825 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17826 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17827 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17828 };
17829
17830 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17831 static const struct builtin_description bdesc_sse_3arg[] =
17832 {
17833 /* SSE4.1 */
17834 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17835 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17836 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17837 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17838 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17839 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17844 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17845 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17846 };
17847
17848 static const struct builtin_description bdesc_2arg[] =
17849 {
17850 /* SSE */
17851 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17852 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17853 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17859
17860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17882
17883 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17884 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17887
17888 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17890 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17891 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17892
17893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17898
17899 /* MMX */
17900 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17901 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17902 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17903 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17904 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17905 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17906 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17907 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17908
17909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17913 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17917
17918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17919 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17920 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17921
17922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17923 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17924 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17926
17927 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17928 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17929
17930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17932 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17936
17937 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17938 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17939 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17940 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17941
17942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17948
17949 /* Special. */
17950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17953
17954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17956 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17957
17958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17964
17965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17971
17972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17976
17977 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17979
17980 /* SSE2 */
17981 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17982 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17983 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17984 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17989
17990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18010
18011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18012 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
18015
18016 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
18020
18021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18024
18025 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18026
18027 /* SSE2 MMX */
18028 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18029 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18030 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18031 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18032 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18033 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18034 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18035 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18036
18037 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18038 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18039 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18040 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18041 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18042 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18043 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18044 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18045
18046 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18047 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18048
18049 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18051 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18052 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18053
18054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18056
18057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18063
18064 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18065 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18066 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18068
18069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18077
18078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18081
18082 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18084
18085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18087
18088 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
18090 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
18091
18092 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
18095
18096 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
18098
18099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18100
18101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18105
18106 /* SSE3 MMX */
18107 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18108 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18109 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18112 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18113
18114 /* SSSE3 */
18115 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18116 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18117 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18118 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18122 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18126 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18129 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18130 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18138 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18139
18140 /* SSE4.1 */
18141 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18142 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18143 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18146 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18148 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18149 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18150 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18151 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18152 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18153
18154 /* SSE4.2 */
18155 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18156 };
18157
18158 static const struct builtin_description bdesc_1arg[] =
18159 {
18160 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18162
18163 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18164 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
18165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18166 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
18167 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18168
18169 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18171 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18174 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18175
18176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18178
18179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18180
18181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18183
18184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18189
18190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18191
18192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18194 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18195 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18196
18197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18200
18201 /* SSE3 */
18202 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18203 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18204
18205 /* SSSE3 */
18206 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18207 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18208 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18209 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18210 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18211 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18212
18213 /* SSE4.1 */
18214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18216 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18221 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18227
18228 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18231 };
18232
18233 /* SSE5 */
18234 enum multi_arg_type {
18235 MULTI_ARG_UNKNOWN,
18236 MULTI_ARG_3_SF,
18237 MULTI_ARG_3_DF,
18238 MULTI_ARG_3_DI,
18239 MULTI_ARG_3_SI,
18240 MULTI_ARG_3_SI_DI,
18241 MULTI_ARG_3_HI,
18242 MULTI_ARG_3_HI_SI,
18243 MULTI_ARG_3_QI,
18244 MULTI_ARG_3_PERMPS,
18245 MULTI_ARG_3_PERMPD,
18246 MULTI_ARG_2_SF,
18247 MULTI_ARG_2_DF,
18248 MULTI_ARG_2_DI,
18249 MULTI_ARG_2_SI,
18250 MULTI_ARG_2_HI,
18251 MULTI_ARG_2_QI,
18252 MULTI_ARG_2_DI_IMM,
18253 MULTI_ARG_2_SI_IMM,
18254 MULTI_ARG_2_HI_IMM,
18255 MULTI_ARG_2_QI_IMM,
18256 MULTI_ARG_2_SF_CMP,
18257 MULTI_ARG_2_DF_CMP,
18258 MULTI_ARG_2_DI_CMP,
18259 MULTI_ARG_2_SI_CMP,
18260 MULTI_ARG_2_HI_CMP,
18261 MULTI_ARG_2_QI_CMP,
18262 MULTI_ARG_2_DI_TF,
18263 MULTI_ARG_2_SI_TF,
18264 MULTI_ARG_2_HI_TF,
18265 MULTI_ARG_2_QI_TF,
18266 MULTI_ARG_2_SF_TF,
18267 MULTI_ARG_2_DF_TF,
18268 MULTI_ARG_1_SF,
18269 MULTI_ARG_1_DF,
18270 MULTI_ARG_1_DI,
18271 MULTI_ARG_1_SI,
18272 MULTI_ARG_1_HI,
18273 MULTI_ARG_1_QI,
18274 MULTI_ARG_1_SI_DI,
18275 MULTI_ARG_1_HI_DI,
18276 MULTI_ARG_1_HI_SI,
18277 MULTI_ARG_1_QI_DI,
18278 MULTI_ARG_1_QI_SI,
18279 MULTI_ARG_1_QI_HI,
18280 MULTI_ARG_1_PH2PS,
18281 MULTI_ARG_1_PS2PH
18282 };
18283
18284 static const struct builtin_description bdesc_multi_arg[] =
18285 {
18286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18329 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18361
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18378
18379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18395
18396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18412
18413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18429
18430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18437
18438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18445
18446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18453
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18461
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18469
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18477
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18485
18486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18493
18494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18502
18503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18511
18512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18520 };
18521
18522 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18523 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18524 builtins. */
18525 static void
18526 ix86_init_mmx_sse_builtins (void)
18527 {
18528 const struct builtin_description * d;
18529 size_t i;
18530
18531 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18532 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18533 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18534 tree V2DI_type_node
18535 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18536 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18537 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18538 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18539 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18540 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18541 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18542
18543 tree pchar_type_node = build_pointer_type (char_type_node);
18544 tree pcchar_type_node = build_pointer_type (
18545 build_type_variant (char_type_node, 1, 0));
18546 tree pfloat_type_node = build_pointer_type (float_type_node);
18547 tree pcfloat_type_node = build_pointer_type (
18548 build_type_variant (float_type_node, 1, 0));
18549 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18550 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18551 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18552
18553 /* Comparisons. */
18554 tree int_ftype_v4sf_v4sf
18555 = build_function_type_list (integer_type_node,
18556 V4SF_type_node, V4SF_type_node, NULL_TREE);
18557 tree v4si_ftype_v4sf_v4sf
18558 = build_function_type_list (V4SI_type_node,
18559 V4SF_type_node, V4SF_type_node, NULL_TREE);
18560 /* MMX/SSE/integer conversions. */
18561 tree int_ftype_v4sf
18562 = build_function_type_list (integer_type_node,
18563 V4SF_type_node, NULL_TREE);
18564 tree int64_ftype_v4sf
18565 = build_function_type_list (long_long_integer_type_node,
18566 V4SF_type_node, NULL_TREE);
18567 tree int_ftype_v8qi
18568 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18569 tree v4sf_ftype_v4sf_int
18570 = build_function_type_list (V4SF_type_node,
18571 V4SF_type_node, integer_type_node, NULL_TREE);
18572 tree v4sf_ftype_v4sf_int64
18573 = build_function_type_list (V4SF_type_node,
18574 V4SF_type_node, long_long_integer_type_node,
18575 NULL_TREE);
18576 tree v4sf_ftype_v4sf_v2si
18577 = build_function_type_list (V4SF_type_node,
18578 V4SF_type_node, V2SI_type_node, NULL_TREE);
18579
18580 /* Miscellaneous. */
18581 tree v8qi_ftype_v4hi_v4hi
18582 = build_function_type_list (V8QI_type_node,
18583 V4HI_type_node, V4HI_type_node, NULL_TREE);
18584 tree v4hi_ftype_v2si_v2si
18585 = build_function_type_list (V4HI_type_node,
18586 V2SI_type_node, V2SI_type_node, NULL_TREE);
18587 tree v4sf_ftype_v4sf_v4sf_int
18588 = build_function_type_list (V4SF_type_node,
18589 V4SF_type_node, V4SF_type_node,
18590 integer_type_node, NULL_TREE);
18591 tree v2si_ftype_v4hi_v4hi
18592 = build_function_type_list (V2SI_type_node,
18593 V4HI_type_node, V4HI_type_node, NULL_TREE);
18594 tree v4hi_ftype_v4hi_int
18595 = build_function_type_list (V4HI_type_node,
18596 V4HI_type_node, integer_type_node, NULL_TREE);
18597 tree v4hi_ftype_v4hi_di
18598 = build_function_type_list (V4HI_type_node,
18599 V4HI_type_node, long_long_unsigned_type_node,
18600 NULL_TREE);
18601 tree v2si_ftype_v2si_di
18602 = build_function_type_list (V2SI_type_node,
18603 V2SI_type_node, long_long_unsigned_type_node,
18604 NULL_TREE);
18605 tree void_ftype_void
18606 = build_function_type (void_type_node, void_list_node);
18607 tree void_ftype_unsigned
18608 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18609 tree void_ftype_unsigned_unsigned
18610 = build_function_type_list (void_type_node, unsigned_type_node,
18611 unsigned_type_node, NULL_TREE);
18612 tree void_ftype_pcvoid_unsigned_unsigned
18613 = build_function_type_list (void_type_node, const_ptr_type_node,
18614 unsigned_type_node, unsigned_type_node,
18615 NULL_TREE);
18616 tree unsigned_ftype_void
18617 = build_function_type (unsigned_type_node, void_list_node);
18618 tree v2si_ftype_v4sf
18619 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18620 /* Loads/stores. */
18621 tree void_ftype_v8qi_v8qi_pchar
18622 = build_function_type_list (void_type_node,
18623 V8QI_type_node, V8QI_type_node,
18624 pchar_type_node, NULL_TREE);
18625 tree v4sf_ftype_pcfloat
18626 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18627 /* @@@ the type is bogus */
18628 tree v4sf_ftype_v4sf_pv2si
18629 = build_function_type_list (V4SF_type_node,
18630 V4SF_type_node, pv2si_type_node, NULL_TREE);
18631 tree void_ftype_pv2si_v4sf
18632 = build_function_type_list (void_type_node,
18633 pv2si_type_node, V4SF_type_node, NULL_TREE);
18634 tree void_ftype_pfloat_v4sf
18635 = build_function_type_list (void_type_node,
18636 pfloat_type_node, V4SF_type_node, NULL_TREE);
18637 tree void_ftype_pdi_di
18638 = build_function_type_list (void_type_node,
18639 pdi_type_node, long_long_unsigned_type_node,
18640 NULL_TREE);
18641 tree void_ftype_pv2di_v2di
18642 = build_function_type_list (void_type_node,
18643 pv2di_type_node, V2DI_type_node, NULL_TREE);
18644 /* Normal vector unops. */
18645 tree v4sf_ftype_v4sf
18646 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18647 tree v16qi_ftype_v16qi
18648 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18649 tree v8hi_ftype_v8hi
18650 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18651 tree v4si_ftype_v4si
18652 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18653 tree v8qi_ftype_v8qi
18654 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18655 tree v4hi_ftype_v4hi
18656 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18657
18658 /* Normal vector binops. */
18659 tree v4sf_ftype_v4sf_v4sf
18660 = build_function_type_list (V4SF_type_node,
18661 V4SF_type_node, V4SF_type_node, NULL_TREE);
18662 tree v8qi_ftype_v8qi_v8qi
18663 = build_function_type_list (V8QI_type_node,
18664 V8QI_type_node, V8QI_type_node, NULL_TREE);
18665 tree v4hi_ftype_v4hi_v4hi
18666 = build_function_type_list (V4HI_type_node,
18667 V4HI_type_node, V4HI_type_node, NULL_TREE);
18668 tree v2si_ftype_v2si_v2si
18669 = build_function_type_list (V2SI_type_node,
18670 V2SI_type_node, V2SI_type_node, NULL_TREE);
18671 tree di_ftype_di_di
18672 = build_function_type_list (long_long_unsigned_type_node,
18673 long_long_unsigned_type_node,
18674 long_long_unsigned_type_node, NULL_TREE);
18675
18676 tree di_ftype_di_di_int
18677 = build_function_type_list (long_long_unsigned_type_node,
18678 long_long_unsigned_type_node,
18679 long_long_unsigned_type_node,
18680 integer_type_node, NULL_TREE);
18681
18682 tree v2si_ftype_v2sf
18683 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18684 tree v2sf_ftype_v2si
18685 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18686 tree v2si_ftype_v2si
18687 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18688 tree v2sf_ftype_v2sf
18689 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18690 tree v2sf_ftype_v2sf_v2sf
18691 = build_function_type_list (V2SF_type_node,
18692 V2SF_type_node, V2SF_type_node, NULL_TREE);
18693 tree v2si_ftype_v2sf_v2sf
18694 = build_function_type_list (V2SI_type_node,
18695 V2SF_type_node, V2SF_type_node, NULL_TREE);
18696 tree pint_type_node = build_pointer_type (integer_type_node);
18697 tree pdouble_type_node = build_pointer_type (double_type_node);
18698 tree pcdouble_type_node = build_pointer_type (
18699 build_type_variant (double_type_node, 1, 0));
18700 tree int_ftype_v2df_v2df
18701 = build_function_type_list (integer_type_node,
18702 V2DF_type_node, V2DF_type_node, NULL_TREE);
18703
18704 tree void_ftype_pcvoid
18705 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18706 tree v4sf_ftype_v4si
18707 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18708 tree v4si_ftype_v4sf
18709 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18710 tree v2df_ftype_v4si
18711 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18712 tree v4si_ftype_v2df
18713 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18714 tree v4si_ftype_v2df_v2df
18715 = build_function_type_list (V4SI_type_node,
18716 V2DF_type_node, V2DF_type_node, NULL_TREE);
18717 tree v2si_ftype_v2df
18718 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18719 tree v4sf_ftype_v2df
18720 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18721 tree v2df_ftype_v2si
18722 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18723 tree v2df_ftype_v4sf
18724 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18725 tree int_ftype_v2df
18726 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18727 tree int64_ftype_v2df
18728 = build_function_type_list (long_long_integer_type_node,
18729 V2DF_type_node, NULL_TREE);
18730 tree v2df_ftype_v2df_int
18731 = build_function_type_list (V2DF_type_node,
18732 V2DF_type_node, integer_type_node, NULL_TREE);
18733 tree v2df_ftype_v2df_int64
18734 = build_function_type_list (V2DF_type_node,
18735 V2DF_type_node, long_long_integer_type_node,
18736 NULL_TREE);
18737 tree v4sf_ftype_v4sf_v2df
18738 = build_function_type_list (V4SF_type_node,
18739 V4SF_type_node, V2DF_type_node, NULL_TREE);
18740 tree v2df_ftype_v2df_v4sf
18741 = build_function_type_list (V2DF_type_node,
18742 V2DF_type_node, V4SF_type_node, NULL_TREE);
18743 tree v2df_ftype_v2df_v2df_int
18744 = build_function_type_list (V2DF_type_node,
18745 V2DF_type_node, V2DF_type_node,
18746 integer_type_node,
18747 NULL_TREE);
18748 tree v2df_ftype_v2df_pcdouble
18749 = build_function_type_list (V2DF_type_node,
18750 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18751 tree void_ftype_pdouble_v2df
18752 = build_function_type_list (void_type_node,
18753 pdouble_type_node, V2DF_type_node, NULL_TREE);
18754 tree void_ftype_pint_int
18755 = build_function_type_list (void_type_node,
18756 pint_type_node, integer_type_node, NULL_TREE);
18757 tree void_ftype_v16qi_v16qi_pchar
18758 = build_function_type_list (void_type_node,
18759 V16QI_type_node, V16QI_type_node,
18760 pchar_type_node, NULL_TREE);
18761 tree v2df_ftype_pcdouble
18762 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18763 tree v2df_ftype_v2df_v2df
18764 = build_function_type_list (V2DF_type_node,
18765 V2DF_type_node, V2DF_type_node, NULL_TREE);
18766 tree v16qi_ftype_v16qi_v16qi
18767 = build_function_type_list (V16QI_type_node,
18768 V16QI_type_node, V16QI_type_node, NULL_TREE);
18769 tree v8hi_ftype_v8hi_v8hi
18770 = build_function_type_list (V8HI_type_node,
18771 V8HI_type_node, V8HI_type_node, NULL_TREE);
18772 tree v4si_ftype_v4si_v4si
18773 = build_function_type_list (V4SI_type_node,
18774 V4SI_type_node, V4SI_type_node, NULL_TREE);
18775 tree v2di_ftype_v2di_v2di
18776 = build_function_type_list (V2DI_type_node,
18777 V2DI_type_node, V2DI_type_node, NULL_TREE);
18778 tree v2di_ftype_v2df_v2df
18779 = build_function_type_list (V2DI_type_node,
18780 V2DF_type_node, V2DF_type_node, NULL_TREE);
18781 tree v2df_ftype_v2df
18782 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18783 tree v2di_ftype_v2di_int
18784 = build_function_type_list (V2DI_type_node,
18785 V2DI_type_node, integer_type_node, NULL_TREE);
18786 tree v2di_ftype_v2di_v2di_int
18787 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18788 V2DI_type_node, integer_type_node, NULL_TREE);
18789 tree v4si_ftype_v4si_int
18790 = build_function_type_list (V4SI_type_node,
18791 V4SI_type_node, integer_type_node, NULL_TREE);
18792 tree v8hi_ftype_v8hi_int
18793 = build_function_type_list (V8HI_type_node,
18794 V8HI_type_node, integer_type_node, NULL_TREE);
18795 tree v4si_ftype_v8hi_v8hi
18796 = build_function_type_list (V4SI_type_node,
18797 V8HI_type_node, V8HI_type_node, NULL_TREE);
18798 tree di_ftype_v8qi_v8qi
18799 = build_function_type_list (long_long_unsigned_type_node,
18800 V8QI_type_node, V8QI_type_node, NULL_TREE);
18801 tree di_ftype_v2si_v2si
18802 = build_function_type_list (long_long_unsigned_type_node,
18803 V2SI_type_node, V2SI_type_node, NULL_TREE);
18804 tree v2di_ftype_v16qi_v16qi
18805 = build_function_type_list (V2DI_type_node,
18806 V16QI_type_node, V16QI_type_node, NULL_TREE);
18807 tree v2di_ftype_v4si_v4si
18808 = build_function_type_list (V2DI_type_node,
18809 V4SI_type_node, V4SI_type_node, NULL_TREE);
18810 tree int_ftype_v16qi
18811 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18812 tree v16qi_ftype_pcchar
18813 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18814 tree void_ftype_pchar_v16qi
18815 = build_function_type_list (void_type_node,
18816 pchar_type_node, V16QI_type_node, NULL_TREE);
18817
18818 tree v2di_ftype_v2di_unsigned_unsigned
18819 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18820 unsigned_type_node, unsigned_type_node,
18821 NULL_TREE);
18822 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18823 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18824 unsigned_type_node, unsigned_type_node,
18825 NULL_TREE);
18826 tree v2di_ftype_v2di_v16qi
18827 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18828 NULL_TREE);
18829 tree v2df_ftype_v2df_v2df_v2df
18830 = build_function_type_list (V2DF_type_node,
18831 V2DF_type_node, V2DF_type_node,
18832 V2DF_type_node, NULL_TREE);
18833 tree v4sf_ftype_v4sf_v4sf_v4sf
18834 = build_function_type_list (V4SF_type_node,
18835 V4SF_type_node, V4SF_type_node,
18836 V4SF_type_node, NULL_TREE);
18837 tree v8hi_ftype_v16qi
18838 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18839 NULL_TREE);
18840 tree v4si_ftype_v16qi
18841 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18842 NULL_TREE);
18843 tree v2di_ftype_v16qi
18844 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18845 NULL_TREE);
18846 tree v4si_ftype_v8hi
18847 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18848 NULL_TREE);
18849 tree v2di_ftype_v8hi
18850 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18851 NULL_TREE);
18852 tree v2di_ftype_v4si
18853 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18854 NULL_TREE);
18855 tree v2di_ftype_pv2di
18856 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18857 NULL_TREE);
18858 tree v16qi_ftype_v16qi_v16qi_int
18859 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18860 V16QI_type_node, integer_type_node,
18861 NULL_TREE);
18862 tree v16qi_ftype_v16qi_v16qi_v16qi
18863 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18864 V16QI_type_node, V16QI_type_node,
18865 NULL_TREE);
18866 tree v8hi_ftype_v8hi_v8hi_int
18867 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18868 V8HI_type_node, integer_type_node,
18869 NULL_TREE);
18870 tree v4si_ftype_v4si_v4si_int
18871 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18872 V4SI_type_node, integer_type_node,
18873 NULL_TREE);
18874 tree int_ftype_v2di_v2di
18875 = build_function_type_list (integer_type_node,
18876 V2DI_type_node, V2DI_type_node,
18877 NULL_TREE);
18878 tree int_ftype_v16qi_int_v16qi_int_int
18879 = build_function_type_list (integer_type_node,
18880 V16QI_type_node,
18881 integer_type_node,
18882 V16QI_type_node,
18883 integer_type_node,
18884 integer_type_node,
18885 NULL_TREE);
18886 tree v16qi_ftype_v16qi_int_v16qi_int_int
18887 = build_function_type_list (V16QI_type_node,
18888 V16QI_type_node,
18889 integer_type_node,
18890 V16QI_type_node,
18891 integer_type_node,
18892 integer_type_node,
18893 NULL_TREE);
18894 tree int_ftype_v16qi_v16qi_int
18895 = build_function_type_list (integer_type_node,
18896 V16QI_type_node,
18897 V16QI_type_node,
18898 integer_type_node,
18899 NULL_TREE);
18900
18901 /* SSE5 instructions */
18902 tree v2di_ftype_v2di_v2di_v2di
18903 = build_function_type_list (V2DI_type_node,
18904 V2DI_type_node,
18905 V2DI_type_node,
18906 V2DI_type_node,
18907 NULL_TREE);
18908
18909 tree v4si_ftype_v4si_v4si_v4si
18910 = build_function_type_list (V4SI_type_node,
18911 V4SI_type_node,
18912 V4SI_type_node,
18913 V4SI_type_node,
18914 NULL_TREE);
18915
18916 tree v4si_ftype_v4si_v4si_v2di
18917 = build_function_type_list (V4SI_type_node,
18918 V4SI_type_node,
18919 V4SI_type_node,
18920 V2DI_type_node,
18921 NULL_TREE);
18922
18923 tree v8hi_ftype_v8hi_v8hi_v8hi
18924 = build_function_type_list (V8HI_type_node,
18925 V8HI_type_node,
18926 V8HI_type_node,
18927 V8HI_type_node,
18928 NULL_TREE);
18929
18930 tree v8hi_ftype_v8hi_v8hi_v4si
18931 = build_function_type_list (V8HI_type_node,
18932 V8HI_type_node,
18933 V8HI_type_node,
18934 V4SI_type_node,
18935 NULL_TREE);
18936
18937 tree v2df_ftype_v2df_v2df_v16qi
18938 = build_function_type_list (V2DF_type_node,
18939 V2DF_type_node,
18940 V2DF_type_node,
18941 V16QI_type_node,
18942 NULL_TREE);
18943
18944 tree v4sf_ftype_v4sf_v4sf_v16qi
18945 = build_function_type_list (V4SF_type_node,
18946 V4SF_type_node,
18947 V4SF_type_node,
18948 V16QI_type_node,
18949 NULL_TREE);
18950
18951 tree v2di_ftype_v2di_si
18952 = build_function_type_list (V2DI_type_node,
18953 V2DI_type_node,
18954 integer_type_node,
18955 NULL_TREE);
18956
18957 tree v4si_ftype_v4si_si
18958 = build_function_type_list (V4SI_type_node,
18959 V4SI_type_node,
18960 integer_type_node,
18961 NULL_TREE);
18962
18963 tree v8hi_ftype_v8hi_si
18964 = build_function_type_list (V8HI_type_node,
18965 V8HI_type_node,
18966 integer_type_node,
18967 NULL_TREE);
18968
18969 tree v16qi_ftype_v16qi_si
18970 = build_function_type_list (V16QI_type_node,
18971 V16QI_type_node,
18972 integer_type_node,
18973 NULL_TREE);
18974 tree v4sf_ftype_v4hi
18975 = build_function_type_list (V4SF_type_node,
18976 V4HI_type_node,
18977 NULL_TREE);
18978
18979 tree v4hi_ftype_v4sf
18980 = build_function_type_list (V4HI_type_node,
18981 V4SF_type_node,
18982 NULL_TREE);
18983
18984 tree v2di_ftype_v2di
18985 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18986
18987 tree ftype;
18988
18989 /* The __float80 type. */
18990 if (TYPE_MODE (long_double_type_node) == XFmode)
18991 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18992 "__float80");
18993 else
18994 {
18995 /* The __float80 type. */
18996 tree float80_type_node = make_node (REAL_TYPE);
18997
18998 TYPE_PRECISION (float80_type_node) = 80;
18999 layout_type (float80_type_node);
19000 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19001 "__float80");
19002 }
19003
19004 if (TARGET_64BIT)
19005 {
19006 tree float128_type_node = make_node (REAL_TYPE);
19007
19008 TYPE_PRECISION (float128_type_node) = 128;
19009 layout_type (float128_type_node);
19010 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19011 "__float128");
19012
19013 /* TFmode support builtins. */
19014 ftype = build_function_type (float128_type_node,
19015 void_list_node);
19016 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19017
19018 ftype = build_function_type_list (float128_type_node,
19019 float128_type_node,
19020 NULL_TREE);
19021 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19022
19023 ftype = build_function_type_list (float128_type_node,
19024 float128_type_node,
19025 float128_type_node,
19026 NULL_TREE);
19027 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19028 }
19029
19030 /* Add all SSE builtins that are more or less simple operations on
19031 three operands. */
19032 for (i = 0, d = bdesc_sse_3arg;
19033 i < ARRAY_SIZE (bdesc_sse_3arg);
19034 i++, d++)
19035 {
19036 /* Use one of the operands; the target can have a different mode for
19037 mask-generating compares. */
19038 enum machine_mode mode;
19039 tree type;
19040
19041 if (d->name == 0)
19042 continue;
19043 mode = insn_data[d->icode].operand[1].mode;
19044
19045 switch (mode)
19046 {
19047 case V16QImode:
19048 type = v16qi_ftype_v16qi_v16qi_int;
19049 break;
19050 case V8HImode:
19051 type = v8hi_ftype_v8hi_v8hi_int;
19052 break;
19053 case V4SImode:
19054 type = v4si_ftype_v4si_v4si_int;
19055 break;
19056 case V2DImode:
19057 type = v2di_ftype_v2di_v2di_int;
19058 break;
19059 case V2DFmode:
19060 type = v2df_ftype_v2df_v2df_int;
19061 break;
19062 case V4SFmode:
19063 type = v4sf_ftype_v4sf_v4sf_int;
19064 break;
19065 default:
19066 gcc_unreachable ();
19067 }
19068
19069 /* Override for variable blends. */
19070 switch (d->icode)
19071 {
19072 case CODE_FOR_sse4_1_blendvpd:
19073 type = v2df_ftype_v2df_v2df_v2df;
19074 break;
19075 case CODE_FOR_sse4_1_blendvps:
19076 type = v4sf_ftype_v4sf_v4sf_v4sf;
19077 break;
19078 case CODE_FOR_sse4_1_pblendvb:
19079 type = v16qi_ftype_v16qi_v16qi_v16qi;
19080 break;
19081 default:
19082 break;
19083 }
19084
19085 def_builtin_const (d->mask, d->name, type, d->code);
19086 }
19087
19088 /* Add all builtins that are more or less simple operations on two
19089 operands. */
19090 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19091 {
19092 /* Use one of the operands; the target can have a different mode for
19093 mask-generating compares. */
19094 enum machine_mode mode;
19095 tree type;
19096
19097 if (d->name == 0)
19098 continue;
19099 mode = insn_data[d->icode].operand[1].mode;
19100
19101 switch (mode)
19102 {
19103 case V16QImode:
19104 type = v16qi_ftype_v16qi_v16qi;
19105 break;
19106 case V8HImode:
19107 type = v8hi_ftype_v8hi_v8hi;
19108 break;
19109 case V4SImode:
19110 type = v4si_ftype_v4si_v4si;
19111 break;
19112 case V2DImode:
19113 type = v2di_ftype_v2di_v2di;
19114 break;
19115 case V2DFmode:
19116 type = v2df_ftype_v2df_v2df;
19117 break;
19118 case V4SFmode:
19119 type = v4sf_ftype_v4sf_v4sf;
19120 break;
19121 case V8QImode:
19122 type = v8qi_ftype_v8qi_v8qi;
19123 break;
19124 case V4HImode:
19125 type = v4hi_ftype_v4hi_v4hi;
19126 break;
19127 case V2SImode:
19128 type = v2si_ftype_v2si_v2si;
19129 break;
19130 case DImode:
19131 type = di_ftype_di_di;
19132 break;
19133
19134 default:
19135 gcc_unreachable ();
19136 }
19137
19138 /* Override for comparisons. */
19139 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19140 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19141 type = v4si_ftype_v4sf_v4sf;
19142
19143 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19144 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19145 type = v2di_ftype_v2df_v2df;
19146
19147 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19148 type = v4si_ftype_v2df_v2df;
19149
19150 def_builtin_const (d->mask, d->name, type, d->code);
19151 }
19152
19153 /* Add all builtins that are more or less simple operations on 1 operand. */
19154 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19155 {
19156 enum machine_mode mode;
19157 tree type;
19158
19159 if (d->name == 0)
19160 continue;
19161 mode = insn_data[d->icode].operand[1].mode;
19162
19163 switch (mode)
19164 {
19165 case V16QImode:
19166 type = v16qi_ftype_v16qi;
19167 break;
19168 case V8HImode:
19169 type = v8hi_ftype_v8hi;
19170 break;
19171 case V4SImode:
19172 type = v4si_ftype_v4si;
19173 break;
19174 case V2DFmode:
19175 type = v2df_ftype_v2df;
19176 break;
19177 case V4SFmode:
19178 type = v4sf_ftype_v4sf;
19179 break;
19180 case V8QImode:
19181 type = v8qi_ftype_v8qi;
19182 break;
19183 case V4HImode:
19184 type = v4hi_ftype_v4hi;
19185 break;
19186 case V2SImode:
19187 type = v2si_ftype_v2si;
19188 break;
19189
19190 default:
19191 abort ();
19192 }
19193
19194 def_builtin_const (d->mask, d->name, type, d->code);
19195 }
19196
19197 /* pcmpestr[im] insns. */
19198 for (i = 0, d = bdesc_pcmpestr;
19199 i < ARRAY_SIZE (bdesc_pcmpestr);
19200 i++, d++)
19201 {
19202 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19203 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19204 else
19205 ftype = int_ftype_v16qi_int_v16qi_int_int;
19206 def_builtin_const (d->mask, d->name, ftype, d->code);
19207 }
19208
19209 /* pcmpistr[im] insns. */
19210 for (i = 0, d = bdesc_pcmpistr;
19211 i < ARRAY_SIZE (bdesc_pcmpistr);
19212 i++, d++)
19213 {
19214 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19215 ftype = v16qi_ftype_v16qi_v16qi_int;
19216 else
19217 ftype = int_ftype_v16qi_v16qi_int;
19218 def_builtin_const (d->mask, d->name, ftype, d->code);
19219 }
19220
19221 /* Add the remaining MMX insns with somewhat more complicated types. */
19222 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19223 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19224 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19225 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19226
19227 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19228 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19229 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19230
19231 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19232 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19233
19234 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19235 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19236
19237 /* comi/ucomi insns. */
19238 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19239 if (d->mask == OPTION_MASK_ISA_SSE2)
19240 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19241 else
19242 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19243
19244 /* ptest insns. */
19245 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19246 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19247
19248 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19249 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19250 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19251
19252 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19253 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19254 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19255 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19256 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19257 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19258 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19259 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19260 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19261 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19262 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19263
19264 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19265
19266 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19267 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19268
19269 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19270 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19271 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19272 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19273
19274 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19275 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19276 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19277 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19278
19279 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19280
19281 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19282
19283 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19284 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19285 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19286 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
19287 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19288 ftype = build_function_type_list (float_type_node,
19289 float_type_node,
19290 NULL_TREE);
19291 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19292 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19293 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
19294 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19295
19296 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19297
19298 /* Original 3DNow! */
19299 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19300 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19301 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19302 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19303 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19304 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19305 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19306 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19307 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19308 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19309 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19310 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19311 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19312 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19313 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19314 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19315 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19316 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19317 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19318 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19319
19320 /* 3DNow! extension as used in the Athlon CPU. */
19321 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19322 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19323 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19324 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19325 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19326 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19327
19328 /* SSE2 */
19329 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19330
19331 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19332 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19333
19334 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19335 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19336
19337 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19338 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19339 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19340 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19341 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19342
19343 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19344 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19345 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19347
19348 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19349 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19350
19351 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19352
19353 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19354 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19355
19356 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19357 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19358 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19360 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19361
19362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19363
19364 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19365 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19366 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19367 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19368
19369 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19370 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19371 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19372
19373 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19374 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19375 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19376 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19377
19378 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19379 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19380 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19381
19382 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19383 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19384
19385 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19386 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19387
19388 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19389 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19390 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19392 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19393 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19394 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19395
19396 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19398 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19399 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19400 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19401 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19402 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19403
19404 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19405 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19406 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19407 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19408
19409 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19410
19411 /* Prescott New Instructions. */
19412 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19413 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19414 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19415
19416 /* SSSE3. */
19417 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19418 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19419
19420 /* SSE4.1. */
19421 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19422 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19423 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19424 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19425 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19426 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19427 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19428 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19429 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19430 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19431 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19432 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19433 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19434 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19435
19436 /* SSE4.1 and SSE5 */
19437 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19438 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19439 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19440 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19441
19442 /* SSE4.2. */
19443 ftype = build_function_type_list (unsigned_type_node,
19444 unsigned_type_node,
19445 unsigned_char_type_node,
19446 NULL_TREE);
19447 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19448 ftype = build_function_type_list (unsigned_type_node,
19449 unsigned_type_node,
19450 short_unsigned_type_node,
19451 NULL_TREE);
19452 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19453 ftype = build_function_type_list (unsigned_type_node,
19454 unsigned_type_node,
19455 unsigned_type_node,
19456 NULL_TREE);
19457 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19458 ftype = build_function_type_list (long_long_unsigned_type_node,
19459 long_long_unsigned_type_node,
19460 long_long_unsigned_type_node,
19461 NULL_TREE);
19462 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19463
19464 /* AMDFAM10 SSE4A New built-ins */
19465 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19466 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19467 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19468 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19469 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19470 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19471
19472 /* Access to the vec_init patterns. */
19473 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19474 integer_type_node, NULL_TREE);
19475 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19476
19477 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19478 short_integer_type_node,
19479 short_integer_type_node,
19480 short_integer_type_node, NULL_TREE);
19481 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19482
19483 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19484 char_type_node, char_type_node,
19485 char_type_node, char_type_node,
19486 char_type_node, char_type_node,
19487 char_type_node, NULL_TREE);
19488 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19489
19490 /* Access to the vec_extract patterns. */
19491 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19492 integer_type_node, NULL_TREE);
19493 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19494
19495 ftype = build_function_type_list (long_long_integer_type_node,
19496 V2DI_type_node, integer_type_node,
19497 NULL_TREE);
19498 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19499
19500 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19501 integer_type_node, NULL_TREE);
19502 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19503
19504 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19505 integer_type_node, NULL_TREE);
19506 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19507
19508 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19509 integer_type_node, NULL_TREE);
19510 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19511
19512 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19513 integer_type_node, NULL_TREE);
19514 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19515
19516 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19517 integer_type_node, NULL_TREE);
19518 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19519
19520 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19521 integer_type_node, NULL_TREE);
19522 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19523
19524 /* Access to the vec_set patterns. */
19525 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19526 intDI_type_node,
19527 integer_type_node, NULL_TREE);
19528 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19529
19530 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19531 float_type_node,
19532 integer_type_node, NULL_TREE);
19533 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19534
19535 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19536 intSI_type_node,
19537 integer_type_node, NULL_TREE);
19538 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19539
19540 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19541 intHI_type_node,
19542 integer_type_node, NULL_TREE);
19543 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19544
19545 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19546 intHI_type_node,
19547 integer_type_node, NULL_TREE);
19548 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19549
19550 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19551 intQI_type_node,
19552 integer_type_node, NULL_TREE);
19553 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19554
19555 /* Add SSE5 multi-arg argument instructions */
19556 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19557 {
19558 tree mtype = NULL_TREE;
19559
19560 if (d->name == 0)
19561 continue;
19562
19563 switch ((enum multi_arg_type)d->flag)
19564 {
19565 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19566 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19567 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19568 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19569 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19570 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19571 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19572 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19573 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19574 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19575 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19576 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19577 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19578 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19579 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19580 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19581 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19582 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19583 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19584 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19585 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19586 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19587 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19588 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19589 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19590 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19591 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19592 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19593 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19594 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19595 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19596 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19597 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19598 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19599 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19600 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19601 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19602 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19603 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19604 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19605 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19606 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19607 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19608 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19609 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19610 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19611 case MULTI_ARG_UNKNOWN:
19612 default:
19613 gcc_unreachable ();
19614 }
19615
19616 if (mtype)
19617 def_builtin_const (d->mask, d->name, mtype, d->code);
19618 }
19619 }
19620
19621 static void
19622 ix86_init_builtins (void)
19623 {
19624 if (TARGET_MMX)
19625 ix86_init_mmx_sse_builtins ();
19626 }
19627
19628 /* Errors in the source file can cause expand_expr to return const0_rtx
19629 where we expect a vector. To avoid crashing, use one of the vector
19630 clear instructions. */
19631 static rtx
19632 safe_vector_operand (rtx x, enum machine_mode mode)
19633 {
19634 if (x == const0_rtx)
19635 x = CONST0_RTX (mode);
19636 return x;
19637 }
19638
19639 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19640 4 operands. The third argument must be a constant smaller than 8
19641 bits or xmm0. */
19642
19643 static rtx
19644 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19645 rtx target)
19646 {
19647 rtx pat;
19648 tree arg0 = CALL_EXPR_ARG (exp, 0);
19649 tree arg1 = CALL_EXPR_ARG (exp, 1);
19650 tree arg2 = CALL_EXPR_ARG (exp, 2);
19651 rtx op0 = expand_normal (arg0);
19652 rtx op1 = expand_normal (arg1);
19653 rtx op2 = expand_normal (arg2);
19654 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19655 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19656 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19657 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19658
19659 if (VECTOR_MODE_P (mode1))
19660 op0 = safe_vector_operand (op0, mode1);
19661 if (VECTOR_MODE_P (mode2))
19662 op1 = safe_vector_operand (op1, mode2);
19663 if (VECTOR_MODE_P (mode3))
19664 op2 = safe_vector_operand (op2, mode3);
19665
19666 if (optimize
19667 || target == 0
19668 || GET_MODE (target) != tmode
19669 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19670 target = gen_reg_rtx (tmode);
19671
19672 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19673 op0 = copy_to_mode_reg (mode1, op0);
19674 if ((optimize && !register_operand (op1, mode2))
19675 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19676 op1 = copy_to_mode_reg (mode2, op1);
19677
19678 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19679 switch (icode)
19680 {
19681 case CODE_FOR_sse4_1_blendvpd:
19682 case CODE_FOR_sse4_1_blendvps:
19683 case CODE_FOR_sse4_1_pblendvb:
19684 op2 = copy_to_mode_reg (mode3, op2);
19685 break;
19686
19687 case CODE_FOR_sse4_1_roundsd:
19688 case CODE_FOR_sse4_1_roundss:
19689 error ("the third argument must be a 4-bit immediate");
19690 return const0_rtx;
19691
19692 default:
19693 error ("the third argument must be an 8-bit immediate");
19694 return const0_rtx;
19695 }
19696
19697 pat = GEN_FCN (icode) (target, op0, op1, op2);
19698 if (! pat)
19699 return 0;
19700 emit_insn (pat);
19701 return target;
19702 }
19703
19704 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19705
19706 static rtx
19707 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19708 {
19709 rtx pat;
19710 tree arg0 = CALL_EXPR_ARG (exp, 0);
19711 tree arg1 = CALL_EXPR_ARG (exp, 1);
19712 rtx op0 = expand_normal (arg0);
19713 rtx op1 = expand_normal (arg1);
19714 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19715 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19716 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19717
19718 if (optimize
19719 || !target
19720 || GET_MODE (target) != tmode
19721 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19722 target = gen_reg_rtx (tmode);
19723
19724 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19725 op0 = copy_to_mode_reg (mode0, op0);
19726 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19727 {
19728 op1 = copy_to_reg (op1);
19729 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19730 }
19731
19732 pat = GEN_FCN (icode) (target, op0, op1);
19733 if (! pat)
19734 return 0;
19735 emit_insn (pat);
19736 return target;
19737 }
19738
19739 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19740
19741 static rtx
19742 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19743 {
19744 rtx pat, xops[3];
19745 tree arg0 = CALL_EXPR_ARG (exp, 0);
19746 tree arg1 = CALL_EXPR_ARG (exp, 1);
19747 rtx op0 = expand_normal (arg0);
19748 rtx op1 = expand_normal (arg1);
19749 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19750 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19751 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19752
19753 if (VECTOR_MODE_P (mode0))
19754 op0 = safe_vector_operand (op0, mode0);
19755 if (VECTOR_MODE_P (mode1))
19756 op1 = safe_vector_operand (op1, mode1);
19757
19758 if (optimize || !target
19759 || GET_MODE (target) != tmode
19760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19761 target = gen_reg_rtx (tmode);
19762
19763 if (GET_MODE (op1) == SImode && mode1 == TImode)
19764 {
19765 rtx x = gen_reg_rtx (V4SImode);
19766 emit_insn (gen_sse2_loadd (x, op1));
19767 op1 = gen_lowpart (TImode, x);
19768 }
19769
19770 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19771 op0 = copy_to_mode_reg (mode0, op0);
19772 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19773 op1 = copy_to_mode_reg (mode1, op1);
19774
19775 /* ??? Using ix86_fixup_binary_operands is problematic when
19776 we've got mismatched modes. Fake it. */
19777
19778 xops[0] = target;
19779 xops[1] = op0;
19780 xops[2] = op1;
19781
19782 if (tmode == mode0 && tmode == mode1)
19783 {
19784 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19785 op0 = xops[1];
19786 op1 = xops[2];
19787 }
19788 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19789 {
19790 op0 = force_reg (mode0, op0);
19791 op1 = force_reg (mode1, op1);
19792 target = gen_reg_rtx (tmode);
19793 }
19794
19795 pat = GEN_FCN (icode) (target, op0, op1);
19796 if (! pat)
19797 return 0;
19798 emit_insn (pat);
19799 return target;
19800 }
19801
19802 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19803
19804 static rtx
19805 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19806 enum multi_arg_type m_type,
19807 enum insn_code sub_code)
19808 {
19809 rtx pat;
19810 int i;
19811 int nargs;
19812 bool comparison_p = false;
19813 bool tf_p = false;
19814 bool last_arg_constant = false;
19815 int num_memory = 0;
19816 struct {
19817 rtx op;
19818 enum machine_mode mode;
19819 } args[4];
19820
19821 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19822
19823 switch (m_type)
19824 {
19825 case MULTI_ARG_3_SF:
19826 case MULTI_ARG_3_DF:
19827 case MULTI_ARG_3_DI:
19828 case MULTI_ARG_3_SI:
19829 case MULTI_ARG_3_SI_DI:
19830 case MULTI_ARG_3_HI:
19831 case MULTI_ARG_3_HI_SI:
19832 case MULTI_ARG_3_QI:
19833 case MULTI_ARG_3_PERMPS:
19834 case MULTI_ARG_3_PERMPD:
19835 nargs = 3;
19836 break;
19837
19838 case MULTI_ARG_2_SF:
19839 case MULTI_ARG_2_DF:
19840 case MULTI_ARG_2_DI:
19841 case MULTI_ARG_2_SI:
19842 case MULTI_ARG_2_HI:
19843 case MULTI_ARG_2_QI:
19844 nargs = 2;
19845 break;
19846
19847 case MULTI_ARG_2_DI_IMM:
19848 case MULTI_ARG_2_SI_IMM:
19849 case MULTI_ARG_2_HI_IMM:
19850 case MULTI_ARG_2_QI_IMM:
19851 nargs = 2;
19852 last_arg_constant = true;
19853 break;
19854
19855 case MULTI_ARG_1_SF:
19856 case MULTI_ARG_1_DF:
19857 case MULTI_ARG_1_DI:
19858 case MULTI_ARG_1_SI:
19859 case MULTI_ARG_1_HI:
19860 case MULTI_ARG_1_QI:
19861 case MULTI_ARG_1_SI_DI:
19862 case MULTI_ARG_1_HI_DI:
19863 case MULTI_ARG_1_HI_SI:
19864 case MULTI_ARG_1_QI_DI:
19865 case MULTI_ARG_1_QI_SI:
19866 case MULTI_ARG_1_QI_HI:
19867 case MULTI_ARG_1_PH2PS:
19868 case MULTI_ARG_1_PS2PH:
19869 nargs = 1;
19870 break;
19871
19872 case MULTI_ARG_2_SF_CMP:
19873 case MULTI_ARG_2_DF_CMP:
19874 case MULTI_ARG_2_DI_CMP:
19875 case MULTI_ARG_2_SI_CMP:
19876 case MULTI_ARG_2_HI_CMP:
19877 case MULTI_ARG_2_QI_CMP:
19878 nargs = 2;
19879 comparison_p = true;
19880 break;
19881
19882 case MULTI_ARG_2_SF_TF:
19883 case MULTI_ARG_2_DF_TF:
19884 case MULTI_ARG_2_DI_TF:
19885 case MULTI_ARG_2_SI_TF:
19886 case MULTI_ARG_2_HI_TF:
19887 case MULTI_ARG_2_QI_TF:
19888 nargs = 2;
19889 tf_p = true;
19890 break;
19891
19892 case MULTI_ARG_UNKNOWN:
19893 default:
19894 gcc_unreachable ();
19895 }
19896
19897 if (optimize || !target
19898 || GET_MODE (target) != tmode
19899 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19900 target = gen_reg_rtx (tmode);
19901
19902 gcc_assert (nargs <= 4);
19903
19904 for (i = 0; i < nargs; i++)
19905 {
19906 tree arg = CALL_EXPR_ARG (exp, i);
19907 rtx op = expand_normal (arg);
19908 int adjust = (comparison_p) ? 1 : 0;
19909 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19910
19911 if (last_arg_constant && i == nargs-1)
19912 {
19913 if (GET_CODE (op) != CONST_INT)
19914 {
19915 error ("last argument must be an immediate");
19916 return gen_reg_rtx (tmode);
19917 }
19918 }
19919 else
19920 {
19921 if (VECTOR_MODE_P (mode))
19922 op = safe_vector_operand (op, mode);
19923
19924 /* If we aren't optimizing, only allow one memory operand to be
19925 generated. */
19926 if (memory_operand (op, mode))
19927 num_memory++;
19928
19929 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19930
19931 if (optimize
19932 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19933 || num_memory > 1)
19934 op = force_reg (mode, op);
19935 }
19936
19937 args[i].op = op;
19938 args[i].mode = mode;
19939 }
19940
19941 switch (nargs)
19942 {
19943 case 1:
19944 pat = GEN_FCN (icode) (target, args[0].op);
19945 break;
19946
19947 case 2:
19948 if (tf_p)
19949 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19950 GEN_INT ((int)sub_code));
19951 else if (! comparison_p)
19952 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19953 else
19954 {
19955 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19956 args[0].op,
19957 args[1].op);
19958
19959 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19960 }
19961 break;
19962
19963 case 3:
19964 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19965 break;
19966
19967 default:
19968 gcc_unreachable ();
19969 }
19970
19971 if (! pat)
19972 return 0;
19973
19974 emit_insn (pat);
19975 return target;
19976 }
19977
19978 /* Subroutine of ix86_expand_builtin to take care of stores. */
19979
19980 static rtx
19981 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19982 {
19983 rtx pat;
19984 tree arg0 = CALL_EXPR_ARG (exp, 0);
19985 tree arg1 = CALL_EXPR_ARG (exp, 1);
19986 rtx op0 = expand_normal (arg0);
19987 rtx op1 = expand_normal (arg1);
19988 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19989 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19990
19991 if (VECTOR_MODE_P (mode1))
19992 op1 = safe_vector_operand (op1, mode1);
19993
19994 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19995 op1 = copy_to_mode_reg (mode1, op1);
19996
19997 pat = GEN_FCN (icode) (op0, op1);
19998 if (pat)
19999 emit_insn (pat);
20000 return 0;
20001 }
20002
20003 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
20004
20005 static rtx
20006 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
20007 rtx target, int do_load)
20008 {
20009 rtx pat;
20010 tree arg0 = CALL_EXPR_ARG (exp, 0);
20011 rtx op0 = expand_normal (arg0);
20012 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20013 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20014
20015 if (optimize || !target
20016 || GET_MODE (target) != tmode
20017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20018 target = gen_reg_rtx (tmode);
20019 if (do_load)
20020 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20021 else
20022 {
20023 if (VECTOR_MODE_P (mode0))
20024 op0 = safe_vector_operand (op0, mode0);
20025
20026 if ((optimize && !register_operand (op0, mode0))
20027 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20028 op0 = copy_to_mode_reg (mode0, op0);
20029 }
20030
20031 switch (icode)
20032 {
20033 case CODE_FOR_sse4_1_roundpd:
20034 case CODE_FOR_sse4_1_roundps:
20035 {
20036 tree arg1 = CALL_EXPR_ARG (exp, 1);
20037 rtx op1 = expand_normal (arg1);
20038 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20039
20040 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20041 {
20042 error ("the second argument must be a 4-bit immediate");
20043 return const0_rtx;
20044 }
20045 pat = GEN_FCN (icode) (target, op0, op1);
20046 }
20047 break;
20048 default:
20049 pat = GEN_FCN (icode) (target, op0);
20050 break;
20051 }
20052
20053 if (! pat)
20054 return 0;
20055 emit_insn (pat);
20056 return target;
20057 }
20058
20059 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20060 sqrtss, rsqrtss, rcpss. */
20061
20062 static rtx
20063 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20064 {
20065 rtx pat;
20066 tree arg0 = CALL_EXPR_ARG (exp, 0);
20067 rtx op1, op0 = expand_normal (arg0);
20068 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20069 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20070
20071 if (optimize || !target
20072 || GET_MODE (target) != tmode
20073 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20074 target = gen_reg_rtx (tmode);
20075
20076 if (VECTOR_MODE_P (mode0))
20077 op0 = safe_vector_operand (op0, mode0);
20078
20079 if ((optimize && !register_operand (op0, mode0))
20080 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20081 op0 = copy_to_mode_reg (mode0, op0);
20082
20083 op1 = op0;
20084 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20085 op1 = copy_to_mode_reg (mode0, op1);
20086
20087 pat = GEN_FCN (icode) (target, op0, op1);
20088 if (! pat)
20089 return 0;
20090 emit_insn (pat);
20091 return target;
20092 }
20093
20094 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20095
20096 static rtx
20097 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20098 rtx target)
20099 {
20100 rtx pat;
20101 tree arg0 = CALL_EXPR_ARG (exp, 0);
20102 tree arg1 = CALL_EXPR_ARG (exp, 1);
20103 rtx op0 = expand_normal (arg0);
20104 rtx op1 = expand_normal (arg1);
20105 rtx op2;
20106 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20107 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20108 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20109 enum rtx_code comparison = d->comparison;
20110
20111 if (VECTOR_MODE_P (mode0))
20112 op0 = safe_vector_operand (op0, mode0);
20113 if (VECTOR_MODE_P (mode1))
20114 op1 = safe_vector_operand (op1, mode1);
20115
20116 /* Swap operands if we have a comparison that isn't available in
20117 hardware. */
20118 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20119 {
20120 rtx tmp = gen_reg_rtx (mode1);
20121 emit_move_insn (tmp, op1);
20122 op1 = op0;
20123 op0 = tmp;
20124 }
20125
20126 if (optimize || !target
20127 || GET_MODE (target) != tmode
20128 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20129 target = gen_reg_rtx (tmode);
20130
20131 if ((optimize && !register_operand (op0, mode0))
20132 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20133 op0 = copy_to_mode_reg (mode0, op0);
20134 if ((optimize && !register_operand (op1, mode1))
20135 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20136 op1 = copy_to_mode_reg (mode1, op1);
20137
20138 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20139 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20140 if (! pat)
20141 return 0;
20142 emit_insn (pat);
20143 return target;
20144 }
20145
20146 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20147
20148 static rtx
20149 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20150 rtx target)
20151 {
20152 rtx pat;
20153 tree arg0 = CALL_EXPR_ARG (exp, 0);
20154 tree arg1 = CALL_EXPR_ARG (exp, 1);
20155 rtx op0 = expand_normal (arg0);
20156 rtx op1 = expand_normal (arg1);
20157 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20158 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20159 enum rtx_code comparison = d->comparison;
20160
20161 if (VECTOR_MODE_P (mode0))
20162 op0 = safe_vector_operand (op0, mode0);
20163 if (VECTOR_MODE_P (mode1))
20164 op1 = safe_vector_operand (op1, mode1);
20165
20166 /* Swap operands if we have a comparison that isn't available in
20167 hardware. */
20168 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20169 {
20170 rtx tmp = op1;
20171 op1 = op0;
20172 op0 = tmp;
20173 }
20174
20175 target = gen_reg_rtx (SImode);
20176 emit_move_insn (target, const0_rtx);
20177 target = gen_rtx_SUBREG (QImode, target, 0);
20178
20179 if ((optimize && !register_operand (op0, mode0))
20180 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20181 op0 = copy_to_mode_reg (mode0, op0);
20182 if ((optimize && !register_operand (op1, mode1))
20183 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20184 op1 = copy_to_mode_reg (mode1, op1);
20185
20186 pat = GEN_FCN (d->icode) (op0, op1);
20187 if (! pat)
20188 return 0;
20189 emit_insn (pat);
20190 emit_insn (gen_rtx_SET (VOIDmode,
20191 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20192 gen_rtx_fmt_ee (comparison, QImode,
20193 SET_DEST (pat),
20194 const0_rtx)));
20195
20196 return SUBREG_REG (target);
20197 }
20198
20199 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20200
20201 static rtx
20202 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20203 rtx target)
20204 {
20205 rtx pat;
20206 tree arg0 = CALL_EXPR_ARG (exp, 0);
20207 tree arg1 = CALL_EXPR_ARG (exp, 1);
20208 rtx op0 = expand_normal (arg0);
20209 rtx op1 = expand_normal (arg1);
20210 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20211 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20212 enum rtx_code comparison = d->comparison;
20213
20214 if (VECTOR_MODE_P (mode0))
20215 op0 = safe_vector_operand (op0, mode0);
20216 if (VECTOR_MODE_P (mode1))
20217 op1 = safe_vector_operand (op1, mode1);
20218
20219 target = gen_reg_rtx (SImode);
20220 emit_move_insn (target, const0_rtx);
20221 target = gen_rtx_SUBREG (QImode, target, 0);
20222
20223 if ((optimize && !register_operand (op0, mode0))
20224 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20225 op0 = copy_to_mode_reg (mode0, op0);
20226 if ((optimize && !register_operand (op1, mode1))
20227 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20228 op1 = copy_to_mode_reg (mode1, op1);
20229
20230 pat = GEN_FCN (d->icode) (op0, op1);
20231 if (! pat)
20232 return 0;
20233 emit_insn (pat);
20234 emit_insn (gen_rtx_SET (VOIDmode,
20235 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20236 gen_rtx_fmt_ee (comparison, QImode,
20237 SET_DEST (pat),
20238 const0_rtx)));
20239
20240 return SUBREG_REG (target);
20241 }
20242
20243 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20244
20245 static rtx
20246 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20247 tree exp, rtx target)
20248 {
20249 rtx pat;
20250 tree arg0 = CALL_EXPR_ARG (exp, 0);
20251 tree arg1 = CALL_EXPR_ARG (exp, 1);
20252 tree arg2 = CALL_EXPR_ARG (exp, 2);
20253 tree arg3 = CALL_EXPR_ARG (exp, 3);
20254 tree arg4 = CALL_EXPR_ARG (exp, 4);
20255 rtx scratch0, scratch1;
20256 rtx op0 = expand_normal (arg0);
20257 rtx op1 = expand_normal (arg1);
20258 rtx op2 = expand_normal (arg2);
20259 rtx op3 = expand_normal (arg3);
20260 rtx op4 = expand_normal (arg4);
20261 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20262
20263 tmode0 = insn_data[d->icode].operand[0].mode;
20264 tmode1 = insn_data[d->icode].operand[1].mode;
20265 modev2 = insn_data[d->icode].operand[2].mode;
20266 modei3 = insn_data[d->icode].operand[3].mode;
20267 modev4 = insn_data[d->icode].operand[4].mode;
20268 modei5 = insn_data[d->icode].operand[5].mode;
20269 modeimm = insn_data[d->icode].operand[6].mode;
20270
20271 if (VECTOR_MODE_P (modev2))
20272 op0 = safe_vector_operand (op0, modev2);
20273 if (VECTOR_MODE_P (modev4))
20274 op2 = safe_vector_operand (op2, modev4);
20275
20276 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20277 op0 = copy_to_mode_reg (modev2, op0);
20278 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20279 op1 = copy_to_mode_reg (modei3, op1);
20280 if ((optimize && !register_operand (op2, modev4))
20281 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20282 op2 = copy_to_mode_reg (modev4, op2);
20283 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20284 op3 = copy_to_mode_reg (modei5, op3);
20285
20286 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20287 {
20288 error ("the fifth argument must be a 8-bit immediate");
20289 return const0_rtx;
20290 }
20291
20292 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20293 {
20294 if (optimize || !target
20295 || GET_MODE (target) != tmode0
20296 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20297 target = gen_reg_rtx (tmode0);
20298
20299 scratch1 = gen_reg_rtx (tmode1);
20300
20301 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20302 }
20303 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20304 {
20305 if (optimize || !target
20306 || GET_MODE (target) != tmode1
20307 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20308 target = gen_reg_rtx (tmode1);
20309
20310 scratch0 = gen_reg_rtx (tmode0);
20311
20312 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20313 }
20314 else
20315 {
20316 gcc_assert (d->flag);
20317
20318 scratch0 = gen_reg_rtx (tmode0);
20319 scratch1 = gen_reg_rtx (tmode1);
20320
20321 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20322 }
20323
20324 if (! pat)
20325 return 0;
20326
20327 emit_insn (pat);
20328
20329 if (d->flag)
20330 {
20331 target = gen_reg_rtx (SImode);
20332 emit_move_insn (target, const0_rtx);
20333 target = gen_rtx_SUBREG (QImode, target, 0);
20334
20335 emit_insn
20336 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20337 gen_rtx_fmt_ee (EQ, QImode,
20338 gen_rtx_REG ((enum machine_mode) d->flag,
20339 FLAGS_REG),
20340 const0_rtx)));
20341 return SUBREG_REG (target);
20342 }
20343 else
20344 return target;
20345 }
20346
20347
20348 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20349
20350 static rtx
20351 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20352 tree exp, rtx target)
20353 {
20354 rtx pat;
20355 tree arg0 = CALL_EXPR_ARG (exp, 0);
20356 tree arg1 = CALL_EXPR_ARG (exp, 1);
20357 tree arg2 = CALL_EXPR_ARG (exp, 2);
20358 rtx scratch0, scratch1;
20359 rtx op0 = expand_normal (arg0);
20360 rtx op1 = expand_normal (arg1);
20361 rtx op2 = expand_normal (arg2);
20362 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20363
20364 tmode0 = insn_data[d->icode].operand[0].mode;
20365 tmode1 = insn_data[d->icode].operand[1].mode;
20366 modev2 = insn_data[d->icode].operand[2].mode;
20367 modev3 = insn_data[d->icode].operand[3].mode;
20368 modeimm = insn_data[d->icode].operand[4].mode;
20369
20370 if (VECTOR_MODE_P (modev2))
20371 op0 = safe_vector_operand (op0, modev2);
20372 if (VECTOR_MODE_P (modev3))
20373 op1 = safe_vector_operand (op1, modev3);
20374
20375 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20376 op0 = copy_to_mode_reg (modev2, op0);
20377 if ((optimize && !register_operand (op1, modev3))
20378 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20379 op1 = copy_to_mode_reg (modev3, op1);
20380
20381 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20382 {
20383 error ("the third argument must be a 8-bit immediate");
20384 return const0_rtx;
20385 }
20386
20387 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20388 {
20389 if (optimize || !target
20390 || GET_MODE (target) != tmode0
20391 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20392 target = gen_reg_rtx (tmode0);
20393
20394 scratch1 = gen_reg_rtx (tmode1);
20395
20396 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20397 }
20398 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20399 {
20400 if (optimize || !target
20401 || GET_MODE (target) != tmode1
20402 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20403 target = gen_reg_rtx (tmode1);
20404
20405 scratch0 = gen_reg_rtx (tmode0);
20406
20407 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20408 }
20409 else
20410 {
20411 gcc_assert (d->flag);
20412
20413 scratch0 = gen_reg_rtx (tmode0);
20414 scratch1 = gen_reg_rtx (tmode1);
20415
20416 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20417 }
20418
20419 if (! pat)
20420 return 0;
20421
20422 emit_insn (pat);
20423
20424 if (d->flag)
20425 {
20426 target = gen_reg_rtx (SImode);
20427 emit_move_insn (target, const0_rtx);
20428 target = gen_rtx_SUBREG (QImode, target, 0);
20429
20430 emit_insn
20431 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20432 gen_rtx_fmt_ee (EQ, QImode,
20433 gen_rtx_REG ((enum machine_mode) d->flag,
20434 FLAGS_REG),
20435 const0_rtx)));
20436 return SUBREG_REG (target);
20437 }
20438 else
20439 return target;
20440 }
20441
20442 /* Return the integer constant in ARG. Constrain it to be in the range
20443 of the subparts of VEC_TYPE; issue an error if not. */
20444
20445 static int
20446 get_element_number (tree vec_type, tree arg)
20447 {
20448 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20449
20450 if (!host_integerp (arg, 1)
20451 || (elt = tree_low_cst (arg, 1), elt > max))
20452 {
20453 error ("selector must be an integer constant in the range 0..%wi", max);
20454 return 0;
20455 }
20456
20457 return elt;
20458 }
20459
20460 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20461 ix86_expand_vector_init. We DO have language-level syntax for this, in
20462 the form of (type){ init-list }. Except that since we can't place emms
20463 instructions from inside the compiler, we can't allow the use of MMX
20464 registers unless the user explicitly asks for it. So we do *not* define
20465 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20466 we have builtins invoked by mmintrin.h that gives us license to emit
20467 these sorts of instructions. */
20468
20469 static rtx
20470 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20471 {
20472 enum machine_mode tmode = TYPE_MODE (type);
20473 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20474 int i, n_elt = GET_MODE_NUNITS (tmode);
20475 rtvec v = rtvec_alloc (n_elt);
20476
20477 gcc_assert (VECTOR_MODE_P (tmode));
20478 gcc_assert (call_expr_nargs (exp) == n_elt);
20479
20480 for (i = 0; i < n_elt; ++i)
20481 {
20482 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20483 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20484 }
20485
20486 if (!target || !register_operand (target, tmode))
20487 target = gen_reg_rtx (tmode);
20488
20489 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20490 return target;
20491 }
20492
20493 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20494 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20495 had a language-level syntax for referencing vector elements. */
20496
20497 static rtx
20498 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20499 {
20500 enum machine_mode tmode, mode0;
20501 tree arg0, arg1;
20502 int elt;
20503 rtx op0;
20504
20505 arg0 = CALL_EXPR_ARG (exp, 0);
20506 arg1 = CALL_EXPR_ARG (exp, 1);
20507
20508 op0 = expand_normal (arg0);
20509 elt = get_element_number (TREE_TYPE (arg0), arg1);
20510
20511 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20512 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20513 gcc_assert (VECTOR_MODE_P (mode0));
20514
20515 op0 = force_reg (mode0, op0);
20516
20517 if (optimize || !target || !register_operand (target, tmode))
20518 target = gen_reg_rtx (tmode);
20519
20520 ix86_expand_vector_extract (true, target, op0, elt);
20521
20522 return target;
20523 }
20524
20525 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20526 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20527 a language-level syntax for referencing vector elements. */
20528
20529 static rtx
20530 ix86_expand_vec_set_builtin (tree exp)
20531 {
20532 enum machine_mode tmode, mode1;
20533 tree arg0, arg1, arg2;
20534 int elt;
20535 rtx op0, op1, target;
20536
20537 arg0 = CALL_EXPR_ARG (exp, 0);
20538 arg1 = CALL_EXPR_ARG (exp, 1);
20539 arg2 = CALL_EXPR_ARG (exp, 2);
20540
20541 tmode = TYPE_MODE (TREE_TYPE (arg0));
20542 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20543 gcc_assert (VECTOR_MODE_P (tmode));
20544
20545 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20546 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20547 elt = get_element_number (TREE_TYPE (arg0), arg2);
20548
20549 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20550 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20551
20552 op0 = force_reg (tmode, op0);
20553 op1 = force_reg (mode1, op1);
20554
20555 /* OP0 is the source of these builtin functions and shouldn't be
20556 modified. Create a copy, use it and return it as target. */
20557 target = gen_reg_rtx (tmode);
20558 emit_move_insn (target, op0);
20559 ix86_expand_vector_set (true, target, op1, elt);
20560
20561 return target;
20562 }
20563
20564 /* Expand an expression EXP that calls a built-in function,
20565 with result going to TARGET if that's convenient
20566 (and in mode MODE if that's convenient).
20567 SUBTARGET may be used as the target for computing one of EXP's operands.
20568 IGNORE is nonzero if the value is to be ignored. */
20569
20570 static rtx
20571 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20572 enum machine_mode mode ATTRIBUTE_UNUSED,
20573 int ignore ATTRIBUTE_UNUSED)
20574 {
20575 const struct builtin_description *d;
20576 size_t i;
20577 enum insn_code icode;
20578 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20579 tree arg0, arg1, arg2, arg3;
20580 rtx op0, op1, op2, op3, pat;
20581 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20582 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20583
20584 switch (fcode)
20585 {
20586 case IX86_BUILTIN_EMMS:
20587 emit_insn (gen_mmx_emms ());
20588 return 0;
20589
20590 case IX86_BUILTIN_SFENCE:
20591 emit_insn (gen_sse_sfence ());
20592 return 0;
20593
20594 case IX86_BUILTIN_MASKMOVQ:
20595 case IX86_BUILTIN_MASKMOVDQU:
20596 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20597 ? CODE_FOR_mmx_maskmovq
20598 : CODE_FOR_sse2_maskmovdqu);
20599 /* Note the arg order is different from the operand order. */
20600 arg1 = CALL_EXPR_ARG (exp, 0);
20601 arg2 = CALL_EXPR_ARG (exp, 1);
20602 arg0 = CALL_EXPR_ARG (exp, 2);
20603 op0 = expand_normal (arg0);
20604 op1 = expand_normal (arg1);
20605 op2 = expand_normal (arg2);
20606 mode0 = insn_data[icode].operand[0].mode;
20607 mode1 = insn_data[icode].operand[1].mode;
20608 mode2 = insn_data[icode].operand[2].mode;
20609
20610 op0 = force_reg (Pmode, op0);
20611 op0 = gen_rtx_MEM (mode1, op0);
20612
20613 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20614 op0 = copy_to_mode_reg (mode0, op0);
20615 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20616 op1 = copy_to_mode_reg (mode1, op1);
20617 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20618 op2 = copy_to_mode_reg (mode2, op2);
20619 pat = GEN_FCN (icode) (op0, op1, op2);
20620 if (! pat)
20621 return 0;
20622 emit_insn (pat);
20623 return 0;
20624
20625 case IX86_BUILTIN_RSQRTF:
20626 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20627
20628 case IX86_BUILTIN_SQRTSS:
20629 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20630 case IX86_BUILTIN_RSQRTSS:
20631 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20632 case IX86_BUILTIN_RCPSS:
20633 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20634
20635 case IX86_BUILTIN_LOADUPS:
20636 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20637
20638 case IX86_BUILTIN_STOREUPS:
20639 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20640
20641 case IX86_BUILTIN_LOADHPS:
20642 case IX86_BUILTIN_LOADLPS:
20643 case IX86_BUILTIN_LOADHPD:
20644 case IX86_BUILTIN_LOADLPD:
20645 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20646 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20647 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20648 : CODE_FOR_sse2_loadlpd);
20649 arg0 = CALL_EXPR_ARG (exp, 0);
20650 arg1 = CALL_EXPR_ARG (exp, 1);
20651 op0 = expand_normal (arg0);
20652 op1 = expand_normal (arg1);
20653 tmode = insn_data[icode].operand[0].mode;
20654 mode0 = insn_data[icode].operand[1].mode;
20655 mode1 = insn_data[icode].operand[2].mode;
20656
20657 op0 = force_reg (mode0, op0);
20658 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20659 if (optimize || target == 0
20660 || GET_MODE (target) != tmode
20661 || !register_operand (target, tmode))
20662 target = gen_reg_rtx (tmode);
20663 pat = GEN_FCN (icode) (target, op0, op1);
20664 if (! pat)
20665 return 0;
20666 emit_insn (pat);
20667 return target;
20668
20669 case IX86_BUILTIN_STOREHPS:
20670 case IX86_BUILTIN_STORELPS:
20671 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20672 : CODE_FOR_sse_storelps);
20673 arg0 = CALL_EXPR_ARG (exp, 0);
20674 arg1 = CALL_EXPR_ARG (exp, 1);
20675 op0 = expand_normal (arg0);
20676 op1 = expand_normal (arg1);
20677 mode0 = insn_data[icode].operand[0].mode;
20678 mode1 = insn_data[icode].operand[1].mode;
20679
20680 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20681 op1 = force_reg (mode1, op1);
20682
20683 pat = GEN_FCN (icode) (op0, op1);
20684 if (! pat)
20685 return 0;
20686 emit_insn (pat);
20687 return const0_rtx;
20688
20689 case IX86_BUILTIN_MOVNTPS:
20690 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20691 case IX86_BUILTIN_MOVNTQ:
20692 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20693
20694 case IX86_BUILTIN_LDMXCSR:
20695 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20696 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20697 emit_move_insn (target, op0);
20698 emit_insn (gen_sse_ldmxcsr (target));
20699 return 0;
20700
20701 case IX86_BUILTIN_STMXCSR:
20702 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20703 emit_insn (gen_sse_stmxcsr (target));
20704 return copy_to_mode_reg (SImode, target);
20705
20706 case IX86_BUILTIN_SHUFPS:
20707 case IX86_BUILTIN_SHUFPD:
20708 icode = (fcode == IX86_BUILTIN_SHUFPS
20709 ? CODE_FOR_sse_shufps
20710 : CODE_FOR_sse2_shufpd);
20711 arg0 = CALL_EXPR_ARG (exp, 0);
20712 arg1 = CALL_EXPR_ARG (exp, 1);
20713 arg2 = CALL_EXPR_ARG (exp, 2);
20714 op0 = expand_normal (arg0);
20715 op1 = expand_normal (arg1);
20716 op2 = expand_normal (arg2);
20717 tmode = insn_data[icode].operand[0].mode;
20718 mode0 = insn_data[icode].operand[1].mode;
20719 mode1 = insn_data[icode].operand[2].mode;
20720 mode2 = insn_data[icode].operand[3].mode;
20721
20722 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20723 op0 = copy_to_mode_reg (mode0, op0);
20724 if ((optimize && !register_operand (op1, mode1))
20725 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20726 op1 = copy_to_mode_reg (mode1, op1);
20727 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20728 {
20729 /* @@@ better error message */
20730 error ("mask must be an immediate");
20731 return gen_reg_rtx (tmode);
20732 }
20733 if (optimize || target == 0
20734 || GET_MODE (target) != tmode
20735 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20736 target = gen_reg_rtx (tmode);
20737 pat = GEN_FCN (icode) (target, op0, op1, op2);
20738 if (! pat)
20739 return 0;
20740 emit_insn (pat);
20741 return target;
20742
20743 case IX86_BUILTIN_PSHUFW:
20744 case IX86_BUILTIN_PSHUFD:
20745 case IX86_BUILTIN_PSHUFHW:
20746 case IX86_BUILTIN_PSHUFLW:
20747 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20748 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20749 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20750 : CODE_FOR_mmx_pshufw);
20751 arg0 = CALL_EXPR_ARG (exp, 0);
20752 arg1 = CALL_EXPR_ARG (exp, 1);
20753 op0 = expand_normal (arg0);
20754 op1 = expand_normal (arg1);
20755 tmode = insn_data[icode].operand[0].mode;
20756 mode1 = insn_data[icode].operand[1].mode;
20757 mode2 = insn_data[icode].operand[2].mode;
20758
20759 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20760 op0 = copy_to_mode_reg (mode1, op0);
20761 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20762 {
20763 /* @@@ better error message */
20764 error ("mask must be an immediate");
20765 return const0_rtx;
20766 }
20767 if (target == 0
20768 || GET_MODE (target) != tmode
20769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20770 target = gen_reg_rtx (tmode);
20771 pat = GEN_FCN (icode) (target, op0, op1);
20772 if (! pat)
20773 return 0;
20774 emit_insn (pat);
20775 return target;
20776
20777 case IX86_BUILTIN_PSLLW128:
20778 case IX86_BUILTIN_PSLLWI128:
20779 icode = CODE_FOR_ashlv8hi3;
20780 goto do_pshift;
20781 case IX86_BUILTIN_PSLLD128:
20782 case IX86_BUILTIN_PSLLDI128:
20783 icode = CODE_FOR_ashlv4si3;
20784 goto do_pshift;
20785 case IX86_BUILTIN_PSLLQ128:
20786 case IX86_BUILTIN_PSLLQI128:
20787 icode = CODE_FOR_ashlv2di3;
20788 goto do_pshift;
20789 case IX86_BUILTIN_PSRAW128:
20790 case IX86_BUILTIN_PSRAWI128:
20791 icode = CODE_FOR_ashrv8hi3;
20792 goto do_pshift;
20793 case IX86_BUILTIN_PSRAD128:
20794 case IX86_BUILTIN_PSRADI128:
20795 icode = CODE_FOR_ashrv4si3;
20796 goto do_pshift;
20797 case IX86_BUILTIN_PSRLW128:
20798 case IX86_BUILTIN_PSRLWI128:
20799 icode = CODE_FOR_lshrv8hi3;
20800 goto do_pshift;
20801 case IX86_BUILTIN_PSRLD128:
20802 case IX86_BUILTIN_PSRLDI128:
20803 icode = CODE_FOR_lshrv4si3;
20804 goto do_pshift;
20805 case IX86_BUILTIN_PSRLQ128:
20806 case IX86_BUILTIN_PSRLQI128:
20807 icode = CODE_FOR_lshrv2di3;
20808
20809 do_pshift:
20810 arg0 = CALL_EXPR_ARG (exp, 0);
20811 arg1 = CALL_EXPR_ARG (exp, 1);
20812 op0 = expand_normal (arg0);
20813 op1 = expand_normal (arg1);
20814
20815 tmode = insn_data[icode].operand[0].mode;
20816 mode1 = insn_data[icode].operand[1].mode;
20817
20818 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20819 op0 = copy_to_reg (op0);
20820
20821 if (!CONST_INT_P (op1))
20822 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20823
20824 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20825 op1 = copy_to_reg (op1);
20826
20827 target = gen_reg_rtx (tmode);
20828 pat = GEN_FCN (icode) (target, op0, op1);
20829 if (!pat)
20830 return 0;
20831 emit_insn (pat);
20832 return target;
20833
20834 case IX86_BUILTIN_PSLLDQI128:
20835 case IX86_BUILTIN_PSRLDQI128:
20836 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20837 : CODE_FOR_sse2_lshrti3);
20838 arg0 = CALL_EXPR_ARG (exp, 0);
20839 arg1 = CALL_EXPR_ARG (exp, 1);
20840 op0 = expand_normal (arg0);
20841 op1 = expand_normal (arg1);
20842 tmode = insn_data[icode].operand[0].mode;
20843 mode1 = insn_data[icode].operand[1].mode;
20844 mode2 = insn_data[icode].operand[2].mode;
20845
20846 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20847 {
20848 op0 = copy_to_reg (op0);
20849 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20850 }
20851 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20852 {
20853 error ("shift must be an immediate");
20854 return const0_rtx;
20855 }
20856 target = gen_reg_rtx (V2DImode);
20857 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20858 op0, op1);
20859 if (! pat)
20860 return 0;
20861 emit_insn (pat);
20862 return target;
20863
20864 case IX86_BUILTIN_FEMMS:
20865 emit_insn (gen_mmx_femms ());
20866 return NULL_RTX;
20867
20868 case IX86_BUILTIN_PAVGUSB:
20869 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20870
20871 case IX86_BUILTIN_PF2ID:
20872 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20873
20874 case IX86_BUILTIN_PFACC:
20875 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20876
20877 case IX86_BUILTIN_PFADD:
20878 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20879
20880 case IX86_BUILTIN_PFCMPEQ:
20881 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20882
20883 case IX86_BUILTIN_PFCMPGE:
20884 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20885
20886 case IX86_BUILTIN_PFCMPGT:
20887 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20888
20889 case IX86_BUILTIN_PFMAX:
20890 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20891
20892 case IX86_BUILTIN_PFMIN:
20893 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20894
20895 case IX86_BUILTIN_PFMUL:
20896 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20897
20898 case IX86_BUILTIN_PFRCP:
20899 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20900
20901 case IX86_BUILTIN_PFRCPIT1:
20902 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20903
20904 case IX86_BUILTIN_PFRCPIT2:
20905 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20906
20907 case IX86_BUILTIN_PFRSQIT1:
20908 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20909
20910 case IX86_BUILTIN_PFRSQRT:
20911 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20912
20913 case IX86_BUILTIN_PFSUB:
20914 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20915
20916 case IX86_BUILTIN_PFSUBR:
20917 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20918
20919 case IX86_BUILTIN_PI2FD:
20920 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20921
20922 case IX86_BUILTIN_PMULHRW:
20923 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20924
20925 case IX86_BUILTIN_PF2IW:
20926 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20927
20928 case IX86_BUILTIN_PFNACC:
20929 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20930
20931 case IX86_BUILTIN_PFPNACC:
20932 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20933
20934 case IX86_BUILTIN_PI2FW:
20935 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20936
20937 case IX86_BUILTIN_PSWAPDSI:
20938 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20939
20940 case IX86_BUILTIN_PSWAPDSF:
20941 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20942
20943 case IX86_BUILTIN_SQRTSD:
20944 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20945 case IX86_BUILTIN_LOADUPD:
20946 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20947 case IX86_BUILTIN_STOREUPD:
20948 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20949
20950 case IX86_BUILTIN_MFENCE:
20951 emit_insn (gen_sse2_mfence ());
20952 return 0;
20953 case IX86_BUILTIN_LFENCE:
20954 emit_insn (gen_sse2_lfence ());
20955 return 0;
20956
20957 case IX86_BUILTIN_CLFLUSH:
20958 arg0 = CALL_EXPR_ARG (exp, 0);
20959 op0 = expand_normal (arg0);
20960 icode = CODE_FOR_sse2_clflush;
20961 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20962 op0 = copy_to_mode_reg (Pmode, op0);
20963
20964 emit_insn (gen_sse2_clflush (op0));
20965 return 0;
20966
20967 case IX86_BUILTIN_MOVNTPD:
20968 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20969 case IX86_BUILTIN_MOVNTDQ:
20970 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20971 case IX86_BUILTIN_MOVNTI:
20972 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20973
20974 case IX86_BUILTIN_LOADDQU:
20975 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20976 case IX86_BUILTIN_STOREDQU:
20977 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20978
20979 case IX86_BUILTIN_MONITOR:
20980 arg0 = CALL_EXPR_ARG (exp, 0);
20981 arg1 = CALL_EXPR_ARG (exp, 1);
20982 arg2 = CALL_EXPR_ARG (exp, 2);
20983 op0 = expand_normal (arg0);
20984 op1 = expand_normal (arg1);
20985 op2 = expand_normal (arg2);
20986 if (!REG_P (op0))
20987 op0 = copy_to_mode_reg (Pmode, op0);
20988 if (!REG_P (op1))
20989 op1 = copy_to_mode_reg (SImode, op1);
20990 if (!REG_P (op2))
20991 op2 = copy_to_mode_reg (SImode, op2);
20992 if (!TARGET_64BIT)
20993 emit_insn (gen_sse3_monitor (op0, op1, op2));
20994 else
20995 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
20996 return 0;
20997
20998 case IX86_BUILTIN_MWAIT:
20999 arg0 = CALL_EXPR_ARG (exp, 0);
21000 arg1 = CALL_EXPR_ARG (exp, 1);
21001 op0 = expand_normal (arg0);
21002 op1 = expand_normal (arg1);
21003 if (!REG_P (op0))
21004 op0 = copy_to_mode_reg (SImode, op0);
21005 if (!REG_P (op1))
21006 op1 = copy_to_mode_reg (SImode, op1);
21007 emit_insn (gen_sse3_mwait (op0, op1));
21008 return 0;
21009
21010 case IX86_BUILTIN_LDDQU:
21011 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
21012 target, 1);
21013
21014 case IX86_BUILTIN_PALIGNR:
21015 case IX86_BUILTIN_PALIGNR128:
21016 if (fcode == IX86_BUILTIN_PALIGNR)
21017 {
21018 icode = CODE_FOR_ssse3_palignrdi;
21019 mode = DImode;
21020 }
21021 else
21022 {
21023 icode = CODE_FOR_ssse3_palignrti;
21024 mode = V2DImode;
21025 }
21026 arg0 = CALL_EXPR_ARG (exp, 0);
21027 arg1 = CALL_EXPR_ARG (exp, 1);
21028 arg2 = CALL_EXPR_ARG (exp, 2);
21029 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21030 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21031 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21032 tmode = insn_data[icode].operand[0].mode;
21033 mode1 = insn_data[icode].operand[1].mode;
21034 mode2 = insn_data[icode].operand[2].mode;
21035 mode3 = insn_data[icode].operand[3].mode;
21036
21037 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21038 {
21039 op0 = copy_to_reg (op0);
21040 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21041 }
21042 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21043 {
21044 op1 = copy_to_reg (op1);
21045 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21046 }
21047 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21048 {
21049 error ("shift must be an immediate");
21050 return const0_rtx;
21051 }
21052 target = gen_reg_rtx (mode);
21053 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21054 op0, op1, op2);
21055 if (! pat)
21056 return 0;
21057 emit_insn (pat);
21058 return target;
21059
21060 case IX86_BUILTIN_MOVNTDQA:
21061 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21062 target, 1);
21063
21064 case IX86_BUILTIN_MOVNTSD:
21065 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21066
21067 case IX86_BUILTIN_MOVNTSS:
21068 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21069
21070 case IX86_BUILTIN_INSERTQ:
21071 case IX86_BUILTIN_EXTRQ:
21072 icode = (fcode == IX86_BUILTIN_EXTRQ
21073 ? CODE_FOR_sse4a_extrq
21074 : CODE_FOR_sse4a_insertq);
21075 arg0 = CALL_EXPR_ARG (exp, 0);
21076 arg1 = CALL_EXPR_ARG (exp, 1);
21077 op0 = expand_normal (arg0);
21078 op1 = expand_normal (arg1);
21079 tmode = insn_data[icode].operand[0].mode;
21080 mode1 = insn_data[icode].operand[1].mode;
21081 mode2 = insn_data[icode].operand[2].mode;
21082 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21083 op0 = copy_to_mode_reg (mode1, op0);
21084 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21085 op1 = copy_to_mode_reg (mode2, op1);
21086 if (optimize || target == 0
21087 || GET_MODE (target) != tmode
21088 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21089 target = gen_reg_rtx (tmode);
21090 pat = GEN_FCN (icode) (target, op0, op1);
21091 if (! pat)
21092 return NULL_RTX;
21093 emit_insn (pat);
21094 return target;
21095
21096 case IX86_BUILTIN_EXTRQI:
21097 icode = CODE_FOR_sse4a_extrqi;
21098 arg0 = CALL_EXPR_ARG (exp, 0);
21099 arg1 = CALL_EXPR_ARG (exp, 1);
21100 arg2 = CALL_EXPR_ARG (exp, 2);
21101 op0 = expand_normal (arg0);
21102 op1 = expand_normal (arg1);
21103 op2 = expand_normal (arg2);
21104 tmode = insn_data[icode].operand[0].mode;
21105 mode1 = insn_data[icode].operand[1].mode;
21106 mode2 = insn_data[icode].operand[2].mode;
21107 mode3 = insn_data[icode].operand[3].mode;
21108 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21109 op0 = copy_to_mode_reg (mode1, op0);
21110 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21111 {
21112 error ("index mask must be an immediate");
21113 return gen_reg_rtx (tmode);
21114 }
21115 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21116 {
21117 error ("length mask must be an immediate");
21118 return gen_reg_rtx (tmode);
21119 }
21120 if (optimize || target == 0
21121 || GET_MODE (target) != tmode
21122 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21123 target = gen_reg_rtx (tmode);
21124 pat = GEN_FCN (icode) (target, op0, op1, op2);
21125 if (! pat)
21126 return NULL_RTX;
21127 emit_insn (pat);
21128 return target;
21129
21130 case IX86_BUILTIN_INSERTQI:
21131 icode = CODE_FOR_sse4a_insertqi;
21132 arg0 = CALL_EXPR_ARG (exp, 0);
21133 arg1 = CALL_EXPR_ARG (exp, 1);
21134 arg2 = CALL_EXPR_ARG (exp, 2);
21135 arg3 = CALL_EXPR_ARG (exp, 3);
21136 op0 = expand_normal (arg0);
21137 op1 = expand_normal (arg1);
21138 op2 = expand_normal (arg2);
21139 op3 = expand_normal (arg3);
21140 tmode = insn_data[icode].operand[0].mode;
21141 mode1 = insn_data[icode].operand[1].mode;
21142 mode2 = insn_data[icode].operand[2].mode;
21143 mode3 = insn_data[icode].operand[3].mode;
21144 mode4 = insn_data[icode].operand[4].mode;
21145
21146 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21147 op0 = copy_to_mode_reg (mode1, op0);
21148
21149 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21150 op1 = copy_to_mode_reg (mode2, op1);
21151
21152 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21153 {
21154 error ("index mask must be an immediate");
21155 return gen_reg_rtx (tmode);
21156 }
21157 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21158 {
21159 error ("length mask must be an immediate");
21160 return gen_reg_rtx (tmode);
21161 }
21162 if (optimize || target == 0
21163 || GET_MODE (target) != tmode
21164 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21165 target = gen_reg_rtx (tmode);
21166 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21167 if (! pat)
21168 return NULL_RTX;
21169 emit_insn (pat);
21170 return target;
21171
21172 case IX86_BUILTIN_VEC_INIT_V2SI:
21173 case IX86_BUILTIN_VEC_INIT_V4HI:
21174 case IX86_BUILTIN_VEC_INIT_V8QI:
21175 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21176
21177 case IX86_BUILTIN_VEC_EXT_V2DF:
21178 case IX86_BUILTIN_VEC_EXT_V2DI:
21179 case IX86_BUILTIN_VEC_EXT_V4SF:
21180 case IX86_BUILTIN_VEC_EXT_V4SI:
21181 case IX86_BUILTIN_VEC_EXT_V8HI:
21182 case IX86_BUILTIN_VEC_EXT_V2SI:
21183 case IX86_BUILTIN_VEC_EXT_V4HI:
21184 case IX86_BUILTIN_VEC_EXT_V16QI:
21185 return ix86_expand_vec_ext_builtin (exp, target);
21186
21187 case IX86_BUILTIN_VEC_SET_V2DI:
21188 case IX86_BUILTIN_VEC_SET_V4SF:
21189 case IX86_BUILTIN_VEC_SET_V4SI:
21190 case IX86_BUILTIN_VEC_SET_V8HI:
21191 case IX86_BUILTIN_VEC_SET_V4HI:
21192 case IX86_BUILTIN_VEC_SET_V16QI:
21193 return ix86_expand_vec_set_builtin (exp);
21194
21195 case IX86_BUILTIN_INFQ:
21196 {
21197 REAL_VALUE_TYPE inf;
21198 rtx tmp;
21199
21200 real_inf (&inf);
21201 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21202
21203 tmp = validize_mem (force_const_mem (mode, tmp));
21204
21205 if (target == 0)
21206 target = gen_reg_rtx (mode);
21207
21208 emit_move_insn (target, tmp);
21209 return target;
21210 }
21211
21212 case IX86_BUILTIN_FABSQ:
21213 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21214
21215 case IX86_BUILTIN_COPYSIGNQ:
21216 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21217
21218 default:
21219 break;
21220 }
21221
21222 for (i = 0, d = bdesc_sse_3arg;
21223 i < ARRAY_SIZE (bdesc_sse_3arg);
21224 i++, d++)
21225 if (d->code == fcode)
21226 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21227 target);
21228
21229 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21230 if (d->code == fcode)
21231 {
21232 /* Compares are treated specially. */
21233 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21234 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21235 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21236 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21237 return ix86_expand_sse_compare (d, exp, target);
21238
21239 return ix86_expand_binop_builtin (d->icode, exp, target);
21240 }
21241
21242 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21243 if (d->code == fcode)
21244 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21245
21246 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21247 if (d->code == fcode)
21248 return ix86_expand_sse_comi (d, exp, target);
21249
21250 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21251 if (d->code == fcode)
21252 return ix86_expand_sse_ptest (d, exp, target);
21253
21254 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21255 if (d->code == fcode)
21256 return ix86_expand_crc32 (d->icode, exp, target);
21257
21258 for (i = 0, d = bdesc_pcmpestr;
21259 i < ARRAY_SIZE (bdesc_pcmpestr);
21260 i++, d++)
21261 if (d->code == fcode)
21262 return ix86_expand_sse_pcmpestr (d, exp, target);
21263
21264 for (i = 0, d = bdesc_pcmpistr;
21265 i < ARRAY_SIZE (bdesc_pcmpistr);
21266 i++, d++)
21267 if (d->code == fcode)
21268 return ix86_expand_sse_pcmpistr (d, exp, target);
21269
21270 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21271 if (d->code == fcode)
21272 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21273 (enum multi_arg_type)d->flag,
21274 d->comparison);
21275
21276 gcc_unreachable ();
21277 }
21278
21279 /* Returns a function decl for a vectorized version of the builtin function
21280 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21281 if it is not available. */
21282
21283 static tree
21284 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21285 tree type_in)
21286 {
21287 enum machine_mode in_mode, out_mode;
21288 int in_n, out_n;
21289
21290 if (TREE_CODE (type_out) != VECTOR_TYPE
21291 || TREE_CODE (type_in) != VECTOR_TYPE)
21292 return NULL_TREE;
21293
21294 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21295 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21296 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21297 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21298
21299 switch (fn)
21300 {
21301 case BUILT_IN_SQRT:
21302 if (out_mode == DFmode && out_n == 2
21303 && in_mode == DFmode && in_n == 2)
21304 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21305 break;
21306
21307 case BUILT_IN_SQRTF:
21308 if (out_mode == SFmode && out_n == 4
21309 && in_mode == SFmode && in_n == 4)
21310 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21311 break;
21312
21313 case BUILT_IN_LRINT:
21314 if (out_mode == SImode && out_n == 4
21315 && in_mode == DFmode && in_n == 2)
21316 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21317 break;
21318
21319 case BUILT_IN_LRINTF:
21320 if (out_mode == SImode && out_n == 4
21321 && in_mode == SFmode && in_n == 4)
21322 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21323 break;
21324
21325 default:
21326 ;
21327 }
21328
21329 /* Dispatch to a handler for a vectorization library. */
21330 if (ix86_veclib_handler)
21331 return (*ix86_veclib_handler)(fn, type_out, type_in);
21332
21333 return NULL_TREE;
21334 }
21335
21336 /* Handler for an ACML-style interface to a library with vectorized
21337 intrinsics. */
21338
21339 static tree
21340 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21341 {
21342 char name[20] = "__vr.._";
21343 tree fntype, new_fndecl, args;
21344 unsigned arity;
21345 const char *bname;
21346 enum machine_mode el_mode, in_mode;
21347 int n, in_n;
21348
21349 /* The ACML is 64bits only and suitable for unsafe math only as
21350 it does not correctly support parts of IEEE with the required
21351 precision such as denormals. */
21352 if (!TARGET_64BIT
21353 || !flag_unsafe_math_optimizations)
21354 return NULL_TREE;
21355
21356 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21357 n = TYPE_VECTOR_SUBPARTS (type_out);
21358 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21359 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21360 if (el_mode != in_mode
21361 || n != in_n)
21362 return NULL_TREE;
21363
21364 switch (fn)
21365 {
21366 case BUILT_IN_SIN:
21367 case BUILT_IN_COS:
21368 case BUILT_IN_EXP:
21369 case BUILT_IN_LOG:
21370 case BUILT_IN_LOG2:
21371 case BUILT_IN_LOG10:
21372 name[4] = 'd';
21373 name[5] = '2';
21374 if (el_mode != DFmode
21375 || n != 2)
21376 return NULL_TREE;
21377 break;
21378
21379 case BUILT_IN_SINF:
21380 case BUILT_IN_COSF:
21381 case BUILT_IN_EXPF:
21382 case BUILT_IN_POWF:
21383 case BUILT_IN_LOGF:
21384 case BUILT_IN_LOG2F:
21385 case BUILT_IN_LOG10F:
21386 name[4] = 's';
21387 name[5] = '4';
21388 if (el_mode != SFmode
21389 || n != 4)
21390 return NULL_TREE;
21391 break;
21392
21393 default:
21394 return NULL_TREE;
21395 }
21396
21397 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21398 sprintf (name + 7, "%s", bname+10);
21399
21400 arity = 0;
21401 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21402 args = TREE_CHAIN (args))
21403 arity++;
21404
21405 if (arity == 1)
21406 fntype = build_function_type_list (type_out, type_in, NULL);
21407 else
21408 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21409
21410 /* Build a function declaration for the vectorized function. */
21411 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21412 TREE_PUBLIC (new_fndecl) = 1;
21413 DECL_EXTERNAL (new_fndecl) = 1;
21414 DECL_IS_NOVOPS (new_fndecl) = 1;
21415 TREE_READONLY (new_fndecl) = 1;
21416
21417 return new_fndecl;
21418 }
21419
21420
21421 /* Returns a decl of a function that implements conversion of the
21422 input vector of type TYPE, or NULL_TREE if it is not available. */
21423
21424 static tree
21425 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21426 {
21427 if (TREE_CODE (type) != VECTOR_TYPE)
21428 return NULL_TREE;
21429
21430 switch (code)
21431 {
21432 case FLOAT_EXPR:
21433 switch (TYPE_MODE (type))
21434 {
21435 case V4SImode:
21436 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21437 default:
21438 return NULL_TREE;
21439 }
21440
21441 case FIX_TRUNC_EXPR:
21442 switch (TYPE_MODE (type))
21443 {
21444 case V4SFmode:
21445 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21446 default:
21447 return NULL_TREE;
21448 }
21449 default:
21450 return NULL_TREE;
21451
21452 }
21453 }
21454
21455 /* Returns a code for a target-specific builtin that implements
21456 reciprocal of the function, or NULL_TREE if not available. */
21457
21458 static tree
21459 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21460 bool sqrt ATTRIBUTE_UNUSED)
21461 {
21462 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21463 && flag_finite_math_only && !flag_trapping_math
21464 && flag_unsafe_math_optimizations))
21465 return NULL_TREE;
21466
21467 if (md_fn)
21468 /* Machine dependent builtins. */
21469 switch (fn)
21470 {
21471 /* Vectorized version of sqrt to rsqrt conversion. */
21472 case IX86_BUILTIN_SQRTPS_NR:
21473 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21474
21475 default:
21476 return NULL_TREE;
21477 }
21478 else
21479 /* Normal builtins. */
21480 switch (fn)
21481 {
21482 /* Sqrt to rsqrt conversion. */
21483 case BUILT_IN_SQRTF:
21484 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21485
21486 default:
21487 return NULL_TREE;
21488 }
21489 }
21490
21491 /* Store OPERAND to the memory after reload is completed. This means
21492 that we can't easily use assign_stack_local. */
21493 rtx
21494 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21495 {
21496 rtx result;
21497
21498 gcc_assert (reload_completed);
21499 if (TARGET_RED_ZONE)
21500 {
21501 result = gen_rtx_MEM (mode,
21502 gen_rtx_PLUS (Pmode,
21503 stack_pointer_rtx,
21504 GEN_INT (-RED_ZONE_SIZE)));
21505 emit_move_insn (result, operand);
21506 }
21507 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21508 {
21509 switch (mode)
21510 {
21511 case HImode:
21512 case SImode:
21513 operand = gen_lowpart (DImode, operand);
21514 /* FALLTHRU */
21515 case DImode:
21516 emit_insn (
21517 gen_rtx_SET (VOIDmode,
21518 gen_rtx_MEM (DImode,
21519 gen_rtx_PRE_DEC (DImode,
21520 stack_pointer_rtx)),
21521 operand));
21522 break;
21523 default:
21524 gcc_unreachable ();
21525 }
21526 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21527 }
21528 else
21529 {
21530 switch (mode)
21531 {
21532 case DImode:
21533 {
21534 rtx operands[2];
21535 split_di (&operand, 1, operands, operands + 1);
21536 emit_insn (
21537 gen_rtx_SET (VOIDmode,
21538 gen_rtx_MEM (SImode,
21539 gen_rtx_PRE_DEC (Pmode,
21540 stack_pointer_rtx)),
21541 operands[1]));
21542 emit_insn (
21543 gen_rtx_SET (VOIDmode,
21544 gen_rtx_MEM (SImode,
21545 gen_rtx_PRE_DEC (Pmode,
21546 stack_pointer_rtx)),
21547 operands[0]));
21548 }
21549 break;
21550 case HImode:
21551 /* Store HImodes as SImodes. */
21552 operand = gen_lowpart (SImode, operand);
21553 /* FALLTHRU */
21554 case SImode:
21555 emit_insn (
21556 gen_rtx_SET (VOIDmode,
21557 gen_rtx_MEM (GET_MODE (operand),
21558 gen_rtx_PRE_DEC (SImode,
21559 stack_pointer_rtx)),
21560 operand));
21561 break;
21562 default:
21563 gcc_unreachable ();
21564 }
21565 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21566 }
21567 return result;
21568 }
21569
21570 /* Free operand from the memory. */
21571 void
21572 ix86_free_from_memory (enum machine_mode mode)
21573 {
21574 if (!TARGET_RED_ZONE)
21575 {
21576 int size;
21577
21578 if (mode == DImode || TARGET_64BIT)
21579 size = 8;
21580 else
21581 size = 4;
21582 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21583 to pop or add instruction if registers are available. */
21584 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21585 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21586 GEN_INT (size))));
21587 }
21588 }
21589
21590 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21591 QImode must go into class Q_REGS.
21592 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21593 movdf to do mem-to-mem moves through integer regs. */
21594 enum reg_class
21595 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21596 {
21597 enum machine_mode mode = GET_MODE (x);
21598
21599 /* We're only allowed to return a subclass of CLASS. Many of the
21600 following checks fail for NO_REGS, so eliminate that early. */
21601 if (regclass == NO_REGS)
21602 return NO_REGS;
21603
21604 /* All classes can load zeros. */
21605 if (x == CONST0_RTX (mode))
21606 return regclass;
21607
21608 /* Force constants into memory if we are loading a (nonzero) constant into
21609 an MMX or SSE register. This is because there are no MMX/SSE instructions
21610 to load from a constant. */
21611 if (CONSTANT_P (x)
21612 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21613 return NO_REGS;
21614
21615 /* Prefer SSE regs only, if we can use them for math. */
21616 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21617 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21618
21619 /* Floating-point constants need more complex checks. */
21620 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21621 {
21622 /* General regs can load everything. */
21623 if (reg_class_subset_p (regclass, GENERAL_REGS))
21624 return regclass;
21625
21626 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21627 zero above. We only want to wind up preferring 80387 registers if
21628 we plan on doing computation with them. */
21629 if (TARGET_80387
21630 && standard_80387_constant_p (x))
21631 {
21632 /* Limit class to non-sse. */
21633 if (regclass == FLOAT_SSE_REGS)
21634 return FLOAT_REGS;
21635 if (regclass == FP_TOP_SSE_REGS)
21636 return FP_TOP_REG;
21637 if (regclass == FP_SECOND_SSE_REGS)
21638 return FP_SECOND_REG;
21639 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21640 return regclass;
21641 }
21642
21643 return NO_REGS;
21644 }
21645
21646 /* Generally when we see PLUS here, it's the function invariant
21647 (plus soft-fp const_int). Which can only be computed into general
21648 regs. */
21649 if (GET_CODE (x) == PLUS)
21650 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21651
21652 /* QImode constants are easy to load, but non-constant QImode data
21653 must go into Q_REGS. */
21654 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21655 {
21656 if (reg_class_subset_p (regclass, Q_REGS))
21657 return regclass;
21658 if (reg_class_subset_p (Q_REGS, regclass))
21659 return Q_REGS;
21660 return NO_REGS;
21661 }
21662
21663 return regclass;
21664 }
21665
21666 /* Discourage putting floating-point values in SSE registers unless
21667 SSE math is being used, and likewise for the 387 registers. */
21668 enum reg_class
21669 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21670 {
21671 enum machine_mode mode = GET_MODE (x);
21672
21673 /* Restrict the output reload class to the register bank that we are doing
21674 math on. If we would like not to return a subset of CLASS, reject this
21675 alternative: if reload cannot do this, it will still use its choice. */
21676 mode = GET_MODE (x);
21677 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21678 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21679
21680 if (X87_FLOAT_MODE_P (mode))
21681 {
21682 if (regclass == FP_TOP_SSE_REGS)
21683 return FP_TOP_REG;
21684 else if (regclass == FP_SECOND_SSE_REGS)
21685 return FP_SECOND_REG;
21686 else
21687 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21688 }
21689
21690 return regclass;
21691 }
21692
21693 /* If we are copying between general and FP registers, we need a memory
21694 location. The same is true for SSE and MMX registers.
21695
21696 To optimize register_move_cost performance, allow inline variant.
21697
21698 The macro can't work reliably when one of the CLASSES is class containing
21699 registers from multiple units (SSE, MMX, integer). We avoid this by never
21700 combining those units in single alternative in the machine description.
21701 Ensure that this constraint holds to avoid unexpected surprises.
21702
21703 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21704 enforce these sanity checks. */
21705
21706 static inline int
21707 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21708 enum machine_mode mode, int strict)
21709 {
21710 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21711 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21712 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21713 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21714 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21715 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21716 {
21717 gcc_assert (!strict);
21718 return true;
21719 }
21720
21721 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21722 return true;
21723
21724 /* ??? This is a lie. We do have moves between mmx/general, and for
21725 mmx/sse2. But by saying we need secondary memory we discourage the
21726 register allocator from using the mmx registers unless needed. */
21727 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21728 return true;
21729
21730 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21731 {
21732 /* SSE1 doesn't have any direct moves from other classes. */
21733 if (!TARGET_SSE2)
21734 return true;
21735
21736 /* If the target says that inter-unit moves are more expensive
21737 than moving through memory, then don't generate them. */
21738 if (!TARGET_INTER_UNIT_MOVES)
21739 return true;
21740
21741 /* Between SSE and general, we have moves no larger than word size. */
21742 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21743 return true;
21744 }
21745
21746 return false;
21747 }
21748
21749 int
21750 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21751 enum machine_mode mode, int strict)
21752 {
21753 return inline_secondary_memory_needed (class1, class2, mode, strict);
21754 }
21755
21756 /* Return true if the registers in CLASS cannot represent the change from
21757 modes FROM to TO. */
21758
21759 bool
21760 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21761 enum reg_class regclass)
21762 {
21763 if (from == to)
21764 return false;
21765
21766 /* x87 registers can't do subreg at all, as all values are reformatted
21767 to extended precision. */
21768 if (MAYBE_FLOAT_CLASS_P (regclass))
21769 return true;
21770
21771 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21772 {
21773 /* Vector registers do not support QI or HImode loads. If we don't
21774 disallow a change to these modes, reload will assume it's ok to
21775 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21776 the vec_dupv4hi pattern. */
21777 if (GET_MODE_SIZE (from) < 4)
21778 return true;
21779
21780 /* Vector registers do not support subreg with nonzero offsets, which
21781 are otherwise valid for integer registers. Since we can't see
21782 whether we have a nonzero offset from here, prohibit all
21783 nonparadoxical subregs changing size. */
21784 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21785 return true;
21786 }
21787
21788 return false;
21789 }
21790
21791 /* Return the cost of moving data of mode M between a
21792 register and memory. A value of 2 is the default; this cost is
21793 relative to those in `REGISTER_MOVE_COST'.
21794
21795 This function is used extensively by register_move_cost that is used to
21796 build tables at startup. Make it inline in this case.
21797 When IN is 2, return maximum of in and out move cost.
21798
21799 If moving between registers and memory is more expensive than
21800 between two registers, you should define this macro to express the
21801 relative cost.
21802
21803 Model also increased moving costs of QImode registers in non
21804 Q_REGS classes.
21805 */
21806 static inline int
21807 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21808 int in)
21809 {
21810 int cost;
21811 if (FLOAT_CLASS_P (regclass))
21812 {
21813 int index;
21814 switch (mode)
21815 {
21816 case SFmode:
21817 index = 0;
21818 break;
21819 case DFmode:
21820 index = 1;
21821 break;
21822 case XFmode:
21823 index = 2;
21824 break;
21825 default:
21826 return 100;
21827 }
21828 if (in == 2)
21829 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21830 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21831 }
21832 if (SSE_CLASS_P (regclass))
21833 {
21834 int index;
21835 switch (GET_MODE_SIZE (mode))
21836 {
21837 case 4:
21838 index = 0;
21839 break;
21840 case 8:
21841 index = 1;
21842 break;
21843 case 16:
21844 index = 2;
21845 break;
21846 default:
21847 return 100;
21848 }
21849 if (in == 2)
21850 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21851 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21852 }
21853 if (MMX_CLASS_P (regclass))
21854 {
21855 int index;
21856 switch (GET_MODE_SIZE (mode))
21857 {
21858 case 4:
21859 index = 0;
21860 break;
21861 case 8:
21862 index = 1;
21863 break;
21864 default:
21865 return 100;
21866 }
21867 if (in)
21868 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21869 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21870 }
21871 switch (GET_MODE_SIZE (mode))
21872 {
21873 case 1:
21874 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21875 {
21876 if (!in)
21877 return ix86_cost->int_store[0];
21878 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21879 cost = ix86_cost->movzbl_load;
21880 else
21881 cost = ix86_cost->int_load[0];
21882 if (in == 2)
21883 return MAX (cost, ix86_cost->int_store[0]);
21884 return cost;
21885 }
21886 else
21887 {
21888 if (in == 2)
21889 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21890 if (in)
21891 return ix86_cost->movzbl_load;
21892 else
21893 return ix86_cost->int_store[0] + 4;
21894 }
21895 break;
21896 case 2:
21897 if (in == 2)
21898 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21899 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21900 default:
21901 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21902 if (mode == TFmode)
21903 mode = XFmode;
21904 if (in == 2)
21905 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21906 else if (in)
21907 cost = ix86_cost->int_load[2];
21908 else
21909 cost = ix86_cost->int_store[2];
21910 return (cost * (((int) GET_MODE_SIZE (mode)
21911 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21912 }
21913 }
21914
21915 int
21916 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21917 {
21918 return inline_memory_move_cost (mode, regclass, in);
21919 }
21920
21921
21922 /* Return the cost of moving data from a register in class CLASS1 to
21923 one in class CLASS2.
21924
21925 It is not required that the cost always equal 2 when FROM is the same as TO;
21926 on some machines it is expensive to move between registers if they are not
21927 general registers. */
21928
21929 int
21930 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21931 enum reg_class class2)
21932 {
21933 /* In case we require secondary memory, compute cost of the store followed
21934 by load. In order to avoid bad register allocation choices, we need
21935 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21936
21937 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21938 {
21939 int cost = 1;
21940
21941 cost += inline_memory_move_cost (mode, class1, 2);
21942 cost += inline_memory_move_cost (mode, class2, 2);
21943
21944 /* In case of copying from general_purpose_register we may emit multiple
21945 stores followed by single load causing memory size mismatch stall.
21946 Count this as arbitrarily high cost of 20. */
21947 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21948 cost += 20;
21949
21950 /* In the case of FP/MMX moves, the registers actually overlap, and we
21951 have to switch modes in order to treat them differently. */
21952 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21953 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21954 cost += 20;
21955
21956 return cost;
21957 }
21958
21959 /* Moves between SSE/MMX and integer unit are expensive. */
21960 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21961 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21962
21963 /* ??? By keeping returned value relatively high, we limit the number
21964 of moves between integer and MMX/SSE registers for all targets.
21965 Additionally, high value prevents problem with x86_modes_tieable_p(),
21966 where integer modes in MMX/SSE registers are not tieable
21967 because of missing QImode and HImode moves to, from or between
21968 MMX/SSE registers. */
21969 return MAX (ix86_cost->mmxsse_to_integer, 8);
21970
21971 if (MAYBE_FLOAT_CLASS_P (class1))
21972 return ix86_cost->fp_move;
21973 if (MAYBE_SSE_CLASS_P (class1))
21974 return ix86_cost->sse_move;
21975 if (MAYBE_MMX_CLASS_P (class1))
21976 return ix86_cost->mmx_move;
21977 return 2;
21978 }
21979
21980 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21981
21982 bool
21983 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21984 {
21985 /* Flags and only flags can only hold CCmode values. */
21986 if (CC_REGNO_P (regno))
21987 return GET_MODE_CLASS (mode) == MODE_CC;
21988 if (GET_MODE_CLASS (mode) == MODE_CC
21989 || GET_MODE_CLASS (mode) == MODE_RANDOM
21990 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21991 return 0;
21992 if (FP_REGNO_P (regno))
21993 return VALID_FP_MODE_P (mode);
21994 if (SSE_REGNO_P (regno))
21995 {
21996 /* We implement the move patterns for all vector modes into and
21997 out of SSE registers, even when no operation instructions
21998 are available. */
21999 return (VALID_SSE_REG_MODE (mode)
22000 || VALID_SSE2_REG_MODE (mode)
22001 || VALID_MMX_REG_MODE (mode)
22002 || VALID_MMX_REG_MODE_3DNOW (mode));
22003 }
22004 if (MMX_REGNO_P (regno))
22005 {
22006 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22007 so if the register is available at all, then we can move data of
22008 the given mode into or out of it. */
22009 return (VALID_MMX_REG_MODE (mode)
22010 || VALID_MMX_REG_MODE_3DNOW (mode));
22011 }
22012
22013 if (mode == QImode)
22014 {
22015 /* Take care for QImode values - they can be in non-QI regs,
22016 but then they do cause partial register stalls. */
22017 if (regno < 4 || TARGET_64BIT)
22018 return 1;
22019 if (!TARGET_PARTIAL_REG_STALL)
22020 return 1;
22021 return reload_in_progress || reload_completed;
22022 }
22023 /* We handle both integer and floats in the general purpose registers. */
22024 else if (VALID_INT_MODE_P (mode))
22025 return 1;
22026 else if (VALID_FP_MODE_P (mode))
22027 return 1;
22028 else if (VALID_DFP_MODE_P (mode))
22029 return 1;
22030 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22031 on to use that value in smaller contexts, this can easily force a
22032 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22033 supporting DImode, allow it. */
22034 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22035 return 1;
22036
22037 return 0;
22038 }
22039
22040 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22041 tieable integer mode. */
22042
22043 static bool
22044 ix86_tieable_integer_mode_p (enum machine_mode mode)
22045 {
22046 switch (mode)
22047 {
22048 case HImode:
22049 case SImode:
22050 return true;
22051
22052 case QImode:
22053 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22054
22055 case DImode:
22056 return TARGET_64BIT;
22057
22058 default:
22059 return false;
22060 }
22061 }
22062
22063 /* Return true if MODE1 is accessible in a register that can hold MODE2
22064 without copying. That is, all register classes that can hold MODE2
22065 can also hold MODE1. */
22066
22067 bool
22068 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22069 {
22070 if (mode1 == mode2)
22071 return true;
22072
22073 if (ix86_tieable_integer_mode_p (mode1)
22074 && ix86_tieable_integer_mode_p (mode2))
22075 return true;
22076
22077 /* MODE2 being XFmode implies fp stack or general regs, which means we
22078 can tie any smaller floating point modes to it. Note that we do not
22079 tie this with TFmode. */
22080 if (mode2 == XFmode)
22081 return mode1 == SFmode || mode1 == DFmode;
22082
22083 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22084 that we can tie it with SFmode. */
22085 if (mode2 == DFmode)
22086 return mode1 == SFmode;
22087
22088 /* If MODE2 is only appropriate for an SSE register, then tie with
22089 any other mode acceptable to SSE registers. */
22090 if (GET_MODE_SIZE (mode2) == 16
22091 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22092 return (GET_MODE_SIZE (mode1) == 16
22093 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22094
22095 /* If MODE2 is appropriate for an MMX register, then tie
22096 with any other mode acceptable to MMX registers. */
22097 if (GET_MODE_SIZE (mode2) == 8
22098 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22099 return (GET_MODE_SIZE (mode1) == 8
22100 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22101
22102 return false;
22103 }
22104
22105 /* Compute a (partial) cost for rtx X. Return true if the complete
22106 cost has been computed, and false if subexpressions should be
22107 scanned. In either case, *TOTAL contains the cost result. */
22108
22109 static bool
22110 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22111 {
22112 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22113 enum machine_mode mode = GET_MODE (x);
22114
22115 switch (code)
22116 {
22117 case CONST_INT:
22118 case CONST:
22119 case LABEL_REF:
22120 case SYMBOL_REF:
22121 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22122 *total = 3;
22123 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22124 *total = 2;
22125 else if (flag_pic && SYMBOLIC_CONST (x)
22126 && (!TARGET_64BIT
22127 || (!GET_CODE (x) != LABEL_REF
22128 && (GET_CODE (x) != SYMBOL_REF
22129 || !SYMBOL_REF_LOCAL_P (x)))))
22130 *total = 1;
22131 else
22132 *total = 0;
22133 return true;
22134
22135 case CONST_DOUBLE:
22136 if (mode == VOIDmode)
22137 *total = 0;
22138 else
22139 switch (standard_80387_constant_p (x))
22140 {
22141 case 1: /* 0.0 */
22142 *total = 1;
22143 break;
22144 default: /* Other constants */
22145 *total = 2;
22146 break;
22147 case 0:
22148 case -1:
22149 /* Start with (MEM (SYMBOL_REF)), since that's where
22150 it'll probably end up. Add a penalty for size. */
22151 *total = (COSTS_N_INSNS (1)
22152 + (flag_pic != 0 && !TARGET_64BIT)
22153 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22154 break;
22155 }
22156 return true;
22157
22158 case ZERO_EXTEND:
22159 /* The zero extensions is often completely free on x86_64, so make
22160 it as cheap as possible. */
22161 if (TARGET_64BIT && mode == DImode
22162 && GET_MODE (XEXP (x, 0)) == SImode)
22163 *total = 1;
22164 else if (TARGET_ZERO_EXTEND_WITH_AND)
22165 *total = ix86_cost->add;
22166 else
22167 *total = ix86_cost->movzx;
22168 return false;
22169
22170 case SIGN_EXTEND:
22171 *total = ix86_cost->movsx;
22172 return false;
22173
22174 case ASHIFT:
22175 if (CONST_INT_P (XEXP (x, 1))
22176 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22177 {
22178 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22179 if (value == 1)
22180 {
22181 *total = ix86_cost->add;
22182 return false;
22183 }
22184 if ((value == 2 || value == 3)
22185 && ix86_cost->lea <= ix86_cost->shift_const)
22186 {
22187 *total = ix86_cost->lea;
22188 return false;
22189 }
22190 }
22191 /* FALLTHRU */
22192
22193 case ROTATE:
22194 case ASHIFTRT:
22195 case LSHIFTRT:
22196 case ROTATERT:
22197 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22198 {
22199 if (CONST_INT_P (XEXP (x, 1)))
22200 {
22201 if (INTVAL (XEXP (x, 1)) > 32)
22202 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22203 else
22204 *total = ix86_cost->shift_const * 2;
22205 }
22206 else
22207 {
22208 if (GET_CODE (XEXP (x, 1)) == AND)
22209 *total = ix86_cost->shift_var * 2;
22210 else
22211 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22212 }
22213 }
22214 else
22215 {
22216 if (CONST_INT_P (XEXP (x, 1)))
22217 *total = ix86_cost->shift_const;
22218 else
22219 *total = ix86_cost->shift_var;
22220 }
22221 return false;
22222
22223 case MULT:
22224 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22225 {
22226 /* ??? SSE scalar cost should be used here. */
22227 *total = ix86_cost->fmul;
22228 return false;
22229 }
22230 else if (X87_FLOAT_MODE_P (mode))
22231 {
22232 *total = ix86_cost->fmul;
22233 return false;
22234 }
22235 else if (FLOAT_MODE_P (mode))
22236 {
22237 /* ??? SSE vector cost should be used here. */
22238 *total = ix86_cost->fmul;
22239 return false;
22240 }
22241 else
22242 {
22243 rtx op0 = XEXP (x, 0);
22244 rtx op1 = XEXP (x, 1);
22245 int nbits;
22246 if (CONST_INT_P (XEXP (x, 1)))
22247 {
22248 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22249 for (nbits = 0; value != 0; value &= value - 1)
22250 nbits++;
22251 }
22252 else
22253 /* This is arbitrary. */
22254 nbits = 7;
22255
22256 /* Compute costs correctly for widening multiplication. */
22257 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22258 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22259 == GET_MODE_SIZE (mode))
22260 {
22261 int is_mulwiden = 0;
22262 enum machine_mode inner_mode = GET_MODE (op0);
22263
22264 if (GET_CODE (op0) == GET_CODE (op1))
22265 is_mulwiden = 1, op1 = XEXP (op1, 0);
22266 else if (CONST_INT_P (op1))
22267 {
22268 if (GET_CODE (op0) == SIGN_EXTEND)
22269 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22270 == INTVAL (op1);
22271 else
22272 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22273 }
22274
22275 if (is_mulwiden)
22276 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22277 }
22278
22279 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22280 + nbits * ix86_cost->mult_bit
22281 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22282
22283 return true;
22284 }
22285
22286 case DIV:
22287 case UDIV:
22288 case MOD:
22289 case UMOD:
22290 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22291 /* ??? SSE cost should be used here. */
22292 *total = ix86_cost->fdiv;
22293 else if (X87_FLOAT_MODE_P (mode))
22294 *total = ix86_cost->fdiv;
22295 else if (FLOAT_MODE_P (mode))
22296 /* ??? SSE vector cost should be used here. */
22297 *total = ix86_cost->fdiv;
22298 else
22299 *total = ix86_cost->divide[MODE_INDEX (mode)];
22300 return false;
22301
22302 case PLUS:
22303 if (GET_MODE_CLASS (mode) == MODE_INT
22304 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22305 {
22306 if (GET_CODE (XEXP (x, 0)) == PLUS
22307 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22308 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22309 && CONSTANT_P (XEXP (x, 1)))
22310 {
22311 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22312 if (val == 2 || val == 4 || val == 8)
22313 {
22314 *total = ix86_cost->lea;
22315 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22316 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22317 outer_code);
22318 *total += rtx_cost (XEXP (x, 1), outer_code);
22319 return true;
22320 }
22321 }
22322 else if (GET_CODE (XEXP (x, 0)) == MULT
22323 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22324 {
22325 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22326 if (val == 2 || val == 4 || val == 8)
22327 {
22328 *total = ix86_cost->lea;
22329 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22330 *total += rtx_cost (XEXP (x, 1), outer_code);
22331 return true;
22332 }
22333 }
22334 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22335 {
22336 *total = ix86_cost->lea;
22337 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22338 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22339 *total += rtx_cost (XEXP (x, 1), outer_code);
22340 return true;
22341 }
22342 }
22343 /* FALLTHRU */
22344
22345 case MINUS:
22346 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22347 {
22348 /* ??? SSE cost should be used here. */
22349 *total = ix86_cost->fadd;
22350 return false;
22351 }
22352 else if (X87_FLOAT_MODE_P (mode))
22353 {
22354 *total = ix86_cost->fadd;
22355 return false;
22356 }
22357 else if (FLOAT_MODE_P (mode))
22358 {
22359 /* ??? SSE vector cost should be used here. */
22360 *total = ix86_cost->fadd;
22361 return false;
22362 }
22363 /* FALLTHRU */
22364
22365 case AND:
22366 case IOR:
22367 case XOR:
22368 if (!TARGET_64BIT && mode == DImode)
22369 {
22370 *total = (ix86_cost->add * 2
22371 + (rtx_cost (XEXP (x, 0), outer_code)
22372 << (GET_MODE (XEXP (x, 0)) != DImode))
22373 + (rtx_cost (XEXP (x, 1), outer_code)
22374 << (GET_MODE (XEXP (x, 1)) != DImode)));
22375 return true;
22376 }
22377 /* FALLTHRU */
22378
22379 case NEG:
22380 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22381 {
22382 /* ??? SSE cost should be used here. */
22383 *total = ix86_cost->fchs;
22384 return false;
22385 }
22386 else if (X87_FLOAT_MODE_P (mode))
22387 {
22388 *total = ix86_cost->fchs;
22389 return false;
22390 }
22391 else if (FLOAT_MODE_P (mode))
22392 {
22393 /* ??? SSE vector cost should be used here. */
22394 *total = ix86_cost->fchs;
22395 return false;
22396 }
22397 /* FALLTHRU */
22398
22399 case NOT:
22400 if (!TARGET_64BIT && mode == DImode)
22401 *total = ix86_cost->add * 2;
22402 else
22403 *total = ix86_cost->add;
22404 return false;
22405
22406 case COMPARE:
22407 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22408 && XEXP (XEXP (x, 0), 1) == const1_rtx
22409 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22410 && XEXP (x, 1) == const0_rtx)
22411 {
22412 /* This kind of construct is implemented using test[bwl].
22413 Treat it as if we had an AND. */
22414 *total = (ix86_cost->add
22415 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22416 + rtx_cost (const1_rtx, outer_code));
22417 return true;
22418 }
22419 return false;
22420
22421 case FLOAT_EXTEND:
22422 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22423 *total = 0;
22424 return false;
22425
22426 case ABS:
22427 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22428 /* ??? SSE cost should be used here. */
22429 *total = ix86_cost->fabs;
22430 else if (X87_FLOAT_MODE_P (mode))
22431 *total = ix86_cost->fabs;
22432 else if (FLOAT_MODE_P (mode))
22433 /* ??? SSE vector cost should be used here. */
22434 *total = ix86_cost->fabs;
22435 return false;
22436
22437 case SQRT:
22438 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22439 /* ??? SSE cost should be used here. */
22440 *total = ix86_cost->fsqrt;
22441 else if (X87_FLOAT_MODE_P (mode))
22442 *total = ix86_cost->fsqrt;
22443 else if (FLOAT_MODE_P (mode))
22444 /* ??? SSE vector cost should be used here. */
22445 *total = ix86_cost->fsqrt;
22446 return false;
22447
22448 case UNSPEC:
22449 if (XINT (x, 1) == UNSPEC_TP)
22450 *total = 0;
22451 return false;
22452
22453 default:
22454 return false;
22455 }
22456 }
22457
22458 #if TARGET_MACHO
22459
22460 static int current_machopic_label_num;
22461
22462 /* Given a symbol name and its associated stub, write out the
22463 definition of the stub. */
22464
22465 void
22466 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22467 {
22468 unsigned int length;
22469 char *binder_name, *symbol_name, lazy_ptr_name[32];
22470 int label = ++current_machopic_label_num;
22471
22472 /* For 64-bit we shouldn't get here. */
22473 gcc_assert (!TARGET_64BIT);
22474
22475 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22476 symb = (*targetm.strip_name_encoding) (symb);
22477
22478 length = strlen (stub);
22479 binder_name = alloca (length + 32);
22480 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22481
22482 length = strlen (symb);
22483 symbol_name = alloca (length + 32);
22484 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22485
22486 sprintf (lazy_ptr_name, "L%d$lz", label);
22487
22488 if (MACHOPIC_PURE)
22489 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22490 else
22491 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22492
22493 fprintf (file, "%s:\n", stub);
22494 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22495
22496 if (MACHOPIC_PURE)
22497 {
22498 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22499 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22500 fprintf (file, "\tjmp\t*%%edx\n");
22501 }
22502 else
22503 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22504
22505 fprintf (file, "%s:\n", binder_name);
22506
22507 if (MACHOPIC_PURE)
22508 {
22509 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22510 fprintf (file, "\tpushl\t%%eax\n");
22511 }
22512 else
22513 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22514
22515 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22516
22517 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22518 fprintf (file, "%s:\n", lazy_ptr_name);
22519 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22520 fprintf (file, "\t.long %s\n", binder_name);
22521 }
22522
22523 void
22524 darwin_x86_file_end (void)
22525 {
22526 darwin_file_end ();
22527 ix86_file_end ();
22528 }
22529 #endif /* TARGET_MACHO */
22530
22531 /* Order the registers for register allocator. */
22532
22533 void
22534 x86_order_regs_for_local_alloc (void)
22535 {
22536 int pos = 0;
22537 int i;
22538
22539 /* First allocate the local general purpose registers. */
22540 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22541 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22542 reg_alloc_order [pos++] = i;
22543
22544 /* Global general purpose registers. */
22545 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22546 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22547 reg_alloc_order [pos++] = i;
22548
22549 /* x87 registers come first in case we are doing FP math
22550 using them. */
22551 if (!TARGET_SSE_MATH)
22552 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22553 reg_alloc_order [pos++] = i;
22554
22555 /* SSE registers. */
22556 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22557 reg_alloc_order [pos++] = i;
22558 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22559 reg_alloc_order [pos++] = i;
22560
22561 /* x87 registers. */
22562 if (TARGET_SSE_MATH)
22563 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22564 reg_alloc_order [pos++] = i;
22565
22566 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22567 reg_alloc_order [pos++] = i;
22568
22569 /* Initialize the rest of array as we do not allocate some registers
22570 at all. */
22571 while (pos < FIRST_PSEUDO_REGISTER)
22572 reg_alloc_order [pos++] = 0;
22573 }
22574
22575 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22576 struct attribute_spec.handler. */
22577 static tree
22578 ix86_handle_struct_attribute (tree *node, tree name,
22579 tree args ATTRIBUTE_UNUSED,
22580 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22581 {
22582 tree *type = NULL;
22583 if (DECL_P (*node))
22584 {
22585 if (TREE_CODE (*node) == TYPE_DECL)
22586 type = &TREE_TYPE (*node);
22587 }
22588 else
22589 type = node;
22590
22591 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22592 || TREE_CODE (*type) == UNION_TYPE)))
22593 {
22594 warning (OPT_Wattributes, "%qs attribute ignored",
22595 IDENTIFIER_POINTER (name));
22596 *no_add_attrs = true;
22597 }
22598
22599 else if ((is_attribute_p ("ms_struct", name)
22600 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22601 || ((is_attribute_p ("gcc_struct", name)
22602 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22603 {
22604 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22605 IDENTIFIER_POINTER (name));
22606 *no_add_attrs = true;
22607 }
22608
22609 return NULL_TREE;
22610 }
22611
22612 static bool
22613 ix86_ms_bitfield_layout_p (const_tree record_type)
22614 {
22615 return (TARGET_MS_BITFIELD_LAYOUT &&
22616 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22617 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22618 }
22619
22620 /* Returns an expression indicating where the this parameter is
22621 located on entry to the FUNCTION. */
22622
22623 static rtx
22624 x86_this_parameter (tree function)
22625 {
22626 tree type = TREE_TYPE (function);
22627 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22628
22629 if (TARGET_64BIT)
22630 {
22631 const int *parm_regs;
22632
22633 if (TARGET_64BIT_MS_ABI)
22634 parm_regs = x86_64_ms_abi_int_parameter_registers;
22635 else
22636 parm_regs = x86_64_int_parameter_registers;
22637 return gen_rtx_REG (DImode, parm_regs[aggr]);
22638 }
22639
22640 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22641 {
22642 int regno = AX_REG;
22643 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22644 regno = CX_REG;
22645 return gen_rtx_REG (SImode, regno);
22646 }
22647
22648 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22649 }
22650
22651 /* Determine whether x86_output_mi_thunk can succeed. */
22652
22653 static bool
22654 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22655 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22656 HOST_WIDE_INT vcall_offset, const_tree function)
22657 {
22658 /* 64-bit can handle anything. */
22659 if (TARGET_64BIT)
22660 return true;
22661
22662 /* For 32-bit, everything's fine if we have one free register. */
22663 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22664 return true;
22665
22666 /* Need a free register for vcall_offset. */
22667 if (vcall_offset)
22668 return false;
22669
22670 /* Need a free register for GOT references. */
22671 if (flag_pic && !(*targetm.binds_local_p) (function))
22672 return false;
22673
22674 /* Otherwise ok. */
22675 return true;
22676 }
22677
22678 /* Output the assembler code for a thunk function. THUNK_DECL is the
22679 declaration for the thunk function itself, FUNCTION is the decl for
22680 the target function. DELTA is an immediate constant offset to be
22681 added to THIS. If VCALL_OFFSET is nonzero, the word at
22682 *(*this + vcall_offset) should be added to THIS. */
22683
22684 static void
22685 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22686 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22687 HOST_WIDE_INT vcall_offset, tree function)
22688 {
22689 rtx xops[3];
22690 rtx this_param = x86_this_parameter (function);
22691 rtx this_reg, tmp;
22692
22693 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22694 pull it in now and let DELTA benefit. */
22695 if (REG_P (this_param))
22696 this_reg = this_param;
22697 else if (vcall_offset)
22698 {
22699 /* Put the this parameter into %eax. */
22700 xops[0] = this_param;
22701 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
22702 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22703 }
22704 else
22705 this_reg = NULL_RTX;
22706
22707 /* Adjust the this parameter by a fixed constant. */
22708 if (delta)
22709 {
22710 xops[0] = GEN_INT (delta);
22711 xops[1] = this_reg ? this_reg : this_param;
22712 if (TARGET_64BIT)
22713 {
22714 if (!x86_64_general_operand (xops[0], DImode))
22715 {
22716 tmp = gen_rtx_REG (DImode, R10_REG);
22717 xops[1] = tmp;
22718 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22719 xops[0] = tmp;
22720 xops[1] = this_param;
22721 }
22722 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22723 }
22724 else
22725 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22726 }
22727
22728 /* Adjust the this parameter by a value stored in the vtable. */
22729 if (vcall_offset)
22730 {
22731 if (TARGET_64BIT)
22732 tmp = gen_rtx_REG (DImode, R10_REG);
22733 else
22734 {
22735 int tmp_regno = CX_REG;
22736 if (lookup_attribute ("fastcall",
22737 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22738 tmp_regno = AX_REG;
22739 tmp = gen_rtx_REG (SImode, tmp_regno);
22740 }
22741
22742 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22743 xops[1] = tmp;
22744 if (TARGET_64BIT)
22745 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22746 else
22747 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22748
22749 /* Adjust the this parameter. */
22750 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22751 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22752 {
22753 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22754 xops[0] = GEN_INT (vcall_offset);
22755 xops[1] = tmp2;
22756 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22757 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22758 }
22759 xops[1] = this_reg;
22760 if (TARGET_64BIT)
22761 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22762 else
22763 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22764 }
22765
22766 /* If necessary, drop THIS back to its stack slot. */
22767 if (this_reg && this_reg != this_param)
22768 {
22769 xops[0] = this_reg;
22770 xops[1] = this_param;
22771 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22772 }
22773
22774 xops[0] = XEXP (DECL_RTL (function), 0);
22775 if (TARGET_64BIT)
22776 {
22777 if (!flag_pic || (*targetm.binds_local_p) (function))
22778 output_asm_insn ("jmp\t%P0", xops);
22779 /* All thunks should be in the same object as their target,
22780 and thus binds_local_p should be true. */
22781 else if (TARGET_64BIT_MS_ABI)
22782 gcc_unreachable ();
22783 else
22784 {
22785 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22786 tmp = gen_rtx_CONST (Pmode, tmp);
22787 tmp = gen_rtx_MEM (QImode, tmp);
22788 xops[0] = tmp;
22789 output_asm_insn ("jmp\t%A0", xops);
22790 }
22791 }
22792 else
22793 {
22794 if (!flag_pic || (*targetm.binds_local_p) (function))
22795 output_asm_insn ("jmp\t%P0", xops);
22796 else
22797 #if TARGET_MACHO
22798 if (TARGET_MACHO)
22799 {
22800 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22801 tmp = (gen_rtx_SYMBOL_REF
22802 (Pmode,
22803 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22804 tmp = gen_rtx_MEM (QImode, tmp);
22805 xops[0] = tmp;
22806 output_asm_insn ("jmp\t%0", xops);
22807 }
22808 else
22809 #endif /* TARGET_MACHO */
22810 {
22811 tmp = gen_rtx_REG (SImode, CX_REG);
22812 output_set_got (tmp, NULL_RTX);
22813
22814 xops[1] = tmp;
22815 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22816 output_asm_insn ("jmp\t{*}%1", xops);
22817 }
22818 }
22819 }
22820
22821 static void
22822 x86_file_start (void)
22823 {
22824 default_file_start ();
22825 #if TARGET_MACHO
22826 darwin_file_start ();
22827 #endif
22828 if (X86_FILE_START_VERSION_DIRECTIVE)
22829 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22830 if (X86_FILE_START_FLTUSED)
22831 fputs ("\t.global\t__fltused\n", asm_out_file);
22832 if (ix86_asm_dialect == ASM_INTEL)
22833 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22834 }
22835
22836 int
22837 x86_field_alignment (tree field, int computed)
22838 {
22839 enum machine_mode mode;
22840 tree type = TREE_TYPE (field);
22841
22842 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22843 return computed;
22844 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22845 ? get_inner_array_type (type) : type);
22846 if (mode == DFmode || mode == DCmode
22847 || GET_MODE_CLASS (mode) == MODE_INT
22848 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22849 return MIN (32, computed);
22850 return computed;
22851 }
22852
22853 /* Output assembler code to FILE to increment profiler label # LABELNO
22854 for profiling a function entry. */
22855 void
22856 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22857 {
22858 if (TARGET_64BIT)
22859 {
22860 #ifndef NO_PROFILE_COUNTERS
22861 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22862 #endif
22863
22864 if (!TARGET_64BIT_MS_ABI && flag_pic)
22865 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22866 else
22867 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22868 }
22869 else if (flag_pic)
22870 {
22871 #ifndef NO_PROFILE_COUNTERS
22872 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22873 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22874 #endif
22875 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22876 }
22877 else
22878 {
22879 #ifndef NO_PROFILE_COUNTERS
22880 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22881 PROFILE_COUNT_REGISTER);
22882 #endif
22883 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22884 }
22885 }
22886
22887 /* We don't have exact information about the insn sizes, but we may assume
22888 quite safely that we are informed about all 1 byte insns and memory
22889 address sizes. This is enough to eliminate unnecessary padding in
22890 99% of cases. */
22891
22892 static int
22893 min_insn_size (rtx insn)
22894 {
22895 int l = 0;
22896
22897 if (!INSN_P (insn) || !active_insn_p (insn))
22898 return 0;
22899
22900 /* Discard alignments we've emit and jump instructions. */
22901 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22902 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22903 return 0;
22904 if (JUMP_P (insn)
22905 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22906 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22907 return 0;
22908
22909 /* Important case - calls are always 5 bytes.
22910 It is common to have many calls in the row. */
22911 if (CALL_P (insn)
22912 && symbolic_reference_mentioned_p (PATTERN (insn))
22913 && !SIBLING_CALL_P (insn))
22914 return 5;
22915 if (get_attr_length (insn) <= 1)
22916 return 1;
22917
22918 /* For normal instructions we may rely on the sizes of addresses
22919 and the presence of symbol to require 4 bytes of encoding.
22920 This is not the case for jumps where references are PC relative. */
22921 if (!JUMP_P (insn))
22922 {
22923 l = get_attr_length_address (insn);
22924 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22925 l = 4;
22926 }
22927 if (l)
22928 return 1+l;
22929 else
22930 return 2;
22931 }
22932
22933 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22934 window. */
22935
22936 static void
22937 ix86_avoid_jump_misspredicts (void)
22938 {
22939 rtx insn, start = get_insns ();
22940 int nbytes = 0, njumps = 0;
22941 int isjump = 0;
22942
22943 /* Look for all minimal intervals of instructions containing 4 jumps.
22944 The intervals are bounded by START and INSN. NBYTES is the total
22945 size of instructions in the interval including INSN and not including
22946 START. When the NBYTES is smaller than 16 bytes, it is possible
22947 that the end of START and INSN ends up in the same 16byte page.
22948
22949 The smallest offset in the page INSN can start is the case where START
22950 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22951 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22952 */
22953 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22954 {
22955
22956 nbytes += min_insn_size (insn);
22957 if (dump_file)
22958 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22959 INSN_UID (insn), min_insn_size (insn));
22960 if ((JUMP_P (insn)
22961 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22962 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22963 || CALL_P (insn))
22964 njumps++;
22965 else
22966 continue;
22967
22968 while (njumps > 3)
22969 {
22970 start = NEXT_INSN (start);
22971 if ((JUMP_P (start)
22972 && GET_CODE (PATTERN (start)) != ADDR_VEC
22973 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22974 || CALL_P (start))
22975 njumps--, isjump = 1;
22976 else
22977 isjump = 0;
22978 nbytes -= min_insn_size (start);
22979 }
22980 gcc_assert (njumps >= 0);
22981 if (dump_file)
22982 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22983 INSN_UID (start), INSN_UID (insn), nbytes);
22984
22985 if (njumps == 3 && isjump && nbytes < 16)
22986 {
22987 int padsize = 15 - nbytes + min_insn_size (insn);
22988
22989 if (dump_file)
22990 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22991 INSN_UID (insn), padsize);
22992 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
22993 }
22994 }
22995 }
22996
22997 /* AMD Athlon works faster
22998 when RET is not destination of conditional jump or directly preceded
22999 by other jump instruction. We avoid the penalty by inserting NOP just
23000 before the RET instructions in such cases. */
23001 static void
23002 ix86_pad_returns (void)
23003 {
23004 edge e;
23005 edge_iterator ei;
23006
23007 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23008 {
23009 basic_block bb = e->src;
23010 rtx ret = BB_END (bb);
23011 rtx prev;
23012 bool replace = false;
23013
23014 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23015 || !maybe_hot_bb_p (bb))
23016 continue;
23017 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23018 if (active_insn_p (prev) || LABEL_P (prev))
23019 break;
23020 if (prev && LABEL_P (prev))
23021 {
23022 edge e;
23023 edge_iterator ei;
23024
23025 FOR_EACH_EDGE (e, ei, bb->preds)
23026 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23027 && !(e->flags & EDGE_FALLTHRU))
23028 replace = true;
23029 }
23030 if (!replace)
23031 {
23032 prev = prev_active_insn (ret);
23033 if (prev
23034 && ((JUMP_P (prev) && any_condjump_p (prev))
23035 || CALL_P (prev)))
23036 replace = true;
23037 /* Empty functions get branch mispredict even when the jump destination
23038 is not visible to us. */
23039 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23040 replace = true;
23041 }
23042 if (replace)
23043 {
23044 emit_insn_before (gen_return_internal_long (), ret);
23045 delete_insn (ret);
23046 }
23047 }
23048 }
23049
23050 /* Implement machine specific optimizations. We implement padding of returns
23051 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23052 static void
23053 ix86_reorg (void)
23054 {
23055 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23056 ix86_pad_returns ();
23057 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23058 ix86_avoid_jump_misspredicts ();
23059 }
23060
23061 /* Return nonzero when QImode register that must be represented via REX prefix
23062 is used. */
23063 bool
23064 x86_extended_QIreg_mentioned_p (rtx insn)
23065 {
23066 int i;
23067 extract_insn_cached (insn);
23068 for (i = 0; i < recog_data.n_operands; i++)
23069 if (REG_P (recog_data.operand[i])
23070 && REGNO (recog_data.operand[i]) >= 4)
23071 return true;
23072 return false;
23073 }
23074
23075 /* Return nonzero when P points to register encoded via REX prefix.
23076 Called via for_each_rtx. */
23077 static int
23078 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23079 {
23080 unsigned int regno;
23081 if (!REG_P (*p))
23082 return 0;
23083 regno = REGNO (*p);
23084 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23085 }
23086
23087 /* Return true when INSN mentions register that must be encoded using REX
23088 prefix. */
23089 bool
23090 x86_extended_reg_mentioned_p (rtx insn)
23091 {
23092 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23093 }
23094
23095 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23096 optabs would emit if we didn't have TFmode patterns. */
23097
23098 void
23099 x86_emit_floatuns (rtx operands[2])
23100 {
23101 rtx neglab, donelab, i0, i1, f0, in, out;
23102 enum machine_mode mode, inmode;
23103
23104 inmode = GET_MODE (operands[1]);
23105 gcc_assert (inmode == SImode || inmode == DImode);
23106
23107 out = operands[0];
23108 in = force_reg (inmode, operands[1]);
23109 mode = GET_MODE (out);
23110 neglab = gen_label_rtx ();
23111 donelab = gen_label_rtx ();
23112 f0 = gen_reg_rtx (mode);
23113
23114 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23115
23116 expand_float (out, in, 0);
23117
23118 emit_jump_insn (gen_jump (donelab));
23119 emit_barrier ();
23120
23121 emit_label (neglab);
23122
23123 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23124 1, OPTAB_DIRECT);
23125 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23126 1, OPTAB_DIRECT);
23127 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23128
23129 expand_float (f0, i0, 0);
23130
23131 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23132
23133 emit_label (donelab);
23134 }
23135 \f
23136 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23137 with all elements equal to VAR. Return true if successful. */
23138
23139 static bool
23140 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23141 rtx target, rtx val)
23142 {
23143 enum machine_mode smode, wsmode, wvmode;
23144 rtx x;
23145
23146 switch (mode)
23147 {
23148 case V2SImode:
23149 case V2SFmode:
23150 if (!mmx_ok)
23151 return false;
23152 /* FALLTHRU */
23153
23154 case V2DFmode:
23155 case V2DImode:
23156 case V4SFmode:
23157 case V4SImode:
23158 val = force_reg (GET_MODE_INNER (mode), val);
23159 x = gen_rtx_VEC_DUPLICATE (mode, val);
23160 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23161 return true;
23162
23163 case V4HImode:
23164 if (!mmx_ok)
23165 return false;
23166 if (TARGET_SSE || TARGET_3DNOW_A)
23167 {
23168 val = gen_lowpart (SImode, val);
23169 x = gen_rtx_TRUNCATE (HImode, val);
23170 x = gen_rtx_VEC_DUPLICATE (mode, x);
23171 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23172 return true;
23173 }
23174 else
23175 {
23176 smode = HImode;
23177 wsmode = SImode;
23178 wvmode = V2SImode;
23179 goto widen;
23180 }
23181
23182 case V8QImode:
23183 if (!mmx_ok)
23184 return false;
23185 smode = QImode;
23186 wsmode = HImode;
23187 wvmode = V4HImode;
23188 goto widen;
23189 case V8HImode:
23190 if (TARGET_SSE2)
23191 {
23192 rtx tmp1, tmp2;
23193 /* Extend HImode to SImode using a paradoxical SUBREG. */
23194 tmp1 = gen_reg_rtx (SImode);
23195 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23196 /* Insert the SImode value as low element of V4SImode vector. */
23197 tmp2 = gen_reg_rtx (V4SImode);
23198 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23199 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23200 CONST0_RTX (V4SImode),
23201 const1_rtx);
23202 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23203 /* Cast the V4SImode vector back to a V8HImode vector. */
23204 tmp1 = gen_reg_rtx (V8HImode);
23205 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23206 /* Duplicate the low short through the whole low SImode word. */
23207 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23208 /* Cast the V8HImode vector back to a V4SImode vector. */
23209 tmp2 = gen_reg_rtx (V4SImode);
23210 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23211 /* Replicate the low element of the V4SImode vector. */
23212 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23213 /* Cast the V2SImode back to V8HImode, and store in target. */
23214 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23215 return true;
23216 }
23217 smode = HImode;
23218 wsmode = SImode;
23219 wvmode = V4SImode;
23220 goto widen;
23221 case V16QImode:
23222 if (TARGET_SSE2)
23223 {
23224 rtx tmp1, tmp2;
23225 /* Extend QImode to SImode using a paradoxical SUBREG. */
23226 tmp1 = gen_reg_rtx (SImode);
23227 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23228 /* Insert the SImode value as low element of V4SImode vector. */
23229 tmp2 = gen_reg_rtx (V4SImode);
23230 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23231 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23232 CONST0_RTX (V4SImode),
23233 const1_rtx);
23234 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23235 /* Cast the V4SImode vector back to a V16QImode vector. */
23236 tmp1 = gen_reg_rtx (V16QImode);
23237 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23238 /* Duplicate the low byte through the whole low SImode word. */
23239 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23240 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23241 /* Cast the V16QImode vector back to a V4SImode vector. */
23242 tmp2 = gen_reg_rtx (V4SImode);
23243 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23244 /* Replicate the low element of the V4SImode vector. */
23245 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23246 /* Cast the V2SImode back to V16QImode, and store in target. */
23247 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23248 return true;
23249 }
23250 smode = QImode;
23251 wsmode = HImode;
23252 wvmode = V8HImode;
23253 goto widen;
23254 widen:
23255 /* Replicate the value once into the next wider mode and recurse. */
23256 val = convert_modes (wsmode, smode, val, true);
23257 x = expand_simple_binop (wsmode, ASHIFT, val,
23258 GEN_INT (GET_MODE_BITSIZE (smode)),
23259 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23260 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23261
23262 x = gen_reg_rtx (wvmode);
23263 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23264 gcc_unreachable ();
23265 emit_move_insn (target, gen_lowpart (mode, x));
23266 return true;
23267
23268 default:
23269 return false;
23270 }
23271 }
23272
23273 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23274 whose ONE_VAR element is VAR, and other elements are zero. Return true
23275 if successful. */
23276
23277 static bool
23278 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23279 rtx target, rtx var, int one_var)
23280 {
23281 enum machine_mode vsimode;
23282 rtx new_target;
23283 rtx x, tmp;
23284
23285 switch (mode)
23286 {
23287 case V2SFmode:
23288 case V2SImode:
23289 if (!mmx_ok)
23290 return false;
23291 /* FALLTHRU */
23292
23293 case V2DFmode:
23294 case V2DImode:
23295 if (one_var != 0)
23296 return false;
23297 var = force_reg (GET_MODE_INNER (mode), var);
23298 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23299 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23300 return true;
23301
23302 case V4SFmode:
23303 case V4SImode:
23304 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23305 new_target = gen_reg_rtx (mode);
23306 else
23307 new_target = target;
23308 var = force_reg (GET_MODE_INNER (mode), var);
23309 x = gen_rtx_VEC_DUPLICATE (mode, var);
23310 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23311 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23312 if (one_var != 0)
23313 {
23314 /* We need to shuffle the value to the correct position, so
23315 create a new pseudo to store the intermediate result. */
23316
23317 /* With SSE2, we can use the integer shuffle insns. */
23318 if (mode != V4SFmode && TARGET_SSE2)
23319 {
23320 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23321 GEN_INT (1),
23322 GEN_INT (one_var == 1 ? 0 : 1),
23323 GEN_INT (one_var == 2 ? 0 : 1),
23324 GEN_INT (one_var == 3 ? 0 : 1)));
23325 if (target != new_target)
23326 emit_move_insn (target, new_target);
23327 return true;
23328 }
23329
23330 /* Otherwise convert the intermediate result to V4SFmode and
23331 use the SSE1 shuffle instructions. */
23332 if (mode != V4SFmode)
23333 {
23334 tmp = gen_reg_rtx (V4SFmode);
23335 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23336 }
23337 else
23338 tmp = new_target;
23339
23340 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23341 GEN_INT (1),
23342 GEN_INT (one_var == 1 ? 0 : 1),
23343 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23344 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23345
23346 if (mode != V4SFmode)
23347 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23348 else if (tmp != target)
23349 emit_move_insn (target, tmp);
23350 }
23351 else if (target != new_target)
23352 emit_move_insn (target, new_target);
23353 return true;
23354
23355 case V8HImode:
23356 case V16QImode:
23357 vsimode = V4SImode;
23358 goto widen;
23359 case V4HImode:
23360 case V8QImode:
23361 if (!mmx_ok)
23362 return false;
23363 vsimode = V2SImode;
23364 goto widen;
23365 widen:
23366 if (one_var != 0)
23367 return false;
23368
23369 /* Zero extend the variable element to SImode and recurse. */
23370 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23371
23372 x = gen_reg_rtx (vsimode);
23373 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23374 var, one_var))
23375 gcc_unreachable ();
23376
23377 emit_move_insn (target, gen_lowpart (mode, x));
23378 return true;
23379
23380 default:
23381 return false;
23382 }
23383 }
23384
23385 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23386 consisting of the values in VALS. It is known that all elements
23387 except ONE_VAR are constants. Return true if successful. */
23388
23389 static bool
23390 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23391 rtx target, rtx vals, int one_var)
23392 {
23393 rtx var = XVECEXP (vals, 0, one_var);
23394 enum machine_mode wmode;
23395 rtx const_vec, x;
23396
23397 const_vec = copy_rtx (vals);
23398 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23399 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23400
23401 switch (mode)
23402 {
23403 case V2DFmode:
23404 case V2DImode:
23405 case V2SFmode:
23406 case V2SImode:
23407 /* For the two element vectors, it's just as easy to use
23408 the general case. */
23409 return false;
23410
23411 case V4SFmode:
23412 case V4SImode:
23413 case V8HImode:
23414 case V4HImode:
23415 break;
23416
23417 case V16QImode:
23418 wmode = V8HImode;
23419 goto widen;
23420 case V8QImode:
23421 wmode = V4HImode;
23422 goto widen;
23423 widen:
23424 /* There's no way to set one QImode entry easily. Combine
23425 the variable value with its adjacent constant value, and
23426 promote to an HImode set. */
23427 x = XVECEXP (vals, 0, one_var ^ 1);
23428 if (one_var & 1)
23429 {
23430 var = convert_modes (HImode, QImode, var, true);
23431 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23432 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23433 x = GEN_INT (INTVAL (x) & 0xff);
23434 }
23435 else
23436 {
23437 var = convert_modes (HImode, QImode, var, true);
23438 x = gen_int_mode (INTVAL (x) << 8, HImode);
23439 }
23440 if (x != const0_rtx)
23441 var = expand_simple_binop (HImode, IOR, var, x, var,
23442 1, OPTAB_LIB_WIDEN);
23443
23444 x = gen_reg_rtx (wmode);
23445 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23446 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23447
23448 emit_move_insn (target, gen_lowpart (mode, x));
23449 return true;
23450
23451 default:
23452 return false;
23453 }
23454
23455 emit_move_insn (target, const_vec);
23456 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23457 return true;
23458 }
23459
23460 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23461 all values variable, and none identical. */
23462
23463 static void
23464 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23465 rtx target, rtx vals)
23466 {
23467 enum machine_mode half_mode = GET_MODE_INNER (mode);
23468 rtx op0 = NULL, op1 = NULL;
23469 bool use_vec_concat = false;
23470
23471 switch (mode)
23472 {
23473 case V2SFmode:
23474 case V2SImode:
23475 if (!mmx_ok && !TARGET_SSE)
23476 break;
23477 /* FALLTHRU */
23478
23479 case V2DFmode:
23480 case V2DImode:
23481 /* For the two element vectors, we always implement VEC_CONCAT. */
23482 op0 = XVECEXP (vals, 0, 0);
23483 op1 = XVECEXP (vals, 0, 1);
23484 use_vec_concat = true;
23485 break;
23486
23487 case V4SFmode:
23488 half_mode = V2SFmode;
23489 goto half;
23490 case V4SImode:
23491 half_mode = V2SImode;
23492 goto half;
23493 half:
23494 {
23495 rtvec v;
23496
23497 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23498 Recurse to load the two halves. */
23499
23500 op0 = gen_reg_rtx (half_mode);
23501 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23502 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23503
23504 op1 = gen_reg_rtx (half_mode);
23505 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23506 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23507
23508 use_vec_concat = true;
23509 }
23510 break;
23511
23512 case V8HImode:
23513 case V16QImode:
23514 case V4HImode:
23515 case V8QImode:
23516 break;
23517
23518 default:
23519 gcc_unreachable ();
23520 }
23521
23522 if (use_vec_concat)
23523 {
23524 if (!register_operand (op0, half_mode))
23525 op0 = force_reg (half_mode, op0);
23526 if (!register_operand (op1, half_mode))
23527 op1 = force_reg (half_mode, op1);
23528
23529 emit_insn (gen_rtx_SET (VOIDmode, target,
23530 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23531 }
23532 else
23533 {
23534 int i, j, n_elts, n_words, n_elt_per_word;
23535 enum machine_mode inner_mode;
23536 rtx words[4], shift;
23537
23538 inner_mode = GET_MODE_INNER (mode);
23539 n_elts = GET_MODE_NUNITS (mode);
23540 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23541 n_elt_per_word = n_elts / n_words;
23542 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23543
23544 for (i = 0; i < n_words; ++i)
23545 {
23546 rtx word = NULL_RTX;
23547
23548 for (j = 0; j < n_elt_per_word; ++j)
23549 {
23550 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23551 elt = convert_modes (word_mode, inner_mode, elt, true);
23552
23553 if (j == 0)
23554 word = elt;
23555 else
23556 {
23557 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23558 word, 1, OPTAB_LIB_WIDEN);
23559 word = expand_simple_binop (word_mode, IOR, word, elt,
23560 word, 1, OPTAB_LIB_WIDEN);
23561 }
23562 }
23563
23564 words[i] = word;
23565 }
23566
23567 if (n_words == 1)
23568 emit_move_insn (target, gen_lowpart (mode, words[0]));
23569 else if (n_words == 2)
23570 {
23571 rtx tmp = gen_reg_rtx (mode);
23572 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23573 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23574 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23575 emit_move_insn (target, tmp);
23576 }
23577 else if (n_words == 4)
23578 {
23579 rtx tmp = gen_reg_rtx (V4SImode);
23580 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23581 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23582 emit_move_insn (target, gen_lowpart (mode, tmp));
23583 }
23584 else
23585 gcc_unreachable ();
23586 }
23587 }
23588
23589 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23590 instructions unless MMX_OK is true. */
23591
23592 void
23593 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23594 {
23595 enum machine_mode mode = GET_MODE (target);
23596 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23597 int n_elts = GET_MODE_NUNITS (mode);
23598 int n_var = 0, one_var = -1;
23599 bool all_same = true, all_const_zero = true;
23600 int i;
23601 rtx x;
23602
23603 for (i = 0; i < n_elts; ++i)
23604 {
23605 x = XVECEXP (vals, 0, i);
23606 if (!CONSTANT_P (x))
23607 n_var++, one_var = i;
23608 else if (x != CONST0_RTX (inner_mode))
23609 all_const_zero = false;
23610 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23611 all_same = false;
23612 }
23613
23614 /* Constants are best loaded from the constant pool. */
23615 if (n_var == 0)
23616 {
23617 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23618 return;
23619 }
23620
23621 /* If all values are identical, broadcast the value. */
23622 if (all_same
23623 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23624 XVECEXP (vals, 0, 0)))
23625 return;
23626
23627 /* Values where only one field is non-constant are best loaded from
23628 the pool and overwritten via move later. */
23629 if (n_var == 1)
23630 {
23631 if (all_const_zero
23632 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23633 XVECEXP (vals, 0, one_var),
23634 one_var))
23635 return;
23636
23637 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23638 return;
23639 }
23640
23641 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23642 }
23643
23644 void
23645 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23646 {
23647 enum machine_mode mode = GET_MODE (target);
23648 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23649 bool use_vec_merge = false;
23650 rtx tmp;
23651
23652 switch (mode)
23653 {
23654 case V2SFmode:
23655 case V2SImode:
23656 if (mmx_ok)
23657 {
23658 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23659 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23660 if (elt == 0)
23661 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23662 else
23663 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23664 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23665 return;
23666 }
23667 break;
23668
23669 case V2DImode:
23670 use_vec_merge = TARGET_SSE4_1;
23671 if (use_vec_merge)
23672 break;
23673
23674 case V2DFmode:
23675 {
23676 rtx op0, op1;
23677
23678 /* For the two element vectors, we implement a VEC_CONCAT with
23679 the extraction of the other element. */
23680
23681 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23682 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23683
23684 if (elt == 0)
23685 op0 = val, op1 = tmp;
23686 else
23687 op0 = tmp, op1 = val;
23688
23689 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23690 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23691 }
23692 return;
23693
23694 case V4SFmode:
23695 use_vec_merge = TARGET_SSE4_1;
23696 if (use_vec_merge)
23697 break;
23698
23699 switch (elt)
23700 {
23701 case 0:
23702 use_vec_merge = true;
23703 break;
23704
23705 case 1:
23706 /* tmp = target = A B C D */
23707 tmp = copy_to_reg (target);
23708 /* target = A A B B */
23709 emit_insn (gen_sse_unpcklps (target, target, target));
23710 /* target = X A B B */
23711 ix86_expand_vector_set (false, target, val, 0);
23712 /* target = A X C D */
23713 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23714 GEN_INT (1), GEN_INT (0),
23715 GEN_INT (2+4), GEN_INT (3+4)));
23716 return;
23717
23718 case 2:
23719 /* tmp = target = A B C D */
23720 tmp = copy_to_reg (target);
23721 /* tmp = X B C D */
23722 ix86_expand_vector_set (false, tmp, val, 0);
23723 /* target = A B X D */
23724 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23725 GEN_INT (0), GEN_INT (1),
23726 GEN_INT (0+4), GEN_INT (3+4)));
23727 return;
23728
23729 case 3:
23730 /* tmp = target = A B C D */
23731 tmp = copy_to_reg (target);
23732 /* tmp = X B C D */
23733 ix86_expand_vector_set (false, tmp, val, 0);
23734 /* target = A B X D */
23735 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23736 GEN_INT (0), GEN_INT (1),
23737 GEN_INT (2+4), GEN_INT (0+4)));
23738 return;
23739
23740 default:
23741 gcc_unreachable ();
23742 }
23743 break;
23744
23745 case V4SImode:
23746 use_vec_merge = TARGET_SSE4_1;
23747 if (use_vec_merge)
23748 break;
23749
23750 /* Element 0 handled by vec_merge below. */
23751 if (elt == 0)
23752 {
23753 use_vec_merge = true;
23754 break;
23755 }
23756
23757 if (TARGET_SSE2)
23758 {
23759 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23760 store into element 0, then shuffle them back. */
23761
23762 rtx order[4];
23763
23764 order[0] = GEN_INT (elt);
23765 order[1] = const1_rtx;
23766 order[2] = const2_rtx;
23767 order[3] = GEN_INT (3);
23768 order[elt] = const0_rtx;
23769
23770 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23771 order[1], order[2], order[3]));
23772
23773 ix86_expand_vector_set (false, target, val, 0);
23774
23775 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23776 order[1], order[2], order[3]));
23777 }
23778 else
23779 {
23780 /* For SSE1, we have to reuse the V4SF code. */
23781 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23782 gen_lowpart (SFmode, val), elt);
23783 }
23784 return;
23785
23786 case V8HImode:
23787 use_vec_merge = TARGET_SSE2;
23788 break;
23789 case V4HImode:
23790 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23791 break;
23792
23793 case V16QImode:
23794 use_vec_merge = TARGET_SSE4_1;
23795 break;
23796
23797 case V8QImode:
23798 default:
23799 break;
23800 }
23801
23802 if (use_vec_merge)
23803 {
23804 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23805 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23806 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23807 }
23808 else
23809 {
23810 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23811
23812 emit_move_insn (mem, target);
23813
23814 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23815 emit_move_insn (tmp, val);
23816
23817 emit_move_insn (target, mem);
23818 }
23819 }
23820
23821 void
23822 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23823 {
23824 enum machine_mode mode = GET_MODE (vec);
23825 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23826 bool use_vec_extr = false;
23827 rtx tmp;
23828
23829 switch (mode)
23830 {
23831 case V2SImode:
23832 case V2SFmode:
23833 if (!mmx_ok)
23834 break;
23835 /* FALLTHRU */
23836
23837 case V2DFmode:
23838 case V2DImode:
23839 use_vec_extr = true;
23840 break;
23841
23842 case V4SFmode:
23843 use_vec_extr = TARGET_SSE4_1;
23844 if (use_vec_extr)
23845 break;
23846
23847 switch (elt)
23848 {
23849 case 0:
23850 tmp = vec;
23851 break;
23852
23853 case 1:
23854 case 3:
23855 tmp = gen_reg_rtx (mode);
23856 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23857 GEN_INT (elt), GEN_INT (elt),
23858 GEN_INT (elt+4), GEN_INT (elt+4)));
23859 break;
23860
23861 case 2:
23862 tmp = gen_reg_rtx (mode);
23863 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23864 break;
23865
23866 default:
23867 gcc_unreachable ();
23868 }
23869 vec = tmp;
23870 use_vec_extr = true;
23871 elt = 0;
23872 break;
23873
23874 case V4SImode:
23875 use_vec_extr = TARGET_SSE4_1;
23876 if (use_vec_extr)
23877 break;
23878
23879 if (TARGET_SSE2)
23880 {
23881 switch (elt)
23882 {
23883 case 0:
23884 tmp = vec;
23885 break;
23886
23887 case 1:
23888 case 3:
23889 tmp = gen_reg_rtx (mode);
23890 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23891 GEN_INT (elt), GEN_INT (elt),
23892 GEN_INT (elt), GEN_INT (elt)));
23893 break;
23894
23895 case 2:
23896 tmp = gen_reg_rtx (mode);
23897 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23898 break;
23899
23900 default:
23901 gcc_unreachable ();
23902 }
23903 vec = tmp;
23904 use_vec_extr = true;
23905 elt = 0;
23906 }
23907 else
23908 {
23909 /* For SSE1, we have to reuse the V4SF code. */
23910 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23911 gen_lowpart (V4SFmode, vec), elt);
23912 return;
23913 }
23914 break;
23915
23916 case V8HImode:
23917 use_vec_extr = TARGET_SSE2;
23918 break;
23919 case V4HImode:
23920 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23921 break;
23922
23923 case V16QImode:
23924 use_vec_extr = TARGET_SSE4_1;
23925 break;
23926
23927 case V8QImode:
23928 /* ??? Could extract the appropriate HImode element and shift. */
23929 default:
23930 break;
23931 }
23932
23933 if (use_vec_extr)
23934 {
23935 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23936 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23937
23938 /* Let the rtl optimizers know about the zero extension performed. */
23939 if (inner_mode == QImode || inner_mode == HImode)
23940 {
23941 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23942 target = gen_lowpart (SImode, target);
23943 }
23944
23945 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23946 }
23947 else
23948 {
23949 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23950
23951 emit_move_insn (mem, vec);
23952
23953 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23954 emit_move_insn (target, tmp);
23955 }
23956 }
23957
23958 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23959 pattern to reduce; DEST is the destination; IN is the input vector. */
23960
23961 void
23962 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23963 {
23964 rtx tmp1, tmp2, tmp3;
23965
23966 tmp1 = gen_reg_rtx (V4SFmode);
23967 tmp2 = gen_reg_rtx (V4SFmode);
23968 tmp3 = gen_reg_rtx (V4SFmode);
23969
23970 emit_insn (gen_sse_movhlps (tmp1, in, in));
23971 emit_insn (fn (tmp2, tmp1, in));
23972
23973 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23974 GEN_INT (1), GEN_INT (1),
23975 GEN_INT (1+4), GEN_INT (1+4)));
23976 emit_insn (fn (dest, tmp2, tmp3));
23977 }
23978 \f
23979 /* Target hook for scalar_mode_supported_p. */
23980 static bool
23981 ix86_scalar_mode_supported_p (enum machine_mode mode)
23982 {
23983 if (DECIMAL_FLOAT_MODE_P (mode))
23984 return true;
23985 else if (mode == TFmode)
23986 return TARGET_64BIT;
23987 else
23988 return default_scalar_mode_supported_p (mode);
23989 }
23990
23991 /* Implements target hook vector_mode_supported_p. */
23992 static bool
23993 ix86_vector_mode_supported_p (enum machine_mode mode)
23994 {
23995 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23996 return true;
23997 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23998 return true;
23999 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24000 return true;
24001 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24002 return true;
24003 return false;
24004 }
24005
24006 /* Target hook for c_mode_for_suffix. */
24007 static enum machine_mode
24008 ix86_c_mode_for_suffix (char suffix)
24009 {
24010 if (TARGET_64BIT && suffix == 'q')
24011 return TFmode;
24012 if (TARGET_MMX && suffix == 'w')
24013 return XFmode;
24014
24015 return VOIDmode;
24016 }
24017
24018 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24019
24020 We do this in the new i386 backend to maintain source compatibility
24021 with the old cc0-based compiler. */
24022
24023 static tree
24024 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24025 tree inputs ATTRIBUTE_UNUSED,
24026 tree clobbers)
24027 {
24028 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24029 clobbers);
24030 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24031 clobbers);
24032 return clobbers;
24033 }
24034
24035 /* Implements target vector targetm.asm.encode_section_info. This
24036 is not used by netware. */
24037
24038 static void ATTRIBUTE_UNUSED
24039 ix86_encode_section_info (tree decl, rtx rtl, int first)
24040 {
24041 default_encode_section_info (decl, rtl, first);
24042
24043 if (TREE_CODE (decl) == VAR_DECL
24044 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24045 && ix86_in_large_data_p (decl))
24046 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24047 }
24048
24049 /* Worker function for REVERSE_CONDITION. */
24050
24051 enum rtx_code
24052 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24053 {
24054 return (mode != CCFPmode && mode != CCFPUmode
24055 ? reverse_condition (code)
24056 : reverse_condition_maybe_unordered (code));
24057 }
24058
24059 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24060 to OPERANDS[0]. */
24061
24062 const char *
24063 output_387_reg_move (rtx insn, rtx *operands)
24064 {
24065 if (REG_P (operands[0]))
24066 {
24067 if (REG_P (operands[1])
24068 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24069 {
24070 if (REGNO (operands[0]) == FIRST_STACK_REG)
24071 return output_387_ffreep (operands, 0);
24072 return "fstp\t%y0";
24073 }
24074 if (STACK_TOP_P (operands[0]))
24075 return "fld%z1\t%y1";
24076 return "fst\t%y0";
24077 }
24078 else if (MEM_P (operands[0]))
24079 {
24080 gcc_assert (REG_P (operands[1]));
24081 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24082 return "fstp%z0\t%y0";
24083 else
24084 {
24085 /* There is no non-popping store to memory for XFmode.
24086 So if we need one, follow the store with a load. */
24087 if (GET_MODE (operands[0]) == XFmode)
24088 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24089 else
24090 return "fst%z0\t%y0";
24091 }
24092 }
24093 else
24094 gcc_unreachable();
24095 }
24096
24097 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24098 FP status register is set. */
24099
24100 void
24101 ix86_emit_fp_unordered_jump (rtx label)
24102 {
24103 rtx reg = gen_reg_rtx (HImode);
24104 rtx temp;
24105
24106 emit_insn (gen_x86_fnstsw_1 (reg));
24107
24108 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24109 {
24110 emit_insn (gen_x86_sahf_1 (reg));
24111
24112 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24113 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24114 }
24115 else
24116 {
24117 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24118
24119 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24120 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24121 }
24122
24123 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24124 gen_rtx_LABEL_REF (VOIDmode, label),
24125 pc_rtx);
24126 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24127
24128 emit_jump_insn (temp);
24129 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24130 }
24131
24132 /* Output code to perform a log1p XFmode calculation. */
24133
24134 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24135 {
24136 rtx label1 = gen_label_rtx ();
24137 rtx label2 = gen_label_rtx ();
24138
24139 rtx tmp = gen_reg_rtx (XFmode);
24140 rtx tmp2 = gen_reg_rtx (XFmode);
24141
24142 emit_insn (gen_absxf2 (tmp, op1));
24143 emit_insn (gen_cmpxf (tmp,
24144 CONST_DOUBLE_FROM_REAL_VALUE (
24145 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24146 XFmode)));
24147 emit_jump_insn (gen_bge (label1));
24148
24149 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24150 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24151 emit_jump (label2);
24152
24153 emit_label (label1);
24154 emit_move_insn (tmp, CONST1_RTX (XFmode));
24155 emit_insn (gen_addxf3 (tmp, op1, tmp));
24156 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24157 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24158
24159 emit_label (label2);
24160 }
24161
24162 /* Output code to perform a Newton-Rhapson approximation of a single precision
24163 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24164
24165 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24166 {
24167 rtx x0, x1, e0, e1, two;
24168
24169 x0 = gen_reg_rtx (mode);
24170 e0 = gen_reg_rtx (mode);
24171 e1 = gen_reg_rtx (mode);
24172 x1 = gen_reg_rtx (mode);
24173
24174 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24175
24176 if (VECTOR_MODE_P (mode))
24177 two = ix86_build_const_vector (SFmode, true, two);
24178
24179 two = force_reg (mode, two);
24180
24181 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24182
24183 /* x0 = 1./b estimate */
24184 emit_insn (gen_rtx_SET (VOIDmode, x0,
24185 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24186 UNSPEC_RCP)));
24187 /* e0 = x0 * b */
24188 emit_insn (gen_rtx_SET (VOIDmode, e0,
24189 gen_rtx_MULT (mode, x0, b)));
24190 /* e1 = 2. - e0 */
24191 emit_insn (gen_rtx_SET (VOIDmode, e1,
24192 gen_rtx_MINUS (mode, two, e0)));
24193 /* x1 = x0 * e1 */
24194 emit_insn (gen_rtx_SET (VOIDmode, x1,
24195 gen_rtx_MULT (mode, x0, e1)));
24196 /* res = a * x1 */
24197 emit_insn (gen_rtx_SET (VOIDmode, res,
24198 gen_rtx_MULT (mode, a, x1)));
24199 }
24200
24201 /* Output code to perform a Newton-Rhapson approximation of a
24202 single precision floating point [reciprocal] square root. */
24203
24204 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24205 bool recip)
24206 {
24207 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
24208
24209 x0 = gen_reg_rtx (mode);
24210 e0 = gen_reg_rtx (mode);
24211 e1 = gen_reg_rtx (mode);
24212 e2 = gen_reg_rtx (mode);
24213 e3 = gen_reg_rtx (mode);
24214
24215 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
24216 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
24217
24218 mask = gen_reg_rtx (mode);
24219
24220 if (VECTOR_MODE_P (mode))
24221 {
24222 three = ix86_build_const_vector (SFmode, true, three);
24223 half = ix86_build_const_vector (SFmode, true, half);
24224 }
24225
24226 three = force_reg (mode, three);
24227 half = force_reg (mode, half);
24228
24229 zero = force_reg (mode, CONST0_RTX(mode));
24230
24231 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24232 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24233
24234 /* Compare a to zero. */
24235 emit_insn (gen_rtx_SET (VOIDmode, mask,
24236 gen_rtx_NE (mode, a, zero)));
24237
24238 /* x0 = 1./sqrt(a) estimate */
24239 emit_insn (gen_rtx_SET (VOIDmode, x0,
24240 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24241 UNSPEC_RSQRT)));
24242 /* Filter out infinity. */
24243 if (VECTOR_MODE_P (mode))
24244 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
24245 gen_rtx_AND (mode,
24246 gen_lowpart (V4SFmode, x0),
24247 gen_lowpart (V4SFmode, mask))));
24248 else
24249 emit_insn (gen_rtx_SET (VOIDmode, x0,
24250 gen_rtx_AND (mode, x0, mask)));
24251
24252 /* e0 = x0 * a */
24253 emit_insn (gen_rtx_SET (VOIDmode, e0,
24254 gen_rtx_MULT (mode, x0, a)));
24255 /* e1 = e0 * x0 */
24256 emit_insn (gen_rtx_SET (VOIDmode, e1,
24257 gen_rtx_MULT (mode, e0, x0)));
24258 /* e2 = 3. - e1 */
24259 emit_insn (gen_rtx_SET (VOIDmode, e2,
24260 gen_rtx_MINUS (mode, three, e1)));
24261 if (recip)
24262 /* e3 = .5 * x0 */
24263 emit_insn (gen_rtx_SET (VOIDmode, e3,
24264 gen_rtx_MULT (mode, half, x0)));
24265 else
24266 /* e3 = .5 * e0 */
24267 emit_insn (gen_rtx_SET (VOIDmode, e3,
24268 gen_rtx_MULT (mode, half, e0)));
24269 /* ret = e2 * e3 */
24270 emit_insn (gen_rtx_SET (VOIDmode, res,
24271 gen_rtx_MULT (mode, e2, e3)));
24272 }
24273
24274 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24275
24276 static void ATTRIBUTE_UNUSED
24277 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24278 tree decl)
24279 {
24280 /* With Binutils 2.15, the "@unwind" marker must be specified on
24281 every occurrence of the ".eh_frame" section, not just the first
24282 one. */
24283 if (TARGET_64BIT
24284 && strcmp (name, ".eh_frame") == 0)
24285 {
24286 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24287 flags & SECTION_WRITE ? "aw" : "a");
24288 return;
24289 }
24290 default_elf_asm_named_section (name, flags, decl);
24291 }
24292
24293 /* Return the mangling of TYPE if it is an extended fundamental type. */
24294
24295 static const char *
24296 ix86_mangle_type (const_tree type)
24297 {
24298 type = TYPE_MAIN_VARIANT (type);
24299
24300 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24301 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24302 return NULL;
24303
24304 switch (TYPE_MODE (type))
24305 {
24306 case TFmode:
24307 /* __float128 is "g". */
24308 return "g";
24309 case XFmode:
24310 /* "long double" or __float80 is "e". */
24311 return "e";
24312 default:
24313 return NULL;
24314 }
24315 }
24316
24317 /* For 32-bit code we can save PIC register setup by using
24318 __stack_chk_fail_local hidden function instead of calling
24319 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24320 register, so it is better to call __stack_chk_fail directly. */
24321
24322 static tree
24323 ix86_stack_protect_fail (void)
24324 {
24325 return TARGET_64BIT
24326 ? default_external_stack_protect_fail ()
24327 : default_hidden_stack_protect_fail ();
24328 }
24329
24330 /* Select a format to encode pointers in exception handling data. CODE
24331 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24332 true if the symbol may be affected by dynamic relocations.
24333
24334 ??? All x86 object file formats are capable of representing this.
24335 After all, the relocation needed is the same as for the call insn.
24336 Whether or not a particular assembler allows us to enter such, I
24337 guess we'll have to see. */
24338 int
24339 asm_preferred_eh_data_format (int code, int global)
24340 {
24341 if (flag_pic)
24342 {
24343 int type = DW_EH_PE_sdata8;
24344 if (!TARGET_64BIT
24345 || ix86_cmodel == CM_SMALL_PIC
24346 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24347 type = DW_EH_PE_sdata4;
24348 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24349 }
24350 if (ix86_cmodel == CM_SMALL
24351 || (ix86_cmodel == CM_MEDIUM && code))
24352 return DW_EH_PE_udata4;
24353 return DW_EH_PE_absptr;
24354 }
24355 \f
24356 /* Expand copysign from SIGN to the positive value ABS_VALUE
24357 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24358 the sign-bit. */
24359 static void
24360 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24361 {
24362 enum machine_mode mode = GET_MODE (sign);
24363 rtx sgn = gen_reg_rtx (mode);
24364 if (mask == NULL_RTX)
24365 {
24366 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24367 if (!VECTOR_MODE_P (mode))
24368 {
24369 /* We need to generate a scalar mode mask in this case. */
24370 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24371 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24372 mask = gen_reg_rtx (mode);
24373 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24374 }
24375 }
24376 else
24377 mask = gen_rtx_NOT (mode, mask);
24378 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24379 gen_rtx_AND (mode, mask, sign)));
24380 emit_insn (gen_rtx_SET (VOIDmode, result,
24381 gen_rtx_IOR (mode, abs_value, sgn)));
24382 }
24383
24384 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24385 mask for masking out the sign-bit is stored in *SMASK, if that is
24386 non-null. */
24387 static rtx
24388 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24389 {
24390 enum machine_mode mode = GET_MODE (op0);
24391 rtx xa, mask;
24392
24393 xa = gen_reg_rtx (mode);
24394 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24395 if (!VECTOR_MODE_P (mode))
24396 {
24397 /* We need to generate a scalar mode mask in this case. */
24398 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24399 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24400 mask = gen_reg_rtx (mode);
24401 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24402 }
24403 emit_insn (gen_rtx_SET (VOIDmode, xa,
24404 gen_rtx_AND (mode, op0, mask)));
24405
24406 if (smask)
24407 *smask = mask;
24408
24409 return xa;
24410 }
24411
24412 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24413 swapping the operands if SWAP_OPERANDS is true. The expanded
24414 code is a forward jump to a newly created label in case the
24415 comparison is true. The generated label rtx is returned. */
24416 static rtx
24417 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24418 bool swap_operands)
24419 {
24420 rtx label, tmp;
24421
24422 if (swap_operands)
24423 {
24424 tmp = op0;
24425 op0 = op1;
24426 op1 = tmp;
24427 }
24428
24429 label = gen_label_rtx ();
24430 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24431 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24432 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24433 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24434 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24435 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24436 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24437 JUMP_LABEL (tmp) = label;
24438
24439 return label;
24440 }
24441
24442 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24443 using comparison code CODE. Operands are swapped for the comparison if
24444 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24445 static rtx
24446 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24447 bool swap_operands)
24448 {
24449 enum machine_mode mode = GET_MODE (op0);
24450 rtx mask = gen_reg_rtx (mode);
24451
24452 if (swap_operands)
24453 {
24454 rtx tmp = op0;
24455 op0 = op1;
24456 op1 = tmp;
24457 }
24458
24459 if (mode == DFmode)
24460 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24461 gen_rtx_fmt_ee (code, mode, op0, op1)));
24462 else
24463 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24464 gen_rtx_fmt_ee (code, mode, op0, op1)));
24465
24466 return mask;
24467 }
24468
24469 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24470 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24471 static rtx
24472 ix86_gen_TWO52 (enum machine_mode mode)
24473 {
24474 REAL_VALUE_TYPE TWO52r;
24475 rtx TWO52;
24476
24477 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24478 TWO52 = const_double_from_real_value (TWO52r, mode);
24479 TWO52 = force_reg (mode, TWO52);
24480
24481 return TWO52;
24482 }
24483
24484 /* Expand SSE sequence for computing lround from OP1 storing
24485 into OP0. */
24486 void
24487 ix86_expand_lround (rtx op0, rtx op1)
24488 {
24489 /* C code for the stuff we're doing below:
24490 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24491 return (long)tmp;
24492 */
24493 enum machine_mode mode = GET_MODE (op1);
24494 const struct real_format *fmt;
24495 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24496 rtx adj;
24497
24498 /* load nextafter (0.5, 0.0) */
24499 fmt = REAL_MODE_FORMAT (mode);
24500 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24501 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24502
24503 /* adj = copysign (0.5, op1) */
24504 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24505 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24506
24507 /* adj = op1 + adj */
24508 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24509
24510 /* op0 = (imode)adj */
24511 expand_fix (op0, adj, 0);
24512 }
24513
24514 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24515 into OPERAND0. */
24516 void
24517 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24518 {
24519 /* C code for the stuff we're doing below (for do_floor):
24520 xi = (long)op1;
24521 xi -= (double)xi > op1 ? 1 : 0;
24522 return xi;
24523 */
24524 enum machine_mode fmode = GET_MODE (op1);
24525 enum machine_mode imode = GET_MODE (op0);
24526 rtx ireg, freg, label, tmp;
24527
24528 /* reg = (long)op1 */
24529 ireg = gen_reg_rtx (imode);
24530 expand_fix (ireg, op1, 0);
24531
24532 /* freg = (double)reg */
24533 freg = gen_reg_rtx (fmode);
24534 expand_float (freg, ireg, 0);
24535
24536 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24537 label = ix86_expand_sse_compare_and_jump (UNLE,
24538 freg, op1, !do_floor);
24539 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24540 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24541 emit_move_insn (ireg, tmp);
24542
24543 emit_label (label);
24544 LABEL_NUSES (label) = 1;
24545
24546 emit_move_insn (op0, ireg);
24547 }
24548
24549 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24550 result in OPERAND0. */
24551 void
24552 ix86_expand_rint (rtx operand0, rtx operand1)
24553 {
24554 /* C code for the stuff we're doing below:
24555 xa = fabs (operand1);
24556 if (!isless (xa, 2**52))
24557 return operand1;
24558 xa = xa + 2**52 - 2**52;
24559 return copysign (xa, operand1);
24560 */
24561 enum machine_mode mode = GET_MODE (operand0);
24562 rtx res, xa, label, TWO52, mask;
24563
24564 res = gen_reg_rtx (mode);
24565 emit_move_insn (res, operand1);
24566
24567 /* xa = abs (operand1) */
24568 xa = ix86_expand_sse_fabs (res, &mask);
24569
24570 /* if (!isless (xa, TWO52)) goto label; */
24571 TWO52 = ix86_gen_TWO52 (mode);
24572 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24573
24574 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24575 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24576
24577 ix86_sse_copysign_to_positive (res, xa, res, mask);
24578
24579 emit_label (label);
24580 LABEL_NUSES (label) = 1;
24581
24582 emit_move_insn (operand0, res);
24583 }
24584
24585 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24586 into OPERAND0. */
24587 void
24588 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24589 {
24590 /* C code for the stuff we expand below.
24591 double xa = fabs (x), x2;
24592 if (!isless (xa, TWO52))
24593 return x;
24594 xa = xa + TWO52 - TWO52;
24595 x2 = copysign (xa, x);
24596 Compensate. Floor:
24597 if (x2 > x)
24598 x2 -= 1;
24599 Compensate. Ceil:
24600 if (x2 < x)
24601 x2 -= -1;
24602 return x2;
24603 */
24604 enum machine_mode mode = GET_MODE (operand0);
24605 rtx xa, TWO52, tmp, label, one, res, mask;
24606
24607 TWO52 = ix86_gen_TWO52 (mode);
24608
24609 /* Temporary for holding the result, initialized to the input
24610 operand to ease control flow. */
24611 res = gen_reg_rtx (mode);
24612 emit_move_insn (res, operand1);
24613
24614 /* xa = abs (operand1) */
24615 xa = ix86_expand_sse_fabs (res, &mask);
24616
24617 /* if (!isless (xa, TWO52)) goto label; */
24618 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24619
24620 /* xa = xa + TWO52 - TWO52; */
24621 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24622 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24623
24624 /* xa = copysign (xa, operand1) */
24625 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24626
24627 /* generate 1.0 or -1.0 */
24628 one = force_reg (mode,
24629 const_double_from_real_value (do_floor
24630 ? dconst1 : dconstm1, mode));
24631
24632 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24633 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24634 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24635 gen_rtx_AND (mode, one, tmp)));
24636 /* We always need to subtract here to preserve signed zero. */
24637 tmp = expand_simple_binop (mode, MINUS,
24638 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24639 emit_move_insn (res, tmp);
24640
24641 emit_label (label);
24642 LABEL_NUSES (label) = 1;
24643
24644 emit_move_insn (operand0, res);
24645 }
24646
24647 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24648 into OPERAND0. */
24649 void
24650 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24651 {
24652 /* C code for the stuff we expand below.
24653 double xa = fabs (x), x2;
24654 if (!isless (xa, TWO52))
24655 return x;
24656 x2 = (double)(long)x;
24657 Compensate. Floor:
24658 if (x2 > x)
24659 x2 -= 1;
24660 Compensate. Ceil:
24661 if (x2 < x)
24662 x2 += 1;
24663 if (HONOR_SIGNED_ZEROS (mode))
24664 return copysign (x2, x);
24665 return x2;
24666 */
24667 enum machine_mode mode = GET_MODE (operand0);
24668 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24669
24670 TWO52 = ix86_gen_TWO52 (mode);
24671
24672 /* Temporary for holding the result, initialized to the input
24673 operand to ease control flow. */
24674 res = gen_reg_rtx (mode);
24675 emit_move_insn (res, operand1);
24676
24677 /* xa = abs (operand1) */
24678 xa = ix86_expand_sse_fabs (res, &mask);
24679
24680 /* if (!isless (xa, TWO52)) goto label; */
24681 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24682
24683 /* xa = (double)(long)x */
24684 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24685 expand_fix (xi, res, 0);
24686 expand_float (xa, xi, 0);
24687
24688 /* generate 1.0 */
24689 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24690
24691 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24692 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24693 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24694 gen_rtx_AND (mode, one, tmp)));
24695 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24696 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24697 emit_move_insn (res, tmp);
24698
24699 if (HONOR_SIGNED_ZEROS (mode))
24700 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24701
24702 emit_label (label);
24703 LABEL_NUSES (label) = 1;
24704
24705 emit_move_insn (operand0, res);
24706 }
24707
24708 /* Expand SSE sequence for computing round from OPERAND1 storing
24709 into OPERAND0. Sequence that works without relying on DImode truncation
24710 via cvttsd2siq that is only available on 64bit targets. */
24711 void
24712 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24713 {
24714 /* C code for the stuff we expand below.
24715 double xa = fabs (x), xa2, x2;
24716 if (!isless (xa, TWO52))
24717 return x;
24718 Using the absolute value and copying back sign makes
24719 -0.0 -> -0.0 correct.
24720 xa2 = xa + TWO52 - TWO52;
24721 Compensate.
24722 dxa = xa2 - xa;
24723 if (dxa <= -0.5)
24724 xa2 += 1;
24725 else if (dxa > 0.5)
24726 xa2 -= 1;
24727 x2 = copysign (xa2, x);
24728 return x2;
24729 */
24730 enum machine_mode mode = GET_MODE (operand0);
24731 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24732
24733 TWO52 = ix86_gen_TWO52 (mode);
24734
24735 /* Temporary for holding the result, initialized to the input
24736 operand to ease control flow. */
24737 res = gen_reg_rtx (mode);
24738 emit_move_insn (res, operand1);
24739
24740 /* xa = abs (operand1) */
24741 xa = ix86_expand_sse_fabs (res, &mask);
24742
24743 /* if (!isless (xa, TWO52)) goto label; */
24744 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24745
24746 /* xa2 = xa + TWO52 - TWO52; */
24747 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24748 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24749
24750 /* dxa = xa2 - xa; */
24751 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24752
24753 /* generate 0.5, 1.0 and -0.5 */
24754 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24755 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24756 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24757 0, OPTAB_DIRECT);
24758
24759 /* Compensate. */
24760 tmp = gen_reg_rtx (mode);
24761 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24762 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24763 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24764 gen_rtx_AND (mode, one, tmp)));
24765 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24766 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24767 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24768 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24769 gen_rtx_AND (mode, one, tmp)));
24770 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24771
24772 /* res = copysign (xa2, operand1) */
24773 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24774
24775 emit_label (label);
24776 LABEL_NUSES (label) = 1;
24777
24778 emit_move_insn (operand0, res);
24779 }
24780
24781 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24782 into OPERAND0. */
24783 void
24784 ix86_expand_trunc (rtx operand0, rtx operand1)
24785 {
24786 /* C code for SSE variant we expand below.
24787 double xa = fabs (x), x2;
24788 if (!isless (xa, TWO52))
24789 return x;
24790 x2 = (double)(long)x;
24791 if (HONOR_SIGNED_ZEROS (mode))
24792 return copysign (x2, x);
24793 return x2;
24794 */
24795 enum machine_mode mode = GET_MODE (operand0);
24796 rtx xa, xi, TWO52, label, res, mask;
24797
24798 TWO52 = ix86_gen_TWO52 (mode);
24799
24800 /* Temporary for holding the result, initialized to the input
24801 operand to ease control flow. */
24802 res = gen_reg_rtx (mode);
24803 emit_move_insn (res, operand1);
24804
24805 /* xa = abs (operand1) */
24806 xa = ix86_expand_sse_fabs (res, &mask);
24807
24808 /* if (!isless (xa, TWO52)) goto label; */
24809 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24810
24811 /* x = (double)(long)x */
24812 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24813 expand_fix (xi, res, 0);
24814 expand_float (res, xi, 0);
24815
24816 if (HONOR_SIGNED_ZEROS (mode))
24817 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24818
24819 emit_label (label);
24820 LABEL_NUSES (label) = 1;
24821
24822 emit_move_insn (operand0, res);
24823 }
24824
24825 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24826 into OPERAND0. */
24827 void
24828 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24829 {
24830 enum machine_mode mode = GET_MODE (operand0);
24831 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24832
24833 /* C code for SSE variant we expand below.
24834 double xa = fabs (x), x2;
24835 if (!isless (xa, TWO52))
24836 return x;
24837 xa2 = xa + TWO52 - TWO52;
24838 Compensate:
24839 if (xa2 > xa)
24840 xa2 -= 1.0;
24841 x2 = copysign (xa2, x);
24842 return x2;
24843 */
24844
24845 TWO52 = ix86_gen_TWO52 (mode);
24846
24847 /* Temporary for holding the result, initialized to the input
24848 operand to ease control flow. */
24849 res = gen_reg_rtx (mode);
24850 emit_move_insn (res, operand1);
24851
24852 /* xa = abs (operand1) */
24853 xa = ix86_expand_sse_fabs (res, &smask);
24854
24855 /* if (!isless (xa, TWO52)) goto label; */
24856 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24857
24858 /* res = xa + TWO52 - TWO52; */
24859 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24860 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24861 emit_move_insn (res, tmp);
24862
24863 /* generate 1.0 */
24864 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24865
24866 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24867 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24868 emit_insn (gen_rtx_SET (VOIDmode, mask,
24869 gen_rtx_AND (mode, mask, one)));
24870 tmp = expand_simple_binop (mode, MINUS,
24871 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24872 emit_move_insn (res, tmp);
24873
24874 /* res = copysign (res, operand1) */
24875 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24876
24877 emit_label (label);
24878 LABEL_NUSES (label) = 1;
24879
24880 emit_move_insn (operand0, res);
24881 }
24882
24883 /* Expand SSE sequence for computing round from OPERAND1 storing
24884 into OPERAND0. */
24885 void
24886 ix86_expand_round (rtx operand0, rtx operand1)
24887 {
24888 /* C code for the stuff we're doing below:
24889 double xa = fabs (x);
24890 if (!isless (xa, TWO52))
24891 return x;
24892 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24893 return copysign (xa, x);
24894 */
24895 enum machine_mode mode = GET_MODE (operand0);
24896 rtx res, TWO52, xa, label, xi, half, mask;
24897 const struct real_format *fmt;
24898 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24899
24900 /* Temporary for holding the result, initialized to the input
24901 operand to ease control flow. */
24902 res = gen_reg_rtx (mode);
24903 emit_move_insn (res, operand1);
24904
24905 TWO52 = ix86_gen_TWO52 (mode);
24906 xa = ix86_expand_sse_fabs (res, &mask);
24907 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24908
24909 /* load nextafter (0.5, 0.0) */
24910 fmt = REAL_MODE_FORMAT (mode);
24911 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24912 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24913
24914 /* xa = xa + 0.5 */
24915 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24916 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24917
24918 /* xa = (double)(int64_t)xa */
24919 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24920 expand_fix (xi, xa, 0);
24921 expand_float (xa, xi, 0);
24922
24923 /* res = copysign (xa, operand1) */
24924 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24925
24926 emit_label (label);
24927 LABEL_NUSES (label) = 1;
24928
24929 emit_move_insn (operand0, res);
24930 }
24931
24932 \f
24933 /* Validate whether a SSE5 instruction is valid or not.
24934 OPERANDS is the array of operands.
24935 NUM is the number of operands.
24936 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24937 NUM_MEMORY is the maximum number of memory operands to accept. */
24938 bool
24939 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24940 {
24941 int mem_mask;
24942 int mem_count;
24943 int i;
24944
24945 /* Count the number of memory arguments */
24946 mem_mask = 0;
24947 mem_count = 0;
24948 for (i = 0; i < num; i++)
24949 {
24950 enum machine_mode mode = GET_MODE (operands[i]);
24951 if (register_operand (operands[i], mode))
24952 ;
24953
24954 else if (memory_operand (operands[i], mode))
24955 {
24956 mem_mask |= (1 << i);
24957 mem_count++;
24958 }
24959
24960 else
24961 {
24962 rtx pattern = PATTERN (insn);
24963
24964 /* allow 0 for pcmov */
24965 if (GET_CODE (pattern) != SET
24966 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24967 || i < 2
24968 || operands[i] != CONST0_RTX (mode))
24969 return false;
24970 }
24971 }
24972
24973 /* If there were no memory operations, allow the insn */
24974 if (mem_mask == 0)
24975 return true;
24976
24977 /* Do not allow the destination register to be a memory operand. */
24978 else if (mem_mask & (1 << 0))
24979 return false;
24980
24981 /* If there are too many memory operations, disallow the instruction. While
24982 the hardware only allows 1 memory reference, before register allocation
24983 for some insns, we allow two memory operations sometimes in order to allow
24984 code like the following to be optimized:
24985
24986 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24987
24988 or similar cases that are vectorized into using the fmaddss
24989 instruction. */
24990 else if (mem_count > num_memory)
24991 return false;
24992
24993 /* Don't allow more than one memory operation if not optimizing. */
24994 else if (mem_count > 1 && !optimize)
24995 return false;
24996
24997 else if (num == 4 && mem_count == 1)
24998 {
24999 /* formats (destination is the first argument), example fmaddss:
25000 xmm1, xmm1, xmm2, xmm3/mem
25001 xmm1, xmm1, xmm2/mem, xmm3
25002 xmm1, xmm2, xmm3/mem, xmm1
25003 xmm1, xmm2/mem, xmm3, xmm1 */
25004 if (uses_oc0)
25005 return ((mem_mask == (1 << 1))
25006 || (mem_mask == (1 << 2))
25007 || (mem_mask == (1 << 3)));
25008
25009 /* format, example pmacsdd:
25010 xmm1, xmm2, xmm3/mem, xmm1 */
25011 else
25012 return (mem_mask == (1 << 2));
25013 }
25014
25015 else if (num == 4 && num_memory == 2)
25016 {
25017 /* If there are two memory operations, we can load one of the memory ops
25018 into the destination register. This is for optimizing the
25019 multiply/add ops, which the combiner has optimized both the multiply
25020 and the add insns to have a memory operation. We have to be careful
25021 that the destination doesn't overlap with the inputs. */
25022 rtx op0 = operands[0];
25023
25024 if (reg_mentioned_p (op0, operands[1])
25025 || reg_mentioned_p (op0, operands[2])
25026 || reg_mentioned_p (op0, operands[3]))
25027 return false;
25028
25029 /* formats (destination is the first argument), example fmaddss:
25030 xmm1, xmm1, xmm2, xmm3/mem
25031 xmm1, xmm1, xmm2/mem, xmm3
25032 xmm1, xmm2, xmm3/mem, xmm1
25033 xmm1, xmm2/mem, xmm3, xmm1
25034
25035 For the oc0 case, we will load either operands[1] or operands[3] into
25036 operands[0], so any combination of 2 memory operands is ok. */
25037 if (uses_oc0)
25038 return true;
25039
25040 /* format, example pmacsdd:
25041 xmm1, xmm2, xmm3/mem, xmm1
25042
25043 For the integer multiply/add instructions be more restrictive and
25044 require operands[2] and operands[3] to be the memory operands. */
25045 else
25046 return (mem_mask == ((1 << 2) | (1 << 3)));
25047 }
25048
25049 else if (num == 3 && num_memory == 1)
25050 {
25051 /* formats, example protb:
25052 xmm1, xmm2, xmm3/mem
25053 xmm1, xmm2/mem, xmm3 */
25054 if (uses_oc0)
25055 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25056
25057 /* format, example comeq:
25058 xmm1, xmm2, xmm3/mem */
25059 else
25060 return (mem_mask == (1 << 2));
25061 }
25062
25063 else
25064 gcc_unreachable ();
25065
25066 return false;
25067 }
25068
25069 \f
25070 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25071 hardware will allow by using the destination register to load one of the
25072 memory operations. Presently this is used by the multiply/add routines to
25073 allow 2 memory references. */
25074
25075 void
25076 ix86_expand_sse5_multiple_memory (rtx operands[],
25077 int num,
25078 enum machine_mode mode)
25079 {
25080 rtx op0 = operands[0];
25081 if (num != 4
25082 || memory_operand (op0, mode)
25083 || reg_mentioned_p (op0, operands[1])
25084 || reg_mentioned_p (op0, operands[2])
25085 || reg_mentioned_p (op0, operands[3]))
25086 gcc_unreachable ();
25087
25088 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25089 the destination register. */
25090 if (memory_operand (operands[1], mode))
25091 {
25092 emit_move_insn (op0, operands[1]);
25093 operands[1] = op0;
25094 }
25095 else if (memory_operand (operands[3], mode))
25096 {
25097 emit_move_insn (op0, operands[3]);
25098 operands[3] = op0;
25099 }
25100 else
25101 gcc_unreachable ();
25102
25103 return;
25104 }
25105
25106 \f
25107 /* Table of valid machine attributes. */
25108 static const struct attribute_spec ix86_attribute_table[] =
25109 {
25110 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25111 /* Stdcall attribute says callee is responsible for popping arguments
25112 if they are not variable. */
25113 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25114 /* Fastcall attribute says callee is responsible for popping arguments
25115 if they are not variable. */
25116 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25117 /* Cdecl attribute says the callee is a normal C declaration */
25118 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25119 /* Regparm attribute specifies how many integer arguments are to be
25120 passed in registers. */
25121 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25122 /* Sseregparm attribute says we are using x86_64 calling conventions
25123 for FP arguments. */
25124 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25125 /* force_align_arg_pointer says this function realigns the stack at entry. */
25126 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25127 false, true, true, ix86_handle_cconv_attribute },
25128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25129 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25130 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25131 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25132 #endif
25133 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25134 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25135 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25136 SUBTARGET_ATTRIBUTE_TABLE,
25137 #endif
25138 { NULL, 0, 0, false, false, false, NULL }
25139 };
25140
25141 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25142 static int
25143 x86_builtin_vectorization_cost (bool runtime_test)
25144 {
25145 /* If the branch of the runtime test is taken - i.e. - the vectorized
25146 version is skipped - this incurs a misprediction cost (because the
25147 vectorized version is expected to be the fall-through). So we subtract
25148 the latency of a mispredicted branch from the costs that are incured
25149 when the vectorized version is executed.
25150
25151 TODO: The values in individual target tables have to be tuned or new
25152 fields may be needed. For eg. on K8, the default branch path is the
25153 not-taken path. If the taken path is predicted correctly, the minimum
25154 penalty of going down the taken-path is 1 cycle. If the taken-path is
25155 not predicted correctly, then the minimum penalty is 10 cycles. */
25156
25157 if (runtime_test)
25158 {
25159 return (-(ix86_cost->cond_taken_branch_cost));
25160 }
25161 else
25162 return 0;
25163 }
25164
25165 /* Initialize the GCC target structure. */
25166 #undef TARGET_ATTRIBUTE_TABLE
25167 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25168 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25169 # undef TARGET_MERGE_DECL_ATTRIBUTES
25170 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25171 #endif
25172
25173 #undef TARGET_COMP_TYPE_ATTRIBUTES
25174 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25175
25176 #undef TARGET_INIT_BUILTINS
25177 #define TARGET_INIT_BUILTINS ix86_init_builtins
25178 #undef TARGET_EXPAND_BUILTIN
25179 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25180
25181 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25182 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25183 ix86_builtin_vectorized_function
25184
25185 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25186 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25187
25188 #undef TARGET_BUILTIN_RECIPROCAL
25189 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25190
25191 #undef TARGET_ASM_FUNCTION_EPILOGUE
25192 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25193
25194 #undef TARGET_ENCODE_SECTION_INFO
25195 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25196 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25197 #else
25198 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25199 #endif
25200
25201 #undef TARGET_ASM_OPEN_PAREN
25202 #define TARGET_ASM_OPEN_PAREN ""
25203 #undef TARGET_ASM_CLOSE_PAREN
25204 #define TARGET_ASM_CLOSE_PAREN ""
25205
25206 #undef TARGET_ASM_ALIGNED_HI_OP
25207 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25208 #undef TARGET_ASM_ALIGNED_SI_OP
25209 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25210 #ifdef ASM_QUAD
25211 #undef TARGET_ASM_ALIGNED_DI_OP
25212 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25213 #endif
25214
25215 #undef TARGET_ASM_UNALIGNED_HI_OP
25216 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25217 #undef TARGET_ASM_UNALIGNED_SI_OP
25218 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25219 #undef TARGET_ASM_UNALIGNED_DI_OP
25220 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25221
25222 #undef TARGET_SCHED_ADJUST_COST
25223 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25224 #undef TARGET_SCHED_ISSUE_RATE
25225 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25226 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25227 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25228 ia32_multipass_dfa_lookahead
25229
25230 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25231 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25232
25233 #ifdef HAVE_AS_TLS
25234 #undef TARGET_HAVE_TLS
25235 #define TARGET_HAVE_TLS true
25236 #endif
25237 #undef TARGET_CANNOT_FORCE_CONST_MEM
25238 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25239 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25240 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25241
25242 #undef TARGET_DELEGITIMIZE_ADDRESS
25243 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25244
25245 #undef TARGET_MS_BITFIELD_LAYOUT_P
25246 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25247
25248 #if TARGET_MACHO
25249 #undef TARGET_BINDS_LOCAL_P
25250 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25251 #endif
25252 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25253 #undef TARGET_BINDS_LOCAL_P
25254 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25255 #endif
25256
25257 #undef TARGET_ASM_OUTPUT_MI_THUNK
25258 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25259 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25260 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25261
25262 #undef TARGET_ASM_FILE_START
25263 #define TARGET_ASM_FILE_START x86_file_start
25264
25265 #undef TARGET_DEFAULT_TARGET_FLAGS
25266 #define TARGET_DEFAULT_TARGET_FLAGS \
25267 (TARGET_DEFAULT \
25268 | TARGET_SUBTARGET_DEFAULT \
25269 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25270
25271 #undef TARGET_HANDLE_OPTION
25272 #define TARGET_HANDLE_OPTION ix86_handle_option
25273
25274 #undef TARGET_RTX_COSTS
25275 #define TARGET_RTX_COSTS ix86_rtx_costs
25276 #undef TARGET_ADDRESS_COST
25277 #define TARGET_ADDRESS_COST ix86_address_cost
25278
25279 #undef TARGET_FIXED_CONDITION_CODE_REGS
25280 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25281 #undef TARGET_CC_MODES_COMPATIBLE
25282 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25283
25284 #undef TARGET_MACHINE_DEPENDENT_REORG
25285 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25286
25287 #undef TARGET_BUILD_BUILTIN_VA_LIST
25288 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25289
25290 #undef TARGET_EXPAND_BUILTIN_VA_START
25291 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25292
25293 #undef TARGET_MD_ASM_CLOBBERS
25294 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25295
25296 #undef TARGET_PROMOTE_PROTOTYPES
25297 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25298 #undef TARGET_STRUCT_VALUE_RTX
25299 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25300 #undef TARGET_SETUP_INCOMING_VARARGS
25301 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25302 #undef TARGET_MUST_PASS_IN_STACK
25303 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25304 #undef TARGET_PASS_BY_REFERENCE
25305 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25306 #undef TARGET_INTERNAL_ARG_POINTER
25307 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25308 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25309 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25310 #undef TARGET_STRICT_ARGUMENT_NAMING
25311 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25312
25313 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25314 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25315
25316 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25317 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25318
25319 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25320 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25321
25322 #undef TARGET_C_MODE_FOR_SUFFIX
25323 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25324
25325 #ifdef HAVE_AS_TLS
25326 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25327 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25328 #endif
25329
25330 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25331 #undef TARGET_INSERT_ATTRIBUTES
25332 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25333 #endif
25334
25335 #undef TARGET_MANGLE_TYPE
25336 #define TARGET_MANGLE_TYPE ix86_mangle_type
25337
25338 #undef TARGET_STACK_PROTECT_FAIL
25339 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25340
25341 #undef TARGET_FUNCTION_VALUE
25342 #define TARGET_FUNCTION_VALUE ix86_function_value
25343
25344 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25345 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25346
25347 struct gcc_target targetm = TARGET_INITIALIZER;
25348 \f
25349 #include "gt-i386.h"