8da85e16e5c455b1554f73f433a22ba82868e418
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
59
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
62 #endif
63
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
70 : 4)
71
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
75
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
77
78 const
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
97 0, /* "large" insn */
98 2, /* MOVE_RATIO */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 2, /* Branch cost */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
146 };
147
148 /* Processor costs (relative to an add) */
149 static const
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
169 3, /* MOVE_RATIO */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
195 1, /* Branch cost */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
217 };
218
219 static const
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
239 3, /* MOVE_RATIO */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
267 1, /* Branch cost */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
289 };
290
291 static const
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
311 6, /* MOVE_RATIO */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
337 2, /* Branch cost */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
359 };
360
361 static const
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
418 */
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
436 };
437
438 static const
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
485 1, /* Branch cost */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
507 };
508
509 static const
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
529 4, /* MOVE_RATIO */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
558 1, /* Branch cost */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
580 };
581
582 static const
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
602 9, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
628 5, /* Branch cost */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
653 };
654
655 static const
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
704 time). */
705 100, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
732 };
733
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
753 9, /* MOVE_RATIO */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
775 /* On K8
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
778 On AMDFAM10
779 MOVD reg64, xmmreg Double FADD 3
780 1/1 1/1
781 MOVD reg32, xmmreg Double FADD 3
782 1/1 1/1 */
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
790 time). */
791 100, /* number of parallel prefetches */
792 2, /* Branch cost */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
799
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
819 };
820
821 static const
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
841 6, /* MOVE_RATIO */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
867 2, /* Branch cost */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
877 {-1, libcall}}},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
890 };
891
892 static const
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
912 17, /* MOVE_RATIO */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
938 1, /* Branch cost */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
949 {-1, libcall}}},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
963 };
964
965 static const
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
985 16, /* MOVE_RATIO */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1035 };
1036
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1038 static const
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1112 };
1113
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1115 static const
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1183 };
1184
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1186
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1195
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1204
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1207
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1211
1212 /* Feature tests against the various tunings. */
1213 unsigned char ix86_tune_features[X86_TUNE_LAST];
1214
1215 /* Feature tests against the various tunings used to create ix86_tune_features
1216 based on the processor mask. */
1217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1218 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1219 negatively, so enabling for Generic64 seems like good code size
1220 tradeoff. We can't enable it for 32bit generic because it does not
1221 work well with PPro base chips. */
1222 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1223
1224 /* X86_TUNE_PUSH_MEMORY */
1225 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1226 | m_NOCONA | m_CORE2 | m_GENERIC,
1227
1228 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1229 m_486 | m_PENT,
1230
1231 /* X86_TUNE_USE_BIT_TEST */
1232 m_386,
1233
1234 /* X86_TUNE_UNROLL_STRLEN */
1235 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1236
1237 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1238 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1239
1240 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1241 on simulation result. But after P4 was made, no performance benefit
1242 was observed with branch hints. It also increases the code size.
1243 As a result, icc never generates branch hints. */
1244 0,
1245
1246 /* X86_TUNE_DOUBLE_WITH_ADD */
1247 ~m_386,
1248
1249 /* X86_TUNE_USE_SAHF */
1250 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC,
1252
1253 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1254 partial dependencies. */
1255 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1256 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1257
1258 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1259 register stalls on Generic32 compilation setting as well. However
1260 in current implementation the partial register stalls are not eliminated
1261 very well - they can be introduced via subregs synthesized by combine
1262 and can happen in caller/callee saving sequences. Because this option
1263 pays back little on PPro based chips and is in conflict with partial reg
1264 dependencies used by Athlon/P4 based chips, it is better to leave it off
1265 for generic32 for now. */
1266 m_PPRO,
1267
1268 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1269 m_CORE2 | m_GENERIC,
1270
1271 /* X86_TUNE_USE_HIMODE_FIOP */
1272 m_386 | m_486 | m_K6_GEODE,
1273
1274 /* X86_TUNE_USE_SIMODE_FIOP */
1275 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1276
1277 /* X86_TUNE_USE_MOV0 */
1278 m_K6,
1279
1280 /* X86_TUNE_USE_CLTD */
1281 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1282
1283 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1284 m_PENT4,
1285
1286 /* X86_TUNE_SPLIT_LONG_MOVES */
1287 m_PPRO,
1288
1289 /* X86_TUNE_READ_MODIFY_WRITE */
1290 ~m_PENT,
1291
1292 /* X86_TUNE_READ_MODIFY */
1293 ~(m_PENT | m_PPRO),
1294
1295 /* X86_TUNE_PROMOTE_QIMODE */
1296 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1297 | m_GENERIC /* | m_PENT4 ? */,
1298
1299 /* X86_TUNE_FAST_PREFIX */
1300 ~(m_PENT | m_486 | m_386),
1301
1302 /* X86_TUNE_SINGLE_STRINGOP */
1303 m_386 | m_PENT4 | m_NOCONA,
1304
1305 /* X86_TUNE_QIMODE_MATH */
1306 ~0,
1307
1308 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1309 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1310 might be considered for Generic32 if our scheme for avoiding partial
1311 stalls was more effective. */
1312 ~m_PPRO,
1313
1314 /* X86_TUNE_PROMOTE_QI_REGS */
1315 0,
1316
1317 /* X86_TUNE_PROMOTE_HI_REGS */
1318 m_PPRO,
1319
1320 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1321 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_ADD_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1325 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327 /* X86_TUNE_SUB_ESP_4 */
1328 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329
1330 /* X86_TUNE_SUB_ESP_8 */
1331 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1332 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1333
1334 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1335 for DFmode copies */
1336 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1337 | m_GENERIC | m_GEODE),
1338
1339 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1340 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1341
1342 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1343 conflict here in between PPro/Pentium4 based chips that thread 128bit
1344 SSE registers as single units versus K8 based chips that divide SSE
1345 registers to two 64bit halves. This knob promotes all store destinations
1346 to be 128bit to allow register renaming on 128bit SSE units, but usually
1347 results in one extra microop on 64bit SSE units. Experimental results
1348 shows that disabling this option on P4 brings over 20% SPECfp regression,
1349 while enabling it on K8 brings roughly 2.4% regression that can be partly
1350 masked by careful scheduling of moves. */
1351 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1352
1353 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1354 m_AMDFAM10,
1355
1356 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1357 are resolved on SSE register parts instead of whole registers, so we may
1358 maintain just lower part of scalar values in proper format leaving the
1359 upper part undefined. */
1360 m_ATHLON_K8,
1361
1362 /* X86_TUNE_SSE_TYPELESS_STORES */
1363 m_AMD_MULTIPLE,
1364
1365 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1366 m_PPRO | m_PENT4 | m_NOCONA,
1367
1368 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1369 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1370
1371 /* X86_TUNE_PROLOGUE_USING_MOVE */
1372 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373
1374 /* X86_TUNE_EPILOGUE_USING_MOVE */
1375 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1376
1377 /* X86_TUNE_SHIFT1 */
1378 ~m_486,
1379
1380 /* X86_TUNE_USE_FFREEP */
1381 m_AMD_MULTIPLE,
1382
1383 /* X86_TUNE_INTER_UNIT_MOVES */
1384 ~(m_AMD_MULTIPLE | m_GENERIC),
1385
1386 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1387 ~(m_AMDFAM10),
1388
1389 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1390 than 4 branch instructions in the 16 byte window. */
1391 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1392
1393 /* X86_TUNE_SCHEDULE */
1394 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1395
1396 /* X86_TUNE_USE_BT */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398
1399 /* X86_TUNE_USE_INCDEC */
1400 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1401
1402 /* X86_TUNE_PAD_RETURNS */
1403 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1404
1405 /* X86_TUNE_EXT_80387_CONSTANTS */
1406 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1407
1408 /* X86_TUNE_SHORTEN_X87_SSE */
1409 ~m_K8,
1410
1411 /* X86_TUNE_AVOID_VECTOR_DECODE */
1412 m_K8 | m_GENERIC64,
1413
1414 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1415 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1416 ~(m_386 | m_486),
1417
1418 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1419 vector path on AMD machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421
1422 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1423 machines. */
1424 m_K8 | m_GENERIC64 | m_AMDFAM10,
1425
1426 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1427 than a MOV. */
1428 m_PENT,
1429
1430 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1431 but one byte longer. */
1432 m_PENT,
1433
1434 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1435 operand that cannot be represented using a modRM byte. The XOR
1436 replacement is long decoded, so this split helps here as well. */
1437 m_K6,
1438
1439 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1440 from integer to FP. */
1441 m_AMDFAM10,
1442
1443 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1444 with a subsequent conditional jump instruction into a single
1445 compare-and-branch uop. */
1446 m_CORE2,
1447 };
1448
1449 /* Feature tests against the various architecture variations. */
1450 unsigned char ix86_arch_features[X86_ARCH_LAST];
1451
1452 /* Feature tests against the various architecture variations, used to create
1453 ix86_arch_features based on the processor mask. */
1454 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1455 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1456 ~(m_386 | m_486 | m_PENT | m_K6),
1457
1458 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1459 ~m_386,
1460
1461 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1462 ~(m_386 | m_486),
1463
1464 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1465 ~m_386,
1466
1467 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1468 ~m_386,
1469 };
1470
1471 static const unsigned int x86_accumulate_outgoing_args
1472 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1473
1474 static const unsigned int x86_arch_always_fancy_math_387
1475 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1476 | m_NOCONA | m_CORE2 | m_GENERIC;
1477
1478 static enum stringop_alg stringop_alg = no_stringop;
1479
1480 /* In case the average insn count for single function invocation is
1481 lower than this constant, emit fast (but longer) prologue and
1482 epilogue code. */
1483 #define FAST_PROLOGUE_INSN_COUNT 20
1484
1485 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1486 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1487 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1488 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1489
1490 /* Array of the smallest class containing reg number REGNO, indexed by
1491 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1492
1493 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1494 {
1495 /* ax, dx, cx, bx */
1496 AREG, DREG, CREG, BREG,
1497 /* si, di, bp, sp */
1498 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1499 /* FP registers */
1500 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1501 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1502 /* arg pointer */
1503 NON_Q_REGS,
1504 /* flags, fpsr, fpcr, frame */
1505 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1506 /* SSE registers */
1507 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1508 SSE_REGS, SSE_REGS,
1509 /* MMX registers */
1510 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1511 MMX_REGS, MMX_REGS,
1512 /* REX registers */
1513 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1514 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1515 /* SSE REX registers */
1516 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1517 SSE_REGS, SSE_REGS,
1518 };
1519
1520 /* The "default" register map used in 32bit mode. */
1521
1522 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1523 {
1524 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1525 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1526 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1527 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1528 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1529 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1530 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1531 };
1532
1533 static int const x86_64_int_parameter_registers[6] =
1534 {
1535 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1536 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1537 };
1538
1539 static int const x86_64_ms_abi_int_parameter_registers[4] =
1540 {
1541 2 /*RCX*/, 1 /*RDX*/,
1542 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1543 };
1544
1545 static int const x86_64_int_return_registers[4] =
1546 {
1547 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1548 };
1549
1550 /* The "default" register map used in 64bit mode. */
1551 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1552 {
1553 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1554 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1555 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1556 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1557 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1558 8,9,10,11,12,13,14,15, /* extended integer registers */
1559 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1560 };
1561
1562 /* Define the register numbers to be used in Dwarf debugging information.
1563 The SVR4 reference port C compiler uses the following register numbers
1564 in its Dwarf output code:
1565 0 for %eax (gcc regno = 0)
1566 1 for %ecx (gcc regno = 2)
1567 2 for %edx (gcc regno = 1)
1568 3 for %ebx (gcc regno = 3)
1569 4 for %esp (gcc regno = 7)
1570 5 for %ebp (gcc regno = 6)
1571 6 for %esi (gcc regno = 4)
1572 7 for %edi (gcc regno = 5)
1573 The following three DWARF register numbers are never generated by
1574 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1575 believes these numbers have these meanings.
1576 8 for %eip (no gcc equivalent)
1577 9 for %eflags (gcc regno = 17)
1578 10 for %trapno (no gcc equivalent)
1579 It is not at all clear how we should number the FP stack registers
1580 for the x86 architecture. If the version of SDB on x86/svr4 were
1581 a bit less brain dead with respect to floating-point then we would
1582 have a precedent to follow with respect to DWARF register numbers
1583 for x86 FP registers, but the SDB on x86/svr4 is so completely
1584 broken with respect to FP registers that it is hardly worth thinking
1585 of it as something to strive for compatibility with.
1586 The version of x86/svr4 SDB I have at the moment does (partially)
1587 seem to believe that DWARF register number 11 is associated with
1588 the x86 register %st(0), but that's about all. Higher DWARF
1589 register numbers don't seem to be associated with anything in
1590 particular, and even for DWARF regno 11, SDB only seems to under-
1591 stand that it should say that a variable lives in %st(0) (when
1592 asked via an `=' command) if we said it was in DWARF regno 11,
1593 but SDB still prints garbage when asked for the value of the
1594 variable in question (via a `/' command).
1595 (Also note that the labels SDB prints for various FP stack regs
1596 when doing an `x' command are all wrong.)
1597 Note that these problems generally don't affect the native SVR4
1598 C compiler because it doesn't allow the use of -O with -g and
1599 because when it is *not* optimizing, it allocates a memory
1600 location for each floating-point variable, and the memory
1601 location is what gets described in the DWARF AT_location
1602 attribute for the variable in question.
1603 Regardless of the severe mental illness of the x86/svr4 SDB, we
1604 do something sensible here and we use the following DWARF
1605 register numbers. Note that these are all stack-top-relative
1606 numbers.
1607 11 for %st(0) (gcc regno = 8)
1608 12 for %st(1) (gcc regno = 9)
1609 13 for %st(2) (gcc regno = 10)
1610 14 for %st(3) (gcc regno = 11)
1611 15 for %st(4) (gcc regno = 12)
1612 16 for %st(5) (gcc regno = 13)
1613 17 for %st(6) (gcc regno = 14)
1614 18 for %st(7) (gcc regno = 15)
1615 */
1616 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1617 {
1618 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1619 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1620 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1625 };
1626
1627 /* Test and compare insns in i386.md store the information needed to
1628 generate branch and scc insns here. */
1629
1630 rtx ix86_compare_op0 = NULL_RTX;
1631 rtx ix86_compare_op1 = NULL_RTX;
1632 rtx ix86_compare_emitted = NULL_RTX;
1633
1634 /* Size of the register save area. */
1635 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1636
1637 /* Define the structure for the machine field in struct function. */
1638
1639 struct stack_local_entry GTY(())
1640 {
1641 unsigned short mode;
1642 unsigned short n;
1643 rtx rtl;
1644 struct stack_local_entry *next;
1645 };
1646
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1649
1650 [arguments]
1651 <- ARG_POINTER
1652 saved pc
1653
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1656 [saved regs]
1657
1658 [padding1] \
1659 )
1660 [va_arg registers] (
1661 > to_allocate <- FRAME_POINTER
1662 [frame] (
1663 )
1664 [padding2] /
1665 */
1666 struct ix86_frame
1667 {
1668 int nregs;
1669 int padding1;
1670 int va_arg_size;
1671 HOST_WIDE_INT frame;
1672 int padding2;
1673 int outgoing_arguments_size;
1674 int red_zone_size;
1675
1676 HOST_WIDE_INT to_allocate;
1677 /* The offsets relative to ARG_POINTER. */
1678 HOST_WIDE_INT frame_pointer_offset;
1679 HOST_WIDE_INT hard_frame_pointer_offset;
1680 HOST_WIDE_INT stack_pointer_offset;
1681
1682 /* When save_regs_using_mov is set, emit prologue using
1683 move instead of push instructions. */
1684 bool save_regs_using_mov;
1685 };
1686
1687 /* Code model option. */
1688 enum cmodel ix86_cmodel;
1689 /* Asm dialect. */
1690 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1691 /* TLS dialects. */
1692 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1693
1694 /* Which unit we are generating floating point math for. */
1695 enum fpmath_unit ix86_fpmath;
1696
1697 /* Which cpu are we scheduling for. */
1698 enum processor_type ix86_tune;
1699
1700 /* Which instruction set architecture to use. */
1701 enum processor_type ix86_arch;
1702
1703 /* true if sse prefetch instruction is not NOOP. */
1704 int x86_prefetch_sse;
1705
1706 /* ix86_regparm_string as a number */
1707 static int ix86_regparm;
1708
1709 /* -mstackrealign option */
1710 extern int ix86_force_align_arg_pointer;
1711 static const char ix86_force_align_arg_pointer_string[]
1712 = "force_align_arg_pointer";
1713
1714 static rtx (*ix86_gen_leave) (void);
1715 static rtx (*ix86_gen_pop1) (rtx);
1716 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1717 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1718 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1719 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1720 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1721 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1722
1723 /* Preferred alignment for stack boundary in bits. */
1724 unsigned int ix86_preferred_stack_boundary;
1725
1726 /* Alignment for incoming stack boundary in bits specified at
1727 command line. */
1728 static unsigned int ix86_user_incoming_stack_boundary;
1729
1730 /* Default alignment for incoming stack boundary in bits. */
1731 static unsigned int ix86_default_incoming_stack_boundary;
1732
1733 /* Alignment for incoming stack boundary in bits. */
1734 unsigned int ix86_incoming_stack_boundary;
1735
1736 /* Values 1-5: see jump.c */
1737 int ix86_branch_cost;
1738
1739 /* Calling abi specific va_list type nodes. */
1740 static GTY(()) tree sysv_va_list_type_node;
1741 static GTY(()) tree ms_va_list_type_node;
1742
1743 /* Variables which are this size or smaller are put in the data/bss
1744 or ldata/lbss sections. */
1745
1746 int ix86_section_threshold = 65536;
1747
1748 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1749 char internal_label_prefix[16];
1750 int internal_label_prefix_len;
1751
1752 /* Fence to use after loop using movnt. */
1753 tree x86_mfence;
1754
1755 /* Register class used for passing given 64bit part of the argument.
1756 These represent classes as documented by the PS ABI, with the exception
1757 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1758 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1759
1760 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1761 whenever possible (upper half does contain padding). */
1762 enum x86_64_reg_class
1763 {
1764 X86_64_NO_CLASS,
1765 X86_64_INTEGER_CLASS,
1766 X86_64_INTEGERSI_CLASS,
1767 X86_64_SSE_CLASS,
1768 X86_64_SSESF_CLASS,
1769 X86_64_SSEDF_CLASS,
1770 X86_64_SSEUP_CLASS,
1771 X86_64_X87_CLASS,
1772 X86_64_X87UP_CLASS,
1773 X86_64_COMPLEX_X87_CLASS,
1774 X86_64_MEMORY_CLASS
1775 };
1776 static const char * const x86_64_reg_class_name[] =
1777 {
1778 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1779 "sseup", "x87", "x87up", "cplx87", "no"
1780 };
1781
1782 #define MAX_CLASSES 4
1783
1784 /* Table of constants used by fldpi, fldln2, etc.... */
1785 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1786 static bool ext_80387_constants_init = 0;
1787
1788 \f
1789 static struct machine_function * ix86_init_machine_status (void);
1790 static rtx ix86_function_value (const_tree, const_tree, bool);
1791 static int ix86_function_regparm (const_tree, const_tree);
1792 static void ix86_compute_frame_layout (struct ix86_frame *);
1793 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1794 rtx, rtx, int);
1795
1796 enum ix86_function_specific_strings
1797 {
1798 IX86_FUNCTION_SPECIFIC_ARCH,
1799 IX86_FUNCTION_SPECIFIC_TUNE,
1800 IX86_FUNCTION_SPECIFIC_FPMATH,
1801 IX86_FUNCTION_SPECIFIC_MAX
1802 };
1803
1804 static char *ix86_target_string (int, int, const char *, const char *,
1805 const char *, bool);
1806 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1807 static void ix86_function_specific_save (struct cl_target_option *);
1808 static void ix86_function_specific_restore (struct cl_target_option *);
1809 static void ix86_function_specific_print (FILE *, int,
1810 struct cl_target_option *);
1811 static bool ix86_valid_option_attribute_p (tree, tree, tree, int);
1812 static bool ix86_valid_option_attribute_inner_p (tree, char *[]);
1813 static bool ix86_can_inline_p (tree, tree);
1814 static void ix86_set_current_function (tree);
1815
1816 \f
1817 /* The svr4 ABI for the i386 says that records and unions are returned
1818 in memory. */
1819 #ifndef DEFAULT_PCC_STRUCT_RETURN
1820 #define DEFAULT_PCC_STRUCT_RETURN 1
1821 #endif
1822
1823 /* Whether -mtune= or -march= were specified */
1824 static int ix86_tune_defaulted;
1825 static int ix86_arch_specified;
1826
1827 /* Bit flags that specify the ISA we are compiling for. */
1828 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1829
1830 /* A mask of ix86_isa_flags that includes bit X if X
1831 was set or cleared on the command line. */
1832 static int ix86_isa_flags_explicit;
1833
1834 /* Define a set of ISAs which are available when a given ISA is
1835 enabled. MMX and SSE ISAs are handled separately. */
1836
1837 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1838 #define OPTION_MASK_ISA_3DNOW_SET \
1839 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1840
1841 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1842 #define OPTION_MASK_ISA_SSE2_SET \
1843 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1844 #define OPTION_MASK_ISA_SSE3_SET \
1845 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1846 #define OPTION_MASK_ISA_SSSE3_SET \
1847 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1848 #define OPTION_MASK_ISA_SSE4_1_SET \
1849 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1850 #define OPTION_MASK_ISA_SSE4_2_SET \
1851 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1852
1853 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1854 as -msse4.2. */
1855 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1856
1857 #define OPTION_MASK_ISA_SSE4A_SET \
1858 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1859 #define OPTION_MASK_ISA_SSE5_SET \
1860 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1861
1862 /* AES and PCLMUL need SSE2 because they use xmm registers */
1863 #define OPTION_MASK_ISA_AES_SET \
1864 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1865 #define OPTION_MASK_ISA_PCLMUL_SET \
1866 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1867
1868 #define OPTION_MASK_ISA_ABM_SET \
1869 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1870 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1871 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1872 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1873
1874 /* Define a set of ISAs which aren't available when a given ISA is
1875 disabled. MMX and SSE ISAs are handled separately. */
1876
1877 #define OPTION_MASK_ISA_MMX_UNSET \
1878 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1879 #define OPTION_MASK_ISA_3DNOW_UNSET \
1880 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1881 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1882
1883 #define OPTION_MASK_ISA_SSE_UNSET \
1884 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1885 #define OPTION_MASK_ISA_SSE2_UNSET \
1886 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1887 #define OPTION_MASK_ISA_SSE3_UNSET \
1888 (OPTION_MASK_ISA_SSE3 \
1889 | OPTION_MASK_ISA_SSSE3_UNSET \
1890 | OPTION_MASK_ISA_SSE4A_UNSET )
1891 #define OPTION_MASK_ISA_SSSE3_UNSET \
1892 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1893 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1894 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1895 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1896
1897 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1898 as -mno-sse4.1. */
1899 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1900
1901 #define OPTION_MASK_ISA_SSE4A_UNSET \
1902 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1903 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1904 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1905 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1906 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1907 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1908 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1909 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1910
1911 /* Vectorization library interface and handlers. */
1912 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1913 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1914 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1915
1916 /* Processor target table, indexed by processor number */
1917 struct ptt
1918 {
1919 const struct processor_costs *cost; /* Processor costs */
1920 const int align_loop; /* Default alignments. */
1921 const int align_loop_max_skip;
1922 const int align_jump;
1923 const int align_jump_max_skip;
1924 const int align_func;
1925 };
1926
1927 static const struct ptt processor_target_table[PROCESSOR_max] =
1928 {
1929 {&i386_cost, 4, 3, 4, 3, 4},
1930 {&i486_cost, 16, 15, 16, 15, 16},
1931 {&pentium_cost, 16, 7, 16, 7, 16},
1932 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1933 {&geode_cost, 0, 0, 0, 0, 0},
1934 {&k6_cost, 32, 7, 32, 7, 32},
1935 {&athlon_cost, 16, 7, 16, 7, 16},
1936 {&pentium4_cost, 0, 0, 0, 0, 0},
1937 {&k8_cost, 16, 7, 16, 7, 16},
1938 {&nocona_cost, 0, 0, 0, 0, 0},
1939 {&core2_cost, 16, 10, 16, 10, 16},
1940 {&generic32_cost, 16, 7, 16, 7, 16},
1941 {&generic64_cost, 16, 10, 16, 10, 16},
1942 {&amdfam10_cost, 32, 24, 32, 7, 32}
1943 };
1944
1945 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1946 {
1947 "generic",
1948 "i386",
1949 "i486",
1950 "pentium",
1951 "pentium-mmx",
1952 "pentiumpro",
1953 "pentium2",
1954 "pentium3",
1955 "pentium4",
1956 "pentium-m",
1957 "prescott",
1958 "nocona",
1959 "core2",
1960 "geode",
1961 "k6",
1962 "k6-2",
1963 "k6-3",
1964 "athlon",
1965 "athlon-4",
1966 "k8",
1967 "amdfam10"
1968 };
1969 \f
1970 /* Implement TARGET_HANDLE_OPTION. */
1971
1972 static bool
1973 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1974 {
1975 switch (code)
1976 {
1977 case OPT_mmmx:
1978 if (value)
1979 {
1980 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1981 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1982 }
1983 else
1984 {
1985 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1986 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1987 }
1988 return true;
1989
1990 case OPT_m3dnow:
1991 if (value)
1992 {
1993 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1994 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1995 }
1996 else
1997 {
1998 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1999 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2000 }
2001 return true;
2002
2003 case OPT_m3dnowa:
2004 return false;
2005
2006 case OPT_msse:
2007 if (value)
2008 {
2009 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2010 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2011 }
2012 else
2013 {
2014 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2015 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2016 }
2017 return true;
2018
2019 case OPT_msse2:
2020 if (value)
2021 {
2022 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2024 }
2025 else
2026 {
2027 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2028 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2029 }
2030 return true;
2031
2032 case OPT_msse3:
2033 if (value)
2034 {
2035 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2036 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2037 }
2038 else
2039 {
2040 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2041 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2042 }
2043 return true;
2044
2045 case OPT_mssse3:
2046 if (value)
2047 {
2048 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2049 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2050 }
2051 else
2052 {
2053 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2054 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2055 }
2056 return true;
2057
2058 case OPT_msse4_1:
2059 if (value)
2060 {
2061 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2062 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2063 }
2064 else
2065 {
2066 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2067 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2068 }
2069 return true;
2070
2071 case OPT_msse4_2:
2072 if (value)
2073 {
2074 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2075 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2076 }
2077 else
2078 {
2079 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2080 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2081 }
2082 return true;
2083
2084 case OPT_msse4:
2085 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2087 return true;
2088
2089 case OPT_mno_sse4:
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2092 return true;
2093
2094 case OPT_msse4a:
2095 if (value)
2096 {
2097 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2098 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2099 }
2100 else
2101 {
2102 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2104 }
2105 return true;
2106
2107 case OPT_msse5:
2108 if (value)
2109 {
2110 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2111 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2112 }
2113 else
2114 {
2115 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2116 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2117 }
2118 return true;
2119
2120 case OPT_mabm:
2121 if (value)
2122 {
2123 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2125 }
2126 else
2127 {
2128 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2129 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2130 }
2131 return true;
2132
2133 case OPT_mpopcnt:
2134 if (value)
2135 {
2136 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2138 }
2139 else
2140 {
2141 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2143 }
2144 return true;
2145
2146 case OPT_msahf:
2147 if (value)
2148 {
2149 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2151 }
2152 else
2153 {
2154 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2156 }
2157 return true;
2158
2159 case OPT_mcx16:
2160 if (value)
2161 {
2162 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2164 }
2165 else
2166 {
2167 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2169 }
2170 return true;
2171
2172 case OPT_maes:
2173 if (value)
2174 {
2175 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2177 }
2178 else
2179 {
2180 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2182 }
2183 return true;
2184
2185 case OPT_mpclmul:
2186 if (value)
2187 {
2188 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2190 }
2191 else
2192 {
2193 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2195 }
2196 return true;
2197
2198 default:
2199 return true;
2200 }
2201 }
2202 \f
2203 /* Return a string the documents the current -m options. The caller is
2204 responsible for freeing the string. */
2205
2206 static char *
2207 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2208 const char *fpmath, bool add_nl_p)
2209 {
2210 struct ix86_target_opts
2211 {
2212 const char *option; /* option string */
2213 int mask; /* isa mask options */
2214 };
2215
2216 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2217 preceding options while match those first. */
2218 static struct ix86_target_opts isa_opts[] =
2219 {
2220 { "-m64", OPTION_MASK_ISA_64BIT },
2221 { "-msse5", OPTION_MASK_ISA_SSE5 },
2222 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2223 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2224 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2225 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2226 { "-msse3", OPTION_MASK_ISA_SSE3 },
2227 { "-msse2", OPTION_MASK_ISA_SSE2 },
2228 { "-msse", OPTION_MASK_ISA_SSE },
2229 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2230 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2231 { "-mmmx", OPTION_MASK_ISA_MMX },
2232 { "-mabm", OPTION_MASK_ISA_ABM },
2233 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2234 { "-maes", OPTION_MASK_ISA_AES },
2235 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2236 };
2237
2238 /* Flag options. */
2239 static struct ix86_target_opts flag_opts[] =
2240 {
2241 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2242 { "-m80387", MASK_80387 },
2243 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2244 { "-malign-double", MASK_ALIGN_DOUBLE },
2245 { "-mcld", MASK_CLD },
2246 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2247 { "-mieee-fp", MASK_IEEE_FP },
2248 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2249 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2250 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2251 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2252 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2253 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2254 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2255 { "-mno-red-zone", MASK_NO_RED_ZONE },
2256 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2257 { "-mrecip", MASK_RECIP },
2258 { "-mrtd", MASK_RTD },
2259 { "-msseregparm", MASK_SSEREGPARM },
2260 { "-mstack-arg-probe", MASK_STACK_PROBE },
2261 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2262 };
2263
2264 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2265 + sizeof (flag_opts) / sizeof (flag_opts[0])
2266 + 6)][2];
2267
2268 char isa_other[40];
2269 char target_other[40];
2270 unsigned num = 0;
2271 unsigned i, j;
2272 char *ret;
2273 char *ptr;
2274 size_t len;
2275 size_t line_len;
2276 size_t sep_len;
2277
2278 memset (opts, '\0', sizeof (opts));
2279
2280 /* Add -march= option. */
2281 if (arch)
2282 {
2283 opts[num][0] = "-march=";
2284 opts[num++][1] = arch;
2285 }
2286
2287 /* Add -mtune= option. */
2288 if (tune)
2289 {
2290 opts[num][0] = "-mtune=";
2291 opts[num++][1] = tune;
2292 }
2293
2294 /* Pick out the options in isa options. */
2295 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2296 {
2297 if ((isa & isa_opts[i].mask) != 0)
2298 {
2299 opts[num++][0] = isa_opts[i].option;
2300 isa &= ~ isa_opts[i].mask;
2301 }
2302 }
2303
2304 if (isa && add_nl_p)
2305 {
2306 opts[num++][0] = isa_other;
2307 sprintf (isa_other, "(other isa: 0x%x)", isa);
2308 }
2309
2310 /* Add flag options. */
2311 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2312 {
2313 if ((flags & flag_opts[i].mask) != 0)
2314 {
2315 opts[num++][0] = flag_opts[i].option;
2316 flags &= ~ flag_opts[i].mask;
2317 }
2318 }
2319
2320 if (flags && add_nl_p)
2321 {
2322 opts[num++][0] = target_other;
2323 sprintf (target_other, "(other flags: 0x%x)", isa);
2324 }
2325
2326 /* Add -fpmath= option. */
2327 if (fpmath)
2328 {
2329 opts[num][0] = "-mfpmath=";
2330 opts[num++][1] = fpmath;
2331 }
2332
2333 /* Any options? */
2334 if (num == 0)
2335 return NULL;
2336
2337 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2338
2339 /* Size the string. */
2340 len = 0;
2341 sep_len = (add_nl_p) ? 3 : 1;
2342 for (i = 0; i < num; i++)
2343 {
2344 len += sep_len;
2345 for (j = 0; j < 2; j++)
2346 if (opts[i][j])
2347 len += strlen (opts[i][j]);
2348 }
2349
2350 /* Build the string. */
2351 ret = ptr = (char *) xmalloc (len);
2352 line_len = 0;
2353
2354 for (i = 0; i < num; i++)
2355 {
2356 size_t len2[2];
2357
2358 for (j = 0; j < 2; j++)
2359 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2360
2361 if (i != 0)
2362 {
2363 *ptr++ = ' ';
2364 line_len++;
2365
2366 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2367 {
2368 *ptr++ = '\\';
2369 *ptr++ = '\n';
2370 line_len = 0;
2371 }
2372 }
2373
2374 for (j = 0; j < 2; j++)
2375 if (opts[i][j])
2376 {
2377 memcpy (ptr, opts[i][j], len2[j]);
2378 ptr += len2[j];
2379 line_len += len2[j];
2380 }
2381 }
2382
2383 *ptr = '\0';
2384 gcc_assert (ret + len >= ptr);
2385
2386 return ret;
2387 }
2388
2389 /* Function that is callable from the debugger to print the current
2390 options. */
2391 void
2392 ix86_debug_options (void)
2393 {
2394 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2395 ix86_arch_string, ix86_tune_string,
2396 ix86_fpmath_string, true);
2397
2398 if (opts)
2399 {
2400 fprintf (stderr, "%s\n\n", opts);
2401 free (opts);
2402 }
2403 else
2404 fprintf (stderr, "<no options>\n\n");
2405
2406 return;
2407 }
2408 \f
2409 /* Sometimes certain combinations of command options do not make
2410 sense on a particular target machine. You can define a macro
2411 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2412 defined, is executed once just after all the command options have
2413 been parsed.
2414
2415 Don't use this macro to turn on various extra optimizations for
2416 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2417
2418 void
2419 override_options (bool main_args_p)
2420 {
2421 int i;
2422 unsigned int ix86_arch_mask, ix86_tune_mask;
2423 const char *prefix;
2424 const char *suffix;
2425 const char *sw;
2426
2427 /* Comes from final.c -- no real reason to change it. */
2428 #define MAX_CODE_ALIGN 16
2429
2430 enum pta_flags
2431 {
2432 PTA_SSE = 1 << 0,
2433 PTA_SSE2 = 1 << 1,
2434 PTA_SSE3 = 1 << 2,
2435 PTA_MMX = 1 << 3,
2436 PTA_PREFETCH_SSE = 1 << 4,
2437 PTA_3DNOW = 1 << 5,
2438 PTA_3DNOW_A = 1 << 6,
2439 PTA_64BIT = 1 << 7,
2440 PTA_SSSE3 = 1 << 8,
2441 PTA_CX16 = 1 << 9,
2442 PTA_POPCNT = 1 << 10,
2443 PTA_ABM = 1 << 11,
2444 PTA_SSE4A = 1 << 12,
2445 PTA_NO_SAHF = 1 << 13,
2446 PTA_SSE4_1 = 1 << 14,
2447 PTA_SSE4_2 = 1 << 15,
2448 PTA_SSE5 = 1 << 16,
2449 PTA_AES = 1 << 17,
2450 PTA_PCLMUL = 1 << 18
2451 };
2452
2453 static struct pta
2454 {
2455 const char *const name; /* processor name or nickname. */
2456 const enum processor_type processor;
2457 const unsigned /*enum pta_flags*/ flags;
2458 }
2459 const processor_alias_table[] =
2460 {
2461 {"i386", PROCESSOR_I386, 0},
2462 {"i486", PROCESSOR_I486, 0},
2463 {"i586", PROCESSOR_PENTIUM, 0},
2464 {"pentium", PROCESSOR_PENTIUM, 0},
2465 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2466 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2467 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2468 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2469 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2470 {"i686", PROCESSOR_PENTIUMPRO, 0},
2471 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2472 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2473 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2474 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2475 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2476 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2477 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2478 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2479 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2480 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2481 | PTA_CX16 | PTA_NO_SAHF)},
2482 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2483 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2484 | PTA_SSSE3
2485 | PTA_CX16)},
2486 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2487 |PTA_PREFETCH_SSE)},
2488 {"k6", PROCESSOR_K6, PTA_MMX},
2489 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2490 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2491 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2492 | PTA_PREFETCH_SSE)},
2493 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2494 | PTA_PREFETCH_SSE)},
2495 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2496 | PTA_SSE)},
2497 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2498 | PTA_SSE)},
2499 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2500 | PTA_SSE)},
2501 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2502 | PTA_MMX | PTA_SSE | PTA_SSE2
2503 | PTA_NO_SAHF)},
2504 {"k8", PROCESSOR_K8, (PTA_64BIT
2505 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2506 | PTA_SSE | PTA_SSE2
2507 | PTA_NO_SAHF)},
2508 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2509 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2510 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2511 | PTA_NO_SAHF)},
2512 {"opteron", PROCESSOR_K8, (PTA_64BIT
2513 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2514 | PTA_SSE | PTA_SSE2
2515 | PTA_NO_SAHF)},
2516 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2517 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2518 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2519 | PTA_NO_SAHF)},
2520 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2521 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2522 | PTA_SSE | PTA_SSE2
2523 | PTA_NO_SAHF)},
2524 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2525 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2526 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2527 | PTA_NO_SAHF)},
2528 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2529 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2530 | PTA_SSE | PTA_SSE2
2531 | PTA_NO_SAHF)},
2532 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2533 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2534 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2535 | PTA_SSE4A
2536 | PTA_CX16 | PTA_ABM)},
2537 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2538 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2539 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2540 | PTA_SSE4A
2541 | PTA_CX16 | PTA_ABM)},
2542 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2543 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2544 };
2545
2546 int const pta_size = ARRAY_SIZE (processor_alias_table);
2547
2548 /* Set up prefix/suffix so the error messages refer to either the command
2549 line argument, or the attribute(option). */
2550 if (main_args_p)
2551 {
2552 prefix = "-m";
2553 suffix = "";
2554 sw = "switch";
2555 }
2556 else
2557 {
2558 prefix = "option(\"";
2559 suffix = "\")";
2560 sw = "attribute";
2561 }
2562
2563 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2564 SUBTARGET_OVERRIDE_OPTIONS;
2565 #endif
2566
2567 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2568 SUBSUBTARGET_OVERRIDE_OPTIONS;
2569 #endif
2570
2571 /* -fPIC is the default for x86_64. */
2572 if (TARGET_MACHO && TARGET_64BIT)
2573 flag_pic = 2;
2574
2575 /* Set the default values for switches whose default depends on TARGET_64BIT
2576 in case they weren't overwritten by command line options. */
2577 if (TARGET_64BIT)
2578 {
2579 /* Mach-O doesn't support omitting the frame pointer for now. */
2580 if (flag_omit_frame_pointer == 2)
2581 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2582 if (flag_asynchronous_unwind_tables == 2)
2583 flag_asynchronous_unwind_tables = 1;
2584 if (flag_pcc_struct_return == 2)
2585 flag_pcc_struct_return = 0;
2586 }
2587 else
2588 {
2589 if (flag_omit_frame_pointer == 2)
2590 flag_omit_frame_pointer = 0;
2591 if (flag_asynchronous_unwind_tables == 2)
2592 flag_asynchronous_unwind_tables = 0;
2593 if (flag_pcc_struct_return == 2)
2594 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2595 }
2596
2597 /* Need to check -mtune=generic first. */
2598 if (ix86_tune_string)
2599 {
2600 if (!strcmp (ix86_tune_string, "generic")
2601 || !strcmp (ix86_tune_string, "i686")
2602 /* As special support for cross compilers we read -mtune=native
2603 as -mtune=generic. With native compilers we won't see the
2604 -mtune=native, as it was changed by the driver. */
2605 || !strcmp (ix86_tune_string, "native"))
2606 {
2607 if (TARGET_64BIT)
2608 ix86_tune_string = "generic64";
2609 else
2610 ix86_tune_string = "generic32";
2611 }
2612 /* If this call is for setting the option attribute, allow the
2613 generic32/generic64 that was previously set. */
2614 else if (!main_args_p
2615 && (!strcmp (ix86_tune_string, "generic32")
2616 || !strcmp (ix86_tune_string, "generic64")))
2617 ;
2618 else if (!strncmp (ix86_tune_string, "generic", 7))
2619 error ("bad value (%s) for %stune=%s %s",
2620 ix86_tune_string, prefix, suffix, sw);
2621 }
2622 else
2623 {
2624 if (ix86_arch_string)
2625 ix86_tune_string = ix86_arch_string;
2626 if (!ix86_tune_string)
2627 {
2628 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2629 ix86_tune_defaulted = 1;
2630 }
2631
2632 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2633 need to use a sensible tune option. */
2634 if (!strcmp (ix86_tune_string, "generic")
2635 || !strcmp (ix86_tune_string, "x86-64")
2636 || !strcmp (ix86_tune_string, "i686"))
2637 {
2638 if (TARGET_64BIT)
2639 ix86_tune_string = "generic64";
2640 else
2641 ix86_tune_string = "generic32";
2642 }
2643 }
2644 if (ix86_stringop_string)
2645 {
2646 if (!strcmp (ix86_stringop_string, "rep_byte"))
2647 stringop_alg = rep_prefix_1_byte;
2648 else if (!strcmp (ix86_stringop_string, "libcall"))
2649 stringop_alg = libcall;
2650 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2651 stringop_alg = rep_prefix_4_byte;
2652 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2653 stringop_alg = rep_prefix_8_byte;
2654 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2655 stringop_alg = loop_1_byte;
2656 else if (!strcmp (ix86_stringop_string, "loop"))
2657 stringop_alg = loop;
2658 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2659 stringop_alg = unrolled_loop;
2660 else
2661 error ("bad value (%s) for %sstringop-strategy=%s %s",
2662 ix86_stringop_string, prefix, suffix, sw);
2663 }
2664 if (!strcmp (ix86_tune_string, "x86-64"))
2665 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2666 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2667 prefix, suffix, prefix, suffix, prefix, suffix);
2668
2669 if (!ix86_arch_string)
2670 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2671 else
2672 ix86_arch_specified = 1;
2673
2674 if (!strcmp (ix86_arch_string, "generic"))
2675 error ("generic CPU can be used only for %stune=%s %s",
2676 prefix, suffix, sw);
2677 if (!strncmp (ix86_arch_string, "generic", 7))
2678 error ("bad value (%s) for %sarch=%s %s",
2679 ix86_arch_string, prefix, suffix, sw);
2680
2681 if (ix86_cmodel_string != 0)
2682 {
2683 if (!strcmp (ix86_cmodel_string, "small"))
2684 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2685 else if (!strcmp (ix86_cmodel_string, "medium"))
2686 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2687 else if (!strcmp (ix86_cmodel_string, "large"))
2688 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2689 else if (flag_pic)
2690 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2691 else if (!strcmp (ix86_cmodel_string, "32"))
2692 ix86_cmodel = CM_32;
2693 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2694 ix86_cmodel = CM_KERNEL;
2695 else
2696 error ("bad value (%s) for %scmodel=%s %s",
2697 ix86_cmodel_string, prefix, suffix, sw);
2698 }
2699 else
2700 {
2701 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2702 use of rip-relative addressing. This eliminates fixups that
2703 would otherwise be needed if this object is to be placed in a
2704 DLL, and is essentially just as efficient as direct addressing. */
2705 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2706 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2707 else if (TARGET_64BIT)
2708 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2709 else
2710 ix86_cmodel = CM_32;
2711 }
2712 if (ix86_asm_string != 0)
2713 {
2714 if (! TARGET_MACHO
2715 && !strcmp (ix86_asm_string, "intel"))
2716 ix86_asm_dialect = ASM_INTEL;
2717 else if (!strcmp (ix86_asm_string, "att"))
2718 ix86_asm_dialect = ASM_ATT;
2719 else
2720 error ("bad value (%s) for %sasm=%s %s",
2721 ix86_asm_string, prefix, suffix, sw);
2722 }
2723 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2724 error ("code model %qs not supported in the %s bit mode",
2725 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2726 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2727 sorry ("%i-bit mode not compiled in",
2728 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2729
2730 for (i = 0; i < pta_size; i++)
2731 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2732 {
2733 ix86_arch = processor_alias_table[i].processor;
2734 /* Default cpu tuning to the architecture. */
2735 ix86_tune = ix86_arch;
2736
2737 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2738 error ("CPU you selected does not support x86-64 "
2739 "instruction set");
2740
2741 if (processor_alias_table[i].flags & PTA_MMX
2742 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2743 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2744 if (processor_alias_table[i].flags & PTA_3DNOW
2745 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2746 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2747 if (processor_alias_table[i].flags & PTA_3DNOW_A
2748 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2749 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2750 if (processor_alias_table[i].flags & PTA_SSE
2751 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2752 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2753 if (processor_alias_table[i].flags & PTA_SSE2
2754 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2755 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2756 if (processor_alias_table[i].flags & PTA_SSE3
2757 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2758 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2759 if (processor_alias_table[i].flags & PTA_SSSE3
2760 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2761 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2762 if (processor_alias_table[i].flags & PTA_SSE4_1
2763 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2764 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2765 if (processor_alias_table[i].flags & PTA_SSE4_2
2766 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2767 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2768 if (processor_alias_table[i].flags & PTA_SSE4A
2769 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2770 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2771 if (processor_alias_table[i].flags & PTA_SSE5
2772 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2773 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2774 if (processor_alias_table[i].flags & PTA_ABM
2775 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2776 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2777 if (processor_alias_table[i].flags & PTA_CX16
2778 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2779 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2780 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2781 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2782 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2783 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2784 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2785 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2786 if (processor_alias_table[i].flags & PTA_AES
2787 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2788 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2789 if (processor_alias_table[i].flags & PTA_PCLMUL
2790 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2791 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2792 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2793 x86_prefetch_sse = true;
2794
2795 break;
2796 }
2797
2798 if (i == pta_size)
2799 error ("bad value (%s) for %sarch=%s %s",
2800 ix86_arch_string, prefix, suffix, sw);
2801
2802 ix86_arch_mask = 1u << ix86_arch;
2803 for (i = 0; i < X86_ARCH_LAST; ++i)
2804 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2805
2806 for (i = 0; i < pta_size; i++)
2807 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2808 {
2809 ix86_tune = processor_alias_table[i].processor;
2810 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2811 {
2812 if (ix86_tune_defaulted)
2813 {
2814 ix86_tune_string = "x86-64";
2815 for (i = 0; i < pta_size; i++)
2816 if (! strcmp (ix86_tune_string,
2817 processor_alias_table[i].name))
2818 break;
2819 ix86_tune = processor_alias_table[i].processor;
2820 }
2821 else
2822 error ("CPU you selected does not support x86-64 "
2823 "instruction set");
2824 }
2825 /* Intel CPUs have always interpreted SSE prefetch instructions as
2826 NOPs; so, we can enable SSE prefetch instructions even when
2827 -mtune (rather than -march) points us to a processor that has them.
2828 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2829 higher processors. */
2830 if (TARGET_CMOVE
2831 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2832 x86_prefetch_sse = true;
2833 break;
2834 }
2835 if (i == pta_size)
2836 error ("bad value (%s) for %stune=%s %s",
2837 ix86_tune_string, prefix, suffix, sw);
2838
2839 ix86_tune_mask = 1u << ix86_tune;
2840 for (i = 0; i < X86_TUNE_LAST; ++i)
2841 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2842
2843 if (optimize_size)
2844 ix86_cost = &ix86_size_cost;
2845 else
2846 ix86_cost = processor_target_table[ix86_tune].cost;
2847
2848 /* Arrange to set up i386_stack_locals for all functions. */
2849 init_machine_status = ix86_init_machine_status;
2850
2851 /* Validate -mregparm= value. */
2852 if (ix86_regparm_string)
2853 {
2854 if (TARGET_64BIT)
2855 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2856 i = atoi (ix86_regparm_string);
2857 if (i < 0 || i > REGPARM_MAX)
2858 error ("%sregparm=%d%s is not between 0 and %d",
2859 prefix, i, suffix, REGPARM_MAX);
2860 else
2861 ix86_regparm = i;
2862 }
2863 if (TARGET_64BIT)
2864 ix86_regparm = REGPARM_MAX;
2865
2866 /* If the user has provided any of the -malign-* options,
2867 warn and use that value only if -falign-* is not set.
2868 Remove this code in GCC 3.2 or later. */
2869 if (ix86_align_loops_string)
2870 {
2871 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2872 prefix, suffix, prefix, suffix);
2873 if (align_loops == 0)
2874 {
2875 i = atoi (ix86_align_loops_string);
2876 if (i < 0 || i > MAX_CODE_ALIGN)
2877 error ("%salign-loops=%d%s is not between 0 and %d",
2878 prefix, i, suffix, MAX_CODE_ALIGN);
2879 else
2880 align_loops = 1 << i;
2881 }
2882 }
2883
2884 if (ix86_align_jumps_string)
2885 {
2886 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2887 prefix, suffix, prefix, suffix);
2888 if (align_jumps == 0)
2889 {
2890 i = atoi (ix86_align_jumps_string);
2891 if (i < 0 || i > MAX_CODE_ALIGN)
2892 error ("%salign-loops=%d%s is not between 0 and %d",
2893 prefix, i, suffix, MAX_CODE_ALIGN);
2894 else
2895 align_jumps = 1 << i;
2896 }
2897 }
2898
2899 if (ix86_align_funcs_string)
2900 {
2901 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2902 prefix, suffix, prefix, suffix);
2903 if (align_functions == 0)
2904 {
2905 i = atoi (ix86_align_funcs_string);
2906 if (i < 0 || i > MAX_CODE_ALIGN)
2907 error ("%salign-loops=%d%s is not between 0 and %d",
2908 prefix, i, suffix, MAX_CODE_ALIGN);
2909 else
2910 align_functions = 1 << i;
2911 }
2912 }
2913
2914 /* Default align_* from the processor table. */
2915 if (align_loops == 0)
2916 {
2917 align_loops = processor_target_table[ix86_tune].align_loop;
2918 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2919 }
2920 if (align_jumps == 0)
2921 {
2922 align_jumps = processor_target_table[ix86_tune].align_jump;
2923 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2924 }
2925 if (align_functions == 0)
2926 {
2927 align_functions = processor_target_table[ix86_tune].align_func;
2928 }
2929
2930 /* Validate -mbranch-cost= value, or provide default. */
2931 ix86_branch_cost = ix86_cost->branch_cost;
2932 if (ix86_branch_cost_string)
2933 {
2934 i = atoi (ix86_branch_cost_string);
2935 if (i < 0 || i > 5)
2936 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2937 else
2938 ix86_branch_cost = i;
2939 }
2940 if (ix86_section_threshold_string)
2941 {
2942 i = atoi (ix86_section_threshold_string);
2943 if (i < 0)
2944 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2945 else
2946 ix86_section_threshold = i;
2947 }
2948
2949 if (ix86_tls_dialect_string)
2950 {
2951 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2952 ix86_tls_dialect = TLS_DIALECT_GNU;
2953 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2954 ix86_tls_dialect = TLS_DIALECT_GNU2;
2955 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2956 ix86_tls_dialect = TLS_DIALECT_SUN;
2957 else
2958 error ("bad value (%s) for %stls-dialect=%s %s",
2959 ix86_tls_dialect_string, prefix, suffix, sw);
2960 }
2961
2962 if (ix87_precision_string)
2963 {
2964 i = atoi (ix87_precision_string);
2965 if (i != 32 && i != 64 && i != 80)
2966 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2967 }
2968
2969 if (TARGET_64BIT)
2970 {
2971 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2972
2973 /* Enable by default the SSE and MMX builtins. Do allow the user to
2974 explicitly disable any of these. In particular, disabling SSE and
2975 MMX for kernel code is extremely useful. */
2976 if (!ix86_arch_specified)
2977 ix86_isa_flags
2978 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2979 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2980
2981 if (TARGET_RTD)
2982 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
2983 }
2984 else
2985 {
2986 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2987
2988 if (!ix86_arch_specified)
2989 ix86_isa_flags
2990 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2991
2992 /* i386 ABI does not specify red zone. It still makes sense to use it
2993 when programmer takes care to stack from being destroyed. */
2994 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2995 target_flags |= MASK_NO_RED_ZONE;
2996 }
2997
2998 /* Keep nonleaf frame pointers. */
2999 if (flag_omit_frame_pointer)
3000 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3001 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3002 flag_omit_frame_pointer = 1;
3003
3004 /* If we're doing fast math, we don't care about comparison order
3005 wrt NaNs. This lets us use a shorter comparison sequence. */
3006 if (flag_finite_math_only)
3007 target_flags &= ~MASK_IEEE_FP;
3008
3009 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3010 since the insns won't need emulation. */
3011 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3012 target_flags &= ~MASK_NO_FANCY_MATH_387;
3013
3014 /* Likewise, if the target doesn't have a 387, or we've specified
3015 software floating point, don't use 387 inline intrinsics. */
3016 if (!TARGET_80387)
3017 target_flags |= MASK_NO_FANCY_MATH_387;
3018
3019 /* Turn on MMX builtins for -msse. */
3020 if (TARGET_SSE)
3021 {
3022 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3023 x86_prefetch_sse = true;
3024 }
3025
3026 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3027 if (TARGET_SSE4_2 || TARGET_ABM)
3028 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3029
3030 /* Validate -mpreferred-stack-boundary= value or default it to
3031 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3032 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3033 if (ix86_preferred_stack_boundary_string)
3034 {
3035 i = atoi (ix86_preferred_stack_boundary_string);
3036 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3037 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3038 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3039 else
3040 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3041 }
3042
3043 /* Set the default value for -mstackrealign. */
3044 if (ix86_force_align_arg_pointer == -1)
3045 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3046
3047 /* Validate -mincoming-stack-boundary= value or default it to
3048 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3049 if (ix86_force_align_arg_pointer)
3050 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3051 else
3052 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3053 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3054 if (ix86_incoming_stack_boundary_string)
3055 {
3056 i = atoi (ix86_incoming_stack_boundary_string);
3057 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3058 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3059 i, TARGET_64BIT ? 4 : 2);
3060 else
3061 {
3062 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3063 ix86_incoming_stack_boundary
3064 = ix86_user_incoming_stack_boundary;
3065 }
3066 }
3067
3068 /* Accept -msseregparm only if at least SSE support is enabled. */
3069 if (TARGET_SSEREGPARM
3070 && ! TARGET_SSE)
3071 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3072
3073 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3074 if (ix86_fpmath_string != 0)
3075 {
3076 if (! strcmp (ix86_fpmath_string, "387"))
3077 ix86_fpmath = FPMATH_387;
3078 else if (! strcmp (ix86_fpmath_string, "sse"))
3079 {
3080 if (!TARGET_SSE)
3081 {
3082 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3083 ix86_fpmath = FPMATH_387;
3084 }
3085 else
3086 ix86_fpmath = FPMATH_SSE;
3087 }
3088 else if (! strcmp (ix86_fpmath_string, "387,sse")
3089 || ! strcmp (ix86_fpmath_string, "387+sse")
3090 || ! strcmp (ix86_fpmath_string, "sse,387")
3091 || ! strcmp (ix86_fpmath_string, "sse+387")
3092 || ! strcmp (ix86_fpmath_string, "both"))
3093 {
3094 if (!TARGET_SSE)
3095 {
3096 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3097 ix86_fpmath = FPMATH_387;
3098 }
3099 else if (!TARGET_80387)
3100 {
3101 warning (0, "387 instruction set disabled, using SSE arithmetics");
3102 ix86_fpmath = FPMATH_SSE;
3103 }
3104 else
3105 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3106 }
3107 else
3108 error ("bad value (%s) for %sfpmath=%s %s",
3109 ix86_fpmath_string, prefix, suffix, sw);
3110 }
3111
3112 /* If the i387 is disabled, then do not return values in it. */
3113 if (!TARGET_80387)
3114 target_flags &= ~MASK_FLOAT_RETURNS;
3115
3116 /* Use external vectorized library in vectorizing intrinsics. */
3117 if (ix86_veclibabi_string)
3118 {
3119 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3120 ix86_veclib_handler = ix86_veclibabi_svml;
3121 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3122 ix86_veclib_handler = ix86_veclibabi_acml;
3123 else
3124 error ("unknown vectorization library ABI type (%s) for "
3125 "%sveclibabi=%s %s", ix86_veclibabi_string,
3126 prefix, suffix, sw);
3127 }
3128
3129 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3130 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3131 && !optimize_size)
3132 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3133
3134 /* ??? Unwind info is not correct around the CFG unless either a frame
3135 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3136 unwind info generation to be aware of the CFG and propagating states
3137 around edges. */
3138 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3139 || flag_exceptions || flag_non_call_exceptions)
3140 && flag_omit_frame_pointer
3141 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3142 {
3143 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3144 warning (0, "unwind tables currently require either a frame pointer "
3145 "or %saccumulate-outgoing-args%s for correctness",
3146 prefix, suffix);
3147 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3148 }
3149
3150 /* If stack probes are required, the space used for large function
3151 arguments on the stack must also be probed, so enable
3152 -maccumulate-outgoing-args so this happens in the prologue. */
3153 if (TARGET_STACK_PROBE
3154 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3155 {
3156 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3157 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3158 "for correctness", prefix, suffix);
3159 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3160 }
3161
3162 /* For sane SSE instruction set generation we need fcomi instruction.
3163 It is safe to enable all CMOVE instructions. */
3164 if (TARGET_SSE)
3165 TARGET_CMOVE = 1;
3166
3167 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3168 {
3169 char *p;
3170 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3171 p = strchr (internal_label_prefix, 'X');
3172 internal_label_prefix_len = p - internal_label_prefix;
3173 *p = '\0';
3174 }
3175
3176 /* When scheduling description is not available, disable scheduler pass
3177 so it won't slow down the compilation and make x87 code slower. */
3178 if (!TARGET_SCHEDULE)
3179 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3180
3181 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3182 set_param_value ("simultaneous-prefetches",
3183 ix86_cost->simultaneous_prefetches);
3184 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3185 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3186 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3187 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3188 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3189 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3190
3191 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3192 can be optimized to ap = __builtin_next_arg (0). */
3193 if (!TARGET_64BIT)
3194 targetm.expand_builtin_va_start = NULL;
3195
3196 if (TARGET_64BIT)
3197 {
3198 ix86_gen_leave = gen_leave_rex64;
3199 ix86_gen_pop1 = gen_popdi1;
3200 ix86_gen_add3 = gen_adddi3;
3201 ix86_gen_sub3 = gen_subdi3;
3202 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3203 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3204 ix86_gen_monitor = gen_sse3_monitor64;
3205 ix86_gen_andsp = gen_anddi3;
3206 }
3207 else
3208 {
3209 ix86_gen_leave = gen_leave;
3210 ix86_gen_pop1 = gen_popsi1;
3211 ix86_gen_add3 = gen_addsi3;
3212 ix86_gen_sub3 = gen_subsi3;
3213 ix86_gen_sub3_carry = gen_subsi3_carry;
3214 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3215 ix86_gen_monitor = gen_sse3_monitor;
3216 ix86_gen_andsp = gen_andsi3;
3217 }
3218
3219 #ifdef USE_IX86_CLD
3220 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3221 if (!TARGET_64BIT)
3222 target_flags |= MASK_CLD & ~target_flags_explicit;
3223 #endif
3224
3225 /* Save the initial options in case the user does function specific options */
3226 if (main_args_p)
3227 target_option_default_node = target_option_current_node
3228 = build_target_option_node ();
3229 }
3230 \f
3231 /* Save the current options */
3232
3233 static void
3234 ix86_function_specific_save (struct cl_target_option *ptr)
3235 {
3236 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3237 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3238 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3239 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3240
3241 ptr->arch = ix86_arch;
3242 ptr->tune = ix86_tune;
3243 ptr->fpmath = ix86_fpmath;
3244 ptr->branch_cost = ix86_branch_cost;
3245 ptr->tune_defaulted = ix86_tune_defaulted;
3246 ptr->arch_specified = ix86_arch_specified;
3247 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3248 ptr->target_flags_explicit = target_flags_explicit;
3249 }
3250
3251 /* Restore the current options */
3252
3253 static void
3254 ix86_function_specific_restore (struct cl_target_option *ptr)
3255 {
3256 enum processor_type old_tune = ix86_tune;
3257 enum processor_type old_arch = ix86_arch;
3258 unsigned int ix86_arch_mask, ix86_tune_mask;
3259 int i;
3260
3261 ix86_arch = ptr->arch;
3262 ix86_tune = ptr->tune;
3263 ix86_fpmath = ptr->fpmath;
3264 ix86_branch_cost = ptr->branch_cost;
3265 ix86_tune_defaulted = ptr->tune_defaulted;
3266 ix86_arch_specified = ptr->arch_specified;
3267 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3268 target_flags_explicit = ptr->target_flags_explicit;
3269
3270 /* Recreate the arch feature tests if the arch changed */
3271 if (old_arch != ix86_arch)
3272 {
3273 ix86_arch_mask = 1u << ix86_arch;
3274 for (i = 0; i < X86_ARCH_LAST; ++i)
3275 ix86_arch_features[i]
3276 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3277 }
3278
3279 /* Recreate the tune optimization tests */
3280 if (old_tune != ix86_tune)
3281 {
3282 ix86_tune_mask = 1u << ix86_tune;
3283 for (i = 0; i < X86_TUNE_LAST; ++i)
3284 ix86_tune_features[i]
3285 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3286 }
3287 }
3288
3289 /* Print the current options */
3290
3291 static void
3292 ix86_function_specific_print (FILE *file, int indent,
3293 struct cl_target_option *ptr)
3294 {
3295 char *target_string
3296 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3297 NULL, NULL, NULL, false);
3298
3299 fprintf (file, "%*sarch = %d (%s)\n",
3300 indent, "",
3301 ptr->arch,
3302 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3303 ? cpu_names[ptr->arch]
3304 : "<unknown>"));
3305
3306 fprintf (file, "%*stune = %d (%s)\n",
3307 indent, "",
3308 ptr->tune,
3309 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3310 ? cpu_names[ptr->tune]
3311 : "<unknown>"));
3312
3313 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3314 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3315 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3316 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3317
3318 if (target_string)
3319 {
3320 fprintf (file, "%*s%s\n", indent, "", target_string);
3321 free (target_string);
3322 }
3323 }
3324
3325 \f
3326 /* Inner function to process the attribute((option(...))), take an argument and
3327 set the current options from the argument. If we have a list, recursively go
3328 over the list. */
3329
3330 static bool
3331 ix86_valid_option_attribute_inner_p (tree args, char *p_strings[])
3332 {
3333 char *next_optstr;
3334 bool ret = true;
3335
3336 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3337 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3338 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3339 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3340
3341 enum ix86_opt_type
3342 {
3343 ix86_opt_unknown,
3344 ix86_opt_yes,
3345 ix86_opt_no,
3346 ix86_opt_str,
3347 ix86_opt_isa
3348 };
3349
3350 static const struct
3351 {
3352 const char *string;
3353 size_t len;
3354 enum ix86_opt_type type;
3355 int opt;
3356 int mask;
3357 } attrs[] = {
3358 /* isa options */
3359 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3360 IX86_ATTR_ISA ("abm", OPT_mabm),
3361 IX86_ATTR_ISA ("aes", OPT_maes),
3362 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3363 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3364 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3365 IX86_ATTR_ISA ("sse", OPT_msse),
3366 IX86_ATTR_ISA ("sse2", OPT_msse2),
3367 IX86_ATTR_ISA ("sse3", OPT_msse3),
3368 IX86_ATTR_ISA ("sse4", OPT_msse4),
3369 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3370 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3371 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3372 IX86_ATTR_ISA ("sse5", OPT_msse5),
3373 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3374
3375 /* string options */
3376 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3377 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3378 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3379
3380 /* flag options */
3381 IX86_ATTR_YES ("cld",
3382 OPT_mcld,
3383 MASK_CLD),
3384
3385 IX86_ATTR_NO ("fancy-math-387",
3386 OPT_mfancy_math_387,
3387 MASK_NO_FANCY_MATH_387),
3388
3389 IX86_ATTR_NO ("fused-madd",
3390 OPT_mfused_madd,
3391 MASK_NO_FUSED_MADD),
3392
3393 IX86_ATTR_YES ("ieee-fp",
3394 OPT_mieee_fp,
3395 MASK_IEEE_FP),
3396
3397 IX86_ATTR_YES ("inline-all-stringops",
3398 OPT_minline_all_stringops,
3399 MASK_INLINE_ALL_STRINGOPS),
3400
3401 IX86_ATTR_YES ("inline-stringops-dynamically",
3402 OPT_minline_stringops_dynamically,
3403 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3404
3405 IX86_ATTR_NO ("align-stringops",
3406 OPT_mno_align_stringops,
3407 MASK_NO_ALIGN_STRINGOPS),
3408
3409 IX86_ATTR_YES ("recip",
3410 OPT_mrecip,
3411 MASK_RECIP),
3412
3413 };
3414
3415 /* If this is a list, recurse to get the options. */
3416 if (TREE_CODE (args) == TREE_LIST)
3417 {
3418 bool ret = true;
3419
3420 for (; args; args = TREE_CHAIN (args))
3421 if (TREE_VALUE (args)
3422 && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings))
3423 ret = false;
3424
3425 return ret;
3426 }
3427
3428 else if (TREE_CODE (args) != STRING_CST)
3429 gcc_unreachable ();
3430
3431 /* Handle multiple arguments separated by commas. */
3432 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3433
3434 while (next_optstr && *next_optstr != '\0')
3435 {
3436 char *p = next_optstr;
3437 char *orig_p = p;
3438 char *comma = strchr (next_optstr, ',');
3439 const char *opt_string;
3440 size_t len, opt_len;
3441 int opt;
3442 bool opt_set_p;
3443 char ch;
3444 unsigned i;
3445 enum ix86_opt_type type = ix86_opt_unknown;
3446 int mask = 0;
3447
3448 if (comma)
3449 {
3450 *comma = '\0';
3451 len = comma - next_optstr;
3452 next_optstr = comma + 1;
3453 }
3454 else
3455 {
3456 len = strlen (p);
3457 next_optstr = NULL;
3458 }
3459
3460 /* Recognize no-xxx. */
3461 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3462 {
3463 opt_set_p = false;
3464 p += 3;
3465 len -= 3;
3466 }
3467 else
3468 opt_set_p = true;
3469
3470 /* Find the option. */
3471 ch = *p;
3472 opt = N_OPTS;
3473 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3474 {
3475 type = attrs[i].type;
3476 opt_len = attrs[i].len;
3477 if (ch == attrs[i].string[0]
3478 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3479 && memcmp (p, attrs[i].string, opt_len) == 0)
3480 {
3481 opt = attrs[i].opt;
3482 mask = attrs[i].mask;
3483 opt_string = attrs[i].string;
3484 break;
3485 }
3486 }
3487
3488 /* Process the option. */
3489 if (opt == N_OPTS)
3490 {
3491 error ("attribute(option(\"%s\")) is unknown", orig_p);
3492 ret = false;
3493 }
3494
3495 else if (type == ix86_opt_isa)
3496 ix86_handle_option (opt, p, opt_set_p);
3497
3498 else if (type == ix86_opt_yes || type == ix86_opt_no)
3499 {
3500 if (type == ix86_opt_no)
3501 opt_set_p = !opt_set_p;
3502
3503 if (opt_set_p)
3504 target_flags |= mask;
3505 else
3506 target_flags &= ~mask;
3507 }
3508
3509 else if (type == ix86_opt_str)
3510 {
3511 if (p_strings[opt])
3512 {
3513 error ("option(\"%s\") was already specified", opt_string);
3514 ret = false;
3515 }
3516 else
3517 p_strings[opt] = xstrdup (p + opt_len);
3518 }
3519
3520 else
3521 gcc_unreachable ();
3522 }
3523
3524 return ret;
3525 }
3526
3527 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3528
3529 tree
3530 ix86_valid_option_attribute_tree (tree args)
3531 {
3532 const char *orig_arch_string = ix86_arch_string;
3533 const char *orig_tune_string = ix86_tune_string;
3534 const char *orig_fpmath_string = ix86_fpmath_string;
3535 int orig_tune_defaulted = ix86_tune_defaulted;
3536 int orig_arch_specified = ix86_arch_specified;
3537 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3538 tree t = NULL_TREE;
3539 int i;
3540 struct cl_target_option *def
3541 = TREE_TARGET_OPTION (target_option_default_node);
3542
3543 /* Process each of the options on the chain. */
3544 if (! ix86_valid_option_attribute_inner_p (args, option_strings))
3545 return NULL_TREE;
3546
3547 /* If the changed options are different from the default, rerun override_options,
3548 and then save the options away. The string options are are attribute options,
3549 and will be undone when we copy the save structure. */
3550 if (ix86_isa_flags != def->ix86_isa_flags
3551 || target_flags != def->target_flags
3552 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3553 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3554 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3555 {
3556 /* If we are using the default tune= or arch=, undo the string assigned,
3557 and use the default. */
3558 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3559 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3560 else if (!orig_arch_specified)
3561 ix86_arch_string = NULL;
3562
3563 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3564 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3565 else if (orig_tune_defaulted)
3566 ix86_tune_string = NULL;
3567
3568 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3569 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3570 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3571 else if (!TARGET_64BIT && TARGET_SSE)
3572 ix86_fpmath_string = "sse,387";
3573
3574 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3575 override_options (false);
3576
3577 /* Save the current options unless we are validating options for
3578 #pragma. */
3579 t = build_target_option_node ();
3580
3581 ix86_arch_string = orig_arch_string;
3582 ix86_tune_string = orig_tune_string;
3583 ix86_fpmath_string = orig_fpmath_string;
3584
3585 /* Free up memory allocated to hold the strings */
3586 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3587 if (option_strings[i])
3588 free (option_strings[i]);
3589 }
3590
3591 return t;
3592 }
3593
3594 /* Hook to validate attribute((option("string"))). */
3595
3596 static bool
3597 ix86_valid_option_attribute_p (tree fndecl,
3598 tree ARG_UNUSED (name),
3599 tree args,
3600 int ARG_UNUSED (flags))
3601 {
3602 struct cl_target_option cur_opts;
3603 bool ret = true;
3604 tree new_opts;
3605
3606 cl_target_option_save (&cur_opts);
3607 new_opts = ix86_valid_option_attribute_tree (args);
3608 if (!new_opts)
3609 ret = false;
3610
3611 else if (fndecl)
3612 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts;
3613
3614 cl_target_option_restore (&cur_opts);
3615 return ret;
3616 }
3617
3618 \f
3619 /* Hook to determine if one function can safely inline another. */
3620
3621 static bool
3622 ix86_can_inline_p (tree caller, tree callee)
3623 {
3624 bool ret = false;
3625 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3626 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3627
3628 /* If callee has no option attributes, then it is ok to inline. */
3629 if (!callee_tree)
3630 ret = true;
3631
3632 /* If caller has no option attributes, but callee does then it is not ok to
3633 inline. */
3634 else if (!caller_tree)
3635 ret = false;
3636
3637 else
3638 {
3639 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3640 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3641
3642 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3643 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3644 function. */
3645 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3646 != callee_opts->ix86_isa_flags)
3647 ret = false;
3648
3649 /* See if we have the same non-isa options. */
3650 else if (caller_opts->target_flags != callee_opts->target_flags)
3651 ret = false;
3652
3653 /* See if arch, tune, etc. are the same. */
3654 else if (caller_opts->arch != callee_opts->arch)
3655 ret = false;
3656
3657 else if (caller_opts->tune != callee_opts->tune)
3658 ret = false;
3659
3660 else if (caller_opts->fpmath != callee_opts->fpmath)
3661 ret = false;
3662
3663 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3664 ret = false;
3665
3666 else
3667 ret = true;
3668 }
3669
3670 return ret;
3671 }
3672
3673 \f
3674 /* Remember the last target of ix86_set_current_function. */
3675 static GTY(()) tree ix86_previous_fndecl;
3676
3677 /* Establish appropriate back-end context for processing the function
3678 FNDECL. The argument might be NULL to indicate processing at top
3679 level, outside of any function scope. */
3680 static void
3681 ix86_set_current_function (tree fndecl)
3682 {
3683 /* Only change the context if the function changes. This hook is called
3684 several times in the course of compiling a function, and we don't want to
3685 slow things down too much or call target_reinit when it isn't safe. */
3686 if (fndecl && fndecl != ix86_previous_fndecl)
3687 {
3688 tree old_tree = (ix86_previous_fndecl
3689 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3690 : NULL_TREE);
3691
3692 tree new_tree = (fndecl
3693 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3694 : NULL_TREE);
3695
3696 ix86_previous_fndecl = fndecl;
3697 if (old_tree == new_tree)
3698 ;
3699
3700 else if (new_tree)
3701 {
3702 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3703 target_reinit ();
3704 }
3705
3706 else if (old_tree)
3707 {
3708 struct cl_target_option *def
3709 = TREE_TARGET_OPTION (target_option_current_node);
3710
3711 cl_target_option_restore (def);
3712 target_reinit ();
3713 }
3714 }
3715 }
3716
3717 \f
3718 /* Return true if this goes in large data/bss. */
3719
3720 static bool
3721 ix86_in_large_data_p (tree exp)
3722 {
3723 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3724 return false;
3725
3726 /* Functions are never large data. */
3727 if (TREE_CODE (exp) == FUNCTION_DECL)
3728 return false;
3729
3730 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3731 {
3732 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3733 if (strcmp (section, ".ldata") == 0
3734 || strcmp (section, ".lbss") == 0)
3735 return true;
3736 return false;
3737 }
3738 else
3739 {
3740 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3741
3742 /* If this is an incomplete type with size 0, then we can't put it
3743 in data because it might be too big when completed. */
3744 if (!size || size > ix86_section_threshold)
3745 return true;
3746 }
3747
3748 return false;
3749 }
3750
3751 /* Switch to the appropriate section for output of DECL.
3752 DECL is either a `VAR_DECL' node or a constant of some sort.
3753 RELOC indicates whether forming the initial value of DECL requires
3754 link-time relocations. */
3755
3756 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3757 ATTRIBUTE_UNUSED;
3758
3759 static section *
3760 x86_64_elf_select_section (tree decl, int reloc,
3761 unsigned HOST_WIDE_INT align)
3762 {
3763 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3764 && ix86_in_large_data_p (decl))
3765 {
3766 const char *sname = NULL;
3767 unsigned int flags = SECTION_WRITE;
3768 switch (categorize_decl_for_section (decl, reloc))
3769 {
3770 case SECCAT_DATA:
3771 sname = ".ldata";
3772 break;
3773 case SECCAT_DATA_REL:
3774 sname = ".ldata.rel";
3775 break;
3776 case SECCAT_DATA_REL_LOCAL:
3777 sname = ".ldata.rel.local";
3778 break;
3779 case SECCAT_DATA_REL_RO:
3780 sname = ".ldata.rel.ro";
3781 break;
3782 case SECCAT_DATA_REL_RO_LOCAL:
3783 sname = ".ldata.rel.ro.local";
3784 break;
3785 case SECCAT_BSS:
3786 sname = ".lbss";
3787 flags |= SECTION_BSS;
3788 break;
3789 case SECCAT_RODATA:
3790 case SECCAT_RODATA_MERGE_STR:
3791 case SECCAT_RODATA_MERGE_STR_INIT:
3792 case SECCAT_RODATA_MERGE_CONST:
3793 sname = ".lrodata";
3794 flags = 0;
3795 break;
3796 case SECCAT_SRODATA:
3797 case SECCAT_SDATA:
3798 case SECCAT_SBSS:
3799 gcc_unreachable ();
3800 case SECCAT_TEXT:
3801 case SECCAT_TDATA:
3802 case SECCAT_TBSS:
3803 /* We don't split these for medium model. Place them into
3804 default sections and hope for best. */
3805 break;
3806 case SECCAT_EMUTLS_VAR:
3807 case SECCAT_EMUTLS_TMPL:
3808 gcc_unreachable ();
3809 }
3810 if (sname)
3811 {
3812 /* We might get called with string constants, but get_named_section
3813 doesn't like them as they are not DECLs. Also, we need to set
3814 flags in that case. */
3815 if (!DECL_P (decl))
3816 return get_section (sname, flags, NULL);
3817 return get_named_section (decl, sname, reloc);
3818 }
3819 }
3820 return default_elf_select_section (decl, reloc, align);
3821 }
3822
3823 /* Build up a unique section name, expressed as a
3824 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3825 RELOC indicates whether the initial value of EXP requires
3826 link-time relocations. */
3827
3828 static void ATTRIBUTE_UNUSED
3829 x86_64_elf_unique_section (tree decl, int reloc)
3830 {
3831 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3832 && ix86_in_large_data_p (decl))
3833 {
3834 const char *prefix = NULL;
3835 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3836 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3837
3838 switch (categorize_decl_for_section (decl, reloc))
3839 {
3840 case SECCAT_DATA:
3841 case SECCAT_DATA_REL:
3842 case SECCAT_DATA_REL_LOCAL:
3843 case SECCAT_DATA_REL_RO:
3844 case SECCAT_DATA_REL_RO_LOCAL:
3845 prefix = one_only ? ".ld" : ".ldata";
3846 break;
3847 case SECCAT_BSS:
3848 prefix = one_only ? ".lb" : ".lbss";
3849 break;
3850 case SECCAT_RODATA:
3851 case SECCAT_RODATA_MERGE_STR:
3852 case SECCAT_RODATA_MERGE_STR_INIT:
3853 case SECCAT_RODATA_MERGE_CONST:
3854 prefix = one_only ? ".lr" : ".lrodata";
3855 break;
3856 case SECCAT_SRODATA:
3857 case SECCAT_SDATA:
3858 case SECCAT_SBSS:
3859 gcc_unreachable ();
3860 case SECCAT_TEXT:
3861 case SECCAT_TDATA:
3862 case SECCAT_TBSS:
3863 /* We don't split these for medium model. Place them into
3864 default sections and hope for best. */
3865 break;
3866 case SECCAT_EMUTLS_VAR:
3867 prefix = targetm.emutls.var_section;
3868 break;
3869 case SECCAT_EMUTLS_TMPL:
3870 prefix = targetm.emutls.tmpl_section;
3871 break;
3872 }
3873 if (prefix)
3874 {
3875 const char *name, *linkonce;
3876 char *string;
3877
3878 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3879 name = targetm.strip_name_encoding (name);
3880
3881 /* If we're using one_only, then there needs to be a .gnu.linkonce
3882 prefix to the section name. */
3883 linkonce = one_only ? ".gnu.linkonce" : "";
3884
3885 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3886
3887 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3888 return;
3889 }
3890 }
3891 default_unique_section (decl, reloc);
3892 }
3893
3894 #ifdef COMMON_ASM_OP
3895 /* This says how to output assembler code to declare an
3896 uninitialized external linkage data object.
3897
3898 For medium model x86-64 we need to use .largecomm opcode for
3899 large objects. */
3900 void
3901 x86_elf_aligned_common (FILE *file,
3902 const char *name, unsigned HOST_WIDE_INT size,
3903 int align)
3904 {
3905 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3906 && size > (unsigned int)ix86_section_threshold)
3907 fprintf (file, ".largecomm\t");
3908 else
3909 fprintf (file, "%s", COMMON_ASM_OP);
3910 assemble_name (file, name);
3911 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3912 size, align / BITS_PER_UNIT);
3913 }
3914 #endif
3915
3916 /* Utility function for targets to use in implementing
3917 ASM_OUTPUT_ALIGNED_BSS. */
3918
3919 void
3920 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3921 const char *name, unsigned HOST_WIDE_INT size,
3922 int align)
3923 {
3924 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3925 && size > (unsigned int)ix86_section_threshold)
3926 switch_to_section (get_named_section (decl, ".lbss", 0));
3927 else
3928 switch_to_section (bss_section);
3929 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3930 #ifdef ASM_DECLARE_OBJECT_NAME
3931 last_assemble_variable_decl = decl;
3932 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3933 #else
3934 /* Standard thing is just output label for the object. */
3935 ASM_OUTPUT_LABEL (file, name);
3936 #endif /* ASM_DECLARE_OBJECT_NAME */
3937 ASM_OUTPUT_SKIP (file, size ? size : 1);
3938 }
3939 \f
3940 void
3941 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3942 {
3943 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3944 make the problem with not enough registers even worse. */
3945 #ifdef INSN_SCHEDULING
3946 if (level > 1)
3947 flag_schedule_insns = 0;
3948 #endif
3949
3950 if (TARGET_MACHO)
3951 /* The Darwin libraries never set errno, so we might as well
3952 avoid calling them when that's the only reason we would. */
3953 flag_errno_math = 0;
3954
3955 /* The default values of these switches depend on the TARGET_64BIT
3956 that is not known at this moment. Mark these values with 2 and
3957 let user the to override these. In case there is no command line option
3958 specifying them, we will set the defaults in override_options. */
3959 if (optimize >= 1)
3960 flag_omit_frame_pointer = 2;
3961 flag_pcc_struct_return = 2;
3962 flag_asynchronous_unwind_tables = 2;
3963 flag_vect_cost_model = 1;
3964 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3965 SUBTARGET_OPTIMIZATION_OPTIONS;
3966 #endif
3967 }
3968 \f
3969 /* Decide whether we can make a sibling call to a function. DECL is the
3970 declaration of the function being targeted by the call and EXP is the
3971 CALL_EXPR representing the call. */
3972
3973 static bool
3974 ix86_function_ok_for_sibcall (tree decl, tree exp)
3975 {
3976 tree func;
3977 rtx a, b;
3978
3979 /* If we are generating position-independent code, we cannot sibcall
3980 optimize any indirect call, or a direct call to a global function,
3981 as the PLT requires %ebx be live. */
3982 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3983 return false;
3984
3985 if (decl)
3986 func = decl;
3987 else
3988 {
3989 func = TREE_TYPE (CALL_EXPR_FN (exp));
3990 if (POINTER_TYPE_P (func))
3991 func = TREE_TYPE (func);
3992 }
3993
3994 /* Check that the return value locations are the same. Like
3995 if we are returning floats on the 80387 register stack, we cannot
3996 make a sibcall from a function that doesn't return a float to a
3997 function that does or, conversely, from a function that does return
3998 a float to a function that doesn't; the necessary stack adjustment
3999 would not be executed. This is also the place we notice
4000 differences in the return value ABI. Note that it is ok for one
4001 of the functions to have void return type as long as the return
4002 value of the other is passed in a register. */
4003 a = ix86_function_value (TREE_TYPE (exp), func, false);
4004 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4005 cfun->decl, false);
4006 if (STACK_REG_P (a) || STACK_REG_P (b))
4007 {
4008 if (!rtx_equal_p (a, b))
4009 return false;
4010 }
4011 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4012 ;
4013 else if (!rtx_equal_p (a, b))
4014 return false;
4015
4016 /* If this call is indirect, we'll need to be able to use a call-clobbered
4017 register for the address of the target function. Make sure that all
4018 such registers are not used for passing parameters. */
4019 if (!decl && !TARGET_64BIT)
4020 {
4021 tree type;
4022
4023 /* We're looking at the CALL_EXPR, we need the type of the function. */
4024 type = CALL_EXPR_FN (exp); /* pointer expression */
4025 type = TREE_TYPE (type); /* pointer type */
4026 type = TREE_TYPE (type); /* function type */
4027
4028 if (ix86_function_regparm (type, NULL) >= 3)
4029 {
4030 /* ??? Need to count the actual number of registers to be used,
4031 not the possible number of registers. Fix later. */
4032 return false;
4033 }
4034 }
4035
4036 /* Dllimport'd functions are also called indirectly. */
4037 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4038 && decl && DECL_DLLIMPORT_P (decl)
4039 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4040 return false;
4041
4042 /* Otherwise okay. That also includes certain types of indirect calls. */
4043 return true;
4044 }
4045
4046 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4047 calling convention attributes;
4048 arguments as in struct attribute_spec.handler. */
4049
4050 static tree
4051 ix86_handle_cconv_attribute (tree *node, tree name,
4052 tree args,
4053 int flags ATTRIBUTE_UNUSED,
4054 bool *no_add_attrs)
4055 {
4056 if (TREE_CODE (*node) != FUNCTION_TYPE
4057 && TREE_CODE (*node) != METHOD_TYPE
4058 && TREE_CODE (*node) != FIELD_DECL
4059 && TREE_CODE (*node) != TYPE_DECL)
4060 {
4061 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4062 IDENTIFIER_POINTER (name));
4063 *no_add_attrs = true;
4064 return NULL_TREE;
4065 }
4066
4067 /* Can combine regparm with all attributes but fastcall. */
4068 if (is_attribute_p ("regparm", name))
4069 {
4070 tree cst;
4071
4072 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4073 {
4074 error ("fastcall and regparm attributes are not compatible");
4075 }
4076
4077 cst = TREE_VALUE (args);
4078 if (TREE_CODE (cst) != INTEGER_CST)
4079 {
4080 warning (OPT_Wattributes,
4081 "%qs attribute requires an integer constant argument",
4082 IDENTIFIER_POINTER (name));
4083 *no_add_attrs = true;
4084 }
4085 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4086 {
4087 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4088 IDENTIFIER_POINTER (name), REGPARM_MAX);
4089 *no_add_attrs = true;
4090 }
4091
4092 return NULL_TREE;
4093 }
4094
4095 if (TARGET_64BIT)
4096 {
4097 /* Do not warn when emulating the MS ABI. */
4098 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4099 warning (OPT_Wattributes, "%qs attribute ignored",
4100 IDENTIFIER_POINTER (name));
4101 *no_add_attrs = true;
4102 return NULL_TREE;
4103 }
4104
4105 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4106 if (is_attribute_p ("fastcall", name))
4107 {
4108 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4109 {
4110 error ("fastcall and cdecl attributes are not compatible");
4111 }
4112 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4113 {
4114 error ("fastcall and stdcall attributes are not compatible");
4115 }
4116 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4117 {
4118 error ("fastcall and regparm attributes are not compatible");
4119 }
4120 }
4121
4122 /* Can combine stdcall with fastcall (redundant), regparm and
4123 sseregparm. */
4124 else if (is_attribute_p ("stdcall", name))
4125 {
4126 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4127 {
4128 error ("stdcall and cdecl attributes are not compatible");
4129 }
4130 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4131 {
4132 error ("stdcall and fastcall attributes are not compatible");
4133 }
4134 }
4135
4136 /* Can combine cdecl with regparm and sseregparm. */
4137 else if (is_attribute_p ("cdecl", name))
4138 {
4139 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4140 {
4141 error ("stdcall and cdecl attributes are not compatible");
4142 }
4143 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4144 {
4145 error ("fastcall and cdecl attributes are not compatible");
4146 }
4147 }
4148
4149 /* Can combine sseregparm with all attributes. */
4150
4151 return NULL_TREE;
4152 }
4153
4154 /* Return 0 if the attributes for two types are incompatible, 1 if they
4155 are compatible, and 2 if they are nearly compatible (which causes a
4156 warning to be generated). */
4157
4158 static int
4159 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4160 {
4161 /* Check for mismatch of non-default calling convention. */
4162 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4163
4164 if (TREE_CODE (type1) != FUNCTION_TYPE
4165 && TREE_CODE (type1) != METHOD_TYPE)
4166 return 1;
4167
4168 /* Check for mismatched fastcall/regparm types. */
4169 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4170 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4171 || (ix86_function_regparm (type1, NULL)
4172 != ix86_function_regparm (type2, NULL)))
4173 return 0;
4174
4175 /* Check for mismatched sseregparm types. */
4176 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4177 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4178 return 0;
4179
4180 /* Check for mismatched return types (cdecl vs stdcall). */
4181 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4182 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4183 return 0;
4184
4185 return 1;
4186 }
4187 \f
4188 /* Return the regparm value for a function with the indicated TYPE and DECL.
4189 DECL may be NULL when calling function indirectly
4190 or considering a libcall. */
4191
4192 static int
4193 ix86_function_regparm (const_tree type, const_tree decl)
4194 {
4195 tree attr;
4196 int regparm = ix86_regparm;
4197
4198 static bool error_issued;
4199
4200 if (TARGET_64BIT)
4201 {
4202 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4203 return regparm;
4204 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4205 }
4206
4207 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4208 if (attr)
4209 {
4210 regparm
4211 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4212
4213 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4214 {
4215 /* We can't use regparm(3) for nested functions because
4216 these pass static chain pointer in %ecx register. */
4217 if (!error_issued && regparm == 3
4218 && decl_function_context (decl)
4219 && !DECL_NO_STATIC_CHAIN (decl))
4220 {
4221 error ("nested functions are limited to 2 register parameters");
4222 error_issued = true;
4223 return 0;
4224 }
4225 }
4226
4227 return regparm;
4228 }
4229
4230 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4231 return 2;
4232
4233 /* Use register calling convention for local functions when possible. */
4234 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4235 && !profile_flag)
4236 {
4237 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4238 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4239 if (i && i->local)
4240 {
4241 int local_regparm, globals = 0, regno;
4242 struct function *f;
4243
4244 /* Make sure no regparm register is taken by a
4245 fixed register variable. */
4246 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4247 if (fixed_regs[local_regparm])
4248 break;
4249
4250 /* We can't use regparm(3) for nested functions as these use
4251 static chain pointer in third argument. */
4252 if (local_regparm == 3
4253 && decl_function_context (decl)
4254 && !DECL_NO_STATIC_CHAIN (decl))
4255 local_regparm = 2;
4256
4257 /* If the function realigns its stackpointer, the prologue will
4258 clobber %ecx. If we've already generated code for the callee,
4259 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4260 scanning the attributes for the self-realigning property. */
4261 f = DECL_STRUCT_FUNCTION (decl);
4262 /* Since current internal arg pointer won't conflict with
4263 parameter passing regs, so no need to change stack
4264 realignment and adjust regparm number.
4265
4266 Each fixed register usage increases register pressure,
4267 so less registers should be used for argument passing.
4268 This functionality can be overriden by an explicit
4269 regparm value. */
4270 for (regno = 0; regno <= DI_REG; regno++)
4271 if (fixed_regs[regno])
4272 globals++;
4273
4274 local_regparm
4275 = globals < local_regparm ? local_regparm - globals : 0;
4276
4277 if (local_regparm > regparm)
4278 regparm = local_regparm;
4279 }
4280 }
4281
4282 return regparm;
4283 }
4284
4285 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4286 DFmode (2) arguments in SSE registers for a function with the
4287 indicated TYPE and DECL. DECL may be NULL when calling function
4288 indirectly or considering a libcall. Otherwise return 0. */
4289
4290 static int
4291 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4292 {
4293 gcc_assert (!TARGET_64BIT);
4294
4295 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4296 by the sseregparm attribute. */
4297 if (TARGET_SSEREGPARM
4298 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4299 {
4300 if (!TARGET_SSE)
4301 {
4302 if (warn)
4303 {
4304 if (decl)
4305 error ("Calling %qD with attribute sseregparm without "
4306 "SSE/SSE2 enabled", decl);
4307 else
4308 error ("Calling %qT with attribute sseregparm without "
4309 "SSE/SSE2 enabled", type);
4310 }
4311 return 0;
4312 }
4313
4314 return 2;
4315 }
4316
4317 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4318 (and DFmode for SSE2) arguments in SSE registers. */
4319 if (decl && TARGET_SSE_MATH && !profile_flag)
4320 {
4321 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4322 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4323 if (i && i->local)
4324 return TARGET_SSE2 ? 2 : 1;
4325 }
4326
4327 return 0;
4328 }
4329
4330 /* Return true if EAX is live at the start of the function. Used by
4331 ix86_expand_prologue to determine if we need special help before
4332 calling allocate_stack_worker. */
4333
4334 static bool
4335 ix86_eax_live_at_start_p (void)
4336 {
4337 /* Cheat. Don't bother working forward from ix86_function_regparm
4338 to the function type to whether an actual argument is located in
4339 eax. Instead just look at cfg info, which is still close enough
4340 to correct at this point. This gives false positives for broken
4341 functions that might use uninitialized data that happens to be
4342 allocated in eax, but who cares? */
4343 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4344 }
4345
4346 /* Value is the number of bytes of arguments automatically
4347 popped when returning from a subroutine call.
4348 FUNDECL is the declaration node of the function (as a tree),
4349 FUNTYPE is the data type of the function (as a tree),
4350 or for a library call it is an identifier node for the subroutine name.
4351 SIZE is the number of bytes of arguments passed on the stack.
4352
4353 On the 80386, the RTD insn may be used to pop them if the number
4354 of args is fixed, but if the number is variable then the caller
4355 must pop them all. RTD can't be used for library calls now
4356 because the library is compiled with the Unix compiler.
4357 Use of RTD is a selectable option, since it is incompatible with
4358 standard Unix calling sequences. If the option is not selected,
4359 the caller must always pop the args.
4360
4361 The attribute stdcall is equivalent to RTD on a per module basis. */
4362
4363 int
4364 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4365 {
4366 int rtd;
4367
4368 /* None of the 64-bit ABIs pop arguments. */
4369 if (TARGET_64BIT)
4370 return 0;
4371
4372 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4373
4374 /* Cdecl functions override -mrtd, and never pop the stack. */
4375 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4376 {
4377 /* Stdcall and fastcall functions will pop the stack if not
4378 variable args. */
4379 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4380 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4381 rtd = 1;
4382
4383 if (rtd && ! stdarg_p (funtype))
4384 return size;
4385 }
4386
4387 /* Lose any fake structure return argument if it is passed on the stack. */
4388 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4389 && !KEEP_AGGREGATE_RETURN_POINTER)
4390 {
4391 int nregs = ix86_function_regparm (funtype, fundecl);
4392 if (nregs == 0)
4393 return GET_MODE_SIZE (Pmode);
4394 }
4395
4396 return 0;
4397 }
4398 \f
4399 /* Argument support functions. */
4400
4401 /* Return true when register may be used to pass function parameters. */
4402 bool
4403 ix86_function_arg_regno_p (int regno)
4404 {
4405 int i;
4406 const int *parm_regs;
4407
4408 if (!TARGET_64BIT)
4409 {
4410 if (TARGET_MACHO)
4411 return (regno < REGPARM_MAX
4412 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4413 else
4414 return (regno < REGPARM_MAX
4415 || (TARGET_MMX && MMX_REGNO_P (regno)
4416 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4417 || (TARGET_SSE && SSE_REGNO_P (regno)
4418 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4419 }
4420
4421 if (TARGET_MACHO)
4422 {
4423 if (SSE_REGNO_P (regno) && TARGET_SSE)
4424 return true;
4425 }
4426 else
4427 {
4428 if (TARGET_SSE && SSE_REGNO_P (regno)
4429 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4430 return true;
4431 }
4432
4433 /* TODO: The function should depend on current function ABI but
4434 builtins.c would need updating then. Therefore we use the
4435 default ABI. */
4436
4437 /* RAX is used as hidden argument to va_arg functions. */
4438 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4439 return true;
4440
4441 if (DEFAULT_ABI == MS_ABI)
4442 parm_regs = x86_64_ms_abi_int_parameter_registers;
4443 else
4444 parm_regs = x86_64_int_parameter_registers;
4445 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4446 : X86_64_REGPARM_MAX); i++)
4447 if (regno == parm_regs[i])
4448 return true;
4449 return false;
4450 }
4451
4452 /* Return if we do not know how to pass TYPE solely in registers. */
4453
4454 static bool
4455 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4456 {
4457 if (must_pass_in_stack_var_size_or_pad (mode, type))
4458 return true;
4459
4460 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4461 The layout_type routine is crafty and tries to trick us into passing
4462 currently unsupported vector types on the stack by using TImode. */
4463 return (!TARGET_64BIT && mode == TImode
4464 && type && TREE_CODE (type) != VECTOR_TYPE);
4465 }
4466
4467 /* It returns the size, in bytes, of the area reserved for arguments passed
4468 in registers for the function represented by fndecl dependent to the used
4469 abi format. */
4470 int
4471 ix86_reg_parm_stack_space (const_tree fndecl)
4472 {
4473 int call_abi = 0;
4474 /* For libcalls it is possible that there is no fndecl at hand.
4475 Therefore assume for this case the default abi of the target. */
4476 if (!fndecl)
4477 call_abi = DEFAULT_ABI;
4478 else
4479 call_abi = ix86_function_abi (fndecl);
4480 if (call_abi == 1)
4481 return 32;
4482 return 0;
4483 }
4484
4485 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4486 call abi used. */
4487 int
4488 ix86_function_type_abi (const_tree fntype)
4489 {
4490 if (TARGET_64BIT && fntype != NULL)
4491 {
4492 int abi;
4493 if (DEFAULT_ABI == SYSV_ABI)
4494 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4495 else
4496 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4497
4498 return abi;
4499 }
4500 return DEFAULT_ABI;
4501 }
4502
4503 int
4504 ix86_function_abi (const_tree fndecl)
4505 {
4506 if (! fndecl)
4507 return DEFAULT_ABI;
4508 return ix86_function_type_abi (TREE_TYPE (fndecl));
4509 }
4510
4511 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4512 call abi used. */
4513 int
4514 ix86_cfun_abi (void)
4515 {
4516 if (! cfun || ! TARGET_64BIT)
4517 return DEFAULT_ABI;
4518 return cfun->machine->call_abi;
4519 }
4520
4521 /* regclass.c */
4522 extern void init_regs (void);
4523
4524 /* Implementation of call abi switching target hook. Specific to FNDECL
4525 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4526 for more details.
4527 To prevent redudant calls of costy function init_regs (), it checks not to
4528 reset register usage for default abi. */
4529 void
4530 ix86_call_abi_override (const_tree fndecl)
4531 {
4532 if (fndecl == NULL_TREE)
4533 cfun->machine->call_abi = DEFAULT_ABI;
4534 else
4535 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4536 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
4537 {
4538 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
4539 {
4540 call_used_regs[4 /*RSI*/] = 0;
4541 call_used_regs[5 /*RDI*/] = 0;
4542 init_regs ();
4543 }
4544 }
4545 else if (TARGET_64BIT)
4546 {
4547 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
4548 {
4549 call_used_regs[4 /*RSI*/] = 1;
4550 call_used_regs[5 /*RDI*/] = 1;
4551 init_regs ();
4552 }
4553 }
4554 }
4555
4556 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4557 for a call to a function whose data type is FNTYPE.
4558 For a library call, FNTYPE is 0. */
4559
4560 void
4561 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4562 tree fntype, /* tree ptr for function decl */
4563 rtx libname, /* SYMBOL_REF of library name or 0 */
4564 tree fndecl)
4565 {
4566 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4567 memset (cum, 0, sizeof (*cum));
4568
4569 cum->call_abi = ix86_function_type_abi (fntype);
4570 /* Set up the number of registers to use for passing arguments. */
4571 cum->nregs = ix86_regparm;
4572 if (TARGET_64BIT)
4573 {
4574 if (cum->call_abi != DEFAULT_ABI)
4575 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4576 : X64_REGPARM_MAX;
4577 }
4578 if (TARGET_SSE)
4579 {
4580 cum->sse_nregs = SSE_REGPARM_MAX;
4581 if (TARGET_64BIT)
4582 {
4583 if (cum->call_abi != DEFAULT_ABI)
4584 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4585 : X64_SSE_REGPARM_MAX;
4586 }
4587 }
4588 if (TARGET_MMX)
4589 cum->mmx_nregs = MMX_REGPARM_MAX;
4590 cum->warn_sse = true;
4591 cum->warn_mmx = true;
4592
4593 /* Because type might mismatch in between caller and callee, we need to
4594 use actual type of function for local calls.
4595 FIXME: cgraph_analyze can be told to actually record if function uses
4596 va_start so for local functions maybe_vaarg can be made aggressive
4597 helping K&R code.
4598 FIXME: once typesytem is fixed, we won't need this code anymore. */
4599 if (i && i->local)
4600 fntype = TREE_TYPE (fndecl);
4601 cum->maybe_vaarg = (fntype
4602 ? (!prototype_p (fntype) || stdarg_p (fntype))
4603 : !libname);
4604
4605 if (!TARGET_64BIT)
4606 {
4607 /* If there are variable arguments, then we won't pass anything
4608 in registers in 32-bit mode. */
4609 if (stdarg_p (fntype))
4610 {
4611 cum->nregs = 0;
4612 cum->sse_nregs = 0;
4613 cum->mmx_nregs = 0;
4614 cum->warn_sse = 0;
4615 cum->warn_mmx = 0;
4616 return;
4617 }
4618
4619 /* Use ecx and edx registers if function has fastcall attribute,
4620 else look for regparm information. */
4621 if (fntype)
4622 {
4623 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4624 {
4625 cum->nregs = 2;
4626 cum->fastcall = 1;
4627 }
4628 else
4629 cum->nregs = ix86_function_regparm (fntype, fndecl);
4630 }
4631
4632 /* Set up the number of SSE registers used for passing SFmode
4633 and DFmode arguments. Warn for mismatching ABI. */
4634 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4635 }
4636 }
4637
4638 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4639 But in the case of vector types, it is some vector mode.
4640
4641 When we have only some of our vector isa extensions enabled, then there
4642 are some modes for which vector_mode_supported_p is false. For these
4643 modes, the generic vector support in gcc will choose some non-vector mode
4644 in order to implement the type. By computing the natural mode, we'll
4645 select the proper ABI location for the operand and not depend on whatever
4646 the middle-end decides to do with these vector types. */
4647
4648 static enum machine_mode
4649 type_natural_mode (const_tree type)
4650 {
4651 enum machine_mode mode = TYPE_MODE (type);
4652
4653 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4654 {
4655 HOST_WIDE_INT size = int_size_in_bytes (type);
4656 if ((size == 8 || size == 16)
4657 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4658 && TYPE_VECTOR_SUBPARTS (type) > 1)
4659 {
4660 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4661
4662 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4663 mode = MIN_MODE_VECTOR_FLOAT;
4664 else
4665 mode = MIN_MODE_VECTOR_INT;
4666
4667 /* Get the mode which has this inner mode and number of units. */
4668 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4669 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4670 && GET_MODE_INNER (mode) == innermode)
4671 return mode;
4672
4673 gcc_unreachable ();
4674 }
4675 }
4676
4677 return mode;
4678 }
4679
4680 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4681 this may not agree with the mode that the type system has chosen for the
4682 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4683 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4684
4685 static rtx
4686 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4687 unsigned int regno)
4688 {
4689 rtx tmp;
4690
4691 if (orig_mode != BLKmode)
4692 tmp = gen_rtx_REG (orig_mode, regno);
4693 else
4694 {
4695 tmp = gen_rtx_REG (mode, regno);
4696 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4697 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4698 }
4699
4700 return tmp;
4701 }
4702
4703 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4704 of this code is to classify each 8bytes of incoming argument by the register
4705 class and assign registers accordingly. */
4706
4707 /* Return the union class of CLASS1 and CLASS2.
4708 See the x86-64 PS ABI for details. */
4709
4710 static enum x86_64_reg_class
4711 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4712 {
4713 /* Rule #1: If both classes are equal, this is the resulting class. */
4714 if (class1 == class2)
4715 return class1;
4716
4717 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4718 the other class. */
4719 if (class1 == X86_64_NO_CLASS)
4720 return class2;
4721 if (class2 == X86_64_NO_CLASS)
4722 return class1;
4723
4724 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4725 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4726 return X86_64_MEMORY_CLASS;
4727
4728 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4729 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4730 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4731 return X86_64_INTEGERSI_CLASS;
4732 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4733 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4734 return X86_64_INTEGER_CLASS;
4735
4736 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4737 MEMORY is used. */
4738 if (class1 == X86_64_X87_CLASS
4739 || class1 == X86_64_X87UP_CLASS
4740 || class1 == X86_64_COMPLEX_X87_CLASS
4741 || class2 == X86_64_X87_CLASS
4742 || class2 == X86_64_X87UP_CLASS
4743 || class2 == X86_64_COMPLEX_X87_CLASS)
4744 return X86_64_MEMORY_CLASS;
4745
4746 /* Rule #6: Otherwise class SSE is used. */
4747 return X86_64_SSE_CLASS;
4748 }
4749
4750 /* Classify the argument of type TYPE and mode MODE.
4751 CLASSES will be filled by the register class used to pass each word
4752 of the operand. The number of words is returned. In case the parameter
4753 should be passed in memory, 0 is returned. As a special case for zero
4754 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4755
4756 BIT_OFFSET is used internally for handling records and specifies offset
4757 of the offset in bits modulo 256 to avoid overflow cases.
4758
4759 See the x86-64 PS ABI for details.
4760 */
4761
4762 static int
4763 classify_argument (enum machine_mode mode, const_tree type,
4764 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4765 {
4766 HOST_WIDE_INT bytes =
4767 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4768 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4769
4770 /* Variable sized entities are always passed/returned in memory. */
4771 if (bytes < 0)
4772 return 0;
4773
4774 if (mode != VOIDmode
4775 && targetm.calls.must_pass_in_stack (mode, type))
4776 return 0;
4777
4778 if (type && AGGREGATE_TYPE_P (type))
4779 {
4780 int i;
4781 tree field;
4782 enum x86_64_reg_class subclasses[MAX_CLASSES];
4783
4784 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4785 if (bytes > 16)
4786 return 0;
4787
4788 for (i = 0; i < words; i++)
4789 classes[i] = X86_64_NO_CLASS;
4790
4791 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4792 signalize memory class, so handle it as special case. */
4793 if (!words)
4794 {
4795 classes[0] = X86_64_NO_CLASS;
4796 return 1;
4797 }
4798
4799 /* Classify each field of record and merge classes. */
4800 switch (TREE_CODE (type))
4801 {
4802 case RECORD_TYPE:
4803 /* And now merge the fields of structure. */
4804 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4805 {
4806 if (TREE_CODE (field) == FIELD_DECL)
4807 {
4808 int num;
4809
4810 if (TREE_TYPE (field) == error_mark_node)
4811 continue;
4812
4813 /* Bitfields are always classified as integer. Handle them
4814 early, since later code would consider them to be
4815 misaligned integers. */
4816 if (DECL_BIT_FIELD (field))
4817 {
4818 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4819 i < ((int_bit_position (field) + (bit_offset % 64))
4820 + tree_low_cst (DECL_SIZE (field), 0)
4821 + 63) / 8 / 8; i++)
4822 classes[i] =
4823 merge_classes (X86_64_INTEGER_CLASS,
4824 classes[i]);
4825 }
4826 else
4827 {
4828 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4829 TREE_TYPE (field), subclasses,
4830 (int_bit_position (field)
4831 + bit_offset) % 256);
4832 if (!num)
4833 return 0;
4834 for (i = 0; i < num; i++)
4835 {
4836 int pos =
4837 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4838 classes[i + pos] =
4839 merge_classes (subclasses[i], classes[i + pos]);
4840 }
4841 }
4842 }
4843 }
4844 break;
4845
4846 case ARRAY_TYPE:
4847 /* Arrays are handled as small records. */
4848 {
4849 int num;
4850 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4851 TREE_TYPE (type), subclasses, bit_offset);
4852 if (!num)
4853 return 0;
4854
4855 /* The partial classes are now full classes. */
4856 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4857 subclasses[0] = X86_64_SSE_CLASS;
4858 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
4859 subclasses[0] = X86_64_INTEGER_CLASS;
4860
4861 for (i = 0; i < words; i++)
4862 classes[i] = subclasses[i % num];
4863
4864 break;
4865 }
4866 case UNION_TYPE:
4867 case QUAL_UNION_TYPE:
4868 /* Unions are similar to RECORD_TYPE but offset is always 0.
4869 */
4870 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4871 {
4872 if (TREE_CODE (field) == FIELD_DECL)
4873 {
4874 int num;
4875
4876 if (TREE_TYPE (field) == error_mark_node)
4877 continue;
4878
4879 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4880 TREE_TYPE (field), subclasses,
4881 bit_offset);
4882 if (!num)
4883 return 0;
4884 for (i = 0; i < num; i++)
4885 classes[i] = merge_classes (subclasses[i], classes[i]);
4886 }
4887 }
4888 break;
4889
4890 default:
4891 gcc_unreachable ();
4892 }
4893
4894 /* Final merger cleanup. */
4895 for (i = 0; i < words; i++)
4896 {
4897 /* If one class is MEMORY, everything should be passed in
4898 memory. */
4899 if (classes[i] == X86_64_MEMORY_CLASS)
4900 return 0;
4901
4902 /* The X86_64_SSEUP_CLASS should be always preceded by
4903 X86_64_SSE_CLASS. */
4904 if (classes[i] == X86_64_SSEUP_CLASS
4905 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4906 classes[i] = X86_64_SSE_CLASS;
4907
4908 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4909 if (classes[i] == X86_64_X87UP_CLASS
4910 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4911 classes[i] = X86_64_SSE_CLASS;
4912 }
4913 return words;
4914 }
4915
4916 /* Compute alignment needed. We align all types to natural boundaries with
4917 exception of XFmode that is aligned to 64bits. */
4918 if (mode != VOIDmode && mode != BLKmode)
4919 {
4920 int mode_alignment = GET_MODE_BITSIZE (mode);
4921
4922 if (mode == XFmode)
4923 mode_alignment = 128;
4924 else if (mode == XCmode)
4925 mode_alignment = 256;
4926 if (COMPLEX_MODE_P (mode))
4927 mode_alignment /= 2;
4928 /* Misaligned fields are always returned in memory. */
4929 if (bit_offset % mode_alignment)
4930 return 0;
4931 }
4932
4933 /* for V1xx modes, just use the base mode */
4934 if (VECTOR_MODE_P (mode) && mode != V1DImode
4935 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4936 mode = GET_MODE_INNER (mode);
4937
4938 /* Classification of atomic types. */
4939 switch (mode)
4940 {
4941 case SDmode:
4942 case DDmode:
4943 classes[0] = X86_64_SSE_CLASS;
4944 return 1;
4945 case TDmode:
4946 classes[0] = X86_64_SSE_CLASS;
4947 classes[1] = X86_64_SSEUP_CLASS;
4948 return 2;
4949 case DImode:
4950 case SImode:
4951 case HImode:
4952 case QImode:
4953 case CSImode:
4954 case CHImode:
4955 case CQImode:
4956 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4957 classes[0] = X86_64_INTEGERSI_CLASS;
4958 else
4959 classes[0] = X86_64_INTEGER_CLASS;
4960 return 1;
4961 case CDImode:
4962 case TImode:
4963 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4964 return 2;
4965 case CTImode:
4966 return 0;
4967 case SFmode:
4968 if (!(bit_offset % 64))
4969 classes[0] = X86_64_SSESF_CLASS;
4970 else
4971 classes[0] = X86_64_SSE_CLASS;
4972 return 1;
4973 case DFmode:
4974 classes[0] = X86_64_SSEDF_CLASS;
4975 return 1;
4976 case XFmode:
4977 classes[0] = X86_64_X87_CLASS;
4978 classes[1] = X86_64_X87UP_CLASS;
4979 return 2;
4980 case TFmode:
4981 classes[0] = X86_64_SSE_CLASS;
4982 classes[1] = X86_64_SSEUP_CLASS;
4983 return 2;
4984 case SCmode:
4985 classes[0] = X86_64_SSE_CLASS;
4986 return 1;
4987 case DCmode:
4988 classes[0] = X86_64_SSEDF_CLASS;
4989 classes[1] = X86_64_SSEDF_CLASS;
4990 return 2;
4991 case XCmode:
4992 classes[0] = X86_64_COMPLEX_X87_CLASS;
4993 return 1;
4994 case TCmode:
4995 /* This modes is larger than 16 bytes. */
4996 return 0;
4997 case V4SFmode:
4998 case V4SImode:
4999 case V16QImode:
5000 case V8HImode:
5001 case V2DFmode:
5002 case V2DImode:
5003 classes[0] = X86_64_SSE_CLASS;
5004 classes[1] = X86_64_SSEUP_CLASS;
5005 return 2;
5006 case V1DImode:
5007 case V2SFmode:
5008 case V2SImode:
5009 case V4HImode:
5010 case V8QImode:
5011 classes[0] = X86_64_SSE_CLASS;
5012 return 1;
5013 case BLKmode:
5014 case VOIDmode:
5015 return 0;
5016 default:
5017 gcc_assert (VECTOR_MODE_P (mode));
5018
5019 if (bytes > 16)
5020 return 0;
5021
5022 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5023
5024 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5025 classes[0] = X86_64_INTEGERSI_CLASS;
5026 else
5027 classes[0] = X86_64_INTEGER_CLASS;
5028 classes[1] = X86_64_INTEGER_CLASS;
5029 return 1 + (bytes > 8);
5030 }
5031 }
5032
5033 /* Examine the argument and return set number of register required in each
5034 class. Return 0 iff parameter should be passed in memory. */
5035 static int
5036 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5037 int *int_nregs, int *sse_nregs)
5038 {
5039 enum x86_64_reg_class regclass[MAX_CLASSES];
5040 int n = classify_argument (mode, type, regclass, 0);
5041
5042 *int_nregs = 0;
5043 *sse_nregs = 0;
5044 if (!n)
5045 return 0;
5046 for (n--; n >= 0; n--)
5047 switch (regclass[n])
5048 {
5049 case X86_64_INTEGER_CLASS:
5050 case X86_64_INTEGERSI_CLASS:
5051 (*int_nregs)++;
5052 break;
5053 case X86_64_SSE_CLASS:
5054 case X86_64_SSESF_CLASS:
5055 case X86_64_SSEDF_CLASS:
5056 (*sse_nregs)++;
5057 break;
5058 case X86_64_NO_CLASS:
5059 case X86_64_SSEUP_CLASS:
5060 break;
5061 case X86_64_X87_CLASS:
5062 case X86_64_X87UP_CLASS:
5063 if (!in_return)
5064 return 0;
5065 break;
5066 case X86_64_COMPLEX_X87_CLASS:
5067 return in_return ? 2 : 0;
5068 case X86_64_MEMORY_CLASS:
5069 gcc_unreachable ();
5070 }
5071 return 1;
5072 }
5073
5074 /* Construct container for the argument used by GCC interface. See
5075 FUNCTION_ARG for the detailed description. */
5076
5077 static rtx
5078 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5079 const_tree type, int in_return, int nintregs, int nsseregs,
5080 const int *intreg, int sse_regno)
5081 {
5082 /* The following variables hold the static issued_error state. */
5083 static bool issued_sse_arg_error;
5084 static bool issued_sse_ret_error;
5085 static bool issued_x87_ret_error;
5086
5087 enum machine_mode tmpmode;
5088 int bytes =
5089 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5090 enum x86_64_reg_class regclass[MAX_CLASSES];
5091 int n;
5092 int i;
5093 int nexps = 0;
5094 int needed_sseregs, needed_intregs;
5095 rtx exp[MAX_CLASSES];
5096 rtx ret;
5097
5098 n = classify_argument (mode, type, regclass, 0);
5099 if (!n)
5100 return NULL;
5101 if (!examine_argument (mode, type, in_return, &needed_intregs,
5102 &needed_sseregs))
5103 return NULL;
5104 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5105 return NULL;
5106
5107 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5108 some less clueful developer tries to use floating-point anyway. */
5109 if (needed_sseregs && !TARGET_SSE)
5110 {
5111 if (in_return)
5112 {
5113 if (!issued_sse_ret_error)
5114 {
5115 error ("SSE register return with SSE disabled");
5116 issued_sse_ret_error = true;
5117 }
5118 }
5119 else if (!issued_sse_arg_error)
5120 {
5121 error ("SSE register argument with SSE disabled");
5122 issued_sse_arg_error = true;
5123 }
5124 return NULL;
5125 }
5126
5127 /* Likewise, error if the ABI requires us to return values in the
5128 x87 registers and the user specified -mno-80387. */
5129 if (!TARGET_80387 && in_return)
5130 for (i = 0; i < n; i++)
5131 if (regclass[i] == X86_64_X87_CLASS
5132 || regclass[i] == X86_64_X87UP_CLASS
5133 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5134 {
5135 if (!issued_x87_ret_error)
5136 {
5137 error ("x87 register return with x87 disabled");
5138 issued_x87_ret_error = true;
5139 }
5140 return NULL;
5141 }
5142
5143 /* First construct simple cases. Avoid SCmode, since we want to use
5144 single register to pass this type. */
5145 if (n == 1 && mode != SCmode)
5146 switch (regclass[0])
5147 {
5148 case X86_64_INTEGER_CLASS:
5149 case X86_64_INTEGERSI_CLASS:
5150 return gen_rtx_REG (mode, intreg[0]);
5151 case X86_64_SSE_CLASS:
5152 case X86_64_SSESF_CLASS:
5153 case X86_64_SSEDF_CLASS:
5154 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5155 case X86_64_X87_CLASS:
5156 case X86_64_COMPLEX_X87_CLASS:
5157 return gen_rtx_REG (mode, FIRST_STACK_REG);
5158 case X86_64_NO_CLASS:
5159 /* Zero sized array, struct or class. */
5160 return NULL;
5161 default:
5162 gcc_unreachable ();
5163 }
5164 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5165 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5166 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5167
5168 if (n == 2
5169 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5170 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5171 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5172 && regclass[1] == X86_64_INTEGER_CLASS
5173 && (mode == CDImode || mode == TImode || mode == TFmode)
5174 && intreg[0] + 1 == intreg[1])
5175 return gen_rtx_REG (mode, intreg[0]);
5176
5177 /* Otherwise figure out the entries of the PARALLEL. */
5178 for (i = 0; i < n; i++)
5179 {
5180 switch (regclass[i])
5181 {
5182 case X86_64_NO_CLASS:
5183 break;
5184 case X86_64_INTEGER_CLASS:
5185 case X86_64_INTEGERSI_CLASS:
5186 /* Merge TImodes on aligned occasions here too. */
5187 if (i * 8 + 8 > bytes)
5188 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5189 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5190 tmpmode = SImode;
5191 else
5192 tmpmode = DImode;
5193 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5194 if (tmpmode == BLKmode)
5195 tmpmode = DImode;
5196 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5197 gen_rtx_REG (tmpmode, *intreg),
5198 GEN_INT (i*8));
5199 intreg++;
5200 break;
5201 case X86_64_SSESF_CLASS:
5202 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5203 gen_rtx_REG (SFmode,
5204 SSE_REGNO (sse_regno)),
5205 GEN_INT (i*8));
5206 sse_regno++;
5207 break;
5208 case X86_64_SSEDF_CLASS:
5209 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5210 gen_rtx_REG (DFmode,
5211 SSE_REGNO (sse_regno)),
5212 GEN_INT (i*8));
5213 sse_regno++;
5214 break;
5215 case X86_64_SSE_CLASS:
5216 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5217 tmpmode = TImode;
5218 else
5219 tmpmode = DImode;
5220 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5221 gen_rtx_REG (tmpmode,
5222 SSE_REGNO (sse_regno)),
5223 GEN_INT (i*8));
5224 if (tmpmode == TImode)
5225 i++;
5226 sse_regno++;
5227 break;
5228 default:
5229 gcc_unreachable ();
5230 }
5231 }
5232
5233 /* Empty aligned struct, union or class. */
5234 if (nexps == 0)
5235 return NULL;
5236
5237 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5238 for (i = 0; i < nexps; i++)
5239 XVECEXP (ret, 0, i) = exp [i];
5240 return ret;
5241 }
5242
5243 /* Update the data in CUM to advance over an argument of mode MODE
5244 and data type TYPE. (TYPE is null for libcalls where that information
5245 may not be available.) */
5246
5247 static void
5248 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5249 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5250 {
5251 switch (mode)
5252 {
5253 default:
5254 break;
5255
5256 case BLKmode:
5257 if (bytes < 0)
5258 break;
5259 /* FALLTHRU */
5260
5261 case DImode:
5262 case SImode:
5263 case HImode:
5264 case QImode:
5265 cum->words += words;
5266 cum->nregs -= words;
5267 cum->regno += words;
5268
5269 if (cum->nregs <= 0)
5270 {
5271 cum->nregs = 0;
5272 cum->regno = 0;
5273 }
5274 break;
5275
5276 case DFmode:
5277 if (cum->float_in_sse < 2)
5278 break;
5279 case SFmode:
5280 if (cum->float_in_sse < 1)
5281 break;
5282 /* FALLTHRU */
5283
5284 case TImode:
5285 case V16QImode:
5286 case V8HImode:
5287 case V4SImode:
5288 case V2DImode:
5289 case V4SFmode:
5290 case V2DFmode:
5291 if (!type || !AGGREGATE_TYPE_P (type))
5292 {
5293 cum->sse_words += words;
5294 cum->sse_nregs -= 1;
5295 cum->sse_regno += 1;
5296 if (cum->sse_nregs <= 0)
5297 {
5298 cum->sse_nregs = 0;
5299 cum->sse_regno = 0;
5300 }
5301 }
5302 break;
5303
5304 case V8QImode:
5305 case V4HImode:
5306 case V2SImode:
5307 case V2SFmode:
5308 case V1DImode:
5309 if (!type || !AGGREGATE_TYPE_P (type))
5310 {
5311 cum->mmx_words += words;
5312 cum->mmx_nregs -= 1;
5313 cum->mmx_regno += 1;
5314 if (cum->mmx_nregs <= 0)
5315 {
5316 cum->mmx_nregs = 0;
5317 cum->mmx_regno = 0;
5318 }
5319 }
5320 break;
5321 }
5322 }
5323
5324 static void
5325 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5326 tree type, HOST_WIDE_INT words)
5327 {
5328 int int_nregs, sse_nregs;
5329
5330 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5331 cum->words += words;
5332 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5333 {
5334 cum->nregs -= int_nregs;
5335 cum->sse_nregs -= sse_nregs;
5336 cum->regno += int_nregs;
5337 cum->sse_regno += sse_nregs;
5338 }
5339 else
5340 cum->words += words;
5341 }
5342
5343 static void
5344 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5345 HOST_WIDE_INT words)
5346 {
5347 /* Otherwise, this should be passed indirect. */
5348 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5349
5350 cum->words += words;
5351 if (cum->nregs > 0)
5352 {
5353 cum->nregs -= 1;
5354 cum->regno += 1;
5355 }
5356 }
5357
5358 void
5359 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5360 tree type, int named ATTRIBUTE_UNUSED)
5361 {
5362 HOST_WIDE_INT bytes, words;
5363
5364 if (mode == BLKmode)
5365 bytes = int_size_in_bytes (type);
5366 else
5367 bytes = GET_MODE_SIZE (mode);
5368 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5369
5370 if (type)
5371 mode = type_natural_mode (type);
5372
5373 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5374 function_arg_advance_ms_64 (cum, bytes, words);
5375 else if (TARGET_64BIT)
5376 function_arg_advance_64 (cum, mode, type, words);
5377 else
5378 function_arg_advance_32 (cum, mode, type, bytes, words);
5379 }
5380
5381 /* Define where to put the arguments to a function.
5382 Value is zero to push the argument on the stack,
5383 or a hard register in which to store the argument.
5384
5385 MODE is the argument's machine mode.
5386 TYPE is the data type of the argument (as a tree).
5387 This is null for libcalls where that information may
5388 not be available.
5389 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5390 the preceding args and about the function being called.
5391 NAMED is nonzero if this argument is a named parameter
5392 (otherwise it is an extra parameter matching an ellipsis). */
5393
5394 static rtx
5395 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5396 enum machine_mode orig_mode, tree type,
5397 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5398 {
5399 static bool warnedsse, warnedmmx;
5400
5401 /* Avoid the AL settings for the Unix64 ABI. */
5402 if (mode == VOIDmode)
5403 return constm1_rtx;
5404
5405 switch (mode)
5406 {
5407 default:
5408 break;
5409
5410 case BLKmode:
5411 if (bytes < 0)
5412 break;
5413 /* FALLTHRU */
5414 case DImode:
5415 case SImode:
5416 case HImode:
5417 case QImode:
5418 if (words <= cum->nregs)
5419 {
5420 int regno = cum->regno;
5421
5422 /* Fastcall allocates the first two DWORD (SImode) or
5423 smaller arguments to ECX and EDX if it isn't an
5424 aggregate type . */
5425 if (cum->fastcall)
5426 {
5427 if (mode == BLKmode
5428 || mode == DImode
5429 || (type && AGGREGATE_TYPE_P (type)))
5430 break;
5431
5432 /* ECX not EAX is the first allocated register. */
5433 if (regno == AX_REG)
5434 regno = CX_REG;
5435 }
5436 return gen_rtx_REG (mode, regno);
5437 }
5438 break;
5439
5440 case DFmode:
5441 if (cum->float_in_sse < 2)
5442 break;
5443 case SFmode:
5444 if (cum->float_in_sse < 1)
5445 break;
5446 /* FALLTHRU */
5447 case TImode:
5448 case V16QImode:
5449 case V8HImode:
5450 case V4SImode:
5451 case V2DImode:
5452 case V4SFmode:
5453 case V2DFmode:
5454 if (!type || !AGGREGATE_TYPE_P (type))
5455 {
5456 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5457 {
5458 warnedsse = true;
5459 warning (0, "SSE vector argument without SSE enabled "
5460 "changes the ABI");
5461 }
5462 if (cum->sse_nregs)
5463 return gen_reg_or_parallel (mode, orig_mode,
5464 cum->sse_regno + FIRST_SSE_REG);
5465 }
5466 break;
5467
5468 case V8QImode:
5469 case V4HImode:
5470 case V2SImode:
5471 case V2SFmode:
5472 case V1DImode:
5473 if (!type || !AGGREGATE_TYPE_P (type))
5474 {
5475 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5476 {
5477 warnedmmx = true;
5478 warning (0, "MMX vector argument without MMX enabled "
5479 "changes the ABI");
5480 }
5481 if (cum->mmx_nregs)
5482 return gen_reg_or_parallel (mode, orig_mode,
5483 cum->mmx_regno + FIRST_MMX_REG);
5484 }
5485 break;
5486 }
5487
5488 return NULL_RTX;
5489 }
5490
5491 static rtx
5492 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5493 enum machine_mode orig_mode, tree type)
5494 {
5495 /* Handle a hidden AL argument containing number of registers
5496 for varargs x86-64 functions. */
5497 if (mode == VOIDmode)
5498 return GEN_INT (cum->maybe_vaarg
5499 ? (cum->sse_nregs < 0
5500 ? (cum->call_abi == DEFAULT_ABI
5501 ? SSE_REGPARM_MAX
5502 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5503 : X64_SSE_REGPARM_MAX))
5504 : cum->sse_regno)
5505 : -1);
5506
5507 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5508 cum->sse_nregs,
5509 &x86_64_int_parameter_registers [cum->regno],
5510 cum->sse_regno);
5511 }
5512
5513 static rtx
5514 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5515 enum machine_mode orig_mode, int named,
5516 HOST_WIDE_INT bytes)
5517 {
5518 unsigned int regno;
5519
5520 /* Avoid the AL settings for the Unix64 ABI. */
5521 if (mode == VOIDmode)
5522 return constm1_rtx;
5523
5524 /* If we've run out of registers, it goes on the stack. */
5525 if (cum->nregs == 0)
5526 return NULL_RTX;
5527
5528 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5529
5530 /* Only floating point modes are passed in anything but integer regs. */
5531 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5532 {
5533 if (named)
5534 regno = cum->regno + FIRST_SSE_REG;
5535 else
5536 {
5537 rtx t1, t2;
5538
5539 /* Unnamed floating parameters are passed in both the
5540 SSE and integer registers. */
5541 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5542 t2 = gen_rtx_REG (mode, regno);
5543 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5544 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5545 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5546 }
5547 }
5548 /* Handle aggregated types passed in register. */
5549 if (orig_mode == BLKmode)
5550 {
5551 if (bytes > 0 && bytes <= 8)
5552 mode = (bytes > 4 ? DImode : SImode);
5553 if (mode == BLKmode)
5554 mode = DImode;
5555 }
5556
5557 return gen_reg_or_parallel (mode, orig_mode, regno);
5558 }
5559
5560 rtx
5561 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5562 tree type, int named)
5563 {
5564 enum machine_mode mode = omode;
5565 HOST_WIDE_INT bytes, words;
5566
5567 if (mode == BLKmode)
5568 bytes = int_size_in_bytes (type);
5569 else
5570 bytes = GET_MODE_SIZE (mode);
5571 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5572
5573 /* To simplify the code below, represent vector types with a vector mode
5574 even if MMX/SSE are not active. */
5575 if (type && TREE_CODE (type) == VECTOR_TYPE)
5576 mode = type_natural_mode (type);
5577
5578 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5579 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5580 else if (TARGET_64BIT)
5581 return function_arg_64 (cum, mode, omode, type);
5582 else
5583 return function_arg_32 (cum, mode, omode, type, bytes, words);
5584 }
5585
5586 /* A C expression that indicates when an argument must be passed by
5587 reference. If nonzero for an argument, a copy of that argument is
5588 made in memory and a pointer to the argument is passed instead of
5589 the argument itself. The pointer is passed in whatever way is
5590 appropriate for passing a pointer to that type. */
5591
5592 static bool
5593 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5594 enum machine_mode mode ATTRIBUTE_UNUSED,
5595 const_tree type, bool named ATTRIBUTE_UNUSED)
5596 {
5597 /* See Windows x64 Software Convention. */
5598 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5599 {
5600 int msize = (int) GET_MODE_SIZE (mode);
5601 if (type)
5602 {
5603 /* Arrays are passed by reference. */
5604 if (TREE_CODE (type) == ARRAY_TYPE)
5605 return true;
5606
5607 if (AGGREGATE_TYPE_P (type))
5608 {
5609 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5610 are passed by reference. */
5611 msize = int_size_in_bytes (type);
5612 }
5613 }
5614
5615 /* __m128 is passed by reference. */
5616 switch (msize) {
5617 case 1: case 2: case 4: case 8:
5618 break;
5619 default:
5620 return true;
5621 }
5622 }
5623 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5624 return 1;
5625
5626 return 0;
5627 }
5628
5629 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5630 ABI. */
5631 static bool
5632 contains_aligned_value_p (tree type)
5633 {
5634 enum machine_mode mode = TYPE_MODE (type);
5635 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5636 || mode == TDmode
5637 || mode == TFmode
5638 || mode == TCmode)
5639 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5640 return true;
5641 if (TYPE_ALIGN (type) < 128)
5642 return false;
5643
5644 if (AGGREGATE_TYPE_P (type))
5645 {
5646 /* Walk the aggregates recursively. */
5647 switch (TREE_CODE (type))
5648 {
5649 case RECORD_TYPE:
5650 case UNION_TYPE:
5651 case QUAL_UNION_TYPE:
5652 {
5653 tree field;
5654
5655 /* Walk all the structure fields. */
5656 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5657 {
5658 if (TREE_CODE (field) == FIELD_DECL
5659 && contains_aligned_value_p (TREE_TYPE (field)))
5660 return true;
5661 }
5662 break;
5663 }
5664
5665 case ARRAY_TYPE:
5666 /* Just for use if some languages passes arrays by value. */
5667 if (contains_aligned_value_p (TREE_TYPE (type)))
5668 return true;
5669 break;
5670
5671 default:
5672 gcc_unreachable ();
5673 }
5674 }
5675 return false;
5676 }
5677
5678 /* Gives the alignment boundary, in bits, of an argument with the
5679 specified mode and type. */
5680
5681 int
5682 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5683 {
5684 int align;
5685 if (type)
5686 {
5687 /* Since canonical type is used for call, we convert it to
5688 canonical type if needed. */
5689 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5690 type = TYPE_CANONICAL (type);
5691 align = TYPE_ALIGN (type);
5692 }
5693 else
5694 align = GET_MODE_ALIGNMENT (mode);
5695 if (align < PARM_BOUNDARY)
5696 align = PARM_BOUNDARY;
5697 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5698 natural boundaries. */
5699 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5700 {
5701 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5702 make an exception for SSE modes since these require 128bit
5703 alignment.
5704
5705 The handling here differs from field_alignment. ICC aligns MMX
5706 arguments to 4 byte boundaries, while structure fields are aligned
5707 to 8 byte boundaries. */
5708 if (!type)
5709 {
5710 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5711 align = PARM_BOUNDARY;
5712 }
5713 else
5714 {
5715 if (!contains_aligned_value_p (type))
5716 align = PARM_BOUNDARY;
5717 }
5718 }
5719 if (align > BIGGEST_ALIGNMENT)
5720 align = BIGGEST_ALIGNMENT;
5721 return align;
5722 }
5723
5724 /* Return true if N is a possible register number of function value. */
5725
5726 bool
5727 ix86_function_value_regno_p (int regno)
5728 {
5729 switch (regno)
5730 {
5731 case 0:
5732 return true;
5733
5734 case FIRST_FLOAT_REG:
5735 /* TODO: The function should depend on current function ABI but
5736 builtins.c would need updating then. Therefore we use the
5737 default ABI. */
5738 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5739 return false;
5740 return TARGET_FLOAT_RETURNS_IN_80387;
5741
5742 case FIRST_SSE_REG:
5743 return TARGET_SSE;
5744
5745 case FIRST_MMX_REG:
5746 if (TARGET_MACHO || TARGET_64BIT)
5747 return false;
5748 return TARGET_MMX;
5749 }
5750
5751 return false;
5752 }
5753
5754 /* Define how to find the value returned by a function.
5755 VALTYPE is the data type of the value (as a tree).
5756 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5757 otherwise, FUNC is 0. */
5758
5759 static rtx
5760 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5761 const_tree fntype, const_tree fn)
5762 {
5763 unsigned int regno;
5764
5765 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5766 we normally prevent this case when mmx is not available. However
5767 some ABIs may require the result to be returned like DImode. */
5768 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5769 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5770
5771 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5772 we prevent this case when sse is not available. However some ABIs
5773 may require the result to be returned like integer TImode. */
5774 else if (mode == TImode
5775 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5776 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5777
5778 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5779 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5780 regno = FIRST_FLOAT_REG;
5781 else
5782 /* Most things go in %eax. */
5783 regno = AX_REG;
5784
5785 /* Override FP return register with %xmm0 for local functions when
5786 SSE math is enabled or for functions with sseregparm attribute. */
5787 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5788 {
5789 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5790 if ((sse_level >= 1 && mode == SFmode)
5791 || (sse_level == 2 && mode == DFmode))
5792 regno = FIRST_SSE_REG;
5793 }
5794
5795 return gen_rtx_REG (orig_mode, regno);
5796 }
5797
5798 static rtx
5799 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5800 const_tree valtype)
5801 {
5802 rtx ret;
5803
5804 /* Handle libcalls, which don't provide a type node. */
5805 if (valtype == NULL)
5806 {
5807 switch (mode)
5808 {
5809 case SFmode:
5810 case SCmode:
5811 case DFmode:
5812 case DCmode:
5813 case TFmode:
5814 case SDmode:
5815 case DDmode:
5816 case TDmode:
5817 return gen_rtx_REG (mode, FIRST_SSE_REG);
5818 case XFmode:
5819 case XCmode:
5820 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5821 case TCmode:
5822 return NULL;
5823 default:
5824 return gen_rtx_REG (mode, AX_REG);
5825 }
5826 }
5827
5828 ret = construct_container (mode, orig_mode, valtype, 1,
5829 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5830 x86_64_int_return_registers, 0);
5831
5832 /* For zero sized structures, construct_container returns NULL, but we
5833 need to keep rest of compiler happy by returning meaningful value. */
5834 if (!ret)
5835 ret = gen_rtx_REG (orig_mode, AX_REG);
5836
5837 return ret;
5838 }
5839
5840 static rtx
5841 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
5842 {
5843 unsigned int regno = AX_REG;
5844
5845 if (TARGET_SSE)
5846 {
5847 switch (GET_MODE_SIZE (mode))
5848 {
5849 case 16:
5850 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5851 && !COMPLEX_MODE_P (mode))
5852 regno = FIRST_SSE_REG;
5853 break;
5854 case 8:
5855 case 4:
5856 if (mode == SFmode || mode == DFmode)
5857 regno = FIRST_SSE_REG;
5858 break;
5859 default:
5860 break;
5861 }
5862 }
5863 return gen_rtx_REG (orig_mode, regno);
5864 }
5865
5866 static rtx
5867 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
5868 enum machine_mode orig_mode, enum machine_mode mode)
5869 {
5870 const_tree fn, fntype;
5871
5872 fn = NULL_TREE;
5873 if (fntype_or_decl && DECL_P (fntype_or_decl))
5874 fn = fntype_or_decl;
5875 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
5876
5877 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
5878 return function_value_ms_64 (orig_mode, mode);
5879 else if (TARGET_64BIT)
5880 return function_value_64 (orig_mode, mode, valtype);
5881 else
5882 return function_value_32 (orig_mode, mode, fntype, fn);
5883 }
5884
5885 static rtx
5886 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
5887 bool outgoing ATTRIBUTE_UNUSED)
5888 {
5889 enum machine_mode mode, orig_mode;
5890
5891 orig_mode = TYPE_MODE (valtype);
5892 mode = type_natural_mode (valtype);
5893 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5894 }
5895
5896 rtx
5897 ix86_libcall_value (enum machine_mode mode)
5898 {
5899 return ix86_function_value_1 (NULL, NULL, mode, mode);
5900 }
5901
5902 /* Return true iff type is returned in memory. */
5903
5904 static int ATTRIBUTE_UNUSED
5905 return_in_memory_32 (const_tree type, enum machine_mode mode)
5906 {
5907 HOST_WIDE_INT size;
5908
5909 if (mode == BLKmode)
5910 return 1;
5911
5912 size = int_size_in_bytes (type);
5913
5914 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5915 return 0;
5916
5917 if (VECTOR_MODE_P (mode) || mode == TImode)
5918 {
5919 /* User-created vectors small enough to fit in EAX. */
5920 if (size < 8)
5921 return 0;
5922
5923 /* MMX/3dNow values are returned in MM0,
5924 except when it doesn't exits. */
5925 if (size == 8)
5926 return (TARGET_MMX ? 0 : 1);
5927
5928 /* SSE values are returned in XMM0, except when it doesn't exist. */
5929 if (size == 16)
5930 return (TARGET_SSE ? 0 : 1);
5931 }
5932
5933 if (mode == XFmode)
5934 return 0;
5935
5936 if (size > 12)
5937 return 1;
5938 return 0;
5939 }
5940
5941 static int ATTRIBUTE_UNUSED
5942 return_in_memory_64 (const_tree type, enum machine_mode mode)
5943 {
5944 int needed_intregs, needed_sseregs;
5945 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5946 }
5947
5948 static int ATTRIBUTE_UNUSED
5949 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5950 {
5951 HOST_WIDE_INT size = int_size_in_bytes (type);
5952
5953 /* __m128 is returned in xmm0. */
5954 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5955 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5956 return 0;
5957
5958 /* Otherwise, the size must be exactly in [1248]. */
5959 return (size != 1 && size != 2 && size != 4 && size != 8);
5960 }
5961
5962 static bool
5963 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5964 {
5965 #ifdef SUBTARGET_RETURN_IN_MEMORY
5966 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5967 #else
5968 const enum machine_mode mode = type_natural_mode (type);
5969
5970 if (TARGET_64BIT_MS_ABI)
5971 return return_in_memory_ms_64 (type, mode);
5972 else if (TARGET_64BIT)
5973 return return_in_memory_64 (type, mode);
5974 else
5975 return return_in_memory_32 (type, mode);
5976 #endif
5977 }
5978
5979 /* Return false iff TYPE is returned in memory. This version is used
5980 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5981 but differs notably in that when MMX is available, 8-byte vectors
5982 are returned in memory, rather than in MMX registers. */
5983
5984 bool
5985 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5986 {
5987 int size;
5988 enum machine_mode mode = type_natural_mode (type);
5989
5990 if (TARGET_64BIT)
5991 return return_in_memory_64 (type, mode);
5992
5993 if (mode == BLKmode)
5994 return 1;
5995
5996 size = int_size_in_bytes (type);
5997
5998 if (VECTOR_MODE_P (mode))
5999 {
6000 /* Return in memory only if MMX registers *are* available. This
6001 seems backwards, but it is consistent with the existing
6002 Solaris x86 ABI. */
6003 if (size == 8)
6004 return TARGET_MMX;
6005 if (size == 16)
6006 return !TARGET_SSE;
6007 }
6008 else if (mode == TImode)
6009 return !TARGET_SSE;
6010 else if (mode == XFmode)
6011 return 0;
6012
6013 return size > 12;
6014 }
6015
6016 /* When returning SSE vector types, we have a choice of either
6017 (1) being abi incompatible with a -march switch, or
6018 (2) generating an error.
6019 Given no good solution, I think the safest thing is one warning.
6020 The user won't be able to use -Werror, but....
6021
6022 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6023 called in response to actually generating a caller or callee that
6024 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6025 via aggregate_value_p for general type probing from tree-ssa. */
6026
6027 static rtx
6028 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6029 {
6030 static bool warnedsse, warnedmmx;
6031
6032 if (!TARGET_64BIT && type)
6033 {
6034 /* Look at the return type of the function, not the function type. */
6035 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6036
6037 if (!TARGET_SSE && !warnedsse)
6038 {
6039 if (mode == TImode
6040 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6041 {
6042 warnedsse = true;
6043 warning (0, "SSE vector return without SSE enabled "
6044 "changes the ABI");
6045 }
6046 }
6047
6048 if (!TARGET_MMX && !warnedmmx)
6049 {
6050 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6051 {
6052 warnedmmx = true;
6053 warning (0, "MMX vector return without MMX enabled "
6054 "changes the ABI");
6055 }
6056 }
6057 }
6058
6059 return NULL;
6060 }
6061
6062 \f
6063 /* Create the va_list data type. */
6064
6065 /* Returns the calling convention specific va_list date type.
6066 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6067
6068 static tree
6069 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6070 {
6071 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6072
6073 /* For i386 we use plain pointer to argument area. */
6074 if (!TARGET_64BIT || abi == MS_ABI)
6075 return build_pointer_type (char_type_node);
6076
6077 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6078 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6079
6080 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6081 unsigned_type_node);
6082 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6083 unsigned_type_node);
6084 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6085 ptr_type_node);
6086 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6087 ptr_type_node);
6088
6089 va_list_gpr_counter_field = f_gpr;
6090 va_list_fpr_counter_field = f_fpr;
6091
6092 DECL_FIELD_CONTEXT (f_gpr) = record;
6093 DECL_FIELD_CONTEXT (f_fpr) = record;
6094 DECL_FIELD_CONTEXT (f_ovf) = record;
6095 DECL_FIELD_CONTEXT (f_sav) = record;
6096
6097 TREE_CHAIN (record) = type_decl;
6098 TYPE_NAME (record) = type_decl;
6099 TYPE_FIELDS (record) = f_gpr;
6100 TREE_CHAIN (f_gpr) = f_fpr;
6101 TREE_CHAIN (f_fpr) = f_ovf;
6102 TREE_CHAIN (f_ovf) = f_sav;
6103
6104 layout_type (record);
6105
6106 /* The correct type is an array type of one element. */
6107 return build_array_type (record, build_index_type (size_zero_node));
6108 }
6109
6110 /* Setup the builtin va_list data type and for 64-bit the additional
6111 calling convention specific va_list data types. */
6112
6113 static tree
6114 ix86_build_builtin_va_list (void)
6115 {
6116 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6117
6118 /* Initialize abi specific va_list builtin types. */
6119 if (TARGET_64BIT)
6120 {
6121 tree t;
6122 if (DEFAULT_ABI == MS_ABI)
6123 {
6124 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6125 if (TREE_CODE (t) != RECORD_TYPE)
6126 t = build_variant_type_copy (t);
6127 sysv_va_list_type_node = t;
6128 }
6129 else
6130 {
6131 t = ret;
6132 if (TREE_CODE (t) != RECORD_TYPE)
6133 t = build_variant_type_copy (t);
6134 sysv_va_list_type_node = t;
6135 }
6136 if (DEFAULT_ABI != MS_ABI)
6137 {
6138 t = ix86_build_builtin_va_list_abi (MS_ABI);
6139 if (TREE_CODE (t) != RECORD_TYPE)
6140 t = build_variant_type_copy (t);
6141 ms_va_list_type_node = t;
6142 }
6143 else
6144 {
6145 t = ret;
6146 if (TREE_CODE (t) != RECORD_TYPE)
6147 t = build_variant_type_copy (t);
6148 ms_va_list_type_node = t;
6149 }
6150 }
6151
6152 return ret;
6153 }
6154
6155 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6156
6157 static void
6158 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6159 {
6160 rtx save_area, mem;
6161 rtx label;
6162 rtx label_ref;
6163 rtx tmp_reg;
6164 rtx nsse_reg;
6165 alias_set_type set;
6166 int i;
6167 int regparm = ix86_regparm;
6168
6169 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
6170 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6171
6172 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
6173 return;
6174
6175 /* Indicate to allocate space on the stack for varargs save area. */
6176 ix86_save_varrargs_registers = 1;
6177
6178 save_area = frame_pointer_rtx;
6179 set = get_varargs_alias_set ();
6180
6181 for (i = cum->regno;
6182 i < regparm
6183 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6184 i++)
6185 {
6186 mem = gen_rtx_MEM (Pmode,
6187 plus_constant (save_area, i * UNITS_PER_WORD));
6188 MEM_NOTRAP_P (mem) = 1;
6189 set_mem_alias_set (mem, set);
6190 emit_move_insn (mem, gen_rtx_REG (Pmode,
6191 x86_64_int_parameter_registers[i]));
6192 }
6193
6194 if (cum->sse_nregs && cfun->va_list_fpr_size)
6195 {
6196 /* Now emit code to save SSE registers. The AX parameter contains number
6197 of SSE parameter registers used to call this function. We use
6198 sse_prologue_save insn template that produces computed jump across
6199 SSE saves. We need some preparation work to get this working. */
6200
6201 label = gen_label_rtx ();
6202 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6203
6204 /* Compute address to jump to :
6205 label - eax*4 + nnamed_sse_arguments*4 */
6206 tmp_reg = gen_reg_rtx (Pmode);
6207 nsse_reg = gen_reg_rtx (Pmode);
6208 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6209 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6210 gen_rtx_MULT (Pmode, nsse_reg,
6211 GEN_INT (4))));
6212 if (cum->sse_regno)
6213 emit_move_insn
6214 (nsse_reg,
6215 gen_rtx_CONST (DImode,
6216 gen_rtx_PLUS (DImode,
6217 label_ref,
6218 GEN_INT (cum->sse_regno * 4))));
6219 else
6220 emit_move_insn (nsse_reg, label_ref);
6221 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6222
6223 /* Compute address of memory block we save into. We always use pointer
6224 pointing 127 bytes after first byte to store - this is needed to keep
6225 instruction size limited by 4 bytes. */
6226 tmp_reg = gen_reg_rtx (Pmode);
6227 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6228 plus_constant (save_area,
6229 8 * X86_64_REGPARM_MAX + 127)));
6230 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6231 MEM_NOTRAP_P (mem) = 1;
6232 set_mem_alias_set (mem, set);
6233 set_mem_align (mem, BITS_PER_WORD);
6234
6235 /* And finally do the dirty job! */
6236 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6237 GEN_INT (cum->sse_regno), label));
6238 }
6239 }
6240
6241 static void
6242 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6243 {
6244 alias_set_type set = get_varargs_alias_set ();
6245 int i;
6246
6247 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6248 {
6249 rtx reg, mem;
6250
6251 mem = gen_rtx_MEM (Pmode,
6252 plus_constant (virtual_incoming_args_rtx,
6253 i * UNITS_PER_WORD));
6254 MEM_NOTRAP_P (mem) = 1;
6255 set_mem_alias_set (mem, set);
6256
6257 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6258 emit_move_insn (mem, reg);
6259 }
6260 }
6261
6262 static void
6263 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6264 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6265 int no_rtl)
6266 {
6267 CUMULATIVE_ARGS next_cum;
6268 tree fntype;
6269
6270 /* This argument doesn't appear to be used anymore. Which is good,
6271 because the old code here didn't suppress rtl generation. */
6272 gcc_assert (!no_rtl);
6273
6274 if (!TARGET_64BIT)
6275 return;
6276
6277 fntype = TREE_TYPE (current_function_decl);
6278
6279 /* For varargs, we do not want to skip the dummy va_dcl argument.
6280 For stdargs, we do want to skip the last named argument. */
6281 next_cum = *cum;
6282 if (stdarg_p (fntype))
6283 function_arg_advance (&next_cum, mode, type, 1);
6284
6285 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
6286 setup_incoming_varargs_ms_64 (&next_cum);
6287 else
6288 setup_incoming_varargs_64 (&next_cum);
6289 }
6290
6291 /* Checks if TYPE is of kind va_list char *. */
6292
6293 static bool
6294 is_va_list_char_pointer (tree type)
6295 {
6296 tree canonic;
6297
6298 /* For 32-bit it is always true. */
6299 if (!TARGET_64BIT)
6300 return true;
6301 canonic = ix86_canonical_va_list_type (type);
6302 return (canonic == ms_va_list_type_node
6303 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6304 }
6305
6306 /* Implement va_start. */
6307
6308 static void
6309 ix86_va_start (tree valist, rtx nextarg)
6310 {
6311 HOST_WIDE_INT words, n_gpr, n_fpr;
6312 tree f_gpr, f_fpr, f_ovf, f_sav;
6313 tree gpr, fpr, ovf, sav, t;
6314 tree type;
6315
6316 /* Only 64bit target needs something special. */
6317 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6318 {
6319 std_expand_builtin_va_start (valist, nextarg);
6320 return;
6321 }
6322
6323 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6324 f_fpr = TREE_CHAIN (f_gpr);
6325 f_ovf = TREE_CHAIN (f_fpr);
6326 f_sav = TREE_CHAIN (f_ovf);
6327
6328 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6329 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6330 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6331 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6332 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6333
6334 /* Count number of gp and fp argument registers used. */
6335 words = crtl->args.info.words;
6336 n_gpr = crtl->args.info.regno;
6337 n_fpr = crtl->args.info.sse_regno;
6338
6339 if (cfun->va_list_gpr_size)
6340 {
6341 type = TREE_TYPE (gpr);
6342 t = build2 (MODIFY_EXPR, type,
6343 gpr, build_int_cst (type, n_gpr * 8));
6344 TREE_SIDE_EFFECTS (t) = 1;
6345 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6346 }
6347
6348 if (cfun->va_list_fpr_size)
6349 {
6350 type = TREE_TYPE (fpr);
6351 t = build2 (MODIFY_EXPR, type, fpr,
6352 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6353 TREE_SIDE_EFFECTS (t) = 1;
6354 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6355 }
6356
6357 /* Find the overflow area. */
6358 type = TREE_TYPE (ovf);
6359 t = make_tree (type, crtl->args.internal_arg_pointer);
6360 if (words != 0)
6361 t = build2 (POINTER_PLUS_EXPR, type, t,
6362 size_int (words * UNITS_PER_WORD));
6363 t = build2 (MODIFY_EXPR, type, ovf, t);
6364 TREE_SIDE_EFFECTS (t) = 1;
6365 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6366
6367 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
6368 {
6369 /* Find the register save area.
6370 Prologue of the function save it right above stack frame. */
6371 type = TREE_TYPE (sav);
6372 t = make_tree (type, frame_pointer_rtx);
6373 t = build2 (MODIFY_EXPR, type, sav, t);
6374 TREE_SIDE_EFFECTS (t) = 1;
6375 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6376 }
6377 }
6378
6379 /* Implement va_arg. */
6380
6381 static tree
6382 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6383 gimple_seq *post_p)
6384 {
6385 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6386 tree f_gpr, f_fpr, f_ovf, f_sav;
6387 tree gpr, fpr, ovf, sav, t;
6388 int size, rsize;
6389 tree lab_false, lab_over = NULL_TREE;
6390 tree addr, t2;
6391 rtx container;
6392 int indirect_p = 0;
6393 tree ptrtype;
6394 enum machine_mode nat_mode;
6395 int arg_boundary;
6396
6397 /* Only 64bit target needs something special. */
6398 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6399 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6400
6401 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6402 f_fpr = TREE_CHAIN (f_gpr);
6403 f_ovf = TREE_CHAIN (f_fpr);
6404 f_sav = TREE_CHAIN (f_ovf);
6405
6406 valist = build_va_arg_indirect_ref (valist);
6407 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6408 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6409 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6410 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6411
6412 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6413 if (indirect_p)
6414 type = build_pointer_type (type);
6415 size = int_size_in_bytes (type);
6416 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6417
6418 nat_mode = type_natural_mode (type);
6419 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
6420 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6421 intreg, 0);
6422
6423 /* Pull the value out of the saved registers. */
6424
6425 addr = create_tmp_var (ptr_type_node, "addr");
6426 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6427
6428 if (container)
6429 {
6430 int needed_intregs, needed_sseregs;
6431 bool need_temp;
6432 tree int_addr, sse_addr;
6433
6434 lab_false = create_artificial_label ();
6435 lab_over = create_artificial_label ();
6436
6437 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6438
6439 need_temp = (!REG_P (container)
6440 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6441 || TYPE_ALIGN (type) > 128));
6442
6443 /* In case we are passing structure, verify that it is consecutive block
6444 on the register save area. If not we need to do moves. */
6445 if (!need_temp && !REG_P (container))
6446 {
6447 /* Verify that all registers are strictly consecutive */
6448 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6449 {
6450 int i;
6451
6452 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6453 {
6454 rtx slot = XVECEXP (container, 0, i);
6455 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6456 || INTVAL (XEXP (slot, 1)) != i * 16)
6457 need_temp = 1;
6458 }
6459 }
6460 else
6461 {
6462 int i;
6463
6464 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6465 {
6466 rtx slot = XVECEXP (container, 0, i);
6467 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6468 || INTVAL (XEXP (slot, 1)) != i * 8)
6469 need_temp = 1;
6470 }
6471 }
6472 }
6473 if (!need_temp)
6474 {
6475 int_addr = addr;
6476 sse_addr = addr;
6477 }
6478 else
6479 {
6480 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6481 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6482 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6483 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6484 }
6485
6486 /* First ensure that we fit completely in registers. */
6487 if (needed_intregs)
6488 {
6489 t = build_int_cst (TREE_TYPE (gpr),
6490 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6491 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6492 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6493 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6494 gimplify_and_add (t, pre_p);
6495 }
6496 if (needed_sseregs)
6497 {
6498 t = build_int_cst (TREE_TYPE (fpr),
6499 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6500 + X86_64_REGPARM_MAX * 8);
6501 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6502 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6503 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6504 gimplify_and_add (t, pre_p);
6505 }
6506
6507 /* Compute index to start of area used for integer regs. */
6508 if (needed_intregs)
6509 {
6510 /* int_addr = gpr + sav; */
6511 t = fold_convert (sizetype, gpr);
6512 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6513 gimplify_assign (int_addr, t, pre_p);
6514 }
6515 if (needed_sseregs)
6516 {
6517 /* sse_addr = fpr + sav; */
6518 t = fold_convert (sizetype, fpr);
6519 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6520 gimplify_assign (sse_addr, t, pre_p);
6521 }
6522 if (need_temp)
6523 {
6524 int i;
6525 tree temp = create_tmp_var (type, "va_arg_tmp");
6526
6527 /* addr = &temp; */
6528 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6529 gimplify_assign (addr, t, pre_p);
6530
6531 for (i = 0; i < XVECLEN (container, 0); i++)
6532 {
6533 rtx slot = XVECEXP (container, 0, i);
6534 rtx reg = XEXP (slot, 0);
6535 enum machine_mode mode = GET_MODE (reg);
6536 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6537 tree addr_type = build_pointer_type (piece_type);
6538 tree src_addr, src;
6539 int src_offset;
6540 tree dest_addr, dest;
6541
6542 if (SSE_REGNO_P (REGNO (reg)))
6543 {
6544 src_addr = sse_addr;
6545 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6546 }
6547 else
6548 {
6549 src_addr = int_addr;
6550 src_offset = REGNO (reg) * 8;
6551 }
6552 src_addr = fold_convert (addr_type, src_addr);
6553 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6554 size_int (src_offset));
6555 src = build_va_arg_indirect_ref (src_addr);
6556
6557 dest_addr = fold_convert (addr_type, addr);
6558 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
6559 size_int (INTVAL (XEXP (slot, 1))));
6560 dest = build_va_arg_indirect_ref (dest_addr);
6561
6562 gimplify_assign (dest, src, pre_p);
6563 }
6564 }
6565
6566 if (needed_intregs)
6567 {
6568 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6569 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6570 gimplify_assign (gpr, t, pre_p);
6571 }
6572
6573 if (needed_sseregs)
6574 {
6575 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6576 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6577 gimplify_assign (fpr, t, pre_p);
6578 }
6579
6580 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6581
6582 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6583 }
6584
6585 /* ... otherwise out of the overflow area. */
6586
6587 /* When we align parameter on stack for caller, if the parameter
6588 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6589 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6590 here with caller. */
6591 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6592 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6593 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6594
6595 /* Care for on-stack alignment if needed. */
6596 if (arg_boundary <= 64
6597 || integer_zerop (TYPE_SIZE (type)))
6598 t = ovf;
6599 else
6600 {
6601 HOST_WIDE_INT align = arg_boundary / 8;
6602 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6603 size_int (align - 1));
6604 t = fold_convert (sizetype, t);
6605 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6606 size_int (-align));
6607 t = fold_convert (TREE_TYPE (ovf), t);
6608 }
6609 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6610 gimplify_assign (addr, t, pre_p);
6611
6612 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6613 size_int (rsize * UNITS_PER_WORD));
6614 gimplify_assign (unshare_expr (ovf), t, pre_p);
6615
6616 if (container)
6617 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6618
6619 ptrtype = build_pointer_type (type);
6620 addr = fold_convert (ptrtype, addr);
6621
6622 if (indirect_p)
6623 addr = build_va_arg_indirect_ref (addr);
6624 return build_va_arg_indirect_ref (addr);
6625 }
6626 \f
6627 /* Return nonzero if OPNUM's MEM should be matched
6628 in movabs* patterns. */
6629
6630 int
6631 ix86_check_movabs (rtx insn, int opnum)
6632 {
6633 rtx set, mem;
6634
6635 set = PATTERN (insn);
6636 if (GET_CODE (set) == PARALLEL)
6637 set = XVECEXP (set, 0, 0);
6638 gcc_assert (GET_CODE (set) == SET);
6639 mem = XEXP (set, opnum);
6640 while (GET_CODE (mem) == SUBREG)
6641 mem = SUBREG_REG (mem);
6642 gcc_assert (MEM_P (mem));
6643 return (volatile_ok || !MEM_VOLATILE_P (mem));
6644 }
6645 \f
6646 /* Initialize the table of extra 80387 mathematical constants. */
6647
6648 static void
6649 init_ext_80387_constants (void)
6650 {
6651 static const char * cst[5] =
6652 {
6653 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6654 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6655 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6656 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6657 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6658 };
6659 int i;
6660
6661 for (i = 0; i < 5; i++)
6662 {
6663 real_from_string (&ext_80387_constants_table[i], cst[i]);
6664 /* Ensure each constant is rounded to XFmode precision. */
6665 real_convert (&ext_80387_constants_table[i],
6666 XFmode, &ext_80387_constants_table[i]);
6667 }
6668
6669 ext_80387_constants_init = 1;
6670 }
6671
6672 /* Return true if the constant is something that can be loaded with
6673 a special instruction. */
6674
6675 int
6676 standard_80387_constant_p (rtx x)
6677 {
6678 enum machine_mode mode = GET_MODE (x);
6679
6680 REAL_VALUE_TYPE r;
6681
6682 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6683 return -1;
6684
6685 if (x == CONST0_RTX (mode))
6686 return 1;
6687 if (x == CONST1_RTX (mode))
6688 return 2;
6689
6690 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6691
6692 /* For XFmode constants, try to find a special 80387 instruction when
6693 optimizing for size or on those CPUs that benefit from them. */
6694 if (mode == XFmode
6695 && (optimize_insn_for_size_p () || TARGET_EXT_80387_CONSTANTS))
6696 {
6697 int i;
6698
6699 if (! ext_80387_constants_init)
6700 init_ext_80387_constants ();
6701
6702 for (i = 0; i < 5; i++)
6703 if (real_identical (&r, &ext_80387_constants_table[i]))
6704 return i + 3;
6705 }
6706
6707 /* Load of the constant -0.0 or -1.0 will be split as
6708 fldz;fchs or fld1;fchs sequence. */
6709 if (real_isnegzero (&r))
6710 return 8;
6711 if (real_identical (&r, &dconstm1))
6712 return 9;
6713
6714 return 0;
6715 }
6716
6717 /* Return the opcode of the special instruction to be used to load
6718 the constant X. */
6719
6720 const char *
6721 standard_80387_constant_opcode (rtx x)
6722 {
6723 switch (standard_80387_constant_p (x))
6724 {
6725 case 1:
6726 return "fldz";
6727 case 2:
6728 return "fld1";
6729 case 3:
6730 return "fldlg2";
6731 case 4:
6732 return "fldln2";
6733 case 5:
6734 return "fldl2e";
6735 case 6:
6736 return "fldl2t";
6737 case 7:
6738 return "fldpi";
6739 case 8:
6740 case 9:
6741 return "#";
6742 default:
6743 gcc_unreachable ();
6744 }
6745 }
6746
6747 /* Return the CONST_DOUBLE representing the 80387 constant that is
6748 loaded by the specified special instruction. The argument IDX
6749 matches the return value from standard_80387_constant_p. */
6750
6751 rtx
6752 standard_80387_constant_rtx (int idx)
6753 {
6754 int i;
6755
6756 if (! ext_80387_constants_init)
6757 init_ext_80387_constants ();
6758
6759 switch (idx)
6760 {
6761 case 3:
6762 case 4:
6763 case 5:
6764 case 6:
6765 case 7:
6766 i = idx - 3;
6767 break;
6768
6769 default:
6770 gcc_unreachable ();
6771 }
6772
6773 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6774 XFmode);
6775 }
6776
6777 /* Return 1 if mode is a valid mode for sse. */
6778 static int
6779 standard_sse_mode_p (enum machine_mode mode)
6780 {
6781 switch (mode)
6782 {
6783 case V16QImode:
6784 case V8HImode:
6785 case V4SImode:
6786 case V2DImode:
6787 case V4SFmode:
6788 case V2DFmode:
6789 return 1;
6790
6791 default:
6792 return 0;
6793 }
6794 }
6795
6796 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
6797 */
6798 int
6799 standard_sse_constant_p (rtx x)
6800 {
6801 enum machine_mode mode = GET_MODE (x);
6802
6803 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
6804 return 1;
6805 if (vector_all_ones_operand (x, mode)
6806 && standard_sse_mode_p (mode))
6807 return TARGET_SSE2 ? 2 : -1;
6808
6809 return 0;
6810 }
6811
6812 /* Return the opcode of the special instruction to be used to load
6813 the constant X. */
6814
6815 const char *
6816 standard_sse_constant_opcode (rtx insn, rtx x)
6817 {
6818 switch (standard_sse_constant_p (x))
6819 {
6820 case 1:
6821 if (get_attr_mode (insn) == MODE_V4SF)
6822 return "xorps\t%0, %0";
6823 else if (get_attr_mode (insn) == MODE_V2DF)
6824 return "xorpd\t%0, %0";
6825 else
6826 return "pxor\t%0, %0";
6827 case 2:
6828 return "pcmpeqd\t%0, %0";
6829 }
6830 gcc_unreachable ();
6831 }
6832
6833 /* Returns 1 if OP contains a symbol reference */
6834
6835 int
6836 symbolic_reference_mentioned_p (rtx op)
6837 {
6838 const char *fmt;
6839 int i;
6840
6841 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
6842 return 1;
6843
6844 fmt = GET_RTX_FORMAT (GET_CODE (op));
6845 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6846 {
6847 if (fmt[i] == 'E')
6848 {
6849 int j;
6850
6851 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6852 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6853 return 1;
6854 }
6855
6856 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6857 return 1;
6858 }
6859
6860 return 0;
6861 }
6862
6863 /* Return 1 if it is appropriate to emit `ret' instructions in the
6864 body of a function. Do this only if the epilogue is simple, needing a
6865 couple of insns. Prior to reloading, we can't tell how many registers
6866 must be saved, so return 0 then. Return 0 if there is no frame
6867 marker to de-allocate. */
6868
6869 int
6870 ix86_can_use_return_insn_p (void)
6871 {
6872 struct ix86_frame frame;
6873
6874 if (! reload_completed || frame_pointer_needed)
6875 return 0;
6876
6877 /* Don't allow more than 32 pop, since that's all we can do
6878 with one instruction. */
6879 if (crtl->args.pops_args
6880 && crtl->args.size >= 32768)
6881 return 0;
6882
6883 ix86_compute_frame_layout (&frame);
6884 return frame.to_allocate == 0 && frame.nregs == 0;
6885 }
6886 \f
6887 /* Value should be nonzero if functions must have frame pointers.
6888 Zero means the frame pointer need not be set up (and parms may
6889 be accessed via the stack pointer) in functions that seem suitable. */
6890
6891 int
6892 ix86_frame_pointer_required (void)
6893 {
6894 /* If we accessed previous frames, then the generated code expects
6895 to be able to access the saved ebp value in our frame. */
6896 if (cfun->machine->accesses_prev_frame)
6897 return 1;
6898
6899 /* Several x86 os'es need a frame pointer for other reasons,
6900 usually pertaining to setjmp. */
6901 if (SUBTARGET_FRAME_POINTER_REQUIRED)
6902 return 1;
6903
6904 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
6905 the frame pointer by default. Turn it back on now if we've not
6906 got a leaf function. */
6907 if (TARGET_OMIT_LEAF_FRAME_POINTER
6908 && (!current_function_is_leaf
6909 || ix86_current_function_calls_tls_descriptor))
6910 return 1;
6911
6912 if (crtl->profile)
6913 return 1;
6914
6915 return 0;
6916 }
6917
6918 /* Record that the current function accesses previous call frames. */
6919
6920 void
6921 ix86_setup_frame_addresses (void)
6922 {
6923 cfun->machine->accesses_prev_frame = 1;
6924 }
6925 \f
6926 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
6927 # define USE_HIDDEN_LINKONCE 1
6928 #else
6929 # define USE_HIDDEN_LINKONCE 0
6930 #endif
6931
6932 static int pic_labels_used;
6933
6934 /* Fills in the label name that should be used for a pc thunk for
6935 the given register. */
6936
6937 static void
6938 get_pc_thunk_name (char name[32], unsigned int regno)
6939 {
6940 gcc_assert (!TARGET_64BIT);
6941
6942 if (USE_HIDDEN_LINKONCE)
6943 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6944 else
6945 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6946 }
6947
6948
6949 /* This function generates code for -fpic that loads %ebx with
6950 the return address of the caller and then returns. */
6951
6952 void
6953 ix86_file_end (void)
6954 {
6955 rtx xops[2];
6956 int regno;
6957
6958 for (regno = 0; regno < 8; ++regno)
6959 {
6960 char name[32];
6961
6962 if (! ((pic_labels_used >> regno) & 1))
6963 continue;
6964
6965 get_pc_thunk_name (name, regno);
6966
6967 #if TARGET_MACHO
6968 if (TARGET_MACHO)
6969 {
6970 switch_to_section (darwin_sections[text_coal_section]);
6971 fputs ("\t.weak_definition\t", asm_out_file);
6972 assemble_name (asm_out_file, name);
6973 fputs ("\n\t.private_extern\t", asm_out_file);
6974 assemble_name (asm_out_file, name);
6975 fputs ("\n", asm_out_file);
6976 ASM_OUTPUT_LABEL (asm_out_file, name);
6977 }
6978 else
6979 #endif
6980 if (USE_HIDDEN_LINKONCE)
6981 {
6982 tree decl;
6983
6984 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6985 error_mark_node);
6986 TREE_PUBLIC (decl) = 1;
6987 TREE_STATIC (decl) = 1;
6988 DECL_ONE_ONLY (decl) = 1;
6989
6990 (*targetm.asm_out.unique_section) (decl, 0);
6991 switch_to_section (get_named_section (decl, NULL, 0));
6992
6993 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6994 fputs ("\t.hidden\t", asm_out_file);
6995 assemble_name (asm_out_file, name);
6996 fputc ('\n', asm_out_file);
6997 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6998 }
6999 else
7000 {
7001 switch_to_section (text_section);
7002 ASM_OUTPUT_LABEL (asm_out_file, name);
7003 }
7004
7005 xops[0] = gen_rtx_REG (Pmode, regno);
7006 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7007 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7008 output_asm_insn ("ret", xops);
7009 }
7010
7011 if (NEED_INDICATE_EXEC_STACK)
7012 file_end_indicate_exec_stack ();
7013 }
7014
7015 /* Emit code for the SET_GOT patterns. */
7016
7017 const char *
7018 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7019 {
7020 rtx xops[3];
7021
7022 xops[0] = dest;
7023
7024 if (TARGET_VXWORKS_RTP && flag_pic)
7025 {
7026 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7027 xops[2] = gen_rtx_MEM (Pmode,
7028 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7029 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7030
7031 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7032 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7033 an unadorned address. */
7034 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7035 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7036 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7037 return "";
7038 }
7039
7040 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7041
7042 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7043 {
7044 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7045
7046 if (!flag_pic)
7047 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7048 else
7049 output_asm_insn ("call\t%a2", xops);
7050
7051 #if TARGET_MACHO
7052 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7053 is what will be referenced by the Mach-O PIC subsystem. */
7054 if (!label)
7055 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7056 #endif
7057
7058 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7059 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7060
7061 if (flag_pic)
7062 output_asm_insn ("pop%z0\t%0", xops);
7063 }
7064 else
7065 {
7066 char name[32];
7067 get_pc_thunk_name (name, REGNO (dest));
7068 pic_labels_used |= 1 << REGNO (dest);
7069
7070 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7071 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7072 output_asm_insn ("call\t%X2", xops);
7073 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7074 is what will be referenced by the Mach-O PIC subsystem. */
7075 #if TARGET_MACHO
7076 if (!label)
7077 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7078 else
7079 targetm.asm_out.internal_label (asm_out_file, "L",
7080 CODE_LABEL_NUMBER (label));
7081 #endif
7082 }
7083
7084 if (TARGET_MACHO)
7085 return "";
7086
7087 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7088 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7089 else
7090 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7091
7092 return "";
7093 }
7094
7095 /* Generate an "push" pattern for input ARG. */
7096
7097 static rtx
7098 gen_push (rtx arg)
7099 {
7100 return gen_rtx_SET (VOIDmode,
7101 gen_rtx_MEM (Pmode,
7102 gen_rtx_PRE_DEC (Pmode,
7103 stack_pointer_rtx)),
7104 arg);
7105 }
7106
7107 /* Return >= 0 if there is an unused call-clobbered register available
7108 for the entire function. */
7109
7110 static unsigned int
7111 ix86_select_alt_pic_regnum (void)
7112 {
7113 if (current_function_is_leaf && !crtl->profile
7114 && !ix86_current_function_calls_tls_descriptor)
7115 {
7116 int i, drap;
7117 /* Can't use the same register for both PIC and DRAP. */
7118 if (crtl->drap_reg)
7119 drap = REGNO (crtl->drap_reg);
7120 else
7121 drap = -1;
7122 for (i = 2; i >= 0; --i)
7123 if (i != drap && !df_regs_ever_live_p (i))
7124 return i;
7125 }
7126
7127 return INVALID_REGNUM;
7128 }
7129
7130 /* Return 1 if we need to save REGNO. */
7131 static int
7132 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7133 {
7134 if (pic_offset_table_rtx
7135 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7136 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7137 || crtl->profile
7138 || crtl->calls_eh_return
7139 || crtl->uses_const_pool))
7140 {
7141 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7142 return 0;
7143 return 1;
7144 }
7145
7146 if (crtl->calls_eh_return && maybe_eh_return)
7147 {
7148 unsigned i;
7149 for (i = 0; ; i++)
7150 {
7151 unsigned test = EH_RETURN_DATA_REGNO (i);
7152 if (test == INVALID_REGNUM)
7153 break;
7154 if (test == regno)
7155 return 1;
7156 }
7157 }
7158
7159 if (crtl->drap_reg
7160 && regno == REGNO (crtl->drap_reg))
7161 return 1;
7162
7163 return (df_regs_ever_live_p (regno)
7164 && !call_used_regs[regno]
7165 && !fixed_regs[regno]
7166 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7167 }
7168
7169 /* Return number of registers to be saved on the stack. */
7170
7171 static int
7172 ix86_nsaved_regs (void)
7173 {
7174 int nregs = 0;
7175 int regno;
7176
7177 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7178 if (ix86_save_reg (regno, true))
7179 nregs++;
7180 return nregs;
7181 }
7182
7183 /* Given FROM and TO register numbers, say whether this elimination is
7184 allowed. If stack alignment is needed, we can only replace argument
7185 pointer with hard frame pointer, or replace frame pointer with stack
7186 pointer. Otherwise, frame pointer elimination is automatically
7187 handled and all other eliminations are valid. */
7188
7189 int
7190 ix86_can_eliminate (int from, int to)
7191 {
7192 if (stack_realign_fp)
7193 return ((from == ARG_POINTER_REGNUM
7194 && to == HARD_FRAME_POINTER_REGNUM)
7195 || (from == FRAME_POINTER_REGNUM
7196 && to == STACK_POINTER_REGNUM));
7197 else
7198 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7199 }
7200
7201 /* Return the offset between two registers, one to be eliminated, and the other
7202 its replacement, at the start of a routine. */
7203
7204 HOST_WIDE_INT
7205 ix86_initial_elimination_offset (int from, int to)
7206 {
7207 struct ix86_frame frame;
7208 ix86_compute_frame_layout (&frame);
7209
7210 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7211 return frame.hard_frame_pointer_offset;
7212 else if (from == FRAME_POINTER_REGNUM
7213 && to == HARD_FRAME_POINTER_REGNUM)
7214 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7215 else
7216 {
7217 gcc_assert (to == STACK_POINTER_REGNUM);
7218
7219 if (from == ARG_POINTER_REGNUM)
7220 return frame.stack_pointer_offset;
7221
7222 gcc_assert (from == FRAME_POINTER_REGNUM);
7223 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7224 }
7225 }
7226
7227 /* Fill structure ix86_frame about frame of currently computed function. */
7228
7229 static void
7230 ix86_compute_frame_layout (struct ix86_frame *frame)
7231 {
7232 HOST_WIDE_INT total_size;
7233 unsigned int stack_alignment_needed;
7234 HOST_WIDE_INT offset;
7235 unsigned int preferred_alignment;
7236 HOST_WIDE_INT size = get_frame_size ();
7237
7238 frame->nregs = ix86_nsaved_regs ();
7239 total_size = size;
7240
7241 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7242 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7243
7244 gcc_assert (!size || stack_alignment_needed);
7245 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7246 gcc_assert (preferred_alignment <= stack_alignment_needed);
7247
7248 /* During reload iteration the amount of registers saved can change.
7249 Recompute the value as needed. Do not recompute when amount of registers
7250 didn't change as reload does multiple calls to the function and does not
7251 expect the decision to change within single iteration. */
7252 if (!optimize_size
7253 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7254 {
7255 int count = frame->nregs;
7256
7257 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7258 /* The fast prologue uses move instead of push to save registers. This
7259 is significantly longer, but also executes faster as modern hardware
7260 can execute the moves in parallel, but can't do that for push/pop.
7261
7262 Be careful about choosing what prologue to emit: When function takes
7263 many instructions to execute we may use slow version as well as in
7264 case function is known to be outside hot spot (this is known with
7265 feedback only). Weight the size of function by number of registers
7266 to save as it is cheap to use one or two push instructions but very
7267 slow to use many of them. */
7268 if (count)
7269 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7270 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7271 || (flag_branch_probabilities
7272 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7273 cfun->machine->use_fast_prologue_epilogue = false;
7274 else
7275 cfun->machine->use_fast_prologue_epilogue
7276 = !expensive_function_p (count);
7277 }
7278 if (TARGET_PROLOGUE_USING_MOVE
7279 && cfun->machine->use_fast_prologue_epilogue)
7280 frame->save_regs_using_mov = true;
7281 else
7282 frame->save_regs_using_mov = false;
7283
7284
7285 /* Skip return address and saved base pointer. */
7286 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7287
7288 frame->hard_frame_pointer_offset = offset;
7289
7290 /* Set offset to aligned because the realigned frame starts from
7291 here. */
7292 if (stack_realign_fp)
7293 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7294
7295 /* Register save area */
7296 offset += frame->nregs * UNITS_PER_WORD;
7297
7298 /* Va-arg area */
7299 if (ix86_save_varrargs_registers)
7300 {
7301 offset += X86_64_VARARGS_SIZE;
7302 frame->va_arg_size = X86_64_VARARGS_SIZE;
7303 }
7304 else
7305 frame->va_arg_size = 0;
7306
7307 /* Align start of frame for local function. */
7308 frame->padding1 = ((offset + stack_alignment_needed - 1)
7309 & -stack_alignment_needed) - offset;
7310
7311 offset += frame->padding1;
7312
7313 /* Frame pointer points here. */
7314 frame->frame_pointer_offset = offset;
7315
7316 offset += size;
7317
7318 /* Add outgoing arguments area. Can be skipped if we eliminated
7319 all the function calls as dead code.
7320 Skipping is however impossible when function calls alloca. Alloca
7321 expander assumes that last crtl->outgoing_args_size
7322 of stack frame are unused. */
7323 if (ACCUMULATE_OUTGOING_ARGS
7324 && (!current_function_is_leaf || cfun->calls_alloca
7325 || ix86_current_function_calls_tls_descriptor))
7326 {
7327 offset += crtl->outgoing_args_size;
7328 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7329 }
7330 else
7331 frame->outgoing_arguments_size = 0;
7332
7333 /* Align stack boundary. Only needed if we're calling another function
7334 or using alloca. */
7335 if (!current_function_is_leaf || cfun->calls_alloca
7336 || ix86_current_function_calls_tls_descriptor)
7337 frame->padding2 = ((offset + preferred_alignment - 1)
7338 & -preferred_alignment) - offset;
7339 else
7340 frame->padding2 = 0;
7341
7342 offset += frame->padding2;
7343
7344 /* We've reached end of stack frame. */
7345 frame->stack_pointer_offset = offset;
7346
7347 /* Size prologue needs to allocate. */
7348 frame->to_allocate =
7349 (size + frame->padding1 + frame->padding2
7350 + frame->outgoing_arguments_size + frame->va_arg_size);
7351
7352 if ((!frame->to_allocate && frame->nregs <= 1)
7353 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7354 frame->save_regs_using_mov = false;
7355
7356 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7357 && current_function_is_leaf
7358 && !ix86_current_function_calls_tls_descriptor)
7359 {
7360 frame->red_zone_size = frame->to_allocate;
7361 if (frame->save_regs_using_mov)
7362 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7363 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7364 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7365 }
7366 else
7367 frame->red_zone_size = 0;
7368 frame->to_allocate -= frame->red_zone_size;
7369 frame->stack_pointer_offset -= frame->red_zone_size;
7370 #if 0
7371 fprintf (stderr, "\n");
7372 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7373 fprintf (stderr, "size: %ld\n", (long)size);
7374 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7375 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7376 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7377 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7378 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7379 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7380 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7381 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7382 (long)frame->hard_frame_pointer_offset);
7383 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7384 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7385 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7386 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7387 #endif
7388 }
7389
7390 /* Emit code to save registers in the prologue. */
7391
7392 static void
7393 ix86_emit_save_regs (void)
7394 {
7395 unsigned int regno;
7396 rtx insn;
7397
7398 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7399 if (ix86_save_reg (regno, true))
7400 {
7401 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7402 RTX_FRAME_RELATED_P (insn) = 1;
7403 }
7404 }
7405
7406 /* Emit code to save registers using MOV insns. First register
7407 is restored from POINTER + OFFSET. */
7408 static void
7409 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7410 {
7411 unsigned int regno;
7412 rtx insn;
7413
7414 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7415 if (ix86_save_reg (regno, true))
7416 {
7417 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7418 Pmode, offset),
7419 gen_rtx_REG (Pmode, regno));
7420 RTX_FRAME_RELATED_P (insn) = 1;
7421 offset += UNITS_PER_WORD;
7422 }
7423 }
7424
7425 /* Expand prologue or epilogue stack adjustment.
7426 The pattern exist to put a dependency on all ebp-based memory accesses.
7427 STYLE should be negative if instructions should be marked as frame related,
7428 zero if %r11 register is live and cannot be freely used and positive
7429 otherwise. */
7430
7431 static void
7432 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7433 {
7434 rtx insn;
7435
7436 if (! TARGET_64BIT)
7437 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7438 else if (x86_64_immediate_operand (offset, DImode))
7439 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7440 else
7441 {
7442 rtx r11;
7443 /* r11 is used by indirect sibcall return as well, set before the
7444 epilogue and used after the epilogue. ATM indirect sibcall
7445 shouldn't be used together with huge frame sizes in one
7446 function because of the frame_size check in sibcall.c. */
7447 gcc_assert (style);
7448 r11 = gen_rtx_REG (DImode, R11_REG);
7449 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7450 if (style < 0)
7451 RTX_FRAME_RELATED_P (insn) = 1;
7452 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7453 offset));
7454 }
7455 if (style < 0)
7456 RTX_FRAME_RELATED_P (insn) = 1;
7457 }
7458
7459 /* Find an available register to be used as dynamic realign argument
7460 pointer regsiter. Such a register will be written in prologue and
7461 used in begin of body, so it must not be
7462 1. parameter passing register.
7463 2. GOT pointer.
7464 We reuse static-chain register if it is available. Otherwise, we
7465 use DI for i386 and R13 for x86-64. We chose R13 since it has
7466 shorter encoding.
7467
7468 Return: the regno of chosen register. */
7469
7470 static unsigned int
7471 find_drap_reg (void)
7472 {
7473 tree decl = cfun->decl;
7474
7475 if (TARGET_64BIT)
7476 {
7477 /* Use R13 for nested function or function need static chain.
7478 Since function with tail call may use any caller-saved
7479 registers in epilogue, DRAP must not use caller-saved
7480 register in such case. */
7481 if ((decl_function_context (decl)
7482 && !DECL_NO_STATIC_CHAIN (decl))
7483 || crtl->tail_call_emit)
7484 return R13_REG;
7485
7486 return R10_REG;
7487 }
7488 else
7489 {
7490 /* Use DI for nested function or function need static chain.
7491 Since function with tail call may use any caller-saved
7492 registers in epilogue, DRAP must not use caller-saved
7493 register in such case. */
7494 if ((decl_function_context (decl)
7495 && !DECL_NO_STATIC_CHAIN (decl))
7496 || crtl->tail_call_emit)
7497 return DI_REG;
7498
7499 /* Reuse static chain register if it isn't used for parameter
7500 passing. */
7501 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7502 && !lookup_attribute ("fastcall",
7503 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7504 return CX_REG;
7505 else
7506 return DI_REG;
7507 }
7508 }
7509
7510 /* Update incoming stack boundary and estimated stack alignment. */
7511
7512 static void
7513 ix86_update_stack_boundary (void)
7514 {
7515 /* Prefer the one specified at command line. */
7516 ix86_incoming_stack_boundary
7517 = (ix86_user_incoming_stack_boundary
7518 ? ix86_user_incoming_stack_boundary
7519 : ix86_default_incoming_stack_boundary);
7520
7521 /* Incoming stack alignment can be changed on individual functions
7522 via force_align_arg_pointer attribute. We use the smallest
7523 incoming stack boundary. */
7524 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7525 && lookup_attribute (ix86_force_align_arg_pointer_string,
7526 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7527 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7528
7529 /* Stack at entrance of main is aligned by runtime. We use the
7530 smallest incoming stack boundary. */
7531 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7532 && DECL_NAME (current_function_decl)
7533 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7534 && DECL_FILE_SCOPE_P (current_function_decl))
7535 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7536
7537 /* x86_64 vararg needs 16byte stack alignment for register save
7538 area. */
7539 if (TARGET_64BIT
7540 && cfun->stdarg
7541 && crtl->stack_alignment_estimated < 128)
7542 crtl->stack_alignment_estimated = 128;
7543 }
7544
7545 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7546 needed or an rtx for DRAP otherwise. */
7547
7548 static rtx
7549 ix86_get_drap_rtx (void)
7550 {
7551 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7552 crtl->need_drap = true;
7553
7554 if (stack_realign_drap)
7555 {
7556 /* Assign DRAP to vDRAP and returns vDRAP */
7557 unsigned int regno = find_drap_reg ();
7558 rtx drap_vreg;
7559 rtx arg_ptr;
7560 rtx seq, insn;
7561
7562 arg_ptr = gen_rtx_REG (Pmode, regno);
7563 crtl->drap_reg = arg_ptr;
7564
7565 start_sequence ();
7566 drap_vreg = copy_to_reg (arg_ptr);
7567 seq = get_insns ();
7568 end_sequence ();
7569
7570 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7571 RTX_FRAME_RELATED_P (insn) = 1;
7572 return drap_vreg;
7573 }
7574 else
7575 return NULL;
7576 }
7577
7578 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7579
7580 static rtx
7581 ix86_internal_arg_pointer (void)
7582 {
7583 return virtual_incoming_args_rtx;
7584 }
7585
7586 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7587 This is called from dwarf2out.c to emit call frame instructions
7588 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7589 static void
7590 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7591 {
7592 rtx unspec = SET_SRC (pattern);
7593 gcc_assert (GET_CODE (unspec) == UNSPEC);
7594
7595 switch (index)
7596 {
7597 case UNSPEC_REG_SAVE:
7598 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7599 SET_DEST (pattern));
7600 break;
7601 case UNSPEC_DEF_CFA:
7602 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7603 INTVAL (XVECEXP (unspec, 0, 0)));
7604 break;
7605 default:
7606 gcc_unreachable ();
7607 }
7608 }
7609
7610 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7611 to be generated in correct form. */
7612 static void
7613 ix86_finalize_stack_realign_flags (void)
7614 {
7615 /* Check if stack realign is really needed after reload, and
7616 stores result in cfun */
7617 unsigned int incoming_stack_boundary
7618 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7619 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7620 unsigned int stack_realign = (incoming_stack_boundary
7621 < (current_function_is_leaf
7622 ? crtl->max_used_stack_slot_alignment
7623 : crtl->stack_alignment_needed));
7624
7625 if (crtl->stack_realign_finalized)
7626 {
7627 /* After stack_realign_needed is finalized, we can't no longer
7628 change it. */
7629 gcc_assert (crtl->stack_realign_needed == stack_realign);
7630 }
7631 else
7632 {
7633 crtl->stack_realign_needed = stack_realign;
7634 crtl->stack_realign_finalized = true;
7635 }
7636 }
7637
7638 /* Expand the prologue into a bunch of separate insns. */
7639
7640 void
7641 ix86_expand_prologue (void)
7642 {
7643 rtx insn;
7644 bool pic_reg_used;
7645 struct ix86_frame frame;
7646 HOST_WIDE_INT allocate;
7647
7648 ix86_finalize_stack_realign_flags ();
7649
7650 /* DRAP should not coexist with stack_realign_fp */
7651 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7652
7653 ix86_compute_frame_layout (&frame);
7654
7655 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7656 of DRAP is needed and stack realignment is really needed after reload */
7657 if (crtl->drap_reg && crtl->stack_realign_needed)
7658 {
7659 rtx x, y;
7660 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7661 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7662 ? 0 : UNITS_PER_WORD);
7663
7664 gcc_assert (stack_realign_drap);
7665
7666 /* Grab the argument pointer. */
7667 x = plus_constant (stack_pointer_rtx,
7668 (UNITS_PER_WORD + param_ptr_offset));
7669 y = crtl->drap_reg;
7670
7671 /* Only need to push parameter pointer reg if it is caller
7672 saved reg */
7673 if (!call_used_regs[REGNO (crtl->drap_reg)])
7674 {
7675 /* Push arg pointer reg */
7676 insn = emit_insn (gen_push (y));
7677 RTX_FRAME_RELATED_P (insn) = 1;
7678 }
7679
7680 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7681 RTX_FRAME_RELATED_P (insn) = 1;
7682
7683 /* Align the stack. */
7684 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7685 stack_pointer_rtx,
7686 GEN_INT (-align_bytes)));
7687 RTX_FRAME_RELATED_P (insn) = 1;
7688
7689 /* Replicate the return address on the stack so that return
7690 address can be reached via (argp - 1) slot. This is needed
7691 to implement macro RETURN_ADDR_RTX and intrinsic function
7692 expand_builtin_return_addr etc. */
7693 x = crtl->drap_reg;
7694 x = gen_frame_mem (Pmode,
7695 plus_constant (x, -UNITS_PER_WORD));
7696 insn = emit_insn (gen_push (x));
7697 RTX_FRAME_RELATED_P (insn) = 1;
7698 }
7699
7700 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7701 slower on all targets. Also sdb doesn't like it. */
7702
7703 if (frame_pointer_needed)
7704 {
7705 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7706 RTX_FRAME_RELATED_P (insn) = 1;
7707
7708 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7709 RTX_FRAME_RELATED_P (insn) = 1;
7710 }
7711
7712 if (stack_realign_fp)
7713 {
7714 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7715 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7716
7717 /* Align the stack. */
7718 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7719 stack_pointer_rtx,
7720 GEN_INT (-align_bytes)));
7721 RTX_FRAME_RELATED_P (insn) = 1;
7722 }
7723
7724 allocate = frame.to_allocate;
7725
7726 if (!frame.save_regs_using_mov)
7727 ix86_emit_save_regs ();
7728 else
7729 allocate += frame.nregs * UNITS_PER_WORD;
7730
7731 /* When using red zone we may start register saving before allocating
7732 the stack frame saving one cycle of the prologue. However I will
7733 avoid doing this if I am going to have to probe the stack since
7734 at least on x86_64 the stack probe can turn into a call that clobbers
7735 a red zone location */
7736 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7737 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7738 ix86_emit_save_regs_using_mov ((frame_pointer_needed
7739 && !crtl->stack_realign_needed)
7740 ? hard_frame_pointer_rtx
7741 : stack_pointer_rtx,
7742 -frame.nregs * UNITS_PER_WORD);
7743
7744 if (allocate == 0)
7745 ;
7746 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7747 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7748 GEN_INT (-allocate), -1);
7749 else
7750 {
7751 /* Only valid for Win32. */
7752 rtx eax = gen_rtx_REG (Pmode, AX_REG);
7753 bool eax_live;
7754 rtx t;
7755
7756 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
7757
7758 if (cfun->machine->call_abi == MS_ABI)
7759 eax_live = false;
7760 else
7761 eax_live = ix86_eax_live_at_start_p ();
7762
7763 if (eax_live)
7764 {
7765 emit_insn (gen_push (eax));
7766 allocate -= UNITS_PER_WORD;
7767 }
7768
7769 emit_move_insn (eax, GEN_INT (allocate));
7770
7771 if (TARGET_64BIT)
7772 insn = gen_allocate_stack_worker_64 (eax);
7773 else
7774 insn = gen_allocate_stack_worker_32 (eax);
7775 insn = emit_insn (insn);
7776 RTX_FRAME_RELATED_P (insn) = 1;
7777 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
7778 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
7779 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7780 t, REG_NOTES (insn));
7781
7782 if (eax_live)
7783 {
7784 if (frame_pointer_needed)
7785 t = plus_constant (hard_frame_pointer_rtx,
7786 allocate
7787 - frame.to_allocate
7788 - frame.nregs * UNITS_PER_WORD);
7789 else
7790 t = plus_constant (stack_pointer_rtx, allocate);
7791 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
7792 }
7793 }
7794
7795 if (frame.save_regs_using_mov
7796 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7797 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
7798 {
7799 if (!frame_pointer_needed
7800 || !frame.to_allocate
7801 || crtl->stack_realign_needed)
7802 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
7803 frame.to_allocate);
7804 else
7805 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
7806 -frame.nregs * UNITS_PER_WORD);
7807 }
7808
7809 pic_reg_used = false;
7810 if (pic_offset_table_rtx
7811 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7812 || crtl->profile))
7813 {
7814 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
7815
7816 if (alt_pic_reg_used != INVALID_REGNUM)
7817 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
7818
7819 pic_reg_used = true;
7820 }
7821
7822 if (pic_reg_used)
7823 {
7824 if (TARGET_64BIT)
7825 {
7826 if (ix86_cmodel == CM_LARGE_PIC)
7827 {
7828 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
7829 rtx label = gen_label_rtx ();
7830 emit_label (label);
7831 LABEL_PRESERVE_P (label) = 1;
7832 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
7833 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
7834 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7835 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
7836 pic_offset_table_rtx, tmp_reg));
7837 }
7838 else
7839 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7840 }
7841 else
7842 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
7843 }
7844
7845 /* Prevent function calls from being scheduled before the call to mcount.
7846 In the pic_reg_used case, make sure that the got load isn't deleted. */
7847 if (crtl->profile)
7848 {
7849 if (pic_reg_used)
7850 emit_insn (gen_prologue_use (pic_offset_table_rtx));
7851 emit_insn (gen_blockage ());
7852 }
7853
7854 if (crtl->drap_reg && !crtl->stack_realign_needed)
7855 {
7856 /* vDRAP is setup but after reload it turns out stack realign
7857 isn't necessary, here we will emit prologue to setup DRAP
7858 without stack realign adjustment */
7859 int drap_bp_offset = UNITS_PER_WORD * 2;
7860 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
7861 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
7862 }
7863
7864 /* Emit cld instruction if stringops are used in the function. */
7865 if (TARGET_CLD && ix86_current_function_needs_cld)
7866 emit_insn (gen_cld ());
7867 }
7868
7869 /* Emit code to restore saved registers using MOV insns. First register
7870 is restored from POINTER + OFFSET. */
7871 static void
7872 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
7873 int maybe_eh_return)
7874 {
7875 int regno;
7876 rtx base_address = gen_rtx_MEM (Pmode, pointer);
7877
7878 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7879 if (ix86_save_reg (regno, maybe_eh_return))
7880 {
7881 /* Ensure that adjust_address won't be forced to produce pointer
7882 out of range allowed by x86-64 instruction set. */
7883 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
7884 {
7885 rtx r11;
7886
7887 r11 = gen_rtx_REG (DImode, R11_REG);
7888 emit_move_insn (r11, GEN_INT (offset));
7889 emit_insn (gen_adddi3 (r11, r11, pointer));
7890 base_address = gen_rtx_MEM (Pmode, r11);
7891 offset = 0;
7892 }
7893 emit_move_insn (gen_rtx_REG (Pmode, regno),
7894 adjust_address (base_address, Pmode, offset));
7895 offset += UNITS_PER_WORD;
7896 }
7897 }
7898
7899 /* Restore function stack, frame, and registers. */
7900
7901 void
7902 ix86_expand_epilogue (int style)
7903 {
7904 int regno;
7905 int sp_valid;
7906 struct ix86_frame frame;
7907 HOST_WIDE_INT offset;
7908
7909 ix86_finalize_stack_realign_flags ();
7910
7911 /* When stack is realigned, SP must be valid. */
7912 sp_valid = (!frame_pointer_needed
7913 || current_function_sp_is_unchanging
7914 || stack_realign_fp);
7915
7916 ix86_compute_frame_layout (&frame);
7917
7918 /* Calculate start of saved registers relative to ebp. Special care
7919 must be taken for the normal return case of a function using
7920 eh_return: the eax and edx registers are marked as saved, but not
7921 restored along this path. */
7922 offset = frame.nregs;
7923 if (crtl->calls_eh_return && style != 2)
7924 offset -= 2;
7925 offset *= -UNITS_PER_WORD;
7926
7927 /* If we're only restoring one register and sp is not valid then
7928 using a move instruction to restore the register since it's
7929 less work than reloading sp and popping the register.
7930
7931 The default code result in stack adjustment using add/lea instruction,
7932 while this code results in LEAVE instruction (or discrete equivalent),
7933 so it is profitable in some other cases as well. Especially when there
7934 are no registers to restore. We also use this code when TARGET_USE_LEAVE
7935 and there is exactly one register to pop. This heuristic may need some
7936 tuning in future. */
7937 if ((!sp_valid && frame.nregs <= 1)
7938 || (TARGET_EPILOGUE_USING_MOVE
7939 && cfun->machine->use_fast_prologue_epilogue
7940 && (frame.nregs > 1 || frame.to_allocate))
7941 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
7942 || (frame_pointer_needed && TARGET_USE_LEAVE
7943 && cfun->machine->use_fast_prologue_epilogue
7944 && frame.nregs == 1)
7945 || crtl->calls_eh_return)
7946 {
7947 /* Restore registers. We can use ebp or esp to address the memory
7948 locations. If both are available, default to ebp, since offsets
7949 are known to be small. Only exception is esp pointing directly
7950 to the end of block of saved registers, where we may simplify
7951 addressing mode.
7952
7953 If we are realigning stack with bp and sp, regs restore can't
7954 be addressed by bp. sp must be used instead. */
7955
7956 if (!frame_pointer_needed
7957 || (sp_valid && !frame.to_allocate)
7958 || stack_realign_fp)
7959 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
7960 frame.to_allocate, style == 2);
7961 else
7962 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
7963 offset, style == 2);
7964
7965 /* eh_return epilogues need %ecx added to the stack pointer. */
7966 if (style == 2)
7967 {
7968 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
7969
7970 /* Stack align doesn't work with eh_return. */
7971 gcc_assert (!crtl->stack_realign_needed);
7972
7973 if (frame_pointer_needed)
7974 {
7975 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
7976 tmp = plus_constant (tmp, UNITS_PER_WORD);
7977 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
7978
7979 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
7980 emit_move_insn (hard_frame_pointer_rtx, tmp);
7981
7982 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
7983 const0_rtx, style);
7984 }
7985 else
7986 {
7987 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
7988 tmp = plus_constant (tmp, (frame.to_allocate
7989 + frame.nregs * UNITS_PER_WORD));
7990 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
7991 }
7992 }
7993 else if (!frame_pointer_needed)
7994 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7995 GEN_INT (frame.to_allocate
7996 + frame.nregs * UNITS_PER_WORD),
7997 style);
7998 /* If not an i386, mov & pop is faster than "leave". */
7999 else if (TARGET_USE_LEAVE || optimize_size
8000 || !cfun->machine->use_fast_prologue_epilogue)
8001 emit_insn ((*ix86_gen_leave) ());
8002 else
8003 {
8004 pro_epilogue_adjust_stack (stack_pointer_rtx,
8005 hard_frame_pointer_rtx,
8006 const0_rtx, style);
8007
8008 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8009 }
8010 }
8011 else
8012 {
8013 /* First step is to deallocate the stack frame so that we can
8014 pop the registers.
8015
8016 If we realign stack with frame pointer, then stack pointer
8017 won't be able to recover via lea $offset(%bp), %sp, because
8018 there is a padding area between bp and sp for realign.
8019 "add $to_allocate, %sp" must be used instead. */
8020 if (!sp_valid)
8021 {
8022 gcc_assert (frame_pointer_needed);
8023 gcc_assert (!stack_realign_fp);
8024 pro_epilogue_adjust_stack (stack_pointer_rtx,
8025 hard_frame_pointer_rtx,
8026 GEN_INT (offset), style);
8027 }
8028 else if (frame.to_allocate)
8029 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8030 GEN_INT (frame.to_allocate), style);
8031
8032 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8033 if (ix86_save_reg (regno, false))
8034 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8035 if (frame_pointer_needed)
8036 {
8037 /* Leave results in shorter dependency chains on CPUs that are
8038 able to grok it fast. */
8039 if (TARGET_USE_LEAVE)
8040 emit_insn ((*ix86_gen_leave) ());
8041 else
8042 {
8043 /* For stack realigned really happens, recover stack
8044 pointer to hard frame pointer is a must, if not using
8045 leave. */
8046 if (stack_realign_fp)
8047 pro_epilogue_adjust_stack (stack_pointer_rtx,
8048 hard_frame_pointer_rtx,
8049 const0_rtx, style);
8050 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8051 }
8052 }
8053 }
8054
8055 if (crtl->drap_reg && crtl->stack_realign_needed)
8056 {
8057 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8058 ? 0 : UNITS_PER_WORD);
8059 gcc_assert (stack_realign_drap);
8060 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8061 crtl->drap_reg,
8062 GEN_INT (-(UNITS_PER_WORD
8063 + param_ptr_offset))));
8064 if (!call_used_regs[REGNO (crtl->drap_reg)])
8065 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8066
8067 }
8068
8069 /* Sibcall epilogues don't want a return instruction. */
8070 if (style == 0)
8071 return;
8072
8073 if (crtl->args.pops_args && crtl->args.size)
8074 {
8075 rtx popc = GEN_INT (crtl->args.pops_args);
8076
8077 /* i386 can only pop 64K bytes. If asked to pop more, pop
8078 return address, do explicit add, and jump indirectly to the
8079 caller. */
8080
8081 if (crtl->args.pops_args >= 65536)
8082 {
8083 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8084
8085 /* There is no "pascal" calling convention in any 64bit ABI. */
8086 gcc_assert (!TARGET_64BIT);
8087
8088 emit_insn (gen_popsi1 (ecx));
8089 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8090 emit_jump_insn (gen_return_indirect_internal (ecx));
8091 }
8092 else
8093 emit_jump_insn (gen_return_pop_internal (popc));
8094 }
8095 else
8096 emit_jump_insn (gen_return_internal ());
8097 }
8098
8099 /* Reset from the function's potential modifications. */
8100
8101 static void
8102 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8103 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8104 {
8105 if (pic_offset_table_rtx)
8106 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8107 #if TARGET_MACHO
8108 /* Mach-O doesn't support labels at the end of objects, so if
8109 it looks like we might want one, insert a NOP. */
8110 {
8111 rtx insn = get_last_insn ();
8112 while (insn
8113 && NOTE_P (insn)
8114 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8115 insn = PREV_INSN (insn);
8116 if (insn
8117 && (LABEL_P (insn)
8118 || (NOTE_P (insn)
8119 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8120 fputs ("\tnop\n", file);
8121 }
8122 #endif
8123
8124 }
8125 \f
8126 /* Extract the parts of an RTL expression that is a valid memory address
8127 for an instruction. Return 0 if the structure of the address is
8128 grossly off. Return -1 if the address contains ASHIFT, so it is not
8129 strictly valid, but still used for computing length of lea instruction. */
8130
8131 int
8132 ix86_decompose_address (rtx addr, struct ix86_address *out)
8133 {
8134 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8135 rtx base_reg, index_reg;
8136 HOST_WIDE_INT scale = 1;
8137 rtx scale_rtx = NULL_RTX;
8138 int retval = 1;
8139 enum ix86_address_seg seg = SEG_DEFAULT;
8140
8141 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8142 base = addr;
8143 else if (GET_CODE (addr) == PLUS)
8144 {
8145 rtx addends[4], op;
8146 int n = 0, i;
8147
8148 op = addr;
8149 do
8150 {
8151 if (n >= 4)
8152 return 0;
8153 addends[n++] = XEXP (op, 1);
8154 op = XEXP (op, 0);
8155 }
8156 while (GET_CODE (op) == PLUS);
8157 if (n >= 4)
8158 return 0;
8159 addends[n] = op;
8160
8161 for (i = n; i >= 0; --i)
8162 {
8163 op = addends[i];
8164 switch (GET_CODE (op))
8165 {
8166 case MULT:
8167 if (index)
8168 return 0;
8169 index = XEXP (op, 0);
8170 scale_rtx = XEXP (op, 1);
8171 break;
8172
8173 case UNSPEC:
8174 if (XINT (op, 1) == UNSPEC_TP
8175 && TARGET_TLS_DIRECT_SEG_REFS
8176 && seg == SEG_DEFAULT)
8177 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8178 else
8179 return 0;
8180 break;
8181
8182 case REG:
8183 case SUBREG:
8184 if (!base)
8185 base = op;
8186 else if (!index)
8187 index = op;
8188 else
8189 return 0;
8190 break;
8191
8192 case CONST:
8193 case CONST_INT:
8194 case SYMBOL_REF:
8195 case LABEL_REF:
8196 if (disp)
8197 return 0;
8198 disp = op;
8199 break;
8200
8201 default:
8202 return 0;
8203 }
8204 }
8205 }
8206 else if (GET_CODE (addr) == MULT)
8207 {
8208 index = XEXP (addr, 0); /* index*scale */
8209 scale_rtx = XEXP (addr, 1);
8210 }
8211 else if (GET_CODE (addr) == ASHIFT)
8212 {
8213 rtx tmp;
8214
8215 /* We're called for lea too, which implements ashift on occasion. */
8216 index = XEXP (addr, 0);
8217 tmp = XEXP (addr, 1);
8218 if (!CONST_INT_P (tmp))
8219 return 0;
8220 scale = INTVAL (tmp);
8221 if ((unsigned HOST_WIDE_INT) scale > 3)
8222 return 0;
8223 scale = 1 << scale;
8224 retval = -1;
8225 }
8226 else
8227 disp = addr; /* displacement */
8228
8229 /* Extract the integral value of scale. */
8230 if (scale_rtx)
8231 {
8232 if (!CONST_INT_P (scale_rtx))
8233 return 0;
8234 scale = INTVAL (scale_rtx);
8235 }
8236
8237 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8238 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8239
8240 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8241 if (base_reg && index_reg && scale == 1
8242 && (index_reg == arg_pointer_rtx
8243 || index_reg == frame_pointer_rtx
8244 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8245 {
8246 rtx tmp;
8247 tmp = base, base = index, index = tmp;
8248 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8249 }
8250
8251 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8252 if ((base_reg == hard_frame_pointer_rtx
8253 || base_reg == frame_pointer_rtx
8254 || base_reg == arg_pointer_rtx) && !disp)
8255 disp = const0_rtx;
8256
8257 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8258 Avoid this by transforming to [%esi+0]. */
8259 if (TARGET_K6 && !optimize_size
8260 && base_reg && !index_reg && !disp
8261 && REG_P (base_reg)
8262 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8263 disp = const0_rtx;
8264
8265 /* Special case: encode reg+reg instead of reg*2. */
8266 if (!base && index && scale && scale == 2)
8267 base = index, base_reg = index_reg, scale = 1;
8268
8269 /* Special case: scaling cannot be encoded without base or displacement. */
8270 if (!base && !disp && index && scale != 1)
8271 disp = const0_rtx;
8272
8273 out->base = base;
8274 out->index = index;
8275 out->disp = disp;
8276 out->scale = scale;
8277 out->seg = seg;
8278
8279 return retval;
8280 }
8281 \f
8282 /* Return cost of the memory address x.
8283 For i386, it is better to use a complex address than let gcc copy
8284 the address into a reg and make a new pseudo. But not if the address
8285 requires to two regs - that would mean more pseudos with longer
8286 lifetimes. */
8287 static int
8288 ix86_address_cost (rtx x)
8289 {
8290 struct ix86_address parts;
8291 int cost = 1;
8292 int ok = ix86_decompose_address (x, &parts);
8293
8294 gcc_assert (ok);
8295
8296 if (parts.base && GET_CODE (parts.base) == SUBREG)
8297 parts.base = SUBREG_REG (parts.base);
8298 if (parts.index && GET_CODE (parts.index) == SUBREG)
8299 parts.index = SUBREG_REG (parts.index);
8300
8301 /* Attempt to minimize number of registers in the address. */
8302 if ((parts.base
8303 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8304 || (parts.index
8305 && (!REG_P (parts.index)
8306 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8307 cost++;
8308
8309 if (parts.base
8310 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8311 && parts.index
8312 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8313 && parts.base != parts.index)
8314 cost++;
8315
8316 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8317 since it's predecode logic can't detect the length of instructions
8318 and it degenerates to vector decoded. Increase cost of such
8319 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8320 to split such addresses or even refuse such addresses at all.
8321
8322 Following addressing modes are affected:
8323 [base+scale*index]
8324 [scale*index+disp]
8325 [base+index]
8326
8327 The first and last case may be avoidable by explicitly coding the zero in
8328 memory address, but I don't have AMD-K6 machine handy to check this
8329 theory. */
8330
8331 if (TARGET_K6
8332 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8333 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8334 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8335 cost += 10;
8336
8337 return cost;
8338 }
8339 \f
8340 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8341 this is used for to form addresses to local data when -fPIC is in
8342 use. */
8343
8344 static bool
8345 darwin_local_data_pic (rtx disp)
8346 {
8347 if (GET_CODE (disp) == MINUS)
8348 {
8349 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
8350 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
8351 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
8352 {
8353 const char *sym_name = XSTR (XEXP (disp, 1), 0);
8354 if (! strcmp (sym_name, "<pic base>"))
8355 return true;
8356 }
8357 }
8358
8359 return false;
8360 }
8361
8362 /* Determine if a given RTX is a valid constant. We already know this
8363 satisfies CONSTANT_P. */
8364
8365 bool
8366 legitimate_constant_p (rtx x)
8367 {
8368 switch (GET_CODE (x))
8369 {
8370 case CONST:
8371 x = XEXP (x, 0);
8372
8373 if (GET_CODE (x) == PLUS)
8374 {
8375 if (!CONST_INT_P (XEXP (x, 1)))
8376 return false;
8377 x = XEXP (x, 0);
8378 }
8379
8380 if (TARGET_MACHO && darwin_local_data_pic (x))
8381 return true;
8382
8383 /* Only some unspecs are valid as "constants". */
8384 if (GET_CODE (x) == UNSPEC)
8385 switch (XINT (x, 1))
8386 {
8387 case UNSPEC_GOT:
8388 case UNSPEC_GOTOFF:
8389 case UNSPEC_PLTOFF:
8390 return TARGET_64BIT;
8391 case UNSPEC_TPOFF:
8392 case UNSPEC_NTPOFF:
8393 x = XVECEXP (x, 0, 0);
8394 return (GET_CODE (x) == SYMBOL_REF
8395 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8396 case UNSPEC_DTPOFF:
8397 x = XVECEXP (x, 0, 0);
8398 return (GET_CODE (x) == SYMBOL_REF
8399 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8400 default:
8401 return false;
8402 }
8403
8404 /* We must have drilled down to a symbol. */
8405 if (GET_CODE (x) == LABEL_REF)
8406 return true;
8407 if (GET_CODE (x) != SYMBOL_REF)
8408 return false;
8409 /* FALLTHRU */
8410
8411 case SYMBOL_REF:
8412 /* TLS symbols are never valid. */
8413 if (SYMBOL_REF_TLS_MODEL (x))
8414 return false;
8415
8416 /* DLLIMPORT symbols are never valid. */
8417 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8418 && SYMBOL_REF_DLLIMPORT_P (x))
8419 return false;
8420 break;
8421
8422 case CONST_DOUBLE:
8423 if (GET_MODE (x) == TImode
8424 && x != CONST0_RTX (TImode)
8425 && !TARGET_64BIT)
8426 return false;
8427 break;
8428
8429 case CONST_VECTOR:
8430 if (x == CONST0_RTX (GET_MODE (x)))
8431 return true;
8432 return false;
8433
8434 default:
8435 break;
8436 }
8437
8438 /* Otherwise we handle everything else in the move patterns. */
8439 return true;
8440 }
8441
8442 /* Determine if it's legal to put X into the constant pool. This
8443 is not possible for the address of thread-local symbols, which
8444 is checked above. */
8445
8446 static bool
8447 ix86_cannot_force_const_mem (rtx x)
8448 {
8449 /* We can always put integral constants and vectors in memory. */
8450 switch (GET_CODE (x))
8451 {
8452 case CONST_INT:
8453 case CONST_DOUBLE:
8454 case CONST_VECTOR:
8455 return false;
8456
8457 default:
8458 break;
8459 }
8460 return !legitimate_constant_p (x);
8461 }
8462
8463 /* Determine if a given RTX is a valid constant address. */
8464
8465 bool
8466 constant_address_p (rtx x)
8467 {
8468 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8469 }
8470
8471 /* Nonzero if the constant value X is a legitimate general operand
8472 when generating PIC code. It is given that flag_pic is on and
8473 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8474
8475 bool
8476 legitimate_pic_operand_p (rtx x)
8477 {
8478 rtx inner;
8479
8480 switch (GET_CODE (x))
8481 {
8482 case CONST:
8483 inner = XEXP (x, 0);
8484 if (GET_CODE (inner) == PLUS
8485 && CONST_INT_P (XEXP (inner, 1)))
8486 inner = XEXP (inner, 0);
8487
8488 /* Only some unspecs are valid as "constants". */
8489 if (GET_CODE (inner) == UNSPEC)
8490 switch (XINT (inner, 1))
8491 {
8492 case UNSPEC_GOT:
8493 case UNSPEC_GOTOFF:
8494 case UNSPEC_PLTOFF:
8495 return TARGET_64BIT;
8496 case UNSPEC_TPOFF:
8497 x = XVECEXP (inner, 0, 0);
8498 return (GET_CODE (x) == SYMBOL_REF
8499 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8500 default:
8501 return false;
8502 }
8503 /* FALLTHRU */
8504
8505 case SYMBOL_REF:
8506 case LABEL_REF:
8507 return legitimate_pic_address_disp_p (x);
8508
8509 default:
8510 return true;
8511 }
8512 }
8513
8514 /* Determine if a given CONST RTX is a valid memory displacement
8515 in PIC mode. */
8516
8517 int
8518 legitimate_pic_address_disp_p (rtx disp)
8519 {
8520 bool saw_plus;
8521
8522 /* In 64bit mode we can allow direct addresses of symbols and labels
8523 when they are not dynamic symbols. */
8524 if (TARGET_64BIT)
8525 {
8526 rtx op0 = disp, op1;
8527
8528 switch (GET_CODE (disp))
8529 {
8530 case LABEL_REF:
8531 return true;
8532
8533 case CONST:
8534 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8535 break;
8536 op0 = XEXP (XEXP (disp, 0), 0);
8537 op1 = XEXP (XEXP (disp, 0), 1);
8538 if (!CONST_INT_P (op1)
8539 || INTVAL (op1) >= 16*1024*1024
8540 || INTVAL (op1) < -16*1024*1024)
8541 break;
8542 if (GET_CODE (op0) == LABEL_REF)
8543 return true;
8544 if (GET_CODE (op0) != SYMBOL_REF)
8545 break;
8546 /* FALLTHRU */
8547
8548 case SYMBOL_REF:
8549 /* TLS references should always be enclosed in UNSPEC. */
8550 if (SYMBOL_REF_TLS_MODEL (op0))
8551 return false;
8552 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8553 && ix86_cmodel != CM_LARGE_PIC)
8554 return true;
8555 break;
8556
8557 default:
8558 break;
8559 }
8560 }
8561 if (GET_CODE (disp) != CONST)
8562 return 0;
8563 disp = XEXP (disp, 0);
8564
8565 if (TARGET_64BIT)
8566 {
8567 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8568 of GOT tables. We should not need these anyway. */
8569 if (GET_CODE (disp) != UNSPEC
8570 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8571 && XINT (disp, 1) != UNSPEC_GOTOFF
8572 && XINT (disp, 1) != UNSPEC_PLTOFF))
8573 return 0;
8574
8575 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8576 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8577 return 0;
8578 return 1;
8579 }
8580
8581 saw_plus = false;
8582 if (GET_CODE (disp) == PLUS)
8583 {
8584 if (!CONST_INT_P (XEXP (disp, 1)))
8585 return 0;
8586 disp = XEXP (disp, 0);
8587 saw_plus = true;
8588 }
8589
8590 if (TARGET_MACHO && darwin_local_data_pic (disp))
8591 return 1;
8592
8593 if (GET_CODE (disp) != UNSPEC)
8594 return 0;
8595
8596 switch (XINT (disp, 1))
8597 {
8598 case UNSPEC_GOT:
8599 if (saw_plus)
8600 return false;
8601 /* We need to check for both symbols and labels because VxWorks loads
8602 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8603 details. */
8604 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8605 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8606 case UNSPEC_GOTOFF:
8607 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8608 While ABI specify also 32bit relocation but we don't produce it in
8609 small PIC model at all. */
8610 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8611 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8612 && !TARGET_64BIT)
8613 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8614 return false;
8615 case UNSPEC_GOTTPOFF:
8616 case UNSPEC_GOTNTPOFF:
8617 case UNSPEC_INDNTPOFF:
8618 if (saw_plus)
8619 return false;
8620 disp = XVECEXP (disp, 0, 0);
8621 return (GET_CODE (disp) == SYMBOL_REF
8622 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8623 case UNSPEC_NTPOFF:
8624 disp = XVECEXP (disp, 0, 0);
8625 return (GET_CODE (disp) == SYMBOL_REF
8626 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8627 case UNSPEC_DTPOFF:
8628 disp = XVECEXP (disp, 0, 0);
8629 return (GET_CODE (disp) == SYMBOL_REF
8630 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8631 }
8632
8633 return 0;
8634 }
8635
8636 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8637 memory address for an instruction. The MODE argument is the machine mode
8638 for the MEM expression that wants to use this address.
8639
8640 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8641 convert common non-canonical forms to canonical form so that they will
8642 be recognized. */
8643
8644 int
8645 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8646 rtx addr, int strict)
8647 {
8648 struct ix86_address parts;
8649 rtx base, index, disp;
8650 HOST_WIDE_INT scale;
8651 const char *reason = NULL;
8652 rtx reason_rtx = NULL_RTX;
8653
8654 if (ix86_decompose_address (addr, &parts) <= 0)
8655 {
8656 reason = "decomposition failed";
8657 goto report_error;
8658 }
8659
8660 base = parts.base;
8661 index = parts.index;
8662 disp = parts.disp;
8663 scale = parts.scale;
8664
8665 /* Validate base register.
8666
8667 Don't allow SUBREG's that span more than a word here. It can lead to spill
8668 failures when the base is one word out of a two word structure, which is
8669 represented internally as a DImode int. */
8670
8671 if (base)
8672 {
8673 rtx reg;
8674 reason_rtx = base;
8675
8676 if (REG_P (base))
8677 reg = base;
8678 else if (GET_CODE (base) == SUBREG
8679 && REG_P (SUBREG_REG (base))
8680 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8681 <= UNITS_PER_WORD)
8682 reg = SUBREG_REG (base);
8683 else
8684 {
8685 reason = "base is not a register";
8686 goto report_error;
8687 }
8688
8689 if (GET_MODE (base) != Pmode)
8690 {
8691 reason = "base is not in Pmode";
8692 goto report_error;
8693 }
8694
8695 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8696 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8697 {
8698 reason = "base is not valid";
8699 goto report_error;
8700 }
8701 }
8702
8703 /* Validate index register.
8704
8705 Don't allow SUBREG's that span more than a word here -- same as above. */
8706
8707 if (index)
8708 {
8709 rtx reg;
8710 reason_rtx = index;
8711
8712 if (REG_P (index))
8713 reg = index;
8714 else if (GET_CODE (index) == SUBREG
8715 && REG_P (SUBREG_REG (index))
8716 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8717 <= UNITS_PER_WORD)
8718 reg = SUBREG_REG (index);
8719 else
8720 {
8721 reason = "index is not a register";
8722 goto report_error;
8723 }
8724
8725 if (GET_MODE (index) != Pmode)
8726 {
8727 reason = "index is not in Pmode";
8728 goto report_error;
8729 }
8730
8731 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8732 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8733 {
8734 reason = "index is not valid";
8735 goto report_error;
8736 }
8737 }
8738
8739 /* Validate scale factor. */
8740 if (scale != 1)
8741 {
8742 reason_rtx = GEN_INT (scale);
8743 if (!index)
8744 {
8745 reason = "scale without index";
8746 goto report_error;
8747 }
8748
8749 if (scale != 2 && scale != 4 && scale != 8)
8750 {
8751 reason = "scale is not a valid multiplier";
8752 goto report_error;
8753 }
8754 }
8755
8756 /* Validate displacement. */
8757 if (disp)
8758 {
8759 reason_rtx = disp;
8760
8761 if (GET_CODE (disp) == CONST
8762 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
8763 switch (XINT (XEXP (disp, 0), 1))
8764 {
8765 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
8766 used. While ABI specify also 32bit relocations, we don't produce
8767 them at all and use IP relative instead. */
8768 case UNSPEC_GOT:
8769 case UNSPEC_GOTOFF:
8770 gcc_assert (flag_pic);
8771 if (!TARGET_64BIT)
8772 goto is_legitimate_pic;
8773 reason = "64bit address unspec";
8774 goto report_error;
8775
8776 case UNSPEC_GOTPCREL:
8777 gcc_assert (flag_pic);
8778 goto is_legitimate_pic;
8779
8780 case UNSPEC_GOTTPOFF:
8781 case UNSPEC_GOTNTPOFF:
8782 case UNSPEC_INDNTPOFF:
8783 case UNSPEC_NTPOFF:
8784 case UNSPEC_DTPOFF:
8785 break;
8786
8787 default:
8788 reason = "invalid address unspec";
8789 goto report_error;
8790 }
8791
8792 else if (SYMBOLIC_CONST (disp)
8793 && (flag_pic
8794 || (TARGET_MACHO
8795 #if TARGET_MACHO
8796 && MACHOPIC_INDIRECT
8797 && !machopic_operand_p (disp)
8798 #endif
8799 )))
8800 {
8801
8802 is_legitimate_pic:
8803 if (TARGET_64BIT && (index || base))
8804 {
8805 /* foo@dtpoff(%rX) is ok. */
8806 if (GET_CODE (disp) != CONST
8807 || GET_CODE (XEXP (disp, 0)) != PLUS
8808 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
8809 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
8810 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
8811 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
8812 {
8813 reason = "non-constant pic memory reference";
8814 goto report_error;
8815 }
8816 }
8817 else if (! legitimate_pic_address_disp_p (disp))
8818 {
8819 reason = "displacement is an invalid pic construct";
8820 goto report_error;
8821 }
8822
8823 /* This code used to verify that a symbolic pic displacement
8824 includes the pic_offset_table_rtx register.
8825
8826 While this is good idea, unfortunately these constructs may
8827 be created by "adds using lea" optimization for incorrect
8828 code like:
8829
8830 int a;
8831 int foo(int i)
8832 {
8833 return *(&a+i);
8834 }
8835
8836 This code is nonsensical, but results in addressing
8837 GOT table with pic_offset_table_rtx base. We can't
8838 just refuse it easily, since it gets matched by
8839 "addsi3" pattern, that later gets split to lea in the
8840 case output register differs from input. While this
8841 can be handled by separate addsi pattern for this case
8842 that never results in lea, this seems to be easier and
8843 correct fix for crash to disable this test. */
8844 }
8845 else if (GET_CODE (disp) != LABEL_REF
8846 && !CONST_INT_P (disp)
8847 && (GET_CODE (disp) != CONST
8848 || !legitimate_constant_p (disp))
8849 && (GET_CODE (disp) != SYMBOL_REF
8850 || !legitimate_constant_p (disp)))
8851 {
8852 reason = "displacement is not constant";
8853 goto report_error;
8854 }
8855 else if (TARGET_64BIT
8856 && !x86_64_immediate_operand (disp, VOIDmode))
8857 {
8858 reason = "displacement is out of range";
8859 goto report_error;
8860 }
8861 }
8862
8863 /* Everything looks valid. */
8864 return TRUE;
8865
8866 report_error:
8867 return FALSE;
8868 }
8869 \f
8870 /* Return a unique alias set for the GOT. */
8871
8872 static alias_set_type
8873 ix86_GOT_alias_set (void)
8874 {
8875 static alias_set_type set = -1;
8876 if (set == -1)
8877 set = new_alias_set ();
8878 return set;
8879 }
8880
8881 /* Return a legitimate reference for ORIG (an address) using the
8882 register REG. If REG is 0, a new pseudo is generated.
8883
8884 There are two types of references that must be handled:
8885
8886 1. Global data references must load the address from the GOT, via
8887 the PIC reg. An insn is emitted to do this load, and the reg is
8888 returned.
8889
8890 2. Static data references, constant pool addresses, and code labels
8891 compute the address as an offset from the GOT, whose base is in
8892 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
8893 differentiate them from global data objects. The returned
8894 address is the PIC reg + an unspec constant.
8895
8896 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
8897 reg also appears in the address. */
8898
8899 static rtx
8900 legitimize_pic_address (rtx orig, rtx reg)
8901 {
8902 rtx addr = orig;
8903 rtx new_rtx = orig;
8904 rtx base;
8905
8906 #if TARGET_MACHO
8907 if (TARGET_MACHO && !TARGET_64BIT)
8908 {
8909 if (reg == 0)
8910 reg = gen_reg_rtx (Pmode);
8911 /* Use the generic Mach-O PIC machinery. */
8912 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
8913 }
8914 #endif
8915
8916 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
8917 new_rtx = addr;
8918 else if (TARGET_64BIT
8919 && ix86_cmodel != CM_SMALL_PIC
8920 && gotoff_operand (addr, Pmode))
8921 {
8922 rtx tmpreg;
8923 /* This symbol may be referenced via a displacement from the PIC
8924 base address (@GOTOFF). */
8925
8926 if (reload_in_progress)
8927 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8928 if (GET_CODE (addr) == CONST)
8929 addr = XEXP (addr, 0);
8930 if (GET_CODE (addr) == PLUS)
8931 {
8932 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
8933 UNSPEC_GOTOFF);
8934 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
8935 }
8936 else
8937 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
8938 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8939 if (!reg)
8940 tmpreg = gen_reg_rtx (Pmode);
8941 else
8942 tmpreg = reg;
8943 emit_move_insn (tmpreg, new_rtx);
8944
8945 if (reg != 0)
8946 {
8947 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
8948 tmpreg, 1, OPTAB_DIRECT);
8949 new_rtx = reg;
8950 }
8951 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
8952 }
8953 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
8954 {
8955 /* This symbol may be referenced via a displacement from the PIC
8956 base address (@GOTOFF). */
8957
8958 if (reload_in_progress)
8959 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8960 if (GET_CODE (addr) == CONST)
8961 addr = XEXP (addr, 0);
8962 if (GET_CODE (addr) == PLUS)
8963 {
8964 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
8965 UNSPEC_GOTOFF);
8966 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
8967 }
8968 else
8969 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
8970 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8971 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8972
8973 if (reg != 0)
8974 {
8975 emit_move_insn (reg, new_rtx);
8976 new_rtx = reg;
8977 }
8978 }
8979 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
8980 /* We can't use @GOTOFF for text labels on VxWorks;
8981 see gotoff_operand. */
8982 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
8983 {
8984 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8985 {
8986 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
8987 return legitimize_dllimport_symbol (addr, true);
8988 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
8989 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
8990 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
8991 {
8992 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
8993 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
8994 }
8995 }
8996
8997 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
8998 {
8999 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9000 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9001 new_rtx = gen_const_mem (Pmode, new_rtx);
9002 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9003
9004 if (reg == 0)
9005 reg = gen_reg_rtx (Pmode);
9006 /* Use directly gen_movsi, otherwise the address is loaded
9007 into register for CSE. We don't want to CSE this addresses,
9008 instead we CSE addresses from the GOT table, so skip this. */
9009 emit_insn (gen_movsi (reg, new_rtx));
9010 new_rtx = reg;
9011 }
9012 else
9013 {
9014 /* This symbol must be referenced via a load from the
9015 Global Offset Table (@GOT). */
9016
9017 if (reload_in_progress)
9018 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9019 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9020 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9021 if (TARGET_64BIT)
9022 new_rtx = force_reg (Pmode, new_rtx);
9023 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9024 new_rtx = gen_const_mem (Pmode, new_rtx);
9025 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9026
9027 if (reg == 0)
9028 reg = gen_reg_rtx (Pmode);
9029 emit_move_insn (reg, new_rtx);
9030 new_rtx = reg;
9031 }
9032 }
9033 else
9034 {
9035 if (CONST_INT_P (addr)
9036 && !x86_64_immediate_operand (addr, VOIDmode))
9037 {
9038 if (reg)
9039 {
9040 emit_move_insn (reg, addr);
9041 new_rtx = reg;
9042 }
9043 else
9044 new_rtx = force_reg (Pmode, addr);
9045 }
9046 else if (GET_CODE (addr) == CONST)
9047 {
9048 addr = XEXP (addr, 0);
9049
9050 /* We must match stuff we generate before. Assume the only
9051 unspecs that can get here are ours. Not that we could do
9052 anything with them anyway.... */
9053 if (GET_CODE (addr) == UNSPEC
9054 || (GET_CODE (addr) == PLUS
9055 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9056 return orig;
9057 gcc_assert (GET_CODE (addr) == PLUS);
9058 }
9059 if (GET_CODE (addr) == PLUS)
9060 {
9061 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9062
9063 /* Check first to see if this is a constant offset from a @GOTOFF
9064 symbol reference. */
9065 if (gotoff_operand (op0, Pmode)
9066 && CONST_INT_P (op1))
9067 {
9068 if (!TARGET_64BIT)
9069 {
9070 if (reload_in_progress)
9071 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9072 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9073 UNSPEC_GOTOFF);
9074 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9075 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9076 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9077
9078 if (reg != 0)
9079 {
9080 emit_move_insn (reg, new_rtx);
9081 new_rtx = reg;
9082 }
9083 }
9084 else
9085 {
9086 if (INTVAL (op1) < -16*1024*1024
9087 || INTVAL (op1) >= 16*1024*1024)
9088 {
9089 if (!x86_64_immediate_operand (op1, Pmode))
9090 op1 = force_reg (Pmode, op1);
9091 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9092 }
9093 }
9094 }
9095 else
9096 {
9097 base = legitimize_pic_address (XEXP (addr, 0), reg);
9098 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9099 base == reg ? NULL_RTX : reg);
9100
9101 if (CONST_INT_P (new_rtx))
9102 new_rtx = plus_constant (base, INTVAL (new_rtx));
9103 else
9104 {
9105 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9106 {
9107 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9108 new_rtx = XEXP (new_rtx, 1);
9109 }
9110 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9111 }
9112 }
9113 }
9114 }
9115 return new_rtx;
9116 }
9117 \f
9118 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9119
9120 static rtx
9121 get_thread_pointer (int to_reg)
9122 {
9123 rtx tp, reg, insn;
9124
9125 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9126 if (!to_reg)
9127 return tp;
9128
9129 reg = gen_reg_rtx (Pmode);
9130 insn = gen_rtx_SET (VOIDmode, reg, tp);
9131 insn = emit_insn (insn);
9132
9133 return reg;
9134 }
9135
9136 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9137 false if we expect this to be used for a memory address and true if
9138 we expect to load the address into a register. */
9139
9140 static rtx
9141 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9142 {
9143 rtx dest, base, off, pic, tp;
9144 int type;
9145
9146 switch (model)
9147 {
9148 case TLS_MODEL_GLOBAL_DYNAMIC:
9149 dest = gen_reg_rtx (Pmode);
9150 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9151
9152 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9153 {
9154 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9155
9156 start_sequence ();
9157 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9158 insns = get_insns ();
9159 end_sequence ();
9160
9161 RTL_CONST_CALL_P (insns) = 1;
9162 emit_libcall_block (insns, dest, rax, x);
9163 }
9164 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9165 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9166 else
9167 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9168
9169 if (TARGET_GNU2_TLS)
9170 {
9171 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9172
9173 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9174 }
9175 break;
9176
9177 case TLS_MODEL_LOCAL_DYNAMIC:
9178 base = gen_reg_rtx (Pmode);
9179 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9180
9181 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9182 {
9183 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9184
9185 start_sequence ();
9186 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9187 insns = get_insns ();
9188 end_sequence ();
9189
9190 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9191 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9192 RTL_CONST_CALL_P (insns) = 1;
9193 emit_libcall_block (insns, base, rax, note);
9194 }
9195 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9196 emit_insn (gen_tls_local_dynamic_base_64 (base));
9197 else
9198 emit_insn (gen_tls_local_dynamic_base_32 (base));
9199
9200 if (TARGET_GNU2_TLS)
9201 {
9202 rtx x = ix86_tls_module_base ();
9203
9204 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9205 gen_rtx_MINUS (Pmode, x, tp));
9206 }
9207
9208 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9209 off = gen_rtx_CONST (Pmode, off);
9210
9211 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9212
9213 if (TARGET_GNU2_TLS)
9214 {
9215 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9216
9217 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9218 }
9219
9220 break;
9221
9222 case TLS_MODEL_INITIAL_EXEC:
9223 if (TARGET_64BIT)
9224 {
9225 pic = NULL;
9226 type = UNSPEC_GOTNTPOFF;
9227 }
9228 else if (flag_pic)
9229 {
9230 if (reload_in_progress)
9231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9232 pic = pic_offset_table_rtx;
9233 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9234 }
9235 else if (!TARGET_ANY_GNU_TLS)
9236 {
9237 pic = gen_reg_rtx (Pmode);
9238 emit_insn (gen_set_got (pic));
9239 type = UNSPEC_GOTTPOFF;
9240 }
9241 else
9242 {
9243 pic = NULL;
9244 type = UNSPEC_INDNTPOFF;
9245 }
9246
9247 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9248 off = gen_rtx_CONST (Pmode, off);
9249 if (pic)
9250 off = gen_rtx_PLUS (Pmode, pic, off);
9251 off = gen_const_mem (Pmode, off);
9252 set_mem_alias_set (off, ix86_GOT_alias_set ());
9253
9254 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9255 {
9256 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9257 off = force_reg (Pmode, off);
9258 return gen_rtx_PLUS (Pmode, base, off);
9259 }
9260 else
9261 {
9262 base = get_thread_pointer (true);
9263 dest = gen_reg_rtx (Pmode);
9264 emit_insn (gen_subsi3 (dest, base, off));
9265 }
9266 break;
9267
9268 case TLS_MODEL_LOCAL_EXEC:
9269 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9270 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9271 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9272 off = gen_rtx_CONST (Pmode, off);
9273
9274 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9275 {
9276 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9277 return gen_rtx_PLUS (Pmode, base, off);
9278 }
9279 else
9280 {
9281 base = get_thread_pointer (true);
9282 dest = gen_reg_rtx (Pmode);
9283 emit_insn (gen_subsi3 (dest, base, off));
9284 }
9285 break;
9286
9287 default:
9288 gcc_unreachable ();
9289 }
9290
9291 return dest;
9292 }
9293
9294 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9295 to symbol DECL. */
9296
9297 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9298 htab_t dllimport_map;
9299
9300 static tree
9301 get_dllimport_decl (tree decl)
9302 {
9303 struct tree_map *h, in;
9304 void **loc;
9305 const char *name;
9306 const char *prefix;
9307 size_t namelen, prefixlen;
9308 char *imp_name;
9309 tree to;
9310 rtx rtl;
9311
9312 if (!dllimport_map)
9313 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9314
9315 in.hash = htab_hash_pointer (decl);
9316 in.base.from = decl;
9317 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9318 h = (struct tree_map *) *loc;
9319 if (h)
9320 return h->to;
9321
9322 *loc = h = GGC_NEW (struct tree_map);
9323 h->hash = in.hash;
9324 h->base.from = decl;
9325 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9326 DECL_ARTIFICIAL (to) = 1;
9327 DECL_IGNORED_P (to) = 1;
9328 DECL_EXTERNAL (to) = 1;
9329 TREE_READONLY (to) = 1;
9330
9331 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9332 name = targetm.strip_name_encoding (name);
9333 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9334 ? "*__imp_" : "*__imp__";
9335 namelen = strlen (name);
9336 prefixlen = strlen (prefix);
9337 imp_name = (char *) alloca (namelen + prefixlen + 1);
9338 memcpy (imp_name, prefix, prefixlen);
9339 memcpy (imp_name + prefixlen, name, namelen + 1);
9340
9341 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9342 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9343 SET_SYMBOL_REF_DECL (rtl, to);
9344 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9345
9346 rtl = gen_const_mem (Pmode, rtl);
9347 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9348
9349 SET_DECL_RTL (to, rtl);
9350 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9351
9352 return to;
9353 }
9354
9355 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9356 true if we require the result be a register. */
9357
9358 static rtx
9359 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9360 {
9361 tree imp_decl;
9362 rtx x;
9363
9364 gcc_assert (SYMBOL_REF_DECL (symbol));
9365 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9366
9367 x = DECL_RTL (imp_decl);
9368 if (want_reg)
9369 x = force_reg (Pmode, x);
9370 return x;
9371 }
9372
9373 /* Try machine-dependent ways of modifying an illegitimate address
9374 to be legitimate. If we find one, return the new, valid address.
9375 This macro is used in only one place: `memory_address' in explow.c.
9376
9377 OLDX is the address as it was before break_out_memory_refs was called.
9378 In some cases it is useful to look at this to decide what needs to be done.
9379
9380 MODE and WIN are passed so that this macro can use
9381 GO_IF_LEGITIMATE_ADDRESS.
9382
9383 It is always safe for this macro to do nothing. It exists to recognize
9384 opportunities to optimize the output.
9385
9386 For the 80386, we handle X+REG by loading X into a register R and
9387 using R+REG. R will go in a general reg and indexing will be used.
9388 However, if REG is a broken-out memory address or multiplication,
9389 nothing needs to be done because REG can certainly go in a general reg.
9390
9391 When -fpic is used, special handling is needed for symbolic references.
9392 See comments by legitimize_pic_address in i386.c for details. */
9393
9394 rtx
9395 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9396 {
9397 int changed = 0;
9398 unsigned log;
9399
9400 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9401 if (log)
9402 return legitimize_tls_address (x, (enum tls_model) log, false);
9403 if (GET_CODE (x) == CONST
9404 && GET_CODE (XEXP (x, 0)) == PLUS
9405 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9406 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9407 {
9408 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9409 (enum tls_model) log, false);
9410 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9411 }
9412
9413 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9414 {
9415 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9416 return legitimize_dllimport_symbol (x, true);
9417 if (GET_CODE (x) == CONST
9418 && GET_CODE (XEXP (x, 0)) == PLUS
9419 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9420 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9421 {
9422 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9423 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9424 }
9425 }
9426
9427 if (flag_pic && SYMBOLIC_CONST (x))
9428 return legitimize_pic_address (x, 0);
9429
9430 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9431 if (GET_CODE (x) == ASHIFT
9432 && CONST_INT_P (XEXP (x, 1))
9433 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9434 {
9435 changed = 1;
9436 log = INTVAL (XEXP (x, 1));
9437 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9438 GEN_INT (1 << log));
9439 }
9440
9441 if (GET_CODE (x) == PLUS)
9442 {
9443 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9444
9445 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9446 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9447 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9448 {
9449 changed = 1;
9450 log = INTVAL (XEXP (XEXP (x, 0), 1));
9451 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9452 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9453 GEN_INT (1 << log));
9454 }
9455
9456 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9457 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9458 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9459 {
9460 changed = 1;
9461 log = INTVAL (XEXP (XEXP (x, 1), 1));
9462 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9463 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9464 GEN_INT (1 << log));
9465 }
9466
9467 /* Put multiply first if it isn't already. */
9468 if (GET_CODE (XEXP (x, 1)) == MULT)
9469 {
9470 rtx tmp = XEXP (x, 0);
9471 XEXP (x, 0) = XEXP (x, 1);
9472 XEXP (x, 1) = tmp;
9473 changed = 1;
9474 }
9475
9476 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9477 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9478 created by virtual register instantiation, register elimination, and
9479 similar optimizations. */
9480 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9481 {
9482 changed = 1;
9483 x = gen_rtx_PLUS (Pmode,
9484 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9485 XEXP (XEXP (x, 1), 0)),
9486 XEXP (XEXP (x, 1), 1));
9487 }
9488
9489 /* Canonicalize
9490 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9491 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9492 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9493 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9494 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9495 && CONSTANT_P (XEXP (x, 1)))
9496 {
9497 rtx constant;
9498 rtx other = NULL_RTX;
9499
9500 if (CONST_INT_P (XEXP (x, 1)))
9501 {
9502 constant = XEXP (x, 1);
9503 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9504 }
9505 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9506 {
9507 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9508 other = XEXP (x, 1);
9509 }
9510 else
9511 constant = 0;
9512
9513 if (constant)
9514 {
9515 changed = 1;
9516 x = gen_rtx_PLUS (Pmode,
9517 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9518 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9519 plus_constant (other, INTVAL (constant)));
9520 }
9521 }
9522
9523 if (changed && legitimate_address_p (mode, x, FALSE))
9524 return x;
9525
9526 if (GET_CODE (XEXP (x, 0)) == MULT)
9527 {
9528 changed = 1;
9529 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9530 }
9531
9532 if (GET_CODE (XEXP (x, 1)) == MULT)
9533 {
9534 changed = 1;
9535 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9536 }
9537
9538 if (changed
9539 && REG_P (XEXP (x, 1))
9540 && REG_P (XEXP (x, 0)))
9541 return x;
9542
9543 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9544 {
9545 changed = 1;
9546 x = legitimize_pic_address (x, 0);
9547 }
9548
9549 if (changed && legitimate_address_p (mode, x, FALSE))
9550 return x;
9551
9552 if (REG_P (XEXP (x, 0)))
9553 {
9554 rtx temp = gen_reg_rtx (Pmode);
9555 rtx val = force_operand (XEXP (x, 1), temp);
9556 if (val != temp)
9557 emit_move_insn (temp, val);
9558
9559 XEXP (x, 1) = temp;
9560 return x;
9561 }
9562
9563 else if (REG_P (XEXP (x, 1)))
9564 {
9565 rtx temp = gen_reg_rtx (Pmode);
9566 rtx val = force_operand (XEXP (x, 0), temp);
9567 if (val != temp)
9568 emit_move_insn (temp, val);
9569
9570 XEXP (x, 0) = temp;
9571 return x;
9572 }
9573 }
9574
9575 return x;
9576 }
9577 \f
9578 /* Print an integer constant expression in assembler syntax. Addition
9579 and subtraction are the only arithmetic that may appear in these
9580 expressions. FILE is the stdio stream to write to, X is the rtx, and
9581 CODE is the operand print code from the output string. */
9582
9583 static void
9584 output_pic_addr_const (FILE *file, rtx x, int code)
9585 {
9586 char buf[256];
9587
9588 switch (GET_CODE (x))
9589 {
9590 case PC:
9591 gcc_assert (flag_pic);
9592 putc ('.', file);
9593 break;
9594
9595 case SYMBOL_REF:
9596 if (! TARGET_MACHO || TARGET_64BIT)
9597 output_addr_const (file, x);
9598 else
9599 {
9600 const char *name = XSTR (x, 0);
9601
9602 /* Mark the decl as referenced so that cgraph will
9603 output the function. */
9604 if (SYMBOL_REF_DECL (x))
9605 mark_decl_referenced (SYMBOL_REF_DECL (x));
9606
9607 #if TARGET_MACHO
9608 if (MACHOPIC_INDIRECT
9609 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9610 name = machopic_indirection_name (x, /*stub_p=*/true);
9611 #endif
9612 assemble_name (file, name);
9613 }
9614 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9615 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9616 fputs ("@PLT", file);
9617 break;
9618
9619 case LABEL_REF:
9620 x = XEXP (x, 0);
9621 /* FALLTHRU */
9622 case CODE_LABEL:
9623 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9624 assemble_name (asm_out_file, buf);
9625 break;
9626
9627 case CONST_INT:
9628 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9629 break;
9630
9631 case CONST:
9632 /* This used to output parentheses around the expression,
9633 but that does not work on the 386 (either ATT or BSD assembler). */
9634 output_pic_addr_const (file, XEXP (x, 0), code);
9635 break;
9636
9637 case CONST_DOUBLE:
9638 if (GET_MODE (x) == VOIDmode)
9639 {
9640 /* We can use %d if the number is <32 bits and positive. */
9641 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9642 fprintf (file, "0x%lx%08lx",
9643 (unsigned long) CONST_DOUBLE_HIGH (x),
9644 (unsigned long) CONST_DOUBLE_LOW (x));
9645 else
9646 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9647 }
9648 else
9649 /* We can't handle floating point constants;
9650 PRINT_OPERAND must handle them. */
9651 output_operand_lossage ("floating constant misused");
9652 break;
9653
9654 case PLUS:
9655 /* Some assemblers need integer constants to appear first. */
9656 if (CONST_INT_P (XEXP (x, 0)))
9657 {
9658 output_pic_addr_const (file, XEXP (x, 0), code);
9659 putc ('+', file);
9660 output_pic_addr_const (file, XEXP (x, 1), code);
9661 }
9662 else
9663 {
9664 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9665 output_pic_addr_const (file, XEXP (x, 1), code);
9666 putc ('+', file);
9667 output_pic_addr_const (file, XEXP (x, 0), code);
9668 }
9669 break;
9670
9671 case MINUS:
9672 if (!TARGET_MACHO)
9673 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9674 output_pic_addr_const (file, XEXP (x, 0), code);
9675 putc ('-', file);
9676 output_pic_addr_const (file, XEXP (x, 1), code);
9677 if (!TARGET_MACHO)
9678 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9679 break;
9680
9681 case UNSPEC:
9682 gcc_assert (XVECLEN (x, 0) == 1);
9683 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9684 switch (XINT (x, 1))
9685 {
9686 case UNSPEC_GOT:
9687 fputs ("@GOT", file);
9688 break;
9689 case UNSPEC_GOTOFF:
9690 fputs ("@GOTOFF", file);
9691 break;
9692 case UNSPEC_PLTOFF:
9693 fputs ("@PLTOFF", file);
9694 break;
9695 case UNSPEC_GOTPCREL:
9696 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9697 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9698 break;
9699 case UNSPEC_GOTTPOFF:
9700 /* FIXME: This might be @TPOFF in Sun ld too. */
9701 fputs ("@GOTTPOFF", file);
9702 break;
9703 case UNSPEC_TPOFF:
9704 fputs ("@TPOFF", file);
9705 break;
9706 case UNSPEC_NTPOFF:
9707 if (TARGET_64BIT)
9708 fputs ("@TPOFF", file);
9709 else
9710 fputs ("@NTPOFF", file);
9711 break;
9712 case UNSPEC_DTPOFF:
9713 fputs ("@DTPOFF", file);
9714 break;
9715 case UNSPEC_GOTNTPOFF:
9716 if (TARGET_64BIT)
9717 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9718 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9719 else
9720 fputs ("@GOTNTPOFF", file);
9721 break;
9722 case UNSPEC_INDNTPOFF:
9723 fputs ("@INDNTPOFF", file);
9724 break;
9725 default:
9726 output_operand_lossage ("invalid UNSPEC as operand");
9727 break;
9728 }
9729 break;
9730
9731 default:
9732 output_operand_lossage ("invalid expression as operand");
9733 }
9734 }
9735
9736 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9737 We need to emit DTP-relative relocations. */
9738
9739 static void ATTRIBUTE_UNUSED
9740 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9741 {
9742 fputs (ASM_LONG, file);
9743 output_addr_const (file, x);
9744 fputs ("@DTPOFF", file);
9745 switch (size)
9746 {
9747 case 4:
9748 break;
9749 case 8:
9750 fputs (", 0", file);
9751 break;
9752 default:
9753 gcc_unreachable ();
9754 }
9755 }
9756
9757 /* In the name of slightly smaller debug output, and to cater to
9758 general assembler lossage, recognize PIC+GOTOFF and turn it back
9759 into a direct symbol reference.
9760
9761 On Darwin, this is necessary to avoid a crash, because Darwin
9762 has a different PIC label for each routine but the DWARF debugging
9763 information is not associated with any particular routine, so it's
9764 necessary to remove references to the PIC label from RTL stored by
9765 the DWARF output code. */
9766
9767 static rtx
9768 ix86_delegitimize_address (rtx orig_x)
9769 {
9770 rtx x = orig_x;
9771 /* reg_addend is NULL or a multiple of some register. */
9772 rtx reg_addend = NULL_RTX;
9773 /* const_addend is NULL or a const_int. */
9774 rtx const_addend = NULL_RTX;
9775 /* This is the result, or NULL. */
9776 rtx result = NULL_RTX;
9777
9778 if (MEM_P (x))
9779 x = XEXP (x, 0);
9780
9781 if (TARGET_64BIT)
9782 {
9783 if (GET_CODE (x) != CONST
9784 || GET_CODE (XEXP (x, 0)) != UNSPEC
9785 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
9786 || !MEM_P (orig_x))
9787 return orig_x;
9788 return XVECEXP (XEXP (x, 0), 0, 0);
9789 }
9790
9791 if (GET_CODE (x) != PLUS
9792 || GET_CODE (XEXP (x, 1)) != CONST)
9793 return orig_x;
9794
9795 if (REG_P (XEXP (x, 0))
9796 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
9797 /* %ebx + GOT/GOTOFF */
9798 ;
9799 else if (GET_CODE (XEXP (x, 0)) == PLUS)
9800 {
9801 /* %ebx + %reg * scale + GOT/GOTOFF */
9802 reg_addend = XEXP (x, 0);
9803 if (REG_P (XEXP (reg_addend, 0))
9804 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
9805 reg_addend = XEXP (reg_addend, 1);
9806 else if (REG_P (XEXP (reg_addend, 1))
9807 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
9808 reg_addend = XEXP (reg_addend, 0);
9809 else
9810 return orig_x;
9811 if (!REG_P (reg_addend)
9812 && GET_CODE (reg_addend) != MULT
9813 && GET_CODE (reg_addend) != ASHIFT)
9814 return orig_x;
9815 }
9816 else
9817 return orig_x;
9818
9819 x = XEXP (XEXP (x, 1), 0);
9820 if (GET_CODE (x) == PLUS
9821 && CONST_INT_P (XEXP (x, 1)))
9822 {
9823 const_addend = XEXP (x, 1);
9824 x = XEXP (x, 0);
9825 }
9826
9827 if (GET_CODE (x) == UNSPEC
9828 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
9829 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
9830 result = XVECEXP (x, 0, 0);
9831
9832 if (TARGET_MACHO && darwin_local_data_pic (x)
9833 && !MEM_P (orig_x))
9834 result = XEXP (x, 0);
9835
9836 if (! result)
9837 return orig_x;
9838
9839 if (const_addend)
9840 result = gen_rtx_PLUS (Pmode, result, const_addend);
9841 if (reg_addend)
9842 result = gen_rtx_PLUS (Pmode, reg_addend, result);
9843 return result;
9844 }
9845
9846 /* If X is a machine specific address (i.e. a symbol or label being
9847 referenced as a displacement from the GOT implemented using an
9848 UNSPEC), then return the base term. Otherwise return X. */
9849
9850 rtx
9851 ix86_find_base_term (rtx x)
9852 {
9853 rtx term;
9854
9855 if (TARGET_64BIT)
9856 {
9857 if (GET_CODE (x) != CONST)
9858 return x;
9859 term = XEXP (x, 0);
9860 if (GET_CODE (term) == PLUS
9861 && (CONST_INT_P (XEXP (term, 1))
9862 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
9863 term = XEXP (term, 0);
9864 if (GET_CODE (term) != UNSPEC
9865 || XINT (term, 1) != UNSPEC_GOTPCREL)
9866 return x;
9867
9868 term = XVECEXP (term, 0, 0);
9869
9870 if (GET_CODE (term) != SYMBOL_REF
9871 && GET_CODE (term) != LABEL_REF)
9872 return x;
9873
9874 return term;
9875 }
9876
9877 term = ix86_delegitimize_address (x);
9878
9879 if (GET_CODE (term) != SYMBOL_REF
9880 && GET_CODE (term) != LABEL_REF)
9881 return x;
9882
9883 return term;
9884 }
9885 \f
9886 static void
9887 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
9888 int fp, FILE *file)
9889 {
9890 const char *suffix;
9891
9892 if (mode == CCFPmode || mode == CCFPUmode)
9893 {
9894 enum rtx_code second_code, bypass_code;
9895 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
9896 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
9897 code = ix86_fp_compare_code_to_integer (code);
9898 mode = CCmode;
9899 }
9900 if (reverse)
9901 code = reverse_condition (code);
9902
9903 switch (code)
9904 {
9905 case EQ:
9906 switch (mode)
9907 {
9908 case CCAmode:
9909 suffix = "a";
9910 break;
9911
9912 case CCCmode:
9913 suffix = "c";
9914 break;
9915
9916 case CCOmode:
9917 suffix = "o";
9918 break;
9919
9920 case CCSmode:
9921 suffix = "s";
9922 break;
9923
9924 default:
9925 suffix = "e";
9926 }
9927 break;
9928 case NE:
9929 switch (mode)
9930 {
9931 case CCAmode:
9932 suffix = "na";
9933 break;
9934
9935 case CCCmode:
9936 suffix = "nc";
9937 break;
9938
9939 case CCOmode:
9940 suffix = "no";
9941 break;
9942
9943 case CCSmode:
9944 suffix = "ns";
9945 break;
9946
9947 default:
9948 suffix = "ne";
9949 }
9950 break;
9951 case GT:
9952 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
9953 suffix = "g";
9954 break;
9955 case GTU:
9956 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
9957 Those same assemblers have the same but opposite lossage on cmov. */
9958 if (mode == CCmode)
9959 suffix = fp ? "nbe" : "a";
9960 else if (mode == CCCmode)
9961 suffix = "b";
9962 else
9963 gcc_unreachable ();
9964 break;
9965 case LT:
9966 switch (mode)
9967 {
9968 case CCNOmode:
9969 case CCGOCmode:
9970 suffix = "s";
9971 break;
9972
9973 case CCmode:
9974 case CCGCmode:
9975 suffix = "l";
9976 break;
9977
9978 default:
9979 gcc_unreachable ();
9980 }
9981 break;
9982 case LTU:
9983 gcc_assert (mode == CCmode || mode == CCCmode);
9984 suffix = "b";
9985 break;
9986 case GE:
9987 switch (mode)
9988 {
9989 case CCNOmode:
9990 case CCGOCmode:
9991 suffix = "ns";
9992 break;
9993
9994 case CCmode:
9995 case CCGCmode:
9996 suffix = "ge";
9997 break;
9998
9999 default:
10000 gcc_unreachable ();
10001 }
10002 break;
10003 case GEU:
10004 /* ??? As above. */
10005 gcc_assert (mode == CCmode || mode == CCCmode);
10006 suffix = fp ? "nb" : "ae";
10007 break;
10008 case LE:
10009 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10010 suffix = "le";
10011 break;
10012 case LEU:
10013 /* ??? As above. */
10014 if (mode == CCmode)
10015 suffix = "be";
10016 else if (mode == CCCmode)
10017 suffix = fp ? "nb" : "ae";
10018 else
10019 gcc_unreachable ();
10020 break;
10021 case UNORDERED:
10022 suffix = fp ? "u" : "p";
10023 break;
10024 case ORDERED:
10025 suffix = fp ? "nu" : "np";
10026 break;
10027 default:
10028 gcc_unreachable ();
10029 }
10030 fputs (suffix, file);
10031 }
10032
10033 /* Print the name of register X to FILE based on its machine mode and number.
10034 If CODE is 'w', pretend the mode is HImode.
10035 If CODE is 'b', pretend the mode is QImode.
10036 If CODE is 'k', pretend the mode is SImode.
10037 If CODE is 'q', pretend the mode is DImode.
10038 If CODE is 'h', pretend the reg is the 'high' byte register.
10039 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
10040
10041 void
10042 print_reg (rtx x, int code, FILE *file)
10043 {
10044 gcc_assert (x == pc_rtx
10045 || (REGNO (x) != ARG_POINTER_REGNUM
10046 && REGNO (x) != FRAME_POINTER_REGNUM
10047 && REGNO (x) != FLAGS_REG
10048 && REGNO (x) != FPSR_REG
10049 && REGNO (x) != FPCR_REG));
10050
10051 if (ASSEMBLER_DIALECT == ASM_ATT)
10052 putc ('%', file);
10053
10054 if (x == pc_rtx)
10055 {
10056 gcc_assert (TARGET_64BIT);
10057 fputs ("rip", file);
10058 return;
10059 }
10060
10061 if (code == 'w' || MMX_REG_P (x))
10062 code = 2;
10063 else if (code == 'b')
10064 code = 1;
10065 else if (code == 'k')
10066 code = 4;
10067 else if (code == 'q')
10068 code = 8;
10069 else if (code == 'y')
10070 code = 3;
10071 else if (code == 'h')
10072 code = 0;
10073 else
10074 code = GET_MODE_SIZE (GET_MODE (x));
10075
10076 /* Irritatingly, AMD extended registers use different naming convention
10077 from the normal registers. */
10078 if (REX_INT_REG_P (x))
10079 {
10080 gcc_assert (TARGET_64BIT);
10081 switch (code)
10082 {
10083 case 0:
10084 error ("extended registers have no high halves");
10085 break;
10086 case 1:
10087 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10088 break;
10089 case 2:
10090 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10091 break;
10092 case 4:
10093 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10094 break;
10095 case 8:
10096 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10097 break;
10098 default:
10099 error ("unsupported operand size for extended register");
10100 break;
10101 }
10102 return;
10103 }
10104 switch (code)
10105 {
10106 case 3:
10107 if (STACK_TOP_P (x))
10108 {
10109 fputs ("st(0)", file);
10110 break;
10111 }
10112 /* FALLTHRU */
10113 case 8:
10114 case 4:
10115 case 12:
10116 if (! ANY_FP_REG_P (x))
10117 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10118 /* FALLTHRU */
10119 case 16:
10120 case 2:
10121 normal:
10122 fputs (hi_reg_name[REGNO (x)], file);
10123 break;
10124 case 1:
10125 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10126 goto normal;
10127 fputs (qi_reg_name[REGNO (x)], file);
10128 break;
10129 case 0:
10130 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10131 goto normal;
10132 fputs (qi_high_reg_name[REGNO (x)], file);
10133 break;
10134 default:
10135 gcc_unreachable ();
10136 }
10137 }
10138
10139 /* Locate some local-dynamic symbol still in use by this function
10140 so that we can print its name in some tls_local_dynamic_base
10141 pattern. */
10142
10143 static int
10144 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10145 {
10146 rtx x = *px;
10147
10148 if (GET_CODE (x) == SYMBOL_REF
10149 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10150 {
10151 cfun->machine->some_ld_name = XSTR (x, 0);
10152 return 1;
10153 }
10154
10155 return 0;
10156 }
10157
10158 static const char *
10159 get_some_local_dynamic_name (void)
10160 {
10161 rtx insn;
10162
10163 if (cfun->machine->some_ld_name)
10164 return cfun->machine->some_ld_name;
10165
10166 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10167 if (INSN_P (insn)
10168 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10169 return cfun->machine->some_ld_name;
10170
10171 gcc_unreachable ();
10172 }
10173
10174 /* Meaning of CODE:
10175 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10176 C -- print opcode suffix for set/cmov insn.
10177 c -- like C, but print reversed condition
10178 E,e -- likewise, but for compare-and-branch fused insn.
10179 F,f -- likewise, but for floating-point.
10180 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10181 otherwise nothing
10182 R -- print the prefix for register names.
10183 z -- print the opcode suffix for the size of the current operand.
10184 * -- print a star (in certain assembler syntax)
10185 A -- print an absolute memory reference.
10186 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10187 s -- print a shift double count, followed by the assemblers argument
10188 delimiter.
10189 b -- print the QImode name of the register for the indicated operand.
10190 %b0 would print %al if operands[0] is reg 0.
10191 w -- likewise, print the HImode name of the register.
10192 k -- likewise, print the SImode name of the register.
10193 q -- likewise, print the DImode name of the register.
10194 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10195 y -- print "st(0)" instead of "st" as a register.
10196 D -- print condition for SSE cmp instruction.
10197 P -- if PIC, print an @PLT suffix.
10198 X -- don't print any sort of PIC '@' suffix for a symbol.
10199 & -- print some in-use local-dynamic symbol name.
10200 H -- print a memory address offset by 8; used for sse high-parts
10201 Y -- print condition for SSE5 com* instruction.
10202 + -- print a branch hint as 'cs' or 'ds' prefix
10203 ; -- print a semicolon (after prefixes due to bug in older gas).
10204 */
10205
10206 void
10207 print_operand (FILE *file, rtx x, int code)
10208 {
10209 if (code)
10210 {
10211 switch (code)
10212 {
10213 case '*':
10214 if (ASSEMBLER_DIALECT == ASM_ATT)
10215 putc ('*', file);
10216 return;
10217
10218 case '&':
10219 assemble_name (file, get_some_local_dynamic_name ());
10220 return;
10221
10222 case 'A':
10223 switch (ASSEMBLER_DIALECT)
10224 {
10225 case ASM_ATT:
10226 putc ('*', file);
10227 break;
10228
10229 case ASM_INTEL:
10230 /* Intel syntax. For absolute addresses, registers should not
10231 be surrounded by braces. */
10232 if (!REG_P (x))
10233 {
10234 putc ('[', file);
10235 PRINT_OPERAND (file, x, 0);
10236 putc (']', file);
10237 return;
10238 }
10239 break;
10240
10241 default:
10242 gcc_unreachable ();
10243 }
10244
10245 PRINT_OPERAND (file, x, 0);
10246 return;
10247
10248
10249 case 'L':
10250 if (ASSEMBLER_DIALECT == ASM_ATT)
10251 putc ('l', file);
10252 return;
10253
10254 case 'W':
10255 if (ASSEMBLER_DIALECT == ASM_ATT)
10256 putc ('w', file);
10257 return;
10258
10259 case 'B':
10260 if (ASSEMBLER_DIALECT == ASM_ATT)
10261 putc ('b', file);
10262 return;
10263
10264 case 'Q':
10265 if (ASSEMBLER_DIALECT == ASM_ATT)
10266 putc ('l', file);
10267 return;
10268
10269 case 'S':
10270 if (ASSEMBLER_DIALECT == ASM_ATT)
10271 putc ('s', file);
10272 return;
10273
10274 case 'T':
10275 if (ASSEMBLER_DIALECT == ASM_ATT)
10276 putc ('t', file);
10277 return;
10278
10279 case 'z':
10280 /* 387 opcodes don't get size suffixes if the operands are
10281 registers. */
10282 if (STACK_REG_P (x))
10283 return;
10284
10285 /* Likewise if using Intel opcodes. */
10286 if (ASSEMBLER_DIALECT == ASM_INTEL)
10287 return;
10288
10289 /* This is the size of op from size of operand. */
10290 switch (GET_MODE_SIZE (GET_MODE (x)))
10291 {
10292 case 1:
10293 putc ('b', file);
10294 return;
10295
10296 case 2:
10297 if (MEM_P (x))
10298 {
10299 #ifdef HAVE_GAS_FILDS_FISTS
10300 putc ('s', file);
10301 #endif
10302 return;
10303 }
10304 else
10305 putc ('w', file);
10306 return;
10307
10308 case 4:
10309 if (GET_MODE (x) == SFmode)
10310 {
10311 putc ('s', file);
10312 return;
10313 }
10314 else
10315 putc ('l', file);
10316 return;
10317
10318 case 12:
10319 case 16:
10320 putc ('t', file);
10321 return;
10322
10323 case 8:
10324 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10325 {
10326 if (MEM_P (x))
10327 {
10328 #ifdef GAS_MNEMONICS
10329 putc ('q', file);
10330 #else
10331 putc ('l', file);
10332 putc ('l', file);
10333 #endif
10334 }
10335 else
10336 putc ('q', file);
10337 }
10338 else
10339 putc ('l', file);
10340 return;
10341
10342 default:
10343 gcc_unreachable ();
10344 }
10345
10346 case 'b':
10347 case 'w':
10348 case 'k':
10349 case 'q':
10350 case 'h':
10351 case 'y':
10352 case 'X':
10353 case 'P':
10354 break;
10355
10356 case 's':
10357 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10358 {
10359 PRINT_OPERAND (file, x, 0);
10360 fputs (", ", file);
10361 }
10362 return;
10363
10364 case 'D':
10365 /* Little bit of braindamage here. The SSE compare instructions
10366 does use completely different names for the comparisons that the
10367 fp conditional moves. */
10368 switch (GET_CODE (x))
10369 {
10370 case EQ:
10371 case UNEQ:
10372 fputs ("eq", file);
10373 break;
10374 case LT:
10375 case UNLT:
10376 fputs ("lt", file);
10377 break;
10378 case LE:
10379 case UNLE:
10380 fputs ("le", file);
10381 break;
10382 case UNORDERED:
10383 fputs ("unord", file);
10384 break;
10385 case NE:
10386 case LTGT:
10387 fputs ("neq", file);
10388 break;
10389 case UNGE:
10390 case GE:
10391 fputs ("nlt", file);
10392 break;
10393 case UNGT:
10394 case GT:
10395 fputs ("nle", file);
10396 break;
10397 case ORDERED:
10398 fputs ("ord", file);
10399 break;
10400 default:
10401 gcc_unreachable ();
10402 }
10403 return;
10404 case 'O':
10405 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10406 if (ASSEMBLER_DIALECT == ASM_ATT)
10407 {
10408 switch (GET_MODE (x))
10409 {
10410 case HImode: putc ('w', file); break;
10411 case SImode:
10412 case SFmode: putc ('l', file); break;
10413 case DImode:
10414 case DFmode: putc ('q', file); break;
10415 default: gcc_unreachable ();
10416 }
10417 putc ('.', file);
10418 }
10419 #endif
10420 return;
10421 case 'C':
10422 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10423 return;
10424 case 'F':
10425 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10426 if (ASSEMBLER_DIALECT == ASM_ATT)
10427 putc ('.', file);
10428 #endif
10429 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10430 return;
10431
10432 /* Like above, but reverse condition */
10433 case 'c':
10434 /* Check to see if argument to %c is really a constant
10435 and not a condition code which needs to be reversed. */
10436 if (!COMPARISON_P (x))
10437 {
10438 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10439 return;
10440 }
10441 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10442 return;
10443 case 'f':
10444 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10445 if (ASSEMBLER_DIALECT == ASM_ATT)
10446 putc ('.', file);
10447 #endif
10448 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10449 return;
10450
10451 case 'E':
10452 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10453 return;
10454
10455 case 'e':
10456 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10457 return;
10458
10459 case 'H':
10460 /* It doesn't actually matter what mode we use here, as we're
10461 only going to use this for printing. */
10462 x = adjust_address_nv (x, DImode, 8);
10463 break;
10464
10465 case '+':
10466 {
10467 rtx x;
10468
10469 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
10470 return;
10471
10472 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10473 if (x)
10474 {
10475 int pred_val = INTVAL (XEXP (x, 0));
10476
10477 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10478 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10479 {
10480 int taken = pred_val > REG_BR_PROB_BASE / 2;
10481 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10482
10483 /* Emit hints only in the case default branch prediction
10484 heuristics would fail. */
10485 if (taken != cputaken)
10486 {
10487 /* We use 3e (DS) prefix for taken branches and
10488 2e (CS) prefix for not taken branches. */
10489 if (taken)
10490 fputs ("ds ; ", file);
10491 else
10492 fputs ("cs ; ", file);
10493 }
10494 }
10495 }
10496 return;
10497 }
10498
10499 case 'Y':
10500 switch (GET_CODE (x))
10501 {
10502 case NE:
10503 fputs ("neq", file);
10504 break;
10505 case EQ:
10506 fputs ("eq", file);
10507 break;
10508 case GE:
10509 case GEU:
10510 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10511 break;
10512 case GT:
10513 case GTU:
10514 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10515 break;
10516 case LE:
10517 case LEU:
10518 fputs ("le", file);
10519 break;
10520 case LT:
10521 case LTU:
10522 fputs ("lt", file);
10523 break;
10524 case UNORDERED:
10525 fputs ("unord", file);
10526 break;
10527 case ORDERED:
10528 fputs ("ord", file);
10529 break;
10530 case UNEQ:
10531 fputs ("ueq", file);
10532 break;
10533 case UNGE:
10534 fputs ("nlt", file);
10535 break;
10536 case UNGT:
10537 fputs ("nle", file);
10538 break;
10539 case UNLE:
10540 fputs ("ule", file);
10541 break;
10542 case UNLT:
10543 fputs ("ult", file);
10544 break;
10545 case LTGT:
10546 fputs ("une", file);
10547 break;
10548 default:
10549 gcc_unreachable ();
10550 }
10551 return;
10552
10553 case ';':
10554 #if TARGET_MACHO
10555 fputs (" ; ", file);
10556 #else
10557 fputc (' ', file);
10558 #endif
10559 return;
10560
10561 default:
10562 output_operand_lossage ("invalid operand code '%c'", code);
10563 }
10564 }
10565
10566 if (REG_P (x))
10567 print_reg (x, code, file);
10568
10569 else if (MEM_P (x))
10570 {
10571 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10572 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10573 && GET_MODE (x) != BLKmode)
10574 {
10575 const char * size;
10576 switch (GET_MODE_SIZE (GET_MODE (x)))
10577 {
10578 case 1: size = "BYTE"; break;
10579 case 2: size = "WORD"; break;
10580 case 4: size = "DWORD"; break;
10581 case 8: size = "QWORD"; break;
10582 case 12: size = "XWORD"; break;
10583 case 16:
10584 if (GET_MODE (x) == XFmode)
10585 size = "XWORD";
10586 else
10587 size = "XMMWORD";
10588 break;
10589 default:
10590 gcc_unreachable ();
10591 }
10592
10593 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10594 if (code == 'b')
10595 size = "BYTE";
10596 else if (code == 'w')
10597 size = "WORD";
10598 else if (code == 'k')
10599 size = "DWORD";
10600
10601 fputs (size, file);
10602 fputs (" PTR ", file);
10603 }
10604
10605 x = XEXP (x, 0);
10606 /* Avoid (%rip) for call operands. */
10607 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10608 && !CONST_INT_P (x))
10609 output_addr_const (file, x);
10610 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10611 output_operand_lossage ("invalid constraints for operand");
10612 else
10613 output_address (x);
10614 }
10615
10616 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10617 {
10618 REAL_VALUE_TYPE r;
10619 long l;
10620
10621 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10622 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10623
10624 if (ASSEMBLER_DIALECT == ASM_ATT)
10625 putc ('$', file);
10626 fprintf (file, "0x%08lx", (long unsigned int) l);
10627 }
10628
10629 /* These float cases don't actually occur as immediate operands. */
10630 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10631 {
10632 char dstr[30];
10633
10634 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10635 fprintf (file, "%s", dstr);
10636 }
10637
10638 else if (GET_CODE (x) == CONST_DOUBLE
10639 && GET_MODE (x) == XFmode)
10640 {
10641 char dstr[30];
10642
10643 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10644 fprintf (file, "%s", dstr);
10645 }
10646
10647 else
10648 {
10649 /* We have patterns that allow zero sets of memory, for instance.
10650 In 64-bit mode, we should probably support all 8-byte vectors,
10651 since we can in fact encode that into an immediate. */
10652 if (GET_CODE (x) == CONST_VECTOR)
10653 {
10654 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10655 x = const0_rtx;
10656 }
10657
10658 if (code != 'P')
10659 {
10660 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10661 {
10662 if (ASSEMBLER_DIALECT == ASM_ATT)
10663 putc ('$', file);
10664 }
10665 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
10666 || GET_CODE (x) == LABEL_REF)
10667 {
10668 if (ASSEMBLER_DIALECT == ASM_ATT)
10669 putc ('$', file);
10670 else
10671 fputs ("OFFSET FLAT:", file);
10672 }
10673 }
10674 if (CONST_INT_P (x))
10675 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10676 else if (flag_pic)
10677 output_pic_addr_const (file, x, code);
10678 else
10679 output_addr_const (file, x);
10680 }
10681 }
10682 \f
10683 /* Print a memory operand whose address is ADDR. */
10684
10685 void
10686 print_operand_address (FILE *file, rtx addr)
10687 {
10688 struct ix86_address parts;
10689 rtx base, index, disp;
10690 int scale;
10691 int ok = ix86_decompose_address (addr, &parts);
10692
10693 gcc_assert (ok);
10694
10695 base = parts.base;
10696 index = parts.index;
10697 disp = parts.disp;
10698 scale = parts.scale;
10699
10700 switch (parts.seg)
10701 {
10702 case SEG_DEFAULT:
10703 break;
10704 case SEG_FS:
10705 case SEG_GS:
10706 if (ASSEMBLER_DIALECT == ASM_ATT)
10707 putc ('%', file);
10708 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
10709 break;
10710 default:
10711 gcc_unreachable ();
10712 }
10713
10714 /* Use one byte shorter RIP relative addressing for 64bit mode. */
10715 if (TARGET_64BIT && !base && !index)
10716 {
10717 rtx symbol = disp;
10718
10719 if (GET_CODE (disp) == CONST
10720 && GET_CODE (XEXP (disp, 0)) == PLUS
10721 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
10722 symbol = XEXP (XEXP (disp, 0), 0);
10723
10724 if (GET_CODE (symbol) == LABEL_REF
10725 || (GET_CODE (symbol) == SYMBOL_REF
10726 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
10727 base = pc_rtx;
10728 }
10729 if (!base && !index)
10730 {
10731 /* Displacement only requires special attention. */
10732
10733 if (CONST_INT_P (disp))
10734 {
10735 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
10736 fputs ("ds:", file);
10737 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
10738 }
10739 else if (flag_pic)
10740 output_pic_addr_const (file, disp, 0);
10741 else
10742 output_addr_const (file, disp);
10743 }
10744 else
10745 {
10746 if (ASSEMBLER_DIALECT == ASM_ATT)
10747 {
10748 if (disp)
10749 {
10750 if (flag_pic)
10751 output_pic_addr_const (file, disp, 0);
10752 else if (GET_CODE (disp) == LABEL_REF)
10753 output_asm_label (disp);
10754 else
10755 output_addr_const (file, disp);
10756 }
10757
10758 putc ('(', file);
10759 if (base)
10760 print_reg (base, 0, file);
10761 if (index)
10762 {
10763 putc (',', file);
10764 print_reg (index, 0, file);
10765 if (scale != 1)
10766 fprintf (file, ",%d", scale);
10767 }
10768 putc (')', file);
10769 }
10770 else
10771 {
10772 rtx offset = NULL_RTX;
10773
10774 if (disp)
10775 {
10776 /* Pull out the offset of a symbol; print any symbol itself. */
10777 if (GET_CODE (disp) == CONST
10778 && GET_CODE (XEXP (disp, 0)) == PLUS
10779 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
10780 {
10781 offset = XEXP (XEXP (disp, 0), 1);
10782 disp = gen_rtx_CONST (VOIDmode,
10783 XEXP (XEXP (disp, 0), 0));
10784 }
10785
10786 if (flag_pic)
10787 output_pic_addr_const (file, disp, 0);
10788 else if (GET_CODE (disp) == LABEL_REF)
10789 output_asm_label (disp);
10790 else if (CONST_INT_P (disp))
10791 offset = disp;
10792 else
10793 output_addr_const (file, disp);
10794 }
10795
10796 putc ('[', file);
10797 if (base)
10798 {
10799 print_reg (base, 0, file);
10800 if (offset)
10801 {
10802 if (INTVAL (offset) >= 0)
10803 putc ('+', file);
10804 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
10805 }
10806 }
10807 else if (offset)
10808 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
10809 else
10810 putc ('0', file);
10811
10812 if (index)
10813 {
10814 putc ('+', file);
10815 print_reg (index, 0, file);
10816 if (scale != 1)
10817 fprintf (file, "*%d", scale);
10818 }
10819 putc (']', file);
10820 }
10821 }
10822 }
10823
10824 bool
10825 output_addr_const_extra (FILE *file, rtx x)
10826 {
10827 rtx op;
10828
10829 if (GET_CODE (x) != UNSPEC)
10830 return false;
10831
10832 op = XVECEXP (x, 0, 0);
10833 switch (XINT (x, 1))
10834 {
10835 case UNSPEC_GOTTPOFF:
10836 output_addr_const (file, op);
10837 /* FIXME: This might be @TPOFF in Sun ld. */
10838 fputs ("@GOTTPOFF", file);
10839 break;
10840 case UNSPEC_TPOFF:
10841 output_addr_const (file, op);
10842 fputs ("@TPOFF", file);
10843 break;
10844 case UNSPEC_NTPOFF:
10845 output_addr_const (file, op);
10846 if (TARGET_64BIT)
10847 fputs ("@TPOFF", file);
10848 else
10849 fputs ("@NTPOFF", file);
10850 break;
10851 case UNSPEC_DTPOFF:
10852 output_addr_const (file, op);
10853 fputs ("@DTPOFF", file);
10854 break;
10855 case UNSPEC_GOTNTPOFF:
10856 output_addr_const (file, op);
10857 if (TARGET_64BIT)
10858 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10859 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
10860 else
10861 fputs ("@GOTNTPOFF", file);
10862 break;
10863 case UNSPEC_INDNTPOFF:
10864 output_addr_const (file, op);
10865 fputs ("@INDNTPOFF", file);
10866 break;
10867
10868 default:
10869 return false;
10870 }
10871
10872 return true;
10873 }
10874 \f
10875 /* Split one or more DImode RTL references into pairs of SImode
10876 references. The RTL can be REG, offsettable MEM, integer constant, or
10877 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
10878 split and "num" is its length. lo_half and hi_half are output arrays
10879 that parallel "operands". */
10880
10881 void
10882 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
10883 {
10884 while (num--)
10885 {
10886 rtx op = operands[num];
10887
10888 /* simplify_subreg refuse to split volatile memory addresses,
10889 but we still have to handle it. */
10890 if (MEM_P (op))
10891 {
10892 lo_half[num] = adjust_address (op, SImode, 0);
10893 hi_half[num] = adjust_address (op, SImode, 4);
10894 }
10895 else
10896 {
10897 lo_half[num] = simplify_gen_subreg (SImode, op,
10898 GET_MODE (op) == VOIDmode
10899 ? DImode : GET_MODE (op), 0);
10900 hi_half[num] = simplify_gen_subreg (SImode, op,
10901 GET_MODE (op) == VOIDmode
10902 ? DImode : GET_MODE (op), 4);
10903 }
10904 }
10905 }
10906 /* Split one or more TImode RTL references into pairs of DImode
10907 references. The RTL can be REG, offsettable MEM, integer constant, or
10908 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
10909 split and "num" is its length. lo_half and hi_half are output arrays
10910 that parallel "operands". */
10911
10912 void
10913 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
10914 {
10915 while (num--)
10916 {
10917 rtx op = operands[num];
10918
10919 /* simplify_subreg refuse to split volatile memory addresses, but we
10920 still have to handle it. */
10921 if (MEM_P (op))
10922 {
10923 lo_half[num] = adjust_address (op, DImode, 0);
10924 hi_half[num] = adjust_address (op, DImode, 8);
10925 }
10926 else
10927 {
10928 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
10929 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
10930 }
10931 }
10932 }
10933 \f
10934 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
10935 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
10936 is the expression of the binary operation. The output may either be
10937 emitted here, or returned to the caller, like all output_* functions.
10938
10939 There is no guarantee that the operands are the same mode, as they
10940 might be within FLOAT or FLOAT_EXTEND expressions. */
10941
10942 #ifndef SYSV386_COMPAT
10943 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
10944 wants to fix the assemblers because that causes incompatibility
10945 with gcc. No-one wants to fix gcc because that causes
10946 incompatibility with assemblers... You can use the option of
10947 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
10948 #define SYSV386_COMPAT 1
10949 #endif
10950
10951 const char *
10952 output_387_binary_op (rtx insn, rtx *operands)
10953 {
10954 static char buf[30];
10955 const char *p;
10956 const char *ssep;
10957 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
10958
10959 #ifdef ENABLE_CHECKING
10960 /* Even if we do not want to check the inputs, this documents input
10961 constraints. Which helps in understanding the following code. */
10962 if (STACK_REG_P (operands[0])
10963 && ((REG_P (operands[1])
10964 && REGNO (operands[0]) == REGNO (operands[1])
10965 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
10966 || (REG_P (operands[2])
10967 && REGNO (operands[0]) == REGNO (operands[2])
10968 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
10969 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
10970 ; /* ok */
10971 else
10972 gcc_assert (is_sse);
10973 #endif
10974
10975 switch (GET_CODE (operands[3]))
10976 {
10977 case PLUS:
10978 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10979 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10980 p = "fiadd";
10981 else
10982 p = "fadd";
10983 ssep = "add";
10984 break;
10985
10986 case MINUS:
10987 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10988 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10989 p = "fisub";
10990 else
10991 p = "fsub";
10992 ssep = "sub";
10993 break;
10994
10995 case MULT:
10996 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10997 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10998 p = "fimul";
10999 else
11000 p = "fmul";
11001 ssep = "mul";
11002 break;
11003
11004 case DIV:
11005 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11006 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11007 p = "fidiv";
11008 else
11009 p = "fdiv";
11010 ssep = "div";
11011 break;
11012
11013 default:
11014 gcc_unreachable ();
11015 }
11016
11017 if (is_sse)
11018 {
11019 strcpy (buf, ssep);
11020 if (GET_MODE (operands[0]) == SFmode)
11021 strcat (buf, "ss\t{%2, %0|%0, %2}");
11022 else
11023 strcat (buf, "sd\t{%2, %0|%0, %2}");
11024 return buf;
11025 }
11026 strcpy (buf, p);
11027
11028 switch (GET_CODE (operands[3]))
11029 {
11030 case MULT:
11031 case PLUS:
11032 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11033 {
11034 rtx temp = operands[2];
11035 operands[2] = operands[1];
11036 operands[1] = temp;
11037 }
11038
11039 /* know operands[0] == operands[1]. */
11040
11041 if (MEM_P (operands[2]))
11042 {
11043 p = "%z2\t%2";
11044 break;
11045 }
11046
11047 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11048 {
11049 if (STACK_TOP_P (operands[0]))
11050 /* How is it that we are storing to a dead operand[2]?
11051 Well, presumably operands[1] is dead too. We can't
11052 store the result to st(0) as st(0) gets popped on this
11053 instruction. Instead store to operands[2] (which I
11054 think has to be st(1)). st(1) will be popped later.
11055 gcc <= 2.8.1 didn't have this check and generated
11056 assembly code that the Unixware assembler rejected. */
11057 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11058 else
11059 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11060 break;
11061 }
11062
11063 if (STACK_TOP_P (operands[0]))
11064 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11065 else
11066 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11067 break;
11068
11069 case MINUS:
11070 case DIV:
11071 if (MEM_P (operands[1]))
11072 {
11073 p = "r%z1\t%1";
11074 break;
11075 }
11076
11077 if (MEM_P (operands[2]))
11078 {
11079 p = "%z2\t%2";
11080 break;
11081 }
11082
11083 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11084 {
11085 #if SYSV386_COMPAT
11086 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11087 derived assemblers, confusingly reverse the direction of
11088 the operation for fsub{r} and fdiv{r} when the
11089 destination register is not st(0). The Intel assembler
11090 doesn't have this brain damage. Read !SYSV386_COMPAT to
11091 figure out what the hardware really does. */
11092 if (STACK_TOP_P (operands[0]))
11093 p = "{p\t%0, %2|rp\t%2, %0}";
11094 else
11095 p = "{rp\t%2, %0|p\t%0, %2}";
11096 #else
11097 if (STACK_TOP_P (operands[0]))
11098 /* As above for fmul/fadd, we can't store to st(0). */
11099 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11100 else
11101 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11102 #endif
11103 break;
11104 }
11105
11106 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11107 {
11108 #if SYSV386_COMPAT
11109 if (STACK_TOP_P (operands[0]))
11110 p = "{rp\t%0, %1|p\t%1, %0}";
11111 else
11112 p = "{p\t%1, %0|rp\t%0, %1}";
11113 #else
11114 if (STACK_TOP_P (operands[0]))
11115 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11116 else
11117 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11118 #endif
11119 break;
11120 }
11121
11122 if (STACK_TOP_P (operands[0]))
11123 {
11124 if (STACK_TOP_P (operands[1]))
11125 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11126 else
11127 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11128 break;
11129 }
11130 else if (STACK_TOP_P (operands[1]))
11131 {
11132 #if SYSV386_COMPAT
11133 p = "{\t%1, %0|r\t%0, %1}";
11134 #else
11135 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11136 #endif
11137 }
11138 else
11139 {
11140 #if SYSV386_COMPAT
11141 p = "{r\t%2, %0|\t%0, %2}";
11142 #else
11143 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11144 #endif
11145 }
11146 break;
11147
11148 default:
11149 gcc_unreachable ();
11150 }
11151
11152 strcat (buf, p);
11153 return buf;
11154 }
11155
11156 /* Return needed mode for entity in optimize_mode_switching pass. */
11157
11158 int
11159 ix86_mode_needed (int entity, rtx insn)
11160 {
11161 enum attr_i387_cw mode;
11162
11163 /* The mode UNINITIALIZED is used to store control word after a
11164 function call or ASM pattern. The mode ANY specify that function
11165 has no requirements on the control word and make no changes in the
11166 bits we are interested in. */
11167
11168 if (CALL_P (insn)
11169 || (NONJUMP_INSN_P (insn)
11170 && (asm_noperands (PATTERN (insn)) >= 0
11171 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11172 return I387_CW_UNINITIALIZED;
11173
11174 if (recog_memoized (insn) < 0)
11175 return I387_CW_ANY;
11176
11177 mode = get_attr_i387_cw (insn);
11178
11179 switch (entity)
11180 {
11181 case I387_TRUNC:
11182 if (mode == I387_CW_TRUNC)
11183 return mode;
11184 break;
11185
11186 case I387_FLOOR:
11187 if (mode == I387_CW_FLOOR)
11188 return mode;
11189 break;
11190
11191 case I387_CEIL:
11192 if (mode == I387_CW_CEIL)
11193 return mode;
11194 break;
11195
11196 case I387_MASK_PM:
11197 if (mode == I387_CW_MASK_PM)
11198 return mode;
11199 break;
11200
11201 default:
11202 gcc_unreachable ();
11203 }
11204
11205 return I387_CW_ANY;
11206 }
11207
11208 /* Output code to initialize control word copies used by trunc?f?i and
11209 rounding patterns. CURRENT_MODE is set to current control word,
11210 while NEW_MODE is set to new control word. */
11211
11212 void
11213 emit_i387_cw_initialization (int mode)
11214 {
11215 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11216 rtx new_mode;
11217
11218 enum ix86_stack_slot slot;
11219
11220 rtx reg = gen_reg_rtx (HImode);
11221
11222 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11223 emit_move_insn (reg, copy_rtx (stored_mode));
11224
11225 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
11226 {
11227 switch (mode)
11228 {
11229 case I387_CW_TRUNC:
11230 /* round toward zero (truncate) */
11231 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11232 slot = SLOT_CW_TRUNC;
11233 break;
11234
11235 case I387_CW_FLOOR:
11236 /* round down toward -oo */
11237 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11238 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11239 slot = SLOT_CW_FLOOR;
11240 break;
11241
11242 case I387_CW_CEIL:
11243 /* round up toward +oo */
11244 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11245 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11246 slot = SLOT_CW_CEIL;
11247 break;
11248
11249 case I387_CW_MASK_PM:
11250 /* mask precision exception for nearbyint() */
11251 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11252 slot = SLOT_CW_MASK_PM;
11253 break;
11254
11255 default:
11256 gcc_unreachable ();
11257 }
11258 }
11259 else
11260 {
11261 switch (mode)
11262 {
11263 case I387_CW_TRUNC:
11264 /* round toward zero (truncate) */
11265 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11266 slot = SLOT_CW_TRUNC;
11267 break;
11268
11269 case I387_CW_FLOOR:
11270 /* round down toward -oo */
11271 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11272 slot = SLOT_CW_FLOOR;
11273 break;
11274
11275 case I387_CW_CEIL:
11276 /* round up toward +oo */
11277 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11278 slot = SLOT_CW_CEIL;
11279 break;
11280
11281 case I387_CW_MASK_PM:
11282 /* mask precision exception for nearbyint() */
11283 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11284 slot = SLOT_CW_MASK_PM;
11285 break;
11286
11287 default:
11288 gcc_unreachable ();
11289 }
11290 }
11291
11292 gcc_assert (slot < MAX_386_STACK_LOCALS);
11293
11294 new_mode = assign_386_stack_local (HImode, slot);
11295 emit_move_insn (new_mode, reg);
11296 }
11297
11298 /* Output code for INSN to convert a float to a signed int. OPERANDS
11299 are the insn operands. The output may be [HSD]Imode and the input
11300 operand may be [SDX]Fmode. */
11301
11302 const char *
11303 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11304 {
11305 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11306 int dimode_p = GET_MODE (operands[0]) == DImode;
11307 int round_mode = get_attr_i387_cw (insn);
11308
11309 /* Jump through a hoop or two for DImode, since the hardware has no
11310 non-popping instruction. We used to do this a different way, but
11311 that was somewhat fragile and broke with post-reload splitters. */
11312 if ((dimode_p || fisttp) && !stack_top_dies)
11313 output_asm_insn ("fld\t%y1", operands);
11314
11315 gcc_assert (STACK_TOP_P (operands[1]));
11316 gcc_assert (MEM_P (operands[0]));
11317 gcc_assert (GET_MODE (operands[1]) != TFmode);
11318
11319 if (fisttp)
11320 output_asm_insn ("fisttp%z0\t%0", operands);
11321 else
11322 {
11323 if (round_mode != I387_CW_ANY)
11324 output_asm_insn ("fldcw\t%3", operands);
11325 if (stack_top_dies || dimode_p)
11326 output_asm_insn ("fistp%z0\t%0", operands);
11327 else
11328 output_asm_insn ("fist%z0\t%0", operands);
11329 if (round_mode != I387_CW_ANY)
11330 output_asm_insn ("fldcw\t%2", operands);
11331 }
11332
11333 return "";
11334 }
11335
11336 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11337 have the values zero or one, indicates the ffreep insn's operand
11338 from the OPERANDS array. */
11339
11340 static const char *
11341 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11342 {
11343 if (TARGET_USE_FFREEP)
11344 #if HAVE_AS_IX86_FFREEP
11345 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11346 #else
11347 {
11348 static char retval[] = ".word\t0xc_df";
11349 int regno = REGNO (operands[opno]);
11350
11351 gcc_assert (FP_REGNO_P (regno));
11352
11353 retval[9] = '0' + (regno - FIRST_STACK_REG);
11354 return retval;
11355 }
11356 #endif
11357
11358 return opno ? "fstp\t%y1" : "fstp\t%y0";
11359 }
11360
11361
11362 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11363 should be used. UNORDERED_P is true when fucom should be used. */
11364
11365 const char *
11366 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11367 {
11368 int stack_top_dies;
11369 rtx cmp_op0, cmp_op1;
11370 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11371
11372 if (eflags_p)
11373 {
11374 cmp_op0 = operands[0];
11375 cmp_op1 = operands[1];
11376 }
11377 else
11378 {
11379 cmp_op0 = operands[1];
11380 cmp_op1 = operands[2];
11381 }
11382
11383 if (is_sse)
11384 {
11385 if (GET_MODE (operands[0]) == SFmode)
11386 if (unordered_p)
11387 return "ucomiss\t{%1, %0|%0, %1}";
11388 else
11389 return "comiss\t{%1, %0|%0, %1}";
11390 else
11391 if (unordered_p)
11392 return "ucomisd\t{%1, %0|%0, %1}";
11393 else
11394 return "comisd\t{%1, %0|%0, %1}";
11395 }
11396
11397 gcc_assert (STACK_TOP_P (cmp_op0));
11398
11399 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11400
11401 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11402 {
11403 if (stack_top_dies)
11404 {
11405 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11406 return output_387_ffreep (operands, 1);
11407 }
11408 else
11409 return "ftst\n\tfnstsw\t%0";
11410 }
11411
11412 if (STACK_REG_P (cmp_op1)
11413 && stack_top_dies
11414 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11415 && REGNO (cmp_op1) != FIRST_STACK_REG)
11416 {
11417 /* If both the top of the 387 stack dies, and the other operand
11418 is also a stack register that dies, then this must be a
11419 `fcompp' float compare */
11420
11421 if (eflags_p)
11422 {
11423 /* There is no double popping fcomi variant. Fortunately,
11424 eflags is immune from the fstp's cc clobbering. */
11425 if (unordered_p)
11426 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11427 else
11428 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11429 return output_387_ffreep (operands, 0);
11430 }
11431 else
11432 {
11433 if (unordered_p)
11434 return "fucompp\n\tfnstsw\t%0";
11435 else
11436 return "fcompp\n\tfnstsw\t%0";
11437 }
11438 }
11439 else
11440 {
11441 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11442
11443 static const char * const alt[16] =
11444 {
11445 "fcom%z2\t%y2\n\tfnstsw\t%0",
11446 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11447 "fucom%z2\t%y2\n\tfnstsw\t%0",
11448 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11449
11450 "ficom%z2\t%y2\n\tfnstsw\t%0",
11451 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11452 NULL,
11453 NULL,
11454
11455 "fcomi\t{%y1, %0|%0, %y1}",
11456 "fcomip\t{%y1, %0|%0, %y1}",
11457 "fucomi\t{%y1, %0|%0, %y1}",
11458 "fucomip\t{%y1, %0|%0, %y1}",
11459
11460 NULL,
11461 NULL,
11462 NULL,
11463 NULL
11464 };
11465
11466 int mask;
11467 const char *ret;
11468
11469 mask = eflags_p << 3;
11470 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11471 mask |= unordered_p << 1;
11472 mask |= stack_top_dies;
11473
11474 gcc_assert (mask < 16);
11475 ret = alt[mask];
11476 gcc_assert (ret);
11477
11478 return ret;
11479 }
11480 }
11481
11482 void
11483 ix86_output_addr_vec_elt (FILE *file, int value)
11484 {
11485 const char *directive = ASM_LONG;
11486
11487 #ifdef ASM_QUAD
11488 if (TARGET_64BIT)
11489 directive = ASM_QUAD;
11490 #else
11491 gcc_assert (!TARGET_64BIT);
11492 #endif
11493
11494 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11495 }
11496
11497 void
11498 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11499 {
11500 const char *directive = ASM_LONG;
11501
11502 #ifdef ASM_QUAD
11503 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11504 directive = ASM_QUAD;
11505 #else
11506 gcc_assert (!TARGET_64BIT);
11507 #endif
11508 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11509 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11510 fprintf (file, "%s%s%d-%s%d\n",
11511 directive, LPREFIX, value, LPREFIX, rel);
11512 else if (HAVE_AS_GOTOFF_IN_DATA)
11513 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11514 #if TARGET_MACHO
11515 else if (TARGET_MACHO)
11516 {
11517 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11518 machopic_output_function_base_name (file);
11519 fprintf(file, "\n");
11520 }
11521 #endif
11522 else
11523 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11524 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11525 }
11526 \f
11527 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11528 for the target. */
11529
11530 void
11531 ix86_expand_clear (rtx dest)
11532 {
11533 rtx tmp;
11534
11535 /* We play register width games, which are only valid after reload. */
11536 gcc_assert (reload_completed);
11537
11538 /* Avoid HImode and its attendant prefix byte. */
11539 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11540 dest = gen_rtx_REG (SImode, REGNO (dest));
11541 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11542
11543 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11544 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
11545 {
11546 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11547 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11548 }
11549
11550 emit_insn (tmp);
11551 }
11552
11553 /* X is an unchanging MEM. If it is a constant pool reference, return
11554 the constant pool rtx, else NULL. */
11555
11556 rtx
11557 maybe_get_pool_constant (rtx x)
11558 {
11559 x = ix86_delegitimize_address (XEXP (x, 0));
11560
11561 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11562 return get_pool_constant (x);
11563
11564 return NULL_RTX;
11565 }
11566
11567 void
11568 ix86_expand_move (enum machine_mode mode, rtx operands[])
11569 {
11570 rtx op0, op1;
11571 enum tls_model model;
11572
11573 op0 = operands[0];
11574 op1 = operands[1];
11575
11576 if (GET_CODE (op1) == SYMBOL_REF)
11577 {
11578 model = SYMBOL_REF_TLS_MODEL (op1);
11579 if (model)
11580 {
11581 op1 = legitimize_tls_address (op1, model, true);
11582 op1 = force_operand (op1, op0);
11583 if (op1 == op0)
11584 return;
11585 }
11586 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11587 && SYMBOL_REF_DLLIMPORT_P (op1))
11588 op1 = legitimize_dllimport_symbol (op1, false);
11589 }
11590 else if (GET_CODE (op1) == CONST
11591 && GET_CODE (XEXP (op1, 0)) == PLUS
11592 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11593 {
11594 rtx addend = XEXP (XEXP (op1, 0), 1);
11595 rtx symbol = XEXP (XEXP (op1, 0), 0);
11596 rtx tmp = NULL;
11597
11598 model = SYMBOL_REF_TLS_MODEL (symbol);
11599 if (model)
11600 tmp = legitimize_tls_address (symbol, model, true);
11601 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11602 && SYMBOL_REF_DLLIMPORT_P (symbol))
11603 tmp = legitimize_dllimport_symbol (symbol, true);
11604
11605 if (tmp)
11606 {
11607 tmp = force_operand (tmp, NULL);
11608 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11609 op0, 1, OPTAB_DIRECT);
11610 if (tmp == op0)
11611 return;
11612 }
11613 }
11614
11615 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11616 {
11617 if (TARGET_MACHO && !TARGET_64BIT)
11618 {
11619 #if TARGET_MACHO
11620 if (MACHOPIC_PURE)
11621 {
11622 rtx temp = ((reload_in_progress
11623 || ((op0 && REG_P (op0))
11624 && mode == Pmode))
11625 ? op0 : gen_reg_rtx (Pmode));
11626 op1 = machopic_indirect_data_reference (op1, temp);
11627 op1 = machopic_legitimize_pic_address (op1, mode,
11628 temp == op1 ? 0 : temp);
11629 }
11630 else if (MACHOPIC_INDIRECT)
11631 op1 = machopic_indirect_data_reference (op1, 0);
11632 if (op0 == op1)
11633 return;
11634 #endif
11635 }
11636 else
11637 {
11638 if (MEM_P (op0))
11639 op1 = force_reg (Pmode, op1);
11640 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
11641 {
11642 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
11643 op1 = legitimize_pic_address (op1, reg);
11644 if (op0 == op1)
11645 return;
11646 }
11647 }
11648 }
11649 else
11650 {
11651 if (MEM_P (op0)
11652 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
11653 || !push_operand (op0, mode))
11654 && MEM_P (op1))
11655 op1 = force_reg (mode, op1);
11656
11657 if (push_operand (op0, mode)
11658 && ! general_no_elim_operand (op1, mode))
11659 op1 = copy_to_mode_reg (mode, op1);
11660
11661 /* Force large constants in 64bit compilation into register
11662 to get them CSEed. */
11663 if (can_create_pseudo_p ()
11664 && (mode == DImode) && TARGET_64BIT
11665 && immediate_operand (op1, mode)
11666 && !x86_64_zext_immediate_operand (op1, VOIDmode)
11667 && !register_operand (op0, mode)
11668 && optimize)
11669 op1 = copy_to_mode_reg (mode, op1);
11670
11671 if (can_create_pseudo_p ()
11672 && FLOAT_MODE_P (mode)
11673 && GET_CODE (op1) == CONST_DOUBLE)
11674 {
11675 /* If we are loading a floating point constant to a register,
11676 force the value to memory now, since we'll get better code
11677 out the back end. */
11678
11679 op1 = validize_mem (force_const_mem (mode, op1));
11680 if (!register_operand (op0, mode))
11681 {
11682 rtx temp = gen_reg_rtx (mode);
11683 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
11684 emit_move_insn (op0, temp);
11685 return;
11686 }
11687 }
11688 }
11689
11690 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
11691 }
11692
11693 void
11694 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
11695 {
11696 rtx op0 = operands[0], op1 = operands[1];
11697 unsigned int align = GET_MODE_ALIGNMENT (mode);
11698
11699 /* Force constants other than zero into memory. We do not know how
11700 the instructions used to build constants modify the upper 64 bits
11701 of the register, once we have that information we may be able
11702 to handle some of them more efficiently. */
11703 if (can_create_pseudo_p ()
11704 && register_operand (op0, mode)
11705 && (CONSTANT_P (op1)
11706 || (GET_CODE (op1) == SUBREG
11707 && CONSTANT_P (SUBREG_REG (op1))))
11708 && standard_sse_constant_p (op1) <= 0)
11709 op1 = validize_mem (force_const_mem (mode, op1));
11710
11711 /* We need to check memory alignment for SSE mode since attribute
11712 can make operands unaligned. */
11713 if (can_create_pseudo_p ()
11714 && SSE_REG_MODE_P (mode)
11715 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
11716 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
11717 {
11718 rtx tmp[2];
11719
11720 /* ix86_expand_vector_move_misalign() does not like constants ... */
11721 if (CONSTANT_P (op1)
11722 || (GET_CODE (op1) == SUBREG
11723 && CONSTANT_P (SUBREG_REG (op1))))
11724 op1 = validize_mem (force_const_mem (mode, op1));
11725
11726 /* ... nor both arguments in memory. */
11727 if (!register_operand (op0, mode)
11728 && !register_operand (op1, mode))
11729 op1 = force_reg (mode, op1);
11730
11731 tmp[0] = op0; tmp[1] = op1;
11732 ix86_expand_vector_move_misalign (mode, tmp);
11733 return;
11734 }
11735
11736 /* Make operand1 a register if it isn't already. */
11737 if (can_create_pseudo_p ()
11738 && !register_operand (op0, mode)
11739 && !register_operand (op1, mode))
11740 {
11741 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
11742 return;
11743 }
11744
11745 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
11746 }
11747
11748 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
11749 straight to ix86_expand_vector_move. */
11750 /* Code generation for scalar reg-reg moves of single and double precision data:
11751 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
11752 movaps reg, reg
11753 else
11754 movss reg, reg
11755 if (x86_sse_partial_reg_dependency == true)
11756 movapd reg, reg
11757 else
11758 movsd reg, reg
11759
11760 Code generation for scalar loads of double precision data:
11761 if (x86_sse_split_regs == true)
11762 movlpd mem, reg (gas syntax)
11763 else
11764 movsd mem, reg
11765
11766 Code generation for unaligned packed loads of single precision data
11767 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
11768 if (x86_sse_unaligned_move_optimal)
11769 movups mem, reg
11770
11771 if (x86_sse_partial_reg_dependency == true)
11772 {
11773 xorps reg, reg
11774 movlps mem, reg
11775 movhps mem+8, reg
11776 }
11777 else
11778 {
11779 movlps mem, reg
11780 movhps mem+8, reg
11781 }
11782
11783 Code generation for unaligned packed loads of double precision data
11784 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
11785 if (x86_sse_unaligned_move_optimal)
11786 movupd mem, reg
11787
11788 if (x86_sse_split_regs == true)
11789 {
11790 movlpd mem, reg
11791 movhpd mem+8, reg
11792 }
11793 else
11794 {
11795 movsd mem, reg
11796 movhpd mem+8, reg
11797 }
11798 */
11799
11800 void
11801 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
11802 {
11803 rtx op0, op1, m;
11804
11805 op0 = operands[0];
11806 op1 = operands[1];
11807
11808 if (MEM_P (op1))
11809 {
11810 /* If we're optimizing for size, movups is the smallest. */
11811 if (optimize_insn_for_size_p ())
11812 {
11813 op0 = gen_lowpart (V4SFmode, op0);
11814 op1 = gen_lowpart (V4SFmode, op1);
11815 emit_insn (gen_sse_movups (op0, op1));
11816 return;
11817 }
11818
11819 /* ??? If we have typed data, then it would appear that using
11820 movdqu is the only way to get unaligned data loaded with
11821 integer type. */
11822 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11823 {
11824 op0 = gen_lowpart (V16QImode, op0);
11825 op1 = gen_lowpart (V16QImode, op1);
11826 emit_insn (gen_sse2_movdqu (op0, op1));
11827 return;
11828 }
11829
11830 if (TARGET_SSE2 && mode == V2DFmode)
11831 {
11832 rtx zero;
11833
11834 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
11835 {
11836 op0 = gen_lowpart (V2DFmode, op0);
11837 op1 = gen_lowpart (V2DFmode, op1);
11838 emit_insn (gen_sse2_movupd (op0, op1));
11839 return;
11840 }
11841
11842 /* When SSE registers are split into halves, we can avoid
11843 writing to the top half twice. */
11844 if (TARGET_SSE_SPLIT_REGS)
11845 {
11846 emit_clobber (op0);
11847 zero = op0;
11848 }
11849 else
11850 {
11851 /* ??? Not sure about the best option for the Intel chips.
11852 The following would seem to satisfy; the register is
11853 entirely cleared, breaking the dependency chain. We
11854 then store to the upper half, with a dependency depth
11855 of one. A rumor has it that Intel recommends two movsd
11856 followed by an unpacklpd, but this is unconfirmed. And
11857 given that the dependency depth of the unpacklpd would
11858 still be one, I'm not sure why this would be better. */
11859 zero = CONST0_RTX (V2DFmode);
11860 }
11861
11862 m = adjust_address (op1, DFmode, 0);
11863 emit_insn (gen_sse2_loadlpd (op0, zero, m));
11864 m = adjust_address (op1, DFmode, 8);
11865 emit_insn (gen_sse2_loadhpd (op0, op0, m));
11866 }
11867 else
11868 {
11869 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
11870 {
11871 op0 = gen_lowpart (V4SFmode, op0);
11872 op1 = gen_lowpart (V4SFmode, op1);
11873 emit_insn (gen_sse_movups (op0, op1));
11874 return;
11875 }
11876
11877 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
11878 emit_move_insn (op0, CONST0_RTX (mode));
11879 else
11880 emit_clobber (op0);
11881
11882 if (mode != V4SFmode)
11883 op0 = gen_lowpart (V4SFmode, op0);
11884 m = adjust_address (op1, V2SFmode, 0);
11885 emit_insn (gen_sse_loadlps (op0, op0, m));
11886 m = adjust_address (op1, V2SFmode, 8);
11887 emit_insn (gen_sse_loadhps (op0, op0, m));
11888 }
11889 }
11890 else if (MEM_P (op0))
11891 {
11892 /* If we're optimizing for size, movups is the smallest. */
11893 if (optimize_insn_for_size_p ())
11894 {
11895 op0 = gen_lowpart (V4SFmode, op0);
11896 op1 = gen_lowpart (V4SFmode, op1);
11897 emit_insn (gen_sse_movups (op0, op1));
11898 return;
11899 }
11900
11901 /* ??? Similar to above, only less clear because of quote
11902 typeless stores unquote. */
11903 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
11904 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11905 {
11906 op0 = gen_lowpart (V16QImode, op0);
11907 op1 = gen_lowpart (V16QImode, op1);
11908 emit_insn (gen_sse2_movdqu (op0, op1));
11909 return;
11910 }
11911
11912 if (TARGET_SSE2 && mode == V2DFmode)
11913 {
11914 m = adjust_address (op0, DFmode, 0);
11915 emit_insn (gen_sse2_storelpd (m, op1));
11916 m = adjust_address (op0, DFmode, 8);
11917 emit_insn (gen_sse2_storehpd (m, op1));
11918 }
11919 else
11920 {
11921 if (mode != V4SFmode)
11922 op1 = gen_lowpart (V4SFmode, op1);
11923 m = adjust_address (op0, V2SFmode, 0);
11924 emit_insn (gen_sse_storelps (m, op1));
11925 m = adjust_address (op0, V2SFmode, 8);
11926 emit_insn (gen_sse_storehps (m, op1));
11927 }
11928 }
11929 else
11930 gcc_unreachable ();
11931 }
11932
11933 /* Expand a push in MODE. This is some mode for which we do not support
11934 proper push instructions, at least from the registers that we expect
11935 the value to live in. */
11936
11937 void
11938 ix86_expand_push (enum machine_mode mode, rtx x)
11939 {
11940 rtx tmp;
11941
11942 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
11943 GEN_INT (-GET_MODE_SIZE (mode)),
11944 stack_pointer_rtx, 1, OPTAB_DIRECT);
11945 if (tmp != stack_pointer_rtx)
11946 emit_move_insn (stack_pointer_rtx, tmp);
11947
11948 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
11949 emit_move_insn (tmp, x);
11950 }
11951
11952 /* Helper function of ix86_fixup_binary_operands to canonicalize
11953 operand order. Returns true if the operands should be swapped. */
11954
11955 static bool
11956 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
11957 rtx operands[])
11958 {
11959 rtx dst = operands[0];
11960 rtx src1 = operands[1];
11961 rtx src2 = operands[2];
11962
11963 /* If the operation is not commutative, we can't do anything. */
11964 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
11965 return false;
11966
11967 /* Highest priority is that src1 should match dst. */
11968 if (rtx_equal_p (dst, src1))
11969 return false;
11970 if (rtx_equal_p (dst, src2))
11971 return true;
11972
11973 /* Next highest priority is that immediate constants come second. */
11974 if (immediate_operand (src2, mode))
11975 return false;
11976 if (immediate_operand (src1, mode))
11977 return true;
11978
11979 /* Lowest priority is that memory references should come second. */
11980 if (MEM_P (src2))
11981 return false;
11982 if (MEM_P (src1))
11983 return true;
11984
11985 return false;
11986 }
11987
11988
11989 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
11990 destination to use for the operation. If different from the true
11991 destination in operands[0], a copy operation will be required. */
11992
11993 rtx
11994 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
11995 rtx operands[])
11996 {
11997 rtx dst = operands[0];
11998 rtx src1 = operands[1];
11999 rtx src2 = operands[2];
12000
12001 /* Canonicalize operand order. */
12002 if (ix86_swap_binary_operands_p (code, mode, operands))
12003 {
12004 rtx temp;
12005
12006 /* It is invalid to swap operands of different modes. */
12007 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12008
12009 temp = src1;
12010 src1 = src2;
12011 src2 = temp;
12012 }
12013
12014 /* Both source operands cannot be in memory. */
12015 if (MEM_P (src1) && MEM_P (src2))
12016 {
12017 /* Optimization: Only read from memory once. */
12018 if (rtx_equal_p (src1, src2))
12019 {
12020 src2 = force_reg (mode, src2);
12021 src1 = src2;
12022 }
12023 else
12024 src2 = force_reg (mode, src2);
12025 }
12026
12027 /* If the destination is memory, and we do not have matching source
12028 operands, do things in registers. */
12029 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12030 dst = gen_reg_rtx (mode);
12031
12032 /* Source 1 cannot be a constant. */
12033 if (CONSTANT_P (src1))
12034 src1 = force_reg (mode, src1);
12035
12036 /* Source 1 cannot be a non-matching memory. */
12037 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12038 src1 = force_reg (mode, src1);
12039
12040 operands[1] = src1;
12041 operands[2] = src2;
12042 return dst;
12043 }
12044
12045 /* Similarly, but assume that the destination has already been
12046 set up properly. */
12047
12048 void
12049 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12050 enum machine_mode mode, rtx operands[])
12051 {
12052 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12053 gcc_assert (dst == operands[0]);
12054 }
12055
12056 /* Attempt to expand a binary operator. Make the expansion closer to the
12057 actual machine, then just general_operand, which will allow 3 separate
12058 memory references (one output, two input) in a single insn. */
12059
12060 void
12061 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12062 rtx operands[])
12063 {
12064 rtx src1, src2, dst, op, clob;
12065
12066 dst = ix86_fixup_binary_operands (code, mode, operands);
12067 src1 = operands[1];
12068 src2 = operands[2];
12069
12070 /* Emit the instruction. */
12071
12072 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12073 if (reload_in_progress)
12074 {
12075 /* Reload doesn't know about the flags register, and doesn't know that
12076 it doesn't want to clobber it. We can only do this with PLUS. */
12077 gcc_assert (code == PLUS);
12078 emit_insn (op);
12079 }
12080 else
12081 {
12082 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12083 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12084 }
12085
12086 /* Fix up the destination if needed. */
12087 if (dst != operands[0])
12088 emit_move_insn (operands[0], dst);
12089 }
12090
12091 /* Return TRUE or FALSE depending on whether the binary operator meets the
12092 appropriate constraints. */
12093
12094 int
12095 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12096 rtx operands[3])
12097 {
12098 rtx dst = operands[0];
12099 rtx src1 = operands[1];
12100 rtx src2 = operands[2];
12101
12102 /* Both source operands cannot be in memory. */
12103 if (MEM_P (src1) && MEM_P (src2))
12104 return 0;
12105
12106 /* Canonicalize operand order for commutative operators. */
12107 if (ix86_swap_binary_operands_p (code, mode, operands))
12108 {
12109 rtx temp = src1;
12110 src1 = src2;
12111 src2 = temp;
12112 }
12113
12114 /* If the destination is memory, we must have a matching source operand. */
12115 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12116 return 0;
12117
12118 /* Source 1 cannot be a constant. */
12119 if (CONSTANT_P (src1))
12120 return 0;
12121
12122 /* Source 1 cannot be a non-matching memory. */
12123 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12124 return 0;
12125
12126 return 1;
12127 }
12128
12129 /* Attempt to expand a unary operator. Make the expansion closer to the
12130 actual machine, then just general_operand, which will allow 2 separate
12131 memory references (one output, one input) in a single insn. */
12132
12133 void
12134 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12135 rtx operands[])
12136 {
12137 int matching_memory;
12138 rtx src, dst, op, clob;
12139
12140 dst = operands[0];
12141 src = operands[1];
12142
12143 /* If the destination is memory, and we do not have matching source
12144 operands, do things in registers. */
12145 matching_memory = 0;
12146 if (MEM_P (dst))
12147 {
12148 if (rtx_equal_p (dst, src))
12149 matching_memory = 1;
12150 else
12151 dst = gen_reg_rtx (mode);
12152 }
12153
12154 /* When source operand is memory, destination must match. */
12155 if (MEM_P (src) && !matching_memory)
12156 src = force_reg (mode, src);
12157
12158 /* Emit the instruction. */
12159
12160 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12161 if (reload_in_progress || code == NOT)
12162 {
12163 /* Reload doesn't know about the flags register, and doesn't know that
12164 it doesn't want to clobber it. */
12165 gcc_assert (code == NOT);
12166 emit_insn (op);
12167 }
12168 else
12169 {
12170 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12171 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12172 }
12173
12174 /* Fix up the destination if needed. */
12175 if (dst != operands[0])
12176 emit_move_insn (operands[0], dst);
12177 }
12178
12179 /* Return TRUE or FALSE depending on whether the unary operator meets the
12180 appropriate constraints. */
12181
12182 int
12183 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12184 enum machine_mode mode ATTRIBUTE_UNUSED,
12185 rtx operands[2] ATTRIBUTE_UNUSED)
12186 {
12187 /* If one of operands is memory, source and destination must match. */
12188 if ((MEM_P (operands[0])
12189 || MEM_P (operands[1]))
12190 && ! rtx_equal_p (operands[0], operands[1]))
12191 return FALSE;
12192 return TRUE;
12193 }
12194
12195 /* Post-reload splitter for converting an SF or DFmode value in an
12196 SSE register into an unsigned SImode. */
12197
12198 void
12199 ix86_split_convert_uns_si_sse (rtx operands[])
12200 {
12201 enum machine_mode vecmode;
12202 rtx value, large, zero_or_two31, input, two31, x;
12203
12204 large = operands[1];
12205 zero_or_two31 = operands[2];
12206 input = operands[3];
12207 two31 = operands[4];
12208 vecmode = GET_MODE (large);
12209 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12210
12211 /* Load up the value into the low element. We must ensure that the other
12212 elements are valid floats -- zero is the easiest such value. */
12213 if (MEM_P (input))
12214 {
12215 if (vecmode == V4SFmode)
12216 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12217 else
12218 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12219 }
12220 else
12221 {
12222 input = gen_rtx_REG (vecmode, REGNO (input));
12223 emit_move_insn (value, CONST0_RTX (vecmode));
12224 if (vecmode == V4SFmode)
12225 emit_insn (gen_sse_movss (value, value, input));
12226 else
12227 emit_insn (gen_sse2_movsd (value, value, input));
12228 }
12229
12230 emit_move_insn (large, two31);
12231 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12232
12233 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12234 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12235
12236 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12237 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12238
12239 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12240 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12241
12242 large = gen_rtx_REG (V4SImode, REGNO (large));
12243 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12244
12245 x = gen_rtx_REG (V4SImode, REGNO (value));
12246 if (vecmode == V4SFmode)
12247 emit_insn (gen_sse2_cvttps2dq (x, value));
12248 else
12249 emit_insn (gen_sse2_cvttpd2dq (x, value));
12250 value = x;
12251
12252 emit_insn (gen_xorv4si3 (value, value, large));
12253 }
12254
12255 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12256 Expects the 64-bit DImode to be supplied in a pair of integral
12257 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12258 -mfpmath=sse, !optimize_size only. */
12259
12260 void
12261 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12262 {
12263 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12264 rtx int_xmm, fp_xmm;
12265 rtx biases, exponents;
12266 rtx x;
12267
12268 int_xmm = gen_reg_rtx (V4SImode);
12269 if (TARGET_INTER_UNIT_MOVES)
12270 emit_insn (gen_movdi_to_sse (int_xmm, input));
12271 else if (TARGET_SSE_SPLIT_REGS)
12272 {
12273 emit_clobber (int_xmm);
12274 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12275 }
12276 else
12277 {
12278 x = gen_reg_rtx (V2DImode);
12279 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12280 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12281 }
12282
12283 x = gen_rtx_CONST_VECTOR (V4SImode,
12284 gen_rtvec (4, GEN_INT (0x43300000UL),
12285 GEN_INT (0x45300000UL),
12286 const0_rtx, const0_rtx));
12287 exponents = validize_mem (force_const_mem (V4SImode, x));
12288
12289 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12290 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12291
12292 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12293 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12294 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12295 (0x1.0p84 + double(fp_value_hi_xmm)).
12296 Note these exponents differ by 32. */
12297
12298 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12299
12300 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12301 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12302 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12303 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12304 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12305 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12306 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12307 biases = validize_mem (force_const_mem (V2DFmode, biases));
12308 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12309
12310 /* Add the upper and lower DFmode values together. */
12311 if (TARGET_SSE3)
12312 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12313 else
12314 {
12315 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12316 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12317 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12318 }
12319
12320 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12321 }
12322
12323 /* Not used, but eases macroization of patterns. */
12324 void
12325 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12326 rtx input ATTRIBUTE_UNUSED)
12327 {
12328 gcc_unreachable ();
12329 }
12330
12331 /* Convert an unsigned SImode value into a DFmode. Only currently used
12332 for SSE, but applicable anywhere. */
12333
12334 void
12335 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12336 {
12337 REAL_VALUE_TYPE TWO31r;
12338 rtx x, fp;
12339
12340 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12341 NULL, 1, OPTAB_DIRECT);
12342
12343 fp = gen_reg_rtx (DFmode);
12344 emit_insn (gen_floatsidf2 (fp, x));
12345
12346 real_ldexp (&TWO31r, &dconst1, 31);
12347 x = const_double_from_real_value (TWO31r, DFmode);
12348
12349 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12350 if (x != target)
12351 emit_move_insn (target, x);
12352 }
12353
12354 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12355 32-bit mode; otherwise we have a direct convert instruction. */
12356
12357 void
12358 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12359 {
12360 REAL_VALUE_TYPE TWO32r;
12361 rtx fp_lo, fp_hi, x;
12362
12363 fp_lo = gen_reg_rtx (DFmode);
12364 fp_hi = gen_reg_rtx (DFmode);
12365
12366 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12367
12368 real_ldexp (&TWO32r, &dconst1, 32);
12369 x = const_double_from_real_value (TWO32r, DFmode);
12370 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12371
12372 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12373
12374 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12375 0, OPTAB_DIRECT);
12376 if (x != target)
12377 emit_move_insn (target, x);
12378 }
12379
12380 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12381 For x86_32, -mfpmath=sse, !optimize_size only. */
12382 void
12383 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12384 {
12385 REAL_VALUE_TYPE ONE16r;
12386 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12387
12388 real_ldexp (&ONE16r, &dconst1, 16);
12389 x = const_double_from_real_value (ONE16r, SFmode);
12390 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12391 NULL, 0, OPTAB_DIRECT);
12392 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12393 NULL, 0, OPTAB_DIRECT);
12394 fp_hi = gen_reg_rtx (SFmode);
12395 fp_lo = gen_reg_rtx (SFmode);
12396 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12397 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12398 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12399 0, OPTAB_DIRECT);
12400 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12401 0, OPTAB_DIRECT);
12402 if (!rtx_equal_p (target, fp_hi))
12403 emit_move_insn (target, fp_hi);
12404 }
12405
12406 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12407 then replicate the value for all elements of the vector
12408 register. */
12409
12410 rtx
12411 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12412 {
12413 rtvec v;
12414 switch (mode)
12415 {
12416 case SImode:
12417 gcc_assert (vect);
12418 v = gen_rtvec (4, value, value, value, value);
12419 return gen_rtx_CONST_VECTOR (V4SImode, v);
12420
12421 case DImode:
12422 gcc_assert (vect);
12423 v = gen_rtvec (2, value, value);
12424 return gen_rtx_CONST_VECTOR (V2DImode, v);
12425
12426 case SFmode:
12427 if (vect)
12428 v = gen_rtvec (4, value, value, value, value);
12429 else
12430 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12431 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12432 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12433
12434 case DFmode:
12435 if (vect)
12436 v = gen_rtvec (2, value, value);
12437 else
12438 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12439 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12440
12441 default:
12442 gcc_unreachable ();
12443 }
12444 }
12445
12446 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12447 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12448 for an SSE register. If VECT is true, then replicate the mask for
12449 all elements of the vector register. If INVERT is true, then create
12450 a mask excluding the sign bit. */
12451
12452 rtx
12453 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12454 {
12455 enum machine_mode vec_mode, imode;
12456 HOST_WIDE_INT hi, lo;
12457 int shift = 63;
12458 rtx v;
12459 rtx mask;
12460
12461 /* Find the sign bit, sign extended to 2*HWI. */
12462 switch (mode)
12463 {
12464 case SImode:
12465 case SFmode:
12466 imode = SImode;
12467 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12468 lo = 0x80000000, hi = lo < 0;
12469 break;
12470
12471 case DImode:
12472 case DFmode:
12473 imode = DImode;
12474 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12475 if (HOST_BITS_PER_WIDE_INT >= 64)
12476 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12477 else
12478 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12479 break;
12480
12481 case TImode:
12482 case TFmode:
12483 vec_mode = VOIDmode;
12484 if (HOST_BITS_PER_WIDE_INT >= 64)
12485 {
12486 imode = TImode;
12487 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12488 }
12489 else
12490 {
12491 rtvec vec;
12492
12493 imode = DImode;
12494 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12495
12496 if (invert)
12497 {
12498 lo = ~lo, hi = ~hi;
12499 v = constm1_rtx;
12500 }
12501 else
12502 v = const0_rtx;
12503
12504 mask = immed_double_const (lo, hi, imode);
12505
12506 vec = gen_rtvec (2, v, mask);
12507 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12508 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12509
12510 return v;
12511 }
12512 break;
12513
12514 default:
12515 gcc_unreachable ();
12516 }
12517
12518 if (invert)
12519 lo = ~lo, hi = ~hi;
12520
12521 /* Force this value into the low part of a fp vector constant. */
12522 mask = immed_double_const (lo, hi, imode);
12523 mask = gen_lowpart (mode, mask);
12524
12525 if (vec_mode == VOIDmode)
12526 return force_reg (mode, mask);
12527
12528 v = ix86_build_const_vector (mode, vect, mask);
12529 return force_reg (vec_mode, v);
12530 }
12531
12532 /* Generate code for floating point ABS or NEG. */
12533
12534 void
12535 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12536 rtx operands[])
12537 {
12538 rtx mask, set, use, clob, dst, src;
12539 bool use_sse = false;
12540 bool vector_mode = VECTOR_MODE_P (mode);
12541 enum machine_mode elt_mode = mode;
12542
12543 if (vector_mode)
12544 {
12545 elt_mode = GET_MODE_INNER (mode);
12546 use_sse = true;
12547 }
12548 else if (mode == TFmode)
12549 use_sse = true;
12550 else if (TARGET_SSE_MATH)
12551 use_sse = SSE_FLOAT_MODE_P (mode);
12552
12553 /* NEG and ABS performed with SSE use bitwise mask operations.
12554 Create the appropriate mask now. */
12555 if (use_sse)
12556 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12557 else
12558 mask = NULL_RTX;
12559
12560 dst = operands[0];
12561 src = operands[1];
12562
12563 if (vector_mode)
12564 {
12565 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12566 set = gen_rtx_SET (VOIDmode, dst, set);
12567 emit_insn (set);
12568 }
12569 else
12570 {
12571 set = gen_rtx_fmt_e (code, mode, src);
12572 set = gen_rtx_SET (VOIDmode, dst, set);
12573 if (mask)
12574 {
12575 use = gen_rtx_USE (VOIDmode, mask);
12576 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12577 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12578 gen_rtvec (3, set, use, clob)));
12579 }
12580 else
12581 emit_insn (set);
12582 }
12583 }
12584
12585 /* Expand a copysign operation. Special case operand 0 being a constant. */
12586
12587 void
12588 ix86_expand_copysign (rtx operands[])
12589 {
12590 enum machine_mode mode;
12591 rtx dest, op0, op1, mask, nmask;
12592
12593 dest = operands[0];
12594 op0 = operands[1];
12595 op1 = operands[2];
12596
12597 mode = GET_MODE (dest);
12598
12599 if (GET_CODE (op0) == CONST_DOUBLE)
12600 {
12601 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
12602
12603 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
12604 op0 = simplify_unary_operation (ABS, mode, op0, mode);
12605
12606 if (mode == SFmode || mode == DFmode)
12607 {
12608 enum machine_mode vmode;
12609
12610 vmode = mode == SFmode ? V4SFmode : V2DFmode;
12611
12612 if (op0 == CONST0_RTX (mode))
12613 op0 = CONST0_RTX (vmode);
12614 else
12615 {
12616 rtvec v;
12617
12618 if (mode == SFmode)
12619 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
12620 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12621 else
12622 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
12623
12624 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
12625 }
12626 }
12627 else if (op0 != CONST0_RTX (mode))
12628 op0 = force_reg (mode, op0);
12629
12630 mask = ix86_build_signbit_mask (mode, 0, 0);
12631
12632 if (mode == SFmode)
12633 copysign_insn = gen_copysignsf3_const;
12634 else if (mode == DFmode)
12635 copysign_insn = gen_copysigndf3_const;
12636 else
12637 copysign_insn = gen_copysigntf3_const;
12638
12639 emit_insn (copysign_insn (dest, op0, op1, mask));
12640 }
12641 else
12642 {
12643 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
12644
12645 nmask = ix86_build_signbit_mask (mode, 0, 1);
12646 mask = ix86_build_signbit_mask (mode, 0, 0);
12647
12648 if (mode == SFmode)
12649 copysign_insn = gen_copysignsf3_var;
12650 else if (mode == DFmode)
12651 copysign_insn = gen_copysigndf3_var;
12652 else
12653 copysign_insn = gen_copysigntf3_var;
12654
12655 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
12656 }
12657 }
12658
12659 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
12660 be a constant, and so has already been expanded into a vector constant. */
12661
12662 void
12663 ix86_split_copysign_const (rtx operands[])
12664 {
12665 enum machine_mode mode, vmode;
12666 rtx dest, op0, op1, mask, x;
12667
12668 dest = operands[0];
12669 op0 = operands[1];
12670 op1 = operands[2];
12671 mask = operands[3];
12672
12673 mode = GET_MODE (dest);
12674 vmode = GET_MODE (mask);
12675
12676 dest = simplify_gen_subreg (vmode, dest, mode, 0);
12677 x = gen_rtx_AND (vmode, dest, mask);
12678 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12679
12680 if (op0 != CONST0_RTX (vmode))
12681 {
12682 x = gen_rtx_IOR (vmode, dest, op0);
12683 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12684 }
12685 }
12686
12687 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
12688 so we have to do two masks. */
12689
12690 void
12691 ix86_split_copysign_var (rtx operands[])
12692 {
12693 enum machine_mode mode, vmode;
12694 rtx dest, scratch, op0, op1, mask, nmask, x;
12695
12696 dest = operands[0];
12697 scratch = operands[1];
12698 op0 = operands[2];
12699 op1 = operands[3];
12700 nmask = operands[4];
12701 mask = operands[5];
12702
12703 mode = GET_MODE (dest);
12704 vmode = GET_MODE (mask);
12705
12706 if (rtx_equal_p (op0, op1))
12707 {
12708 /* Shouldn't happen often (it's useless, obviously), but when it does
12709 we'd generate incorrect code if we continue below. */
12710 emit_move_insn (dest, op0);
12711 return;
12712 }
12713
12714 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
12715 {
12716 gcc_assert (REGNO (op1) == REGNO (scratch));
12717
12718 x = gen_rtx_AND (vmode, scratch, mask);
12719 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
12720
12721 dest = mask;
12722 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
12723 x = gen_rtx_NOT (vmode, dest);
12724 x = gen_rtx_AND (vmode, x, op0);
12725 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12726 }
12727 else
12728 {
12729 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
12730 {
12731 x = gen_rtx_AND (vmode, scratch, mask);
12732 }
12733 else /* alternative 2,4 */
12734 {
12735 gcc_assert (REGNO (mask) == REGNO (scratch));
12736 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
12737 x = gen_rtx_AND (vmode, scratch, op1);
12738 }
12739 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
12740
12741 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
12742 {
12743 dest = simplify_gen_subreg (vmode, op0, mode, 0);
12744 x = gen_rtx_AND (vmode, dest, nmask);
12745 }
12746 else /* alternative 3,4 */
12747 {
12748 gcc_assert (REGNO (nmask) == REGNO (dest));
12749 dest = nmask;
12750 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
12751 x = gen_rtx_AND (vmode, dest, op0);
12752 }
12753 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12754 }
12755
12756 x = gen_rtx_IOR (vmode, dest, scratch);
12757 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12758 }
12759
12760 /* Return TRUE or FALSE depending on whether the first SET in INSN
12761 has source and destination with matching CC modes, and that the
12762 CC mode is at least as constrained as REQ_MODE. */
12763
12764 int
12765 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
12766 {
12767 rtx set;
12768 enum machine_mode set_mode;
12769
12770 set = PATTERN (insn);
12771 if (GET_CODE (set) == PARALLEL)
12772 set = XVECEXP (set, 0, 0);
12773 gcc_assert (GET_CODE (set) == SET);
12774 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
12775
12776 set_mode = GET_MODE (SET_DEST (set));
12777 switch (set_mode)
12778 {
12779 case CCNOmode:
12780 if (req_mode != CCNOmode
12781 && (req_mode != CCmode
12782 || XEXP (SET_SRC (set), 1) != const0_rtx))
12783 return 0;
12784 break;
12785 case CCmode:
12786 if (req_mode == CCGCmode)
12787 return 0;
12788 /* FALLTHRU */
12789 case CCGCmode:
12790 if (req_mode == CCGOCmode || req_mode == CCNOmode)
12791 return 0;
12792 /* FALLTHRU */
12793 case CCGOCmode:
12794 if (req_mode == CCZmode)
12795 return 0;
12796 /* FALLTHRU */
12797 case CCZmode:
12798 break;
12799
12800 default:
12801 gcc_unreachable ();
12802 }
12803
12804 return (GET_MODE (SET_SRC (set)) == set_mode);
12805 }
12806
12807 /* Generate insn patterns to do an integer compare of OPERANDS. */
12808
12809 static rtx
12810 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
12811 {
12812 enum machine_mode cmpmode;
12813 rtx tmp, flags;
12814
12815 cmpmode = SELECT_CC_MODE (code, op0, op1);
12816 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
12817
12818 /* This is very simple, but making the interface the same as in the
12819 FP case makes the rest of the code easier. */
12820 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
12821 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
12822
12823 /* Return the test that should be put into the flags user, i.e.
12824 the bcc, scc, or cmov instruction. */
12825 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
12826 }
12827
12828 /* Figure out whether to use ordered or unordered fp comparisons.
12829 Return the appropriate mode to use. */
12830
12831 enum machine_mode
12832 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
12833 {
12834 /* ??? In order to make all comparisons reversible, we do all comparisons
12835 non-trapping when compiling for IEEE. Once gcc is able to distinguish
12836 all forms trapping and nontrapping comparisons, we can make inequality
12837 comparisons trapping again, since it results in better code when using
12838 FCOM based compares. */
12839 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
12840 }
12841
12842 enum machine_mode
12843 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
12844 {
12845 enum machine_mode mode = GET_MODE (op0);
12846
12847 if (SCALAR_FLOAT_MODE_P (mode))
12848 {
12849 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12850 return ix86_fp_compare_mode (code);
12851 }
12852
12853 switch (code)
12854 {
12855 /* Only zero flag is needed. */
12856 case EQ: /* ZF=0 */
12857 case NE: /* ZF!=0 */
12858 return CCZmode;
12859 /* Codes needing carry flag. */
12860 case GEU: /* CF=0 */
12861 case LTU: /* CF=1 */
12862 /* Detect overflow checks. They need just the carry flag. */
12863 if (GET_CODE (op0) == PLUS
12864 && rtx_equal_p (op1, XEXP (op0, 0)))
12865 return CCCmode;
12866 else
12867 return CCmode;
12868 case GTU: /* CF=0 & ZF=0 */
12869 case LEU: /* CF=1 | ZF=1 */
12870 /* Detect overflow checks. They need just the carry flag. */
12871 if (GET_CODE (op0) == MINUS
12872 && rtx_equal_p (op1, XEXP (op0, 0)))
12873 return CCCmode;
12874 else
12875 return CCmode;
12876 /* Codes possibly doable only with sign flag when
12877 comparing against zero. */
12878 case GE: /* SF=OF or SF=0 */
12879 case LT: /* SF<>OF or SF=1 */
12880 if (op1 == const0_rtx)
12881 return CCGOCmode;
12882 else
12883 /* For other cases Carry flag is not required. */
12884 return CCGCmode;
12885 /* Codes doable only with sign flag when comparing
12886 against zero, but we miss jump instruction for it
12887 so we need to use relational tests against overflow
12888 that thus needs to be zero. */
12889 case GT: /* ZF=0 & SF=OF */
12890 case LE: /* ZF=1 | SF<>OF */
12891 if (op1 == const0_rtx)
12892 return CCNOmode;
12893 else
12894 return CCGCmode;
12895 /* strcmp pattern do (use flags) and combine may ask us for proper
12896 mode. */
12897 case USE:
12898 return CCmode;
12899 default:
12900 gcc_unreachable ();
12901 }
12902 }
12903
12904 /* Return the fixed registers used for condition codes. */
12905
12906 static bool
12907 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12908 {
12909 *p1 = FLAGS_REG;
12910 *p2 = FPSR_REG;
12911 return true;
12912 }
12913
12914 /* If two condition code modes are compatible, return a condition code
12915 mode which is compatible with both. Otherwise, return
12916 VOIDmode. */
12917
12918 static enum machine_mode
12919 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
12920 {
12921 if (m1 == m2)
12922 return m1;
12923
12924 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
12925 return VOIDmode;
12926
12927 if ((m1 == CCGCmode && m2 == CCGOCmode)
12928 || (m1 == CCGOCmode && m2 == CCGCmode))
12929 return CCGCmode;
12930
12931 switch (m1)
12932 {
12933 default:
12934 gcc_unreachable ();
12935
12936 case CCmode:
12937 case CCGCmode:
12938 case CCGOCmode:
12939 case CCNOmode:
12940 case CCAmode:
12941 case CCCmode:
12942 case CCOmode:
12943 case CCSmode:
12944 case CCZmode:
12945 switch (m2)
12946 {
12947 default:
12948 return VOIDmode;
12949
12950 case CCmode:
12951 case CCGCmode:
12952 case CCGOCmode:
12953 case CCNOmode:
12954 case CCAmode:
12955 case CCCmode:
12956 case CCOmode:
12957 case CCSmode:
12958 case CCZmode:
12959 return CCmode;
12960 }
12961
12962 case CCFPmode:
12963 case CCFPUmode:
12964 /* These are only compatible with themselves, which we already
12965 checked above. */
12966 return VOIDmode;
12967 }
12968 }
12969
12970 /* Split comparison code CODE into comparisons we can do using branch
12971 instructions. BYPASS_CODE is comparison code for branch that will
12972 branch around FIRST_CODE and SECOND_CODE. If some of branches
12973 is not required, set value to UNKNOWN.
12974 We never require more than two branches. */
12975
12976 void
12977 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
12978 enum rtx_code *first_code,
12979 enum rtx_code *second_code)
12980 {
12981 *first_code = code;
12982 *bypass_code = UNKNOWN;
12983 *second_code = UNKNOWN;
12984
12985 /* The fcomi comparison sets flags as follows:
12986
12987 cmp ZF PF CF
12988 > 0 0 0
12989 < 0 0 1
12990 = 1 0 0
12991 un 1 1 1 */
12992
12993 switch (code)
12994 {
12995 case GT: /* GTU - CF=0 & ZF=0 */
12996 case GE: /* GEU - CF=0 */
12997 case ORDERED: /* PF=0 */
12998 case UNORDERED: /* PF=1 */
12999 case UNEQ: /* EQ - ZF=1 */
13000 case UNLT: /* LTU - CF=1 */
13001 case UNLE: /* LEU - CF=1 | ZF=1 */
13002 case LTGT: /* EQ - ZF=0 */
13003 break;
13004 case LT: /* LTU - CF=1 - fails on unordered */
13005 *first_code = UNLT;
13006 *bypass_code = UNORDERED;
13007 break;
13008 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13009 *first_code = UNLE;
13010 *bypass_code = UNORDERED;
13011 break;
13012 case EQ: /* EQ - ZF=1 - fails on unordered */
13013 *first_code = UNEQ;
13014 *bypass_code = UNORDERED;
13015 break;
13016 case NE: /* NE - ZF=0 - fails on unordered */
13017 *first_code = LTGT;
13018 *second_code = UNORDERED;
13019 break;
13020 case UNGE: /* GEU - CF=0 - fails on unordered */
13021 *first_code = GE;
13022 *second_code = UNORDERED;
13023 break;
13024 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13025 *first_code = GT;
13026 *second_code = UNORDERED;
13027 break;
13028 default:
13029 gcc_unreachable ();
13030 }
13031 if (!TARGET_IEEE_FP)
13032 {
13033 *second_code = UNKNOWN;
13034 *bypass_code = UNKNOWN;
13035 }
13036 }
13037
13038 /* Return cost of comparison done fcom + arithmetics operations on AX.
13039 All following functions do use number of instructions as a cost metrics.
13040 In future this should be tweaked to compute bytes for optimize_size and
13041 take into account performance of various instructions on various CPUs. */
13042 static int
13043 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13044 {
13045 if (!TARGET_IEEE_FP)
13046 return 4;
13047 /* The cost of code output by ix86_expand_fp_compare. */
13048 switch (code)
13049 {
13050 case UNLE:
13051 case UNLT:
13052 case LTGT:
13053 case GT:
13054 case GE:
13055 case UNORDERED:
13056 case ORDERED:
13057 case UNEQ:
13058 return 4;
13059 break;
13060 case LT:
13061 case NE:
13062 case EQ:
13063 case UNGE:
13064 return 5;
13065 break;
13066 case LE:
13067 case UNGT:
13068 return 6;
13069 break;
13070 default:
13071 gcc_unreachable ();
13072 }
13073 }
13074
13075 /* Return cost of comparison done using fcomi operation.
13076 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13077 static int
13078 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13079 {
13080 enum rtx_code bypass_code, first_code, second_code;
13081 /* Return arbitrarily high cost when instruction is not supported - this
13082 prevents gcc from using it. */
13083 if (!TARGET_CMOVE)
13084 return 1024;
13085 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13086 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13087 }
13088
13089 /* Return cost of comparison done using sahf operation.
13090 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13091 static int
13092 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13093 {
13094 enum rtx_code bypass_code, first_code, second_code;
13095 /* Return arbitrarily high cost when instruction is not preferred - this
13096 avoids gcc from using it. */
13097 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13098 return 1024;
13099 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13100 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13101 }
13102
13103 /* Compute cost of the comparison done using any method.
13104 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13105 static int
13106 ix86_fp_comparison_cost (enum rtx_code code)
13107 {
13108 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13109 int min;
13110
13111 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13112 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13113
13114 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13115 if (min > sahf_cost)
13116 min = sahf_cost;
13117 if (min > fcomi_cost)
13118 min = fcomi_cost;
13119 return min;
13120 }
13121
13122 /* Return true if we should use an FCOMI instruction for this
13123 fp comparison. */
13124
13125 int
13126 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13127 {
13128 enum rtx_code swapped_code = swap_condition (code);
13129
13130 return ((ix86_fp_comparison_cost (code)
13131 == ix86_fp_comparison_fcomi_cost (code))
13132 || (ix86_fp_comparison_cost (swapped_code)
13133 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13134 }
13135
13136 /* Swap, force into registers, or otherwise massage the two operands
13137 to a fp comparison. The operands are updated in place; the new
13138 comparison code is returned. */
13139
13140 static enum rtx_code
13141 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13142 {
13143 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13144 rtx op0 = *pop0, op1 = *pop1;
13145 enum machine_mode op_mode = GET_MODE (op0);
13146 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13147
13148 /* All of the unordered compare instructions only work on registers.
13149 The same is true of the fcomi compare instructions. The XFmode
13150 compare instructions require registers except when comparing
13151 against zero or when converting operand 1 from fixed point to
13152 floating point. */
13153
13154 if (!is_sse
13155 && (fpcmp_mode == CCFPUmode
13156 || (op_mode == XFmode
13157 && ! (standard_80387_constant_p (op0) == 1
13158 || standard_80387_constant_p (op1) == 1)
13159 && GET_CODE (op1) != FLOAT)
13160 || ix86_use_fcomi_compare (code)))
13161 {
13162 op0 = force_reg (op_mode, op0);
13163 op1 = force_reg (op_mode, op1);
13164 }
13165 else
13166 {
13167 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13168 things around if they appear profitable, otherwise force op0
13169 into a register. */
13170
13171 if (standard_80387_constant_p (op0) == 0
13172 || (MEM_P (op0)
13173 && ! (standard_80387_constant_p (op1) == 0
13174 || MEM_P (op1))))
13175 {
13176 rtx tmp;
13177 tmp = op0, op0 = op1, op1 = tmp;
13178 code = swap_condition (code);
13179 }
13180
13181 if (!REG_P (op0))
13182 op0 = force_reg (op_mode, op0);
13183
13184 if (CONSTANT_P (op1))
13185 {
13186 int tmp = standard_80387_constant_p (op1);
13187 if (tmp == 0)
13188 op1 = validize_mem (force_const_mem (op_mode, op1));
13189 else if (tmp == 1)
13190 {
13191 if (TARGET_CMOVE)
13192 op1 = force_reg (op_mode, op1);
13193 }
13194 else
13195 op1 = force_reg (op_mode, op1);
13196 }
13197 }
13198
13199 /* Try to rearrange the comparison to make it cheaper. */
13200 if (ix86_fp_comparison_cost (code)
13201 > ix86_fp_comparison_cost (swap_condition (code))
13202 && (REG_P (op1) || can_create_pseudo_p ()))
13203 {
13204 rtx tmp;
13205 tmp = op0, op0 = op1, op1 = tmp;
13206 code = swap_condition (code);
13207 if (!REG_P (op0))
13208 op0 = force_reg (op_mode, op0);
13209 }
13210
13211 *pop0 = op0;
13212 *pop1 = op1;
13213 return code;
13214 }
13215
13216 /* Convert comparison codes we use to represent FP comparison to integer
13217 code that will result in proper branch. Return UNKNOWN if no such code
13218 is available. */
13219
13220 enum rtx_code
13221 ix86_fp_compare_code_to_integer (enum rtx_code code)
13222 {
13223 switch (code)
13224 {
13225 case GT:
13226 return GTU;
13227 case GE:
13228 return GEU;
13229 case ORDERED:
13230 case UNORDERED:
13231 return code;
13232 break;
13233 case UNEQ:
13234 return EQ;
13235 break;
13236 case UNLT:
13237 return LTU;
13238 break;
13239 case UNLE:
13240 return LEU;
13241 break;
13242 case LTGT:
13243 return NE;
13244 break;
13245 default:
13246 return UNKNOWN;
13247 }
13248 }
13249
13250 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13251
13252 static rtx
13253 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13254 rtx *second_test, rtx *bypass_test)
13255 {
13256 enum machine_mode fpcmp_mode, intcmp_mode;
13257 rtx tmp, tmp2;
13258 int cost = ix86_fp_comparison_cost (code);
13259 enum rtx_code bypass_code, first_code, second_code;
13260
13261 fpcmp_mode = ix86_fp_compare_mode (code);
13262 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13263
13264 if (second_test)
13265 *second_test = NULL_RTX;
13266 if (bypass_test)
13267 *bypass_test = NULL_RTX;
13268
13269 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13270
13271 /* Do fcomi/sahf based test when profitable. */
13272 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13273 && (bypass_code == UNKNOWN || bypass_test)
13274 && (second_code == UNKNOWN || second_test))
13275 {
13276 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13277 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13278 tmp);
13279 if (TARGET_CMOVE)
13280 emit_insn (tmp);
13281 else
13282 {
13283 gcc_assert (TARGET_SAHF);
13284
13285 if (!scratch)
13286 scratch = gen_reg_rtx (HImode);
13287 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13288
13289 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13290 }
13291
13292 /* The FP codes work out to act like unsigned. */
13293 intcmp_mode = fpcmp_mode;
13294 code = first_code;
13295 if (bypass_code != UNKNOWN)
13296 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13297 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13298 const0_rtx);
13299 if (second_code != UNKNOWN)
13300 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13301 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13302 const0_rtx);
13303 }
13304 else
13305 {
13306 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13307 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13308 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13309 if (!scratch)
13310 scratch = gen_reg_rtx (HImode);
13311 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13312
13313 /* In the unordered case, we have to check C2 for NaN's, which
13314 doesn't happen to work out to anything nice combination-wise.
13315 So do some bit twiddling on the value we've got in AH to come
13316 up with an appropriate set of condition codes. */
13317
13318 intcmp_mode = CCNOmode;
13319 switch (code)
13320 {
13321 case GT:
13322 case UNGT:
13323 if (code == GT || !TARGET_IEEE_FP)
13324 {
13325 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13326 code = EQ;
13327 }
13328 else
13329 {
13330 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13331 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13332 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13333 intcmp_mode = CCmode;
13334 code = GEU;
13335 }
13336 break;
13337 case LT:
13338 case UNLT:
13339 if (code == LT && TARGET_IEEE_FP)
13340 {
13341 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13342 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13343 intcmp_mode = CCmode;
13344 code = EQ;
13345 }
13346 else
13347 {
13348 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13349 code = NE;
13350 }
13351 break;
13352 case GE:
13353 case UNGE:
13354 if (code == GE || !TARGET_IEEE_FP)
13355 {
13356 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13357 code = EQ;
13358 }
13359 else
13360 {
13361 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13362 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13363 GEN_INT (0x01)));
13364 code = NE;
13365 }
13366 break;
13367 case LE:
13368 case UNLE:
13369 if (code == LE && TARGET_IEEE_FP)
13370 {
13371 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13372 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13373 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13374 intcmp_mode = CCmode;
13375 code = LTU;
13376 }
13377 else
13378 {
13379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13380 code = NE;
13381 }
13382 break;
13383 case EQ:
13384 case UNEQ:
13385 if (code == EQ && TARGET_IEEE_FP)
13386 {
13387 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13388 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13389 intcmp_mode = CCmode;
13390 code = EQ;
13391 }
13392 else
13393 {
13394 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13395 code = NE;
13396 break;
13397 }
13398 break;
13399 case NE:
13400 case LTGT:
13401 if (code == NE && TARGET_IEEE_FP)
13402 {
13403 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13404 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13405 GEN_INT (0x40)));
13406 code = NE;
13407 }
13408 else
13409 {
13410 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13411 code = EQ;
13412 }
13413 break;
13414
13415 case UNORDERED:
13416 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13417 code = NE;
13418 break;
13419 case ORDERED:
13420 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13421 code = EQ;
13422 break;
13423
13424 default:
13425 gcc_unreachable ();
13426 }
13427 }
13428
13429 /* Return the test that should be put into the flags user, i.e.
13430 the bcc, scc, or cmov instruction. */
13431 return gen_rtx_fmt_ee (code, VOIDmode,
13432 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13433 const0_rtx);
13434 }
13435
13436 rtx
13437 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13438 {
13439 rtx op0, op1, ret;
13440 op0 = ix86_compare_op0;
13441 op1 = ix86_compare_op1;
13442
13443 if (second_test)
13444 *second_test = NULL_RTX;
13445 if (bypass_test)
13446 *bypass_test = NULL_RTX;
13447
13448 if (ix86_compare_emitted)
13449 {
13450 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13451 ix86_compare_emitted = NULL_RTX;
13452 }
13453 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13454 {
13455 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13456 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13457 second_test, bypass_test);
13458 }
13459 else
13460 ret = ix86_expand_int_compare (code, op0, op1);
13461
13462 return ret;
13463 }
13464
13465 /* Return true if the CODE will result in nontrivial jump sequence. */
13466 bool
13467 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13468 {
13469 enum rtx_code bypass_code, first_code, second_code;
13470 if (!TARGET_CMOVE)
13471 return true;
13472 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13473 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13474 }
13475
13476 void
13477 ix86_expand_branch (enum rtx_code code, rtx label)
13478 {
13479 rtx tmp;
13480
13481 /* If we have emitted a compare insn, go straight to simple.
13482 ix86_expand_compare won't emit anything if ix86_compare_emitted
13483 is non NULL. */
13484 if (ix86_compare_emitted)
13485 goto simple;
13486
13487 switch (GET_MODE (ix86_compare_op0))
13488 {
13489 case QImode:
13490 case HImode:
13491 case SImode:
13492 simple:
13493 tmp = ix86_expand_compare (code, NULL, NULL);
13494 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13495 gen_rtx_LABEL_REF (VOIDmode, label),
13496 pc_rtx);
13497 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13498 return;
13499
13500 case SFmode:
13501 case DFmode:
13502 case XFmode:
13503 {
13504 rtvec vec;
13505 int use_fcomi;
13506 enum rtx_code bypass_code, first_code, second_code;
13507
13508 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13509 &ix86_compare_op1);
13510
13511 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13512
13513 /* Check whether we will use the natural sequence with one jump. If
13514 so, we can expand jump early. Otherwise delay expansion by
13515 creating compound insn to not confuse optimizers. */
13516 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13517 {
13518 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13519 gen_rtx_LABEL_REF (VOIDmode, label),
13520 pc_rtx, NULL_RTX, NULL_RTX);
13521 }
13522 else
13523 {
13524 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13525 ix86_compare_op0, ix86_compare_op1);
13526 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13527 gen_rtx_LABEL_REF (VOIDmode, label),
13528 pc_rtx);
13529 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13530
13531 use_fcomi = ix86_use_fcomi_compare (code);
13532 vec = rtvec_alloc (3 + !use_fcomi);
13533 RTVEC_ELT (vec, 0) = tmp;
13534 RTVEC_ELT (vec, 1)
13535 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13536 RTVEC_ELT (vec, 2)
13537 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13538 if (! use_fcomi)
13539 RTVEC_ELT (vec, 3)
13540 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13541
13542 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13543 }
13544 return;
13545 }
13546
13547 case DImode:
13548 if (TARGET_64BIT)
13549 goto simple;
13550 case TImode:
13551 /* Expand DImode branch into multiple compare+branch. */
13552 {
13553 rtx lo[2], hi[2], label2;
13554 enum rtx_code code1, code2, code3;
13555 enum machine_mode submode;
13556
13557 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13558 {
13559 tmp = ix86_compare_op0;
13560 ix86_compare_op0 = ix86_compare_op1;
13561 ix86_compare_op1 = tmp;
13562 code = swap_condition (code);
13563 }
13564 if (GET_MODE (ix86_compare_op0) == DImode)
13565 {
13566 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13567 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13568 submode = SImode;
13569 }
13570 else
13571 {
13572 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13573 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13574 submode = DImode;
13575 }
13576
13577 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13578 avoid two branches. This costs one extra insn, so disable when
13579 optimizing for size. */
13580
13581 if ((code == EQ || code == NE)
13582 && (!optimize_insn_for_size_p ()
13583 || hi[1] == const0_rtx || lo[1] == const0_rtx))
13584 {
13585 rtx xor0, xor1;
13586
13587 xor1 = hi[0];
13588 if (hi[1] != const0_rtx)
13589 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
13590 NULL_RTX, 0, OPTAB_WIDEN);
13591
13592 xor0 = lo[0];
13593 if (lo[1] != const0_rtx)
13594 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
13595 NULL_RTX, 0, OPTAB_WIDEN);
13596
13597 tmp = expand_binop (submode, ior_optab, xor1, xor0,
13598 NULL_RTX, 0, OPTAB_WIDEN);
13599
13600 ix86_compare_op0 = tmp;
13601 ix86_compare_op1 = const0_rtx;
13602 ix86_expand_branch (code, label);
13603 return;
13604 }
13605
13606 /* Otherwise, if we are doing less-than or greater-or-equal-than,
13607 op1 is a constant and the low word is zero, then we can just
13608 examine the high word. Similarly for low word -1 and
13609 less-or-equal-than or greater-than. */
13610
13611 if (CONST_INT_P (hi[1]))
13612 switch (code)
13613 {
13614 case LT: case LTU: case GE: case GEU:
13615 if (lo[1] == const0_rtx)
13616 {
13617 ix86_compare_op0 = hi[0];
13618 ix86_compare_op1 = hi[1];
13619 ix86_expand_branch (code, label);
13620 return;
13621 }
13622 break;
13623 case LE: case LEU: case GT: case GTU:
13624 if (lo[1] == constm1_rtx)
13625 {
13626 ix86_compare_op0 = hi[0];
13627 ix86_compare_op1 = hi[1];
13628 ix86_expand_branch (code, label);
13629 return;
13630 }
13631 break;
13632 default:
13633 break;
13634 }
13635
13636 /* Otherwise, we need two or three jumps. */
13637
13638 label2 = gen_label_rtx ();
13639
13640 code1 = code;
13641 code2 = swap_condition (code);
13642 code3 = unsigned_condition (code);
13643
13644 switch (code)
13645 {
13646 case LT: case GT: case LTU: case GTU:
13647 break;
13648
13649 case LE: code1 = LT; code2 = GT; break;
13650 case GE: code1 = GT; code2 = LT; break;
13651 case LEU: code1 = LTU; code2 = GTU; break;
13652 case GEU: code1 = GTU; code2 = LTU; break;
13653
13654 case EQ: code1 = UNKNOWN; code2 = NE; break;
13655 case NE: code2 = UNKNOWN; break;
13656
13657 default:
13658 gcc_unreachable ();
13659 }
13660
13661 /*
13662 * a < b =>
13663 * if (hi(a) < hi(b)) goto true;
13664 * if (hi(a) > hi(b)) goto false;
13665 * if (lo(a) < lo(b)) goto true;
13666 * false:
13667 */
13668
13669 ix86_compare_op0 = hi[0];
13670 ix86_compare_op1 = hi[1];
13671
13672 if (code1 != UNKNOWN)
13673 ix86_expand_branch (code1, label);
13674 if (code2 != UNKNOWN)
13675 ix86_expand_branch (code2, label2);
13676
13677 ix86_compare_op0 = lo[0];
13678 ix86_compare_op1 = lo[1];
13679 ix86_expand_branch (code3, label);
13680
13681 if (code2 != UNKNOWN)
13682 emit_label (label2);
13683 return;
13684 }
13685
13686 default:
13687 gcc_unreachable ();
13688 }
13689 }
13690
13691 /* Split branch based on floating point condition. */
13692 void
13693 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
13694 rtx target1, rtx target2, rtx tmp, rtx pushed)
13695 {
13696 rtx second, bypass;
13697 rtx label = NULL_RTX;
13698 rtx condition;
13699 int bypass_probability = -1, second_probability = -1, probability = -1;
13700 rtx i;
13701
13702 if (target2 != pc_rtx)
13703 {
13704 rtx tmp = target2;
13705 code = reverse_condition_maybe_unordered (code);
13706 target2 = target1;
13707 target1 = tmp;
13708 }
13709
13710 condition = ix86_expand_fp_compare (code, op1, op2,
13711 tmp, &second, &bypass);
13712
13713 /* Remove pushed operand from stack. */
13714 if (pushed)
13715 ix86_free_from_memory (GET_MODE (pushed));
13716
13717 if (split_branch_probability >= 0)
13718 {
13719 /* Distribute the probabilities across the jumps.
13720 Assume the BYPASS and SECOND to be always test
13721 for UNORDERED. */
13722 probability = split_branch_probability;
13723
13724 /* Value of 1 is low enough to make no need for probability
13725 to be updated. Later we may run some experiments and see
13726 if unordered values are more frequent in practice. */
13727 if (bypass)
13728 bypass_probability = 1;
13729 if (second)
13730 second_probability = 1;
13731 }
13732 if (bypass != NULL_RTX)
13733 {
13734 label = gen_label_rtx ();
13735 i = emit_jump_insn (gen_rtx_SET
13736 (VOIDmode, pc_rtx,
13737 gen_rtx_IF_THEN_ELSE (VOIDmode,
13738 bypass,
13739 gen_rtx_LABEL_REF (VOIDmode,
13740 label),
13741 pc_rtx)));
13742 if (bypass_probability >= 0)
13743 REG_NOTES (i)
13744 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13745 GEN_INT (bypass_probability),
13746 REG_NOTES (i));
13747 }
13748 i = emit_jump_insn (gen_rtx_SET
13749 (VOIDmode, pc_rtx,
13750 gen_rtx_IF_THEN_ELSE (VOIDmode,
13751 condition, target1, target2)));
13752 if (probability >= 0)
13753 REG_NOTES (i)
13754 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13755 GEN_INT (probability),
13756 REG_NOTES (i));
13757 if (second != NULL_RTX)
13758 {
13759 i = emit_jump_insn (gen_rtx_SET
13760 (VOIDmode, pc_rtx,
13761 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
13762 target2)));
13763 if (second_probability >= 0)
13764 REG_NOTES (i)
13765 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13766 GEN_INT (second_probability),
13767 REG_NOTES (i));
13768 }
13769 if (label != NULL_RTX)
13770 emit_label (label);
13771 }
13772
13773 int
13774 ix86_expand_setcc (enum rtx_code code, rtx dest)
13775 {
13776 rtx ret, tmp, tmpreg, equiv;
13777 rtx second_test, bypass_test;
13778
13779 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
13780 return 0; /* FAIL */
13781
13782 gcc_assert (GET_MODE (dest) == QImode);
13783
13784 ret = ix86_expand_compare (code, &second_test, &bypass_test);
13785 PUT_MODE (ret, QImode);
13786
13787 tmp = dest;
13788 tmpreg = dest;
13789
13790 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
13791 if (bypass_test || second_test)
13792 {
13793 rtx test = second_test;
13794 int bypass = 0;
13795 rtx tmp2 = gen_reg_rtx (QImode);
13796 if (bypass_test)
13797 {
13798 gcc_assert (!second_test);
13799 test = bypass_test;
13800 bypass = 1;
13801 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
13802 }
13803 PUT_MODE (test, QImode);
13804 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
13805
13806 if (bypass)
13807 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
13808 else
13809 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
13810 }
13811
13812 /* Attach a REG_EQUAL note describing the comparison result. */
13813 if (ix86_compare_op0 && ix86_compare_op1)
13814 {
13815 equiv = simplify_gen_relational (code, QImode,
13816 GET_MODE (ix86_compare_op0),
13817 ix86_compare_op0, ix86_compare_op1);
13818 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
13819 }
13820
13821 return 1; /* DONE */
13822 }
13823
13824 /* Expand comparison setting or clearing carry flag. Return true when
13825 successful and set pop for the operation. */
13826 static bool
13827 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
13828 {
13829 enum machine_mode mode =
13830 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
13831
13832 /* Do not handle DImode compares that go through special path. */
13833 if (mode == (TARGET_64BIT ? TImode : DImode))
13834 return false;
13835
13836 if (SCALAR_FLOAT_MODE_P (mode))
13837 {
13838 rtx second_test = NULL, bypass_test = NULL;
13839 rtx compare_op, compare_seq;
13840
13841 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13842
13843 /* Shortcut: following common codes never translate
13844 into carry flag compares. */
13845 if (code == EQ || code == NE || code == UNEQ || code == LTGT
13846 || code == ORDERED || code == UNORDERED)
13847 return false;
13848
13849 /* These comparisons require zero flag; swap operands so they won't. */
13850 if ((code == GT || code == UNLE || code == LE || code == UNGT)
13851 && !TARGET_IEEE_FP)
13852 {
13853 rtx tmp = op0;
13854 op0 = op1;
13855 op1 = tmp;
13856 code = swap_condition (code);
13857 }
13858
13859 /* Try to expand the comparison and verify that we end up with
13860 carry flag based comparison. This fails to be true only when
13861 we decide to expand comparison using arithmetic that is not
13862 too common scenario. */
13863 start_sequence ();
13864 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13865 &second_test, &bypass_test);
13866 compare_seq = get_insns ();
13867 end_sequence ();
13868
13869 if (second_test || bypass_test)
13870 return false;
13871
13872 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13873 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13874 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
13875 else
13876 code = GET_CODE (compare_op);
13877
13878 if (code != LTU && code != GEU)
13879 return false;
13880
13881 emit_insn (compare_seq);
13882 *pop = compare_op;
13883 return true;
13884 }
13885
13886 if (!INTEGRAL_MODE_P (mode))
13887 return false;
13888
13889 switch (code)
13890 {
13891 case LTU:
13892 case GEU:
13893 break;
13894
13895 /* Convert a==0 into (unsigned)a<1. */
13896 case EQ:
13897 case NE:
13898 if (op1 != const0_rtx)
13899 return false;
13900 op1 = const1_rtx;
13901 code = (code == EQ ? LTU : GEU);
13902 break;
13903
13904 /* Convert a>b into b<a or a>=b-1. */
13905 case GTU:
13906 case LEU:
13907 if (CONST_INT_P (op1))
13908 {
13909 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
13910 /* Bail out on overflow. We still can swap operands but that
13911 would force loading of the constant into register. */
13912 if (op1 == const0_rtx
13913 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
13914 return false;
13915 code = (code == GTU ? GEU : LTU);
13916 }
13917 else
13918 {
13919 rtx tmp = op1;
13920 op1 = op0;
13921 op0 = tmp;
13922 code = (code == GTU ? LTU : GEU);
13923 }
13924 break;
13925
13926 /* Convert a>=0 into (unsigned)a<0x80000000. */
13927 case LT:
13928 case GE:
13929 if (mode == DImode || op1 != const0_rtx)
13930 return false;
13931 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
13932 code = (code == LT ? GEU : LTU);
13933 break;
13934 case LE:
13935 case GT:
13936 if (mode == DImode || op1 != constm1_rtx)
13937 return false;
13938 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
13939 code = (code == LE ? GEU : LTU);
13940 break;
13941
13942 default:
13943 return false;
13944 }
13945 /* Swapping operands may cause constant to appear as first operand. */
13946 if (!nonimmediate_operand (op0, VOIDmode))
13947 {
13948 if (!can_create_pseudo_p ())
13949 return false;
13950 op0 = force_reg (mode, op0);
13951 }
13952 ix86_compare_op0 = op0;
13953 ix86_compare_op1 = op1;
13954 *pop = ix86_expand_compare (code, NULL, NULL);
13955 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
13956 return true;
13957 }
13958
13959 int
13960 ix86_expand_int_movcc (rtx operands[])
13961 {
13962 enum rtx_code code = GET_CODE (operands[1]), compare_code;
13963 rtx compare_seq, compare_op;
13964 rtx second_test, bypass_test;
13965 enum machine_mode mode = GET_MODE (operands[0]);
13966 bool sign_bit_compare_p = false;;
13967
13968 start_sequence ();
13969 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13970 compare_seq = get_insns ();
13971 end_sequence ();
13972
13973 compare_code = GET_CODE (compare_op);
13974
13975 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
13976 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
13977 sign_bit_compare_p = true;
13978
13979 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
13980 HImode insns, we'd be swallowed in word prefix ops. */
13981
13982 if ((mode != HImode || TARGET_FAST_PREFIX)
13983 && (mode != (TARGET_64BIT ? TImode : DImode))
13984 && CONST_INT_P (operands[2])
13985 && CONST_INT_P (operands[3]))
13986 {
13987 rtx out = operands[0];
13988 HOST_WIDE_INT ct = INTVAL (operands[2]);
13989 HOST_WIDE_INT cf = INTVAL (operands[3]);
13990 HOST_WIDE_INT diff;
13991
13992 diff = ct - cf;
13993 /* Sign bit compares are better done using shifts than we do by using
13994 sbb. */
13995 if (sign_bit_compare_p
13996 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13997 ix86_compare_op1, &compare_op))
13998 {
13999 /* Detect overlap between destination and compare sources. */
14000 rtx tmp = out;
14001
14002 if (!sign_bit_compare_p)
14003 {
14004 bool fpcmp = false;
14005
14006 compare_code = GET_CODE (compare_op);
14007
14008 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14009 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14010 {
14011 fpcmp = true;
14012 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14013 }
14014
14015 /* To simplify rest of code, restrict to the GEU case. */
14016 if (compare_code == LTU)
14017 {
14018 HOST_WIDE_INT tmp = ct;
14019 ct = cf;
14020 cf = tmp;
14021 compare_code = reverse_condition (compare_code);
14022 code = reverse_condition (code);
14023 }
14024 else
14025 {
14026 if (fpcmp)
14027 PUT_CODE (compare_op,
14028 reverse_condition_maybe_unordered
14029 (GET_CODE (compare_op)));
14030 else
14031 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14032 }
14033 diff = ct - cf;
14034
14035 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14036 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14037 tmp = gen_reg_rtx (mode);
14038
14039 if (mode == DImode)
14040 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14041 else
14042 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14043 }
14044 else
14045 {
14046 if (code == GT || code == GE)
14047 code = reverse_condition (code);
14048 else
14049 {
14050 HOST_WIDE_INT tmp = ct;
14051 ct = cf;
14052 cf = tmp;
14053 diff = ct - cf;
14054 }
14055 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14056 ix86_compare_op1, VOIDmode, 0, -1);
14057 }
14058
14059 if (diff == 1)
14060 {
14061 /*
14062 * cmpl op0,op1
14063 * sbbl dest,dest
14064 * [addl dest, ct]
14065 *
14066 * Size 5 - 8.
14067 */
14068 if (ct)
14069 tmp = expand_simple_binop (mode, PLUS,
14070 tmp, GEN_INT (ct),
14071 copy_rtx (tmp), 1, OPTAB_DIRECT);
14072 }
14073 else if (cf == -1)
14074 {
14075 /*
14076 * cmpl op0,op1
14077 * sbbl dest,dest
14078 * orl $ct, dest
14079 *
14080 * Size 8.
14081 */
14082 tmp = expand_simple_binop (mode, IOR,
14083 tmp, GEN_INT (ct),
14084 copy_rtx (tmp), 1, OPTAB_DIRECT);
14085 }
14086 else if (diff == -1 && ct)
14087 {
14088 /*
14089 * cmpl op0,op1
14090 * sbbl dest,dest
14091 * notl dest
14092 * [addl dest, cf]
14093 *
14094 * Size 8 - 11.
14095 */
14096 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14097 if (cf)
14098 tmp = expand_simple_binop (mode, PLUS,
14099 copy_rtx (tmp), GEN_INT (cf),
14100 copy_rtx (tmp), 1, OPTAB_DIRECT);
14101 }
14102 else
14103 {
14104 /*
14105 * cmpl op0,op1
14106 * sbbl dest,dest
14107 * [notl dest]
14108 * andl cf - ct, dest
14109 * [addl dest, ct]
14110 *
14111 * Size 8 - 11.
14112 */
14113
14114 if (cf == 0)
14115 {
14116 cf = ct;
14117 ct = 0;
14118 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14119 }
14120
14121 tmp = expand_simple_binop (mode, AND,
14122 copy_rtx (tmp),
14123 gen_int_mode (cf - ct, mode),
14124 copy_rtx (tmp), 1, OPTAB_DIRECT);
14125 if (ct)
14126 tmp = expand_simple_binop (mode, PLUS,
14127 copy_rtx (tmp), GEN_INT (ct),
14128 copy_rtx (tmp), 1, OPTAB_DIRECT);
14129 }
14130
14131 if (!rtx_equal_p (tmp, out))
14132 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14133
14134 return 1; /* DONE */
14135 }
14136
14137 if (diff < 0)
14138 {
14139 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14140
14141 HOST_WIDE_INT tmp;
14142 tmp = ct, ct = cf, cf = tmp;
14143 diff = -diff;
14144
14145 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14146 {
14147 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14148
14149 /* We may be reversing unordered compare to normal compare, that
14150 is not valid in general (we may convert non-trapping condition
14151 to trapping one), however on i386 we currently emit all
14152 comparisons unordered. */
14153 compare_code = reverse_condition_maybe_unordered (compare_code);
14154 code = reverse_condition_maybe_unordered (code);
14155 }
14156 else
14157 {
14158 compare_code = reverse_condition (compare_code);
14159 code = reverse_condition (code);
14160 }
14161 }
14162
14163 compare_code = UNKNOWN;
14164 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14165 && CONST_INT_P (ix86_compare_op1))
14166 {
14167 if (ix86_compare_op1 == const0_rtx
14168 && (code == LT || code == GE))
14169 compare_code = code;
14170 else if (ix86_compare_op1 == constm1_rtx)
14171 {
14172 if (code == LE)
14173 compare_code = LT;
14174 else if (code == GT)
14175 compare_code = GE;
14176 }
14177 }
14178
14179 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14180 if (compare_code != UNKNOWN
14181 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14182 && (cf == -1 || ct == -1))
14183 {
14184 /* If lea code below could be used, only optimize
14185 if it results in a 2 insn sequence. */
14186
14187 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14188 || diff == 3 || diff == 5 || diff == 9)
14189 || (compare_code == LT && ct == -1)
14190 || (compare_code == GE && cf == -1))
14191 {
14192 /*
14193 * notl op1 (if necessary)
14194 * sarl $31, op1
14195 * orl cf, op1
14196 */
14197 if (ct != -1)
14198 {
14199 cf = ct;
14200 ct = -1;
14201 code = reverse_condition (code);
14202 }
14203
14204 out = emit_store_flag (out, code, ix86_compare_op0,
14205 ix86_compare_op1, VOIDmode, 0, -1);
14206
14207 out = expand_simple_binop (mode, IOR,
14208 out, GEN_INT (cf),
14209 out, 1, OPTAB_DIRECT);
14210 if (out != operands[0])
14211 emit_move_insn (operands[0], out);
14212
14213 return 1; /* DONE */
14214 }
14215 }
14216
14217
14218 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14219 || diff == 3 || diff == 5 || diff == 9)
14220 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14221 && (mode != DImode
14222 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14223 {
14224 /*
14225 * xorl dest,dest
14226 * cmpl op1,op2
14227 * setcc dest
14228 * lea cf(dest*(ct-cf)),dest
14229 *
14230 * Size 14.
14231 *
14232 * This also catches the degenerate setcc-only case.
14233 */
14234
14235 rtx tmp;
14236 int nops;
14237
14238 out = emit_store_flag (out, code, ix86_compare_op0,
14239 ix86_compare_op1, VOIDmode, 0, 1);
14240
14241 nops = 0;
14242 /* On x86_64 the lea instruction operates on Pmode, so we need
14243 to get arithmetics done in proper mode to match. */
14244 if (diff == 1)
14245 tmp = copy_rtx (out);
14246 else
14247 {
14248 rtx out1;
14249 out1 = copy_rtx (out);
14250 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14251 nops++;
14252 if (diff & 1)
14253 {
14254 tmp = gen_rtx_PLUS (mode, tmp, out1);
14255 nops++;
14256 }
14257 }
14258 if (cf != 0)
14259 {
14260 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14261 nops++;
14262 }
14263 if (!rtx_equal_p (tmp, out))
14264 {
14265 if (nops == 1)
14266 out = force_operand (tmp, copy_rtx (out));
14267 else
14268 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14269 }
14270 if (!rtx_equal_p (out, operands[0]))
14271 emit_move_insn (operands[0], copy_rtx (out));
14272
14273 return 1; /* DONE */
14274 }
14275
14276 /*
14277 * General case: Jumpful:
14278 * xorl dest,dest cmpl op1, op2
14279 * cmpl op1, op2 movl ct, dest
14280 * setcc dest jcc 1f
14281 * decl dest movl cf, dest
14282 * andl (cf-ct),dest 1:
14283 * addl ct,dest
14284 *
14285 * Size 20. Size 14.
14286 *
14287 * This is reasonably steep, but branch mispredict costs are
14288 * high on modern cpus, so consider failing only if optimizing
14289 * for space.
14290 */
14291
14292 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14293 && BRANCH_COST >= 2)
14294 {
14295 if (cf == 0)
14296 {
14297 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14298
14299 cf = ct;
14300 ct = 0;
14301
14302 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14303 {
14304 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14305
14306 /* We may be reversing unordered compare to normal compare,
14307 that is not valid in general (we may convert non-trapping
14308 condition to trapping one), however on i386 we currently
14309 emit all comparisons unordered. */
14310 code = reverse_condition_maybe_unordered (code);
14311 }
14312 else
14313 {
14314 code = reverse_condition (code);
14315 if (compare_code != UNKNOWN)
14316 compare_code = reverse_condition (compare_code);
14317 }
14318 }
14319
14320 if (compare_code != UNKNOWN)
14321 {
14322 /* notl op1 (if needed)
14323 sarl $31, op1
14324 andl (cf-ct), op1
14325 addl ct, op1
14326
14327 For x < 0 (resp. x <= -1) there will be no notl,
14328 so if possible swap the constants to get rid of the
14329 complement.
14330 True/false will be -1/0 while code below (store flag
14331 followed by decrement) is 0/-1, so the constants need
14332 to be exchanged once more. */
14333
14334 if (compare_code == GE || !cf)
14335 {
14336 code = reverse_condition (code);
14337 compare_code = LT;
14338 }
14339 else
14340 {
14341 HOST_WIDE_INT tmp = cf;
14342 cf = ct;
14343 ct = tmp;
14344 }
14345
14346 out = emit_store_flag (out, code, ix86_compare_op0,
14347 ix86_compare_op1, VOIDmode, 0, -1);
14348 }
14349 else
14350 {
14351 out = emit_store_flag (out, code, ix86_compare_op0,
14352 ix86_compare_op1, VOIDmode, 0, 1);
14353
14354 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14355 copy_rtx (out), 1, OPTAB_DIRECT);
14356 }
14357
14358 out = expand_simple_binop (mode, AND, copy_rtx (out),
14359 gen_int_mode (cf - ct, mode),
14360 copy_rtx (out), 1, OPTAB_DIRECT);
14361 if (ct)
14362 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14363 copy_rtx (out), 1, OPTAB_DIRECT);
14364 if (!rtx_equal_p (out, operands[0]))
14365 emit_move_insn (operands[0], copy_rtx (out));
14366
14367 return 1; /* DONE */
14368 }
14369 }
14370
14371 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14372 {
14373 /* Try a few things more with specific constants and a variable. */
14374
14375 optab op;
14376 rtx var, orig_out, out, tmp;
14377
14378 if (BRANCH_COST <= 2)
14379 return 0; /* FAIL */
14380
14381 /* If one of the two operands is an interesting constant, load a
14382 constant with the above and mask it in with a logical operation. */
14383
14384 if (CONST_INT_P (operands[2]))
14385 {
14386 var = operands[3];
14387 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14388 operands[3] = constm1_rtx, op = and_optab;
14389 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14390 operands[3] = const0_rtx, op = ior_optab;
14391 else
14392 return 0; /* FAIL */
14393 }
14394 else if (CONST_INT_P (operands[3]))
14395 {
14396 var = operands[2];
14397 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14398 operands[2] = constm1_rtx, op = and_optab;
14399 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14400 operands[2] = const0_rtx, op = ior_optab;
14401 else
14402 return 0; /* FAIL */
14403 }
14404 else
14405 return 0; /* FAIL */
14406
14407 orig_out = operands[0];
14408 tmp = gen_reg_rtx (mode);
14409 operands[0] = tmp;
14410
14411 /* Recurse to get the constant loaded. */
14412 if (ix86_expand_int_movcc (operands) == 0)
14413 return 0; /* FAIL */
14414
14415 /* Mask in the interesting variable. */
14416 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14417 OPTAB_WIDEN);
14418 if (!rtx_equal_p (out, orig_out))
14419 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14420
14421 return 1; /* DONE */
14422 }
14423
14424 /*
14425 * For comparison with above,
14426 *
14427 * movl cf,dest
14428 * movl ct,tmp
14429 * cmpl op1,op2
14430 * cmovcc tmp,dest
14431 *
14432 * Size 15.
14433 */
14434
14435 if (! nonimmediate_operand (operands[2], mode))
14436 operands[2] = force_reg (mode, operands[2]);
14437 if (! nonimmediate_operand (operands[3], mode))
14438 operands[3] = force_reg (mode, operands[3]);
14439
14440 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14441 {
14442 rtx tmp = gen_reg_rtx (mode);
14443 emit_move_insn (tmp, operands[3]);
14444 operands[3] = tmp;
14445 }
14446 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14447 {
14448 rtx tmp = gen_reg_rtx (mode);
14449 emit_move_insn (tmp, operands[2]);
14450 operands[2] = tmp;
14451 }
14452
14453 if (! register_operand (operands[2], VOIDmode)
14454 && (mode == QImode
14455 || ! register_operand (operands[3], VOIDmode)))
14456 operands[2] = force_reg (mode, operands[2]);
14457
14458 if (mode == QImode
14459 && ! register_operand (operands[3], VOIDmode))
14460 operands[3] = force_reg (mode, operands[3]);
14461
14462 emit_insn (compare_seq);
14463 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14464 gen_rtx_IF_THEN_ELSE (mode,
14465 compare_op, operands[2],
14466 operands[3])));
14467 if (bypass_test)
14468 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14469 gen_rtx_IF_THEN_ELSE (mode,
14470 bypass_test,
14471 copy_rtx (operands[3]),
14472 copy_rtx (operands[0]))));
14473 if (second_test)
14474 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14475 gen_rtx_IF_THEN_ELSE (mode,
14476 second_test,
14477 copy_rtx (operands[2]),
14478 copy_rtx (operands[0]))));
14479
14480 return 1; /* DONE */
14481 }
14482
14483 /* Swap, force into registers, or otherwise massage the two operands
14484 to an sse comparison with a mask result. Thus we differ a bit from
14485 ix86_prepare_fp_compare_args which expects to produce a flags result.
14486
14487 The DEST operand exists to help determine whether to commute commutative
14488 operators. The POP0/POP1 operands are updated in place. The new
14489 comparison code is returned, or UNKNOWN if not implementable. */
14490
14491 static enum rtx_code
14492 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14493 rtx *pop0, rtx *pop1)
14494 {
14495 rtx tmp;
14496
14497 switch (code)
14498 {
14499 case LTGT:
14500 case UNEQ:
14501 /* We have no LTGT as an operator. We could implement it with
14502 NE & ORDERED, but this requires an extra temporary. It's
14503 not clear that it's worth it. */
14504 return UNKNOWN;
14505
14506 case LT:
14507 case LE:
14508 case UNGT:
14509 case UNGE:
14510 /* These are supported directly. */
14511 break;
14512
14513 case EQ:
14514 case NE:
14515 case UNORDERED:
14516 case ORDERED:
14517 /* For commutative operators, try to canonicalize the destination
14518 operand to be first in the comparison - this helps reload to
14519 avoid extra moves. */
14520 if (!dest || !rtx_equal_p (dest, *pop1))
14521 break;
14522 /* FALLTHRU */
14523
14524 case GE:
14525 case GT:
14526 case UNLE:
14527 case UNLT:
14528 /* These are not supported directly. Swap the comparison operands
14529 to transform into something that is supported. */
14530 tmp = *pop0;
14531 *pop0 = *pop1;
14532 *pop1 = tmp;
14533 code = swap_condition (code);
14534 break;
14535
14536 default:
14537 gcc_unreachable ();
14538 }
14539
14540 return code;
14541 }
14542
14543 /* Detect conditional moves that exactly match min/max operational
14544 semantics. Note that this is IEEE safe, as long as we don't
14545 interchange the operands.
14546
14547 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14548 and TRUE if the operation is successful and instructions are emitted. */
14549
14550 static bool
14551 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14552 rtx cmp_op1, rtx if_true, rtx if_false)
14553 {
14554 enum machine_mode mode;
14555 bool is_min;
14556 rtx tmp;
14557
14558 if (code == LT)
14559 ;
14560 else if (code == UNGE)
14561 {
14562 tmp = if_true;
14563 if_true = if_false;
14564 if_false = tmp;
14565 }
14566 else
14567 return false;
14568
14569 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14570 is_min = true;
14571 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14572 is_min = false;
14573 else
14574 return false;
14575
14576 mode = GET_MODE (dest);
14577
14578 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14579 but MODE may be a vector mode and thus not appropriate. */
14580 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
14581 {
14582 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
14583 rtvec v;
14584
14585 if_true = force_reg (mode, if_true);
14586 v = gen_rtvec (2, if_true, if_false);
14587 tmp = gen_rtx_UNSPEC (mode, v, u);
14588 }
14589 else
14590 {
14591 code = is_min ? SMIN : SMAX;
14592 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
14593 }
14594
14595 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
14596 return true;
14597 }
14598
14599 /* Expand an sse vector comparison. Return the register with the result. */
14600
14601 static rtx
14602 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
14603 rtx op_true, rtx op_false)
14604 {
14605 enum machine_mode mode = GET_MODE (dest);
14606 rtx x;
14607
14608 cmp_op0 = force_reg (mode, cmp_op0);
14609 if (!nonimmediate_operand (cmp_op1, mode))
14610 cmp_op1 = force_reg (mode, cmp_op1);
14611
14612 if (optimize
14613 || reg_overlap_mentioned_p (dest, op_true)
14614 || reg_overlap_mentioned_p (dest, op_false))
14615 dest = gen_reg_rtx (mode);
14616
14617 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
14618 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14619
14620 return dest;
14621 }
14622
14623 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
14624 operations. This is used for both scalar and vector conditional moves. */
14625
14626 static void
14627 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
14628 {
14629 enum machine_mode mode = GET_MODE (dest);
14630 rtx t2, t3, x;
14631
14632 if (op_false == CONST0_RTX (mode))
14633 {
14634 op_true = force_reg (mode, op_true);
14635 x = gen_rtx_AND (mode, cmp, op_true);
14636 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14637 }
14638 else if (op_true == CONST0_RTX (mode))
14639 {
14640 op_false = force_reg (mode, op_false);
14641 x = gen_rtx_NOT (mode, cmp);
14642 x = gen_rtx_AND (mode, x, op_false);
14643 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14644 }
14645 else if (TARGET_SSE5)
14646 {
14647 rtx pcmov = gen_rtx_SET (mode, dest,
14648 gen_rtx_IF_THEN_ELSE (mode, cmp,
14649 op_true,
14650 op_false));
14651 emit_insn (pcmov);
14652 }
14653 else
14654 {
14655 op_true = force_reg (mode, op_true);
14656 op_false = force_reg (mode, op_false);
14657
14658 t2 = gen_reg_rtx (mode);
14659 if (optimize)
14660 t3 = gen_reg_rtx (mode);
14661 else
14662 t3 = dest;
14663
14664 x = gen_rtx_AND (mode, op_true, cmp);
14665 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
14666
14667 x = gen_rtx_NOT (mode, cmp);
14668 x = gen_rtx_AND (mode, x, op_false);
14669 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
14670
14671 x = gen_rtx_IOR (mode, t3, t2);
14672 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14673 }
14674 }
14675
14676 /* Expand a floating-point conditional move. Return true if successful. */
14677
14678 int
14679 ix86_expand_fp_movcc (rtx operands[])
14680 {
14681 enum machine_mode mode = GET_MODE (operands[0]);
14682 enum rtx_code code = GET_CODE (operands[1]);
14683 rtx tmp, compare_op, second_test, bypass_test;
14684
14685 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
14686 {
14687 enum machine_mode cmode;
14688
14689 /* Since we've no cmove for sse registers, don't force bad register
14690 allocation just to gain access to it. Deny movcc when the
14691 comparison mode doesn't match the move mode. */
14692 cmode = GET_MODE (ix86_compare_op0);
14693 if (cmode == VOIDmode)
14694 cmode = GET_MODE (ix86_compare_op1);
14695 if (cmode != mode)
14696 return 0;
14697
14698 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
14699 &ix86_compare_op0,
14700 &ix86_compare_op1);
14701 if (code == UNKNOWN)
14702 return 0;
14703
14704 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
14705 ix86_compare_op1, operands[2],
14706 operands[3]))
14707 return 1;
14708
14709 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
14710 ix86_compare_op1, operands[2], operands[3]);
14711 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
14712 return 1;
14713 }
14714
14715 /* The floating point conditional move instructions don't directly
14716 support conditions resulting from a signed integer comparison. */
14717
14718 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14719
14720 /* The floating point conditional move instructions don't directly
14721 support signed integer comparisons. */
14722
14723 if (!fcmov_comparison_operator (compare_op, VOIDmode))
14724 {
14725 gcc_assert (!second_test && !bypass_test);
14726 tmp = gen_reg_rtx (QImode);
14727 ix86_expand_setcc (code, tmp);
14728 code = NE;
14729 ix86_compare_op0 = tmp;
14730 ix86_compare_op1 = const0_rtx;
14731 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14732 }
14733 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14734 {
14735 tmp = gen_reg_rtx (mode);
14736 emit_move_insn (tmp, operands[3]);
14737 operands[3] = tmp;
14738 }
14739 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14740 {
14741 tmp = gen_reg_rtx (mode);
14742 emit_move_insn (tmp, operands[2]);
14743 operands[2] = tmp;
14744 }
14745
14746 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14747 gen_rtx_IF_THEN_ELSE (mode, compare_op,
14748 operands[2], operands[3])));
14749 if (bypass_test)
14750 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14751 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
14752 operands[3], operands[0])));
14753 if (second_test)
14754 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14755 gen_rtx_IF_THEN_ELSE (mode, second_test,
14756 operands[2], operands[0])));
14757
14758 return 1;
14759 }
14760
14761 /* Expand a floating-point vector conditional move; a vcond operation
14762 rather than a movcc operation. */
14763
14764 bool
14765 ix86_expand_fp_vcond (rtx operands[])
14766 {
14767 enum rtx_code code = GET_CODE (operands[3]);
14768 rtx cmp;
14769
14770 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
14771 &operands[4], &operands[5]);
14772 if (code == UNKNOWN)
14773 return false;
14774
14775 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
14776 operands[5], operands[1], operands[2]))
14777 return true;
14778
14779 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
14780 operands[1], operands[2]);
14781 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
14782 return true;
14783 }
14784
14785 /* Expand a signed/unsigned integral vector conditional move. */
14786
14787 bool
14788 ix86_expand_int_vcond (rtx operands[])
14789 {
14790 enum machine_mode mode = GET_MODE (operands[0]);
14791 enum rtx_code code = GET_CODE (operands[3]);
14792 bool negate = false;
14793 rtx x, cop0, cop1;
14794
14795 cop0 = operands[4];
14796 cop1 = operands[5];
14797
14798 /* SSE5 supports all of the comparisons on all vector int types. */
14799 if (!TARGET_SSE5)
14800 {
14801 /* Canonicalize the comparison to EQ, GT, GTU. */
14802 switch (code)
14803 {
14804 case EQ:
14805 case GT:
14806 case GTU:
14807 break;
14808
14809 case NE:
14810 case LE:
14811 case LEU:
14812 code = reverse_condition (code);
14813 negate = true;
14814 break;
14815
14816 case GE:
14817 case GEU:
14818 code = reverse_condition (code);
14819 negate = true;
14820 /* FALLTHRU */
14821
14822 case LT:
14823 case LTU:
14824 code = swap_condition (code);
14825 x = cop0, cop0 = cop1, cop1 = x;
14826 break;
14827
14828 default:
14829 gcc_unreachable ();
14830 }
14831
14832 /* Only SSE4.1/SSE4.2 supports V2DImode. */
14833 if (mode == V2DImode)
14834 {
14835 switch (code)
14836 {
14837 case EQ:
14838 /* SSE4.1 supports EQ. */
14839 if (!TARGET_SSE4_1)
14840 return false;
14841 break;
14842
14843 case GT:
14844 case GTU:
14845 /* SSE4.2 supports GT/GTU. */
14846 if (!TARGET_SSE4_2)
14847 return false;
14848 break;
14849
14850 default:
14851 gcc_unreachable ();
14852 }
14853 }
14854
14855 /* Unsigned parallel compare is not supported by the hardware. Play some
14856 tricks to turn this into a signed comparison against 0. */
14857 if (code == GTU)
14858 {
14859 cop0 = force_reg (mode, cop0);
14860
14861 switch (mode)
14862 {
14863 case V4SImode:
14864 case V2DImode:
14865 {
14866 rtx t1, t2, mask;
14867
14868 /* Perform a parallel modulo subtraction. */
14869 t1 = gen_reg_rtx (mode);
14870 emit_insn ((mode == V4SImode
14871 ? gen_subv4si3
14872 : gen_subv2di3) (t1, cop0, cop1));
14873
14874 /* Extract the original sign bit of op0. */
14875 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
14876 true, false);
14877 t2 = gen_reg_rtx (mode);
14878 emit_insn ((mode == V4SImode
14879 ? gen_andv4si3
14880 : gen_andv2di3) (t2, cop0, mask));
14881
14882 /* XOR it back into the result of the subtraction. This results
14883 in the sign bit set iff we saw unsigned underflow. */
14884 x = gen_reg_rtx (mode);
14885 emit_insn ((mode == V4SImode
14886 ? gen_xorv4si3
14887 : gen_xorv2di3) (x, t1, t2));
14888
14889 code = GT;
14890 }
14891 break;
14892
14893 case V16QImode:
14894 case V8HImode:
14895 /* Perform a parallel unsigned saturating subtraction. */
14896 x = gen_reg_rtx (mode);
14897 emit_insn (gen_rtx_SET (VOIDmode, x,
14898 gen_rtx_US_MINUS (mode, cop0, cop1)));
14899
14900 code = EQ;
14901 negate = !negate;
14902 break;
14903
14904 default:
14905 gcc_unreachable ();
14906 }
14907
14908 cop0 = x;
14909 cop1 = CONST0_RTX (mode);
14910 }
14911 }
14912
14913 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
14914 operands[1+negate], operands[2-negate]);
14915
14916 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
14917 operands[2-negate]);
14918 return true;
14919 }
14920
14921 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
14922 true if we should do zero extension, else sign extension. HIGH_P is
14923 true if we want the N/2 high elements, else the low elements. */
14924
14925 void
14926 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14927 {
14928 enum machine_mode imode = GET_MODE (operands[1]);
14929 rtx (*unpack)(rtx, rtx, rtx);
14930 rtx se, dest;
14931
14932 switch (imode)
14933 {
14934 case V16QImode:
14935 if (high_p)
14936 unpack = gen_vec_interleave_highv16qi;
14937 else
14938 unpack = gen_vec_interleave_lowv16qi;
14939 break;
14940 case V8HImode:
14941 if (high_p)
14942 unpack = gen_vec_interleave_highv8hi;
14943 else
14944 unpack = gen_vec_interleave_lowv8hi;
14945 break;
14946 case V4SImode:
14947 if (high_p)
14948 unpack = gen_vec_interleave_highv4si;
14949 else
14950 unpack = gen_vec_interleave_lowv4si;
14951 break;
14952 default:
14953 gcc_unreachable ();
14954 }
14955
14956 dest = gen_lowpart (imode, operands[0]);
14957
14958 if (unsigned_p)
14959 se = force_reg (imode, CONST0_RTX (imode));
14960 else
14961 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
14962 operands[1], pc_rtx, pc_rtx);
14963
14964 emit_insn (unpack (dest, operands[1], se));
14965 }
14966
14967 /* This function performs the same task as ix86_expand_sse_unpack,
14968 but with SSE4.1 instructions. */
14969
14970 void
14971 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14972 {
14973 enum machine_mode imode = GET_MODE (operands[1]);
14974 rtx (*unpack)(rtx, rtx);
14975 rtx src, dest;
14976
14977 switch (imode)
14978 {
14979 case V16QImode:
14980 if (unsigned_p)
14981 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
14982 else
14983 unpack = gen_sse4_1_extendv8qiv8hi2;
14984 break;
14985 case V8HImode:
14986 if (unsigned_p)
14987 unpack = gen_sse4_1_zero_extendv4hiv4si2;
14988 else
14989 unpack = gen_sse4_1_extendv4hiv4si2;
14990 break;
14991 case V4SImode:
14992 if (unsigned_p)
14993 unpack = gen_sse4_1_zero_extendv2siv2di2;
14994 else
14995 unpack = gen_sse4_1_extendv2siv2di2;
14996 break;
14997 default:
14998 gcc_unreachable ();
14999 }
15000
15001 dest = operands[0];
15002 if (high_p)
15003 {
15004 /* Shift higher 8 bytes to lower 8 bytes. */
15005 src = gen_reg_rtx (imode);
15006 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15007 gen_lowpart (TImode, operands[1]),
15008 GEN_INT (64)));
15009 }
15010 else
15011 src = operands[1];
15012
15013 emit_insn (unpack (dest, src));
15014 }
15015
15016 /* This function performs the same task as ix86_expand_sse_unpack,
15017 but with sse5 instructions. */
15018
15019 void
15020 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15021 {
15022 enum machine_mode imode = GET_MODE (operands[1]);
15023 int pperm_bytes[16];
15024 int i;
15025 int h = (high_p) ? 8 : 0;
15026 int h2;
15027 int sign_extend;
15028 rtvec v = rtvec_alloc (16);
15029 rtvec vs;
15030 rtx x, p;
15031 rtx op0 = operands[0], op1 = operands[1];
15032
15033 switch (imode)
15034 {
15035 case V16QImode:
15036 vs = rtvec_alloc (8);
15037 h2 = (high_p) ? 8 : 0;
15038 for (i = 0; i < 8; i++)
15039 {
15040 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15041 pperm_bytes[2*i+1] = ((unsigned_p)
15042 ? PPERM_ZERO
15043 : PPERM_SIGN | PPERM_SRC2 | i | h);
15044 }
15045
15046 for (i = 0; i < 16; i++)
15047 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15048
15049 for (i = 0; i < 8; i++)
15050 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15051
15052 p = gen_rtx_PARALLEL (VOIDmode, vs);
15053 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15054 if (unsigned_p)
15055 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15056 else
15057 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15058 break;
15059
15060 case V8HImode:
15061 vs = rtvec_alloc (4);
15062 h2 = (high_p) ? 4 : 0;
15063 for (i = 0; i < 4; i++)
15064 {
15065 sign_extend = ((unsigned_p)
15066 ? PPERM_ZERO
15067 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15068 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15069 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15070 pperm_bytes[4*i+2] = sign_extend;
15071 pperm_bytes[4*i+3] = sign_extend;
15072 }
15073
15074 for (i = 0; i < 16; i++)
15075 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15076
15077 for (i = 0; i < 4; i++)
15078 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15079
15080 p = gen_rtx_PARALLEL (VOIDmode, vs);
15081 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15082 if (unsigned_p)
15083 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15084 else
15085 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15086 break;
15087
15088 case V4SImode:
15089 vs = rtvec_alloc (2);
15090 h2 = (high_p) ? 2 : 0;
15091 for (i = 0; i < 2; i++)
15092 {
15093 sign_extend = ((unsigned_p)
15094 ? PPERM_ZERO
15095 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15096 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15097 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15098 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15099 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15100 pperm_bytes[8*i+4] = sign_extend;
15101 pperm_bytes[8*i+5] = sign_extend;
15102 pperm_bytes[8*i+6] = sign_extend;
15103 pperm_bytes[8*i+7] = sign_extend;
15104 }
15105
15106 for (i = 0; i < 16; i++)
15107 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15108
15109 for (i = 0; i < 2; i++)
15110 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15111
15112 p = gen_rtx_PARALLEL (VOIDmode, vs);
15113 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15114 if (unsigned_p)
15115 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15116 else
15117 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15118 break;
15119
15120 default:
15121 gcc_unreachable ();
15122 }
15123
15124 return;
15125 }
15126
15127 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15128 next narrower integer vector type */
15129 void
15130 ix86_expand_sse5_pack (rtx operands[3])
15131 {
15132 enum machine_mode imode = GET_MODE (operands[0]);
15133 int pperm_bytes[16];
15134 int i;
15135 rtvec v = rtvec_alloc (16);
15136 rtx x;
15137 rtx op0 = operands[0];
15138 rtx op1 = operands[1];
15139 rtx op2 = operands[2];
15140
15141 switch (imode)
15142 {
15143 case V16QImode:
15144 for (i = 0; i < 8; i++)
15145 {
15146 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15147 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15148 }
15149
15150 for (i = 0; i < 16; i++)
15151 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15152
15153 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15154 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15155 break;
15156
15157 case V8HImode:
15158 for (i = 0; i < 4; i++)
15159 {
15160 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15161 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15162 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15163 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15164 }
15165
15166 for (i = 0; i < 16; i++)
15167 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15168
15169 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15170 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15171 break;
15172
15173 case V4SImode:
15174 for (i = 0; i < 2; i++)
15175 {
15176 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15177 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15178 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15179 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15180 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15181 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15182 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15183 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15184 }
15185
15186 for (i = 0; i < 16; i++)
15187 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15188
15189 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15190 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15191 break;
15192
15193 default:
15194 gcc_unreachable ();
15195 }
15196
15197 return;
15198 }
15199
15200 /* Expand conditional increment or decrement using adb/sbb instructions.
15201 The default case using setcc followed by the conditional move can be
15202 done by generic code. */
15203 int
15204 ix86_expand_int_addcc (rtx operands[])
15205 {
15206 enum rtx_code code = GET_CODE (operands[1]);
15207 rtx compare_op;
15208 rtx val = const0_rtx;
15209 bool fpcmp = false;
15210 enum machine_mode mode = GET_MODE (operands[0]);
15211
15212 if (operands[3] != const1_rtx
15213 && operands[3] != constm1_rtx)
15214 return 0;
15215 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15216 ix86_compare_op1, &compare_op))
15217 return 0;
15218 code = GET_CODE (compare_op);
15219
15220 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15221 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15222 {
15223 fpcmp = true;
15224 code = ix86_fp_compare_code_to_integer (code);
15225 }
15226
15227 if (code != LTU)
15228 {
15229 val = constm1_rtx;
15230 if (fpcmp)
15231 PUT_CODE (compare_op,
15232 reverse_condition_maybe_unordered
15233 (GET_CODE (compare_op)));
15234 else
15235 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15236 }
15237 PUT_MODE (compare_op, mode);
15238
15239 /* Construct either adc or sbb insn. */
15240 if ((code == LTU) == (operands[3] == constm1_rtx))
15241 {
15242 switch (GET_MODE (operands[0]))
15243 {
15244 case QImode:
15245 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15246 break;
15247 case HImode:
15248 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15249 break;
15250 case SImode:
15251 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15252 break;
15253 case DImode:
15254 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15255 break;
15256 default:
15257 gcc_unreachable ();
15258 }
15259 }
15260 else
15261 {
15262 switch (GET_MODE (operands[0]))
15263 {
15264 case QImode:
15265 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15266 break;
15267 case HImode:
15268 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15269 break;
15270 case SImode:
15271 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15272 break;
15273 case DImode:
15274 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15275 break;
15276 default:
15277 gcc_unreachable ();
15278 }
15279 }
15280 return 1; /* DONE */
15281 }
15282
15283
15284 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15285 works for floating pointer parameters and nonoffsetable memories.
15286 For pushes, it returns just stack offsets; the values will be saved
15287 in the right order. Maximally three parts are generated. */
15288
15289 static int
15290 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15291 {
15292 int size;
15293
15294 if (!TARGET_64BIT)
15295 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15296 else
15297 size = (GET_MODE_SIZE (mode) + 4) / 8;
15298
15299 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15300 gcc_assert (size >= 2 && size <= 4);
15301
15302 /* Optimize constant pool reference to immediates. This is used by fp
15303 moves, that force all constants to memory to allow combining. */
15304 if (MEM_P (operand) && MEM_READONLY_P (operand))
15305 {
15306 rtx tmp = maybe_get_pool_constant (operand);
15307 if (tmp)
15308 operand = tmp;
15309 }
15310
15311 if (MEM_P (operand) && !offsettable_memref_p (operand))
15312 {
15313 /* The only non-offsetable memories we handle are pushes. */
15314 int ok = push_operand (operand, VOIDmode);
15315
15316 gcc_assert (ok);
15317
15318 operand = copy_rtx (operand);
15319 PUT_MODE (operand, Pmode);
15320 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15321 return size;
15322 }
15323
15324 if (GET_CODE (operand) == CONST_VECTOR)
15325 {
15326 enum machine_mode imode = int_mode_for_mode (mode);
15327 /* Caution: if we looked through a constant pool memory above,
15328 the operand may actually have a different mode now. That's
15329 ok, since we want to pun this all the way back to an integer. */
15330 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15331 gcc_assert (operand != NULL);
15332 mode = imode;
15333 }
15334
15335 if (!TARGET_64BIT)
15336 {
15337 if (mode == DImode)
15338 split_di (&operand, 1, &parts[0], &parts[1]);
15339 else
15340 {
15341 int i;
15342
15343 if (REG_P (operand))
15344 {
15345 gcc_assert (reload_completed);
15346 for (i = 0; i < size; i++)
15347 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15348 }
15349 else if (offsettable_memref_p (operand))
15350 {
15351 operand = adjust_address (operand, SImode, 0);
15352 parts[0] = operand;
15353 for (i = 1; i < size; i++)
15354 parts[i] = adjust_address (operand, SImode, 4 * i);
15355 }
15356 else if (GET_CODE (operand) == CONST_DOUBLE)
15357 {
15358 REAL_VALUE_TYPE r;
15359 long l[4];
15360
15361 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15362 switch (mode)
15363 {
15364 case TFmode:
15365 real_to_target (l, &r, mode);
15366 parts[3] = gen_int_mode (l[3], SImode);
15367 parts[2] = gen_int_mode (l[2], SImode);
15368 break;
15369 case XFmode:
15370 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15371 parts[2] = gen_int_mode (l[2], SImode);
15372 break;
15373 case DFmode:
15374 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15375 break;
15376 default:
15377 gcc_unreachable ();
15378 }
15379 parts[1] = gen_int_mode (l[1], SImode);
15380 parts[0] = gen_int_mode (l[0], SImode);
15381 }
15382 else
15383 gcc_unreachable ();
15384 }
15385 }
15386 else
15387 {
15388 if (mode == TImode)
15389 split_ti (&operand, 1, &parts[0], &parts[1]);
15390 if (mode == XFmode || mode == TFmode)
15391 {
15392 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15393 if (REG_P (operand))
15394 {
15395 gcc_assert (reload_completed);
15396 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15397 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15398 }
15399 else if (offsettable_memref_p (operand))
15400 {
15401 operand = adjust_address (operand, DImode, 0);
15402 parts[0] = operand;
15403 parts[1] = adjust_address (operand, upper_mode, 8);
15404 }
15405 else if (GET_CODE (operand) == CONST_DOUBLE)
15406 {
15407 REAL_VALUE_TYPE r;
15408 long l[4];
15409
15410 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15411 real_to_target (l, &r, mode);
15412
15413 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15414 if (HOST_BITS_PER_WIDE_INT >= 64)
15415 parts[0]
15416 = gen_int_mode
15417 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15418 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15419 DImode);
15420 else
15421 parts[0] = immed_double_const (l[0], l[1], DImode);
15422
15423 if (upper_mode == SImode)
15424 parts[1] = gen_int_mode (l[2], SImode);
15425 else if (HOST_BITS_PER_WIDE_INT >= 64)
15426 parts[1]
15427 = gen_int_mode
15428 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15429 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15430 DImode);
15431 else
15432 parts[1] = immed_double_const (l[2], l[3], DImode);
15433 }
15434 else
15435 gcc_unreachable ();
15436 }
15437 }
15438
15439 return size;
15440 }
15441
15442 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15443 Return false when normal moves are needed; true when all required
15444 insns have been emitted. Operands 2-4 contain the input values
15445 int the correct order; operands 5-7 contain the output values. */
15446
15447 void
15448 ix86_split_long_move (rtx operands[])
15449 {
15450 rtx part[2][4];
15451 int nparts, i, j;
15452 int push = 0;
15453 int collisions = 0;
15454 enum machine_mode mode = GET_MODE (operands[0]);
15455 bool collisionparts[4];
15456
15457 /* The DFmode expanders may ask us to move double.
15458 For 64bit target this is single move. By hiding the fact
15459 here we simplify i386.md splitters. */
15460 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15461 {
15462 /* Optimize constant pool reference to immediates. This is used by
15463 fp moves, that force all constants to memory to allow combining. */
15464
15465 if (MEM_P (operands[1])
15466 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15467 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15468 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15469 if (push_operand (operands[0], VOIDmode))
15470 {
15471 operands[0] = copy_rtx (operands[0]);
15472 PUT_MODE (operands[0], Pmode);
15473 }
15474 else
15475 operands[0] = gen_lowpart (DImode, operands[0]);
15476 operands[1] = gen_lowpart (DImode, operands[1]);
15477 emit_move_insn (operands[0], operands[1]);
15478 return;
15479 }
15480
15481 /* The only non-offsettable memory we handle is push. */
15482 if (push_operand (operands[0], VOIDmode))
15483 push = 1;
15484 else
15485 gcc_assert (!MEM_P (operands[0])
15486 || offsettable_memref_p (operands[0]));
15487
15488 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15489 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15490
15491 /* When emitting push, take care for source operands on the stack. */
15492 if (push && MEM_P (operands[1])
15493 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15494 for (i = 0; i < nparts - 1; i++)
15495 part[1][i] = change_address (part[1][i],
15496 GET_MODE (part[1][i]),
15497 XEXP (part[1][i + 1], 0));
15498
15499 /* We need to do copy in the right order in case an address register
15500 of the source overlaps the destination. */
15501 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15502 {
15503 rtx tmp;
15504
15505 for (i = 0; i < nparts; i++)
15506 {
15507 collisionparts[i]
15508 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15509 if (collisionparts[i])
15510 collisions++;
15511 }
15512
15513 /* Collision in the middle part can be handled by reordering. */
15514 if (collisions == 1 && nparts == 3 && collisionparts [1])
15515 {
15516 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15517 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15518 }
15519 else if (collisions == 1
15520 && nparts == 4
15521 && (collisionparts [1] || collisionparts [2]))
15522 {
15523 if (collisionparts [1])
15524 {
15525 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15526 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15527 }
15528 else
15529 {
15530 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15531 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15532 }
15533 }
15534
15535 /* If there are more collisions, we can't handle it by reordering.
15536 Do an lea to the last part and use only one colliding move. */
15537 else if (collisions > 1)
15538 {
15539 rtx base;
15540
15541 collisions = 1;
15542
15543 base = part[0][nparts - 1];
15544
15545 /* Handle the case when the last part isn't valid for lea.
15546 Happens in 64-bit mode storing the 12-byte XFmode. */
15547 if (GET_MODE (base) != Pmode)
15548 base = gen_rtx_REG (Pmode, REGNO (base));
15549
15550 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15551 part[1][0] = replace_equiv_address (part[1][0], base);
15552 for (i = 1; i < nparts; i++)
15553 {
15554 tmp = plus_constant (base, UNITS_PER_WORD * i);
15555 part[1][i] = replace_equiv_address (part[1][i], tmp);
15556 }
15557 }
15558 }
15559
15560 if (push)
15561 {
15562 if (!TARGET_64BIT)
15563 {
15564 if (nparts == 3)
15565 {
15566 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15567 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15568 emit_move_insn (part[0][2], part[1][2]);
15569 }
15570 else if (nparts == 4)
15571 {
15572 emit_move_insn (part[0][3], part[1][3]);
15573 emit_move_insn (part[0][2], part[1][2]);
15574 }
15575 }
15576 else
15577 {
15578 /* In 64bit mode we don't have 32bit push available. In case this is
15579 register, it is OK - we will just use larger counterpart. We also
15580 retype memory - these comes from attempt to avoid REX prefix on
15581 moving of second half of TFmode value. */
15582 if (GET_MODE (part[1][1]) == SImode)
15583 {
15584 switch (GET_CODE (part[1][1]))
15585 {
15586 case MEM:
15587 part[1][1] = adjust_address (part[1][1], DImode, 0);
15588 break;
15589
15590 case REG:
15591 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
15592 break;
15593
15594 default:
15595 gcc_unreachable ();
15596 }
15597
15598 if (GET_MODE (part[1][0]) == SImode)
15599 part[1][0] = part[1][1];
15600 }
15601 }
15602 emit_move_insn (part[0][1], part[1][1]);
15603 emit_move_insn (part[0][0], part[1][0]);
15604 return;
15605 }
15606
15607 /* Choose correct order to not overwrite the source before it is copied. */
15608 if ((REG_P (part[0][0])
15609 && REG_P (part[1][1])
15610 && (REGNO (part[0][0]) == REGNO (part[1][1])
15611 || (nparts == 3
15612 && REGNO (part[0][0]) == REGNO (part[1][2]))
15613 || (nparts == 4
15614 && REGNO (part[0][0]) == REGNO (part[1][3]))))
15615 || (collisions > 0
15616 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
15617 {
15618 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
15619 {
15620 operands[2 + i] = part[0][j];
15621 operands[6 + i] = part[1][j];
15622 }
15623 }
15624 else
15625 {
15626 for (i = 0; i < nparts; i++)
15627 {
15628 operands[2 + i] = part[0][i];
15629 operands[6 + i] = part[1][i];
15630 }
15631 }
15632
15633 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
15634 if (optimize_insn_for_size_p ())
15635 {
15636 for (j = 0; j < nparts - 1; j++)
15637 if (CONST_INT_P (operands[6 + j])
15638 && operands[6 + j] != const0_rtx
15639 && REG_P (operands[2 + j]))
15640 for (i = j; i < nparts - 1; i++)
15641 if (CONST_INT_P (operands[7 + i])
15642 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
15643 operands[7 + i] = operands[2 + j];
15644 }
15645
15646 for (i = 0; i < nparts; i++)
15647 emit_move_insn (operands[2 + i], operands[6 + i]);
15648
15649 return;
15650 }
15651
15652 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
15653 left shift by a constant, either using a single shift or
15654 a sequence of add instructions. */
15655
15656 static void
15657 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
15658 {
15659 if (count == 1)
15660 {
15661 emit_insn ((mode == DImode
15662 ? gen_addsi3
15663 : gen_adddi3) (operand, operand, operand));
15664 }
15665 else if (!optimize_insn_for_size_p ()
15666 && count * ix86_cost->add <= ix86_cost->shift_const)
15667 {
15668 int i;
15669 for (i=0; i<count; i++)
15670 {
15671 emit_insn ((mode == DImode
15672 ? gen_addsi3
15673 : gen_adddi3) (operand, operand, operand));
15674 }
15675 }
15676 else
15677 emit_insn ((mode == DImode
15678 ? gen_ashlsi3
15679 : gen_ashldi3) (operand, operand, GEN_INT (count)));
15680 }
15681
15682 void
15683 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
15684 {
15685 rtx low[2], high[2];
15686 int count;
15687 const int single_width = mode == DImode ? 32 : 64;
15688
15689 if (CONST_INT_P (operands[2]))
15690 {
15691 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15692 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15693
15694 if (count >= single_width)
15695 {
15696 emit_move_insn (high[0], low[1]);
15697 emit_move_insn (low[0], const0_rtx);
15698
15699 if (count > single_width)
15700 ix86_expand_ashl_const (high[0], count - single_width, mode);
15701 }
15702 else
15703 {
15704 if (!rtx_equal_p (operands[0], operands[1]))
15705 emit_move_insn (operands[0], operands[1]);
15706 emit_insn ((mode == DImode
15707 ? gen_x86_shld
15708 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
15709 ix86_expand_ashl_const (low[0], count, mode);
15710 }
15711 return;
15712 }
15713
15714 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15715
15716 if (operands[1] == const1_rtx)
15717 {
15718 /* Assuming we've chosen a QImode capable registers, then 1 << N
15719 can be done with two 32/64-bit shifts, no branches, no cmoves. */
15720 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
15721 {
15722 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
15723
15724 ix86_expand_clear (low[0]);
15725 ix86_expand_clear (high[0]);
15726 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
15727
15728 d = gen_lowpart (QImode, low[0]);
15729 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
15730 s = gen_rtx_EQ (QImode, flags, const0_rtx);
15731 emit_insn (gen_rtx_SET (VOIDmode, d, s));
15732
15733 d = gen_lowpart (QImode, high[0]);
15734 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
15735 s = gen_rtx_NE (QImode, flags, const0_rtx);
15736 emit_insn (gen_rtx_SET (VOIDmode, d, s));
15737 }
15738
15739 /* Otherwise, we can get the same results by manually performing
15740 a bit extract operation on bit 5/6, and then performing the two
15741 shifts. The two methods of getting 0/1 into low/high are exactly
15742 the same size. Avoiding the shift in the bit extract case helps
15743 pentium4 a bit; no one else seems to care much either way. */
15744 else
15745 {
15746 rtx x;
15747
15748 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
15749 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
15750 else
15751 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
15752 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
15753
15754 emit_insn ((mode == DImode
15755 ? gen_lshrsi3
15756 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
15757 emit_insn ((mode == DImode
15758 ? gen_andsi3
15759 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
15760 emit_move_insn (low[0], high[0]);
15761 emit_insn ((mode == DImode
15762 ? gen_xorsi3
15763 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
15764 }
15765
15766 emit_insn ((mode == DImode
15767 ? gen_ashlsi3
15768 : gen_ashldi3) (low[0], low[0], operands[2]));
15769 emit_insn ((mode == DImode
15770 ? gen_ashlsi3
15771 : gen_ashldi3) (high[0], high[0], operands[2]));
15772 return;
15773 }
15774
15775 if (operands[1] == constm1_rtx)
15776 {
15777 /* For -1 << N, we can avoid the shld instruction, because we
15778 know that we're shifting 0...31/63 ones into a -1. */
15779 emit_move_insn (low[0], constm1_rtx);
15780 if (optimize_insn_for_size_p ())
15781 emit_move_insn (high[0], low[0]);
15782 else
15783 emit_move_insn (high[0], constm1_rtx);
15784 }
15785 else
15786 {
15787 if (!rtx_equal_p (operands[0], operands[1]))
15788 emit_move_insn (operands[0], operands[1]);
15789
15790 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15791 emit_insn ((mode == DImode
15792 ? gen_x86_shld
15793 : gen_x86_64_shld) (high[0], low[0], operands[2]));
15794 }
15795
15796 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
15797
15798 if (TARGET_CMOVE && scratch)
15799 {
15800 ix86_expand_clear (scratch);
15801 emit_insn ((mode == DImode
15802 ? gen_x86_shift_adj_1
15803 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
15804 scratch));
15805 }
15806 else
15807 emit_insn ((mode == DImode
15808 ? gen_x86_shift_adj_2
15809 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
15810 }
15811
15812 void
15813 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
15814 {
15815 rtx low[2], high[2];
15816 int count;
15817 const int single_width = mode == DImode ? 32 : 64;
15818
15819 if (CONST_INT_P (operands[2]))
15820 {
15821 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15822 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15823
15824 if (count == single_width * 2 - 1)
15825 {
15826 emit_move_insn (high[0], high[1]);
15827 emit_insn ((mode == DImode
15828 ? gen_ashrsi3
15829 : gen_ashrdi3) (high[0], high[0],
15830 GEN_INT (single_width - 1)));
15831 emit_move_insn (low[0], high[0]);
15832
15833 }
15834 else if (count >= single_width)
15835 {
15836 emit_move_insn (low[0], high[1]);
15837 emit_move_insn (high[0], low[0]);
15838 emit_insn ((mode == DImode
15839 ? gen_ashrsi3
15840 : gen_ashrdi3) (high[0], high[0],
15841 GEN_INT (single_width - 1)));
15842 if (count > single_width)
15843 emit_insn ((mode == DImode
15844 ? gen_ashrsi3
15845 : gen_ashrdi3) (low[0], low[0],
15846 GEN_INT (count - single_width)));
15847 }
15848 else
15849 {
15850 if (!rtx_equal_p (operands[0], operands[1]))
15851 emit_move_insn (operands[0], operands[1]);
15852 emit_insn ((mode == DImode
15853 ? gen_x86_shrd
15854 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
15855 emit_insn ((mode == DImode
15856 ? gen_ashrsi3
15857 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
15858 }
15859 }
15860 else
15861 {
15862 if (!rtx_equal_p (operands[0], operands[1]))
15863 emit_move_insn (operands[0], operands[1]);
15864
15865 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15866
15867 emit_insn ((mode == DImode
15868 ? gen_x86_shrd
15869 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
15870 emit_insn ((mode == DImode
15871 ? gen_ashrsi3
15872 : gen_ashrdi3) (high[0], high[0], operands[2]));
15873
15874 if (TARGET_CMOVE && scratch)
15875 {
15876 emit_move_insn (scratch, high[0]);
15877 emit_insn ((mode == DImode
15878 ? gen_ashrsi3
15879 : gen_ashrdi3) (scratch, scratch,
15880 GEN_INT (single_width - 1)));
15881 emit_insn ((mode == DImode
15882 ? gen_x86_shift_adj_1
15883 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
15884 scratch));
15885 }
15886 else
15887 emit_insn ((mode == DImode
15888 ? gen_x86_shift_adj_3
15889 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
15890 }
15891 }
15892
15893 void
15894 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
15895 {
15896 rtx low[2], high[2];
15897 int count;
15898 const int single_width = mode == DImode ? 32 : 64;
15899
15900 if (CONST_INT_P (operands[2]))
15901 {
15902 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15903 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15904
15905 if (count >= single_width)
15906 {
15907 emit_move_insn (low[0], high[1]);
15908 ix86_expand_clear (high[0]);
15909
15910 if (count > single_width)
15911 emit_insn ((mode == DImode
15912 ? gen_lshrsi3
15913 : gen_lshrdi3) (low[0], low[0],
15914 GEN_INT (count - single_width)));
15915 }
15916 else
15917 {
15918 if (!rtx_equal_p (operands[0], operands[1]))
15919 emit_move_insn (operands[0], operands[1]);
15920 emit_insn ((mode == DImode
15921 ? gen_x86_shrd
15922 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
15923 emit_insn ((mode == DImode
15924 ? gen_lshrsi3
15925 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
15926 }
15927 }
15928 else
15929 {
15930 if (!rtx_equal_p (operands[0], operands[1]))
15931 emit_move_insn (operands[0], operands[1]);
15932
15933 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15934
15935 emit_insn ((mode == DImode
15936 ? gen_x86_shrd
15937 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
15938 emit_insn ((mode == DImode
15939 ? gen_lshrsi3
15940 : gen_lshrdi3) (high[0], high[0], operands[2]));
15941
15942 /* Heh. By reversing the arguments, we can reuse this pattern. */
15943 if (TARGET_CMOVE && scratch)
15944 {
15945 ix86_expand_clear (scratch);
15946 emit_insn ((mode == DImode
15947 ? gen_x86_shift_adj_1
15948 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
15949 scratch));
15950 }
15951 else
15952 emit_insn ((mode == DImode
15953 ? gen_x86_shift_adj_2
15954 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
15955 }
15956 }
15957
15958 /* Predict just emitted jump instruction to be taken with probability PROB. */
15959 static void
15960 predict_jump (int prob)
15961 {
15962 rtx insn = get_last_insn ();
15963 gcc_assert (JUMP_P (insn));
15964 REG_NOTES (insn)
15965 = gen_rtx_EXPR_LIST (REG_BR_PROB,
15966 GEN_INT (prob),
15967 REG_NOTES (insn));
15968 }
15969
15970 /* Helper function for the string operations below. Dest VARIABLE whether
15971 it is aligned to VALUE bytes. If true, jump to the label. */
15972 static rtx
15973 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
15974 {
15975 rtx label = gen_label_rtx ();
15976 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
15977 if (GET_MODE (variable) == DImode)
15978 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
15979 else
15980 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
15981 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
15982 1, label);
15983 if (epilogue)
15984 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15985 else
15986 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15987 return label;
15988 }
15989
15990 /* Adjust COUNTER by the VALUE. */
15991 static void
15992 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
15993 {
15994 if (GET_MODE (countreg) == DImode)
15995 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
15996 else
15997 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
15998 }
15999
16000 /* Zero extend possibly SImode EXP to Pmode register. */
16001 rtx
16002 ix86_zero_extend_to_Pmode (rtx exp)
16003 {
16004 rtx r;
16005 if (GET_MODE (exp) == VOIDmode)
16006 return force_reg (Pmode, exp);
16007 if (GET_MODE (exp) == Pmode)
16008 return copy_to_mode_reg (Pmode, exp);
16009 r = gen_reg_rtx (Pmode);
16010 emit_insn (gen_zero_extendsidi2 (r, exp));
16011 return r;
16012 }
16013
16014 /* Divide COUNTREG by SCALE. */
16015 static rtx
16016 scale_counter (rtx countreg, int scale)
16017 {
16018 rtx sc;
16019 rtx piece_size_mask;
16020
16021 if (scale == 1)
16022 return countreg;
16023 if (CONST_INT_P (countreg))
16024 return GEN_INT (INTVAL (countreg) / scale);
16025 gcc_assert (REG_P (countreg));
16026
16027 piece_size_mask = GEN_INT (scale - 1);
16028 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16029 GEN_INT (exact_log2 (scale)),
16030 NULL, 1, OPTAB_DIRECT);
16031 return sc;
16032 }
16033
16034 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16035 DImode for constant loop counts. */
16036
16037 static enum machine_mode
16038 counter_mode (rtx count_exp)
16039 {
16040 if (GET_MODE (count_exp) != VOIDmode)
16041 return GET_MODE (count_exp);
16042 if (GET_CODE (count_exp) != CONST_INT)
16043 return Pmode;
16044 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16045 return DImode;
16046 return SImode;
16047 }
16048
16049 /* When SRCPTR is non-NULL, output simple loop to move memory
16050 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16051 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16052 equivalent loop to set memory by VALUE (supposed to be in MODE).
16053
16054 The size is rounded down to whole number of chunk size moved at once.
16055 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16056
16057
16058 static void
16059 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16060 rtx destptr, rtx srcptr, rtx value,
16061 rtx count, enum machine_mode mode, int unroll,
16062 int expected_size)
16063 {
16064 rtx out_label, top_label, iter, tmp;
16065 enum machine_mode iter_mode = counter_mode (count);
16066 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16067 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16068 rtx size;
16069 rtx x_addr;
16070 rtx y_addr;
16071 int i;
16072
16073 top_label = gen_label_rtx ();
16074 out_label = gen_label_rtx ();
16075 iter = gen_reg_rtx (iter_mode);
16076
16077 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16078 NULL, 1, OPTAB_DIRECT);
16079 /* Those two should combine. */
16080 if (piece_size == const1_rtx)
16081 {
16082 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16083 true, out_label);
16084 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16085 }
16086 emit_move_insn (iter, const0_rtx);
16087
16088 emit_label (top_label);
16089
16090 tmp = convert_modes (Pmode, iter_mode, iter, true);
16091 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16092 destmem = change_address (destmem, mode, x_addr);
16093
16094 if (srcmem)
16095 {
16096 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16097 srcmem = change_address (srcmem, mode, y_addr);
16098
16099 /* When unrolling for chips that reorder memory reads and writes,
16100 we can save registers by using single temporary.
16101 Also using 4 temporaries is overkill in 32bit mode. */
16102 if (!TARGET_64BIT && 0)
16103 {
16104 for (i = 0; i < unroll; i++)
16105 {
16106 if (i)
16107 {
16108 destmem =
16109 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16110 srcmem =
16111 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16112 }
16113 emit_move_insn (destmem, srcmem);
16114 }
16115 }
16116 else
16117 {
16118 rtx tmpreg[4];
16119 gcc_assert (unroll <= 4);
16120 for (i = 0; i < unroll; i++)
16121 {
16122 tmpreg[i] = gen_reg_rtx (mode);
16123 if (i)
16124 {
16125 srcmem =
16126 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16127 }
16128 emit_move_insn (tmpreg[i], srcmem);
16129 }
16130 for (i = 0; i < unroll; i++)
16131 {
16132 if (i)
16133 {
16134 destmem =
16135 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16136 }
16137 emit_move_insn (destmem, tmpreg[i]);
16138 }
16139 }
16140 }
16141 else
16142 for (i = 0; i < unroll; i++)
16143 {
16144 if (i)
16145 destmem =
16146 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16147 emit_move_insn (destmem, value);
16148 }
16149
16150 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16151 true, OPTAB_LIB_WIDEN);
16152 if (tmp != iter)
16153 emit_move_insn (iter, tmp);
16154
16155 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16156 true, top_label);
16157 if (expected_size != -1)
16158 {
16159 expected_size /= GET_MODE_SIZE (mode) * unroll;
16160 if (expected_size == 0)
16161 predict_jump (0);
16162 else if (expected_size > REG_BR_PROB_BASE)
16163 predict_jump (REG_BR_PROB_BASE - 1);
16164 else
16165 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16166 }
16167 else
16168 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16169 iter = ix86_zero_extend_to_Pmode (iter);
16170 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16171 true, OPTAB_LIB_WIDEN);
16172 if (tmp != destptr)
16173 emit_move_insn (destptr, tmp);
16174 if (srcptr)
16175 {
16176 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16177 true, OPTAB_LIB_WIDEN);
16178 if (tmp != srcptr)
16179 emit_move_insn (srcptr, tmp);
16180 }
16181 emit_label (out_label);
16182 }
16183
16184 /* Output "rep; mov" instruction.
16185 Arguments have same meaning as for previous function */
16186 static void
16187 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16188 rtx destptr, rtx srcptr,
16189 rtx count,
16190 enum machine_mode mode)
16191 {
16192 rtx destexp;
16193 rtx srcexp;
16194 rtx countreg;
16195
16196 /* If the size is known, it is shorter to use rep movs. */
16197 if (mode == QImode && CONST_INT_P (count)
16198 && !(INTVAL (count) & 3))
16199 mode = SImode;
16200
16201 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16202 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16203 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16204 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16205 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16206 if (mode != QImode)
16207 {
16208 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16209 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16210 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16211 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16212 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16213 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16214 }
16215 else
16216 {
16217 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16218 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16219 }
16220 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16221 destexp, srcexp));
16222 }
16223
16224 /* Output "rep; stos" instruction.
16225 Arguments have same meaning as for previous function */
16226 static void
16227 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16228 rtx count,
16229 enum machine_mode mode)
16230 {
16231 rtx destexp;
16232 rtx countreg;
16233
16234 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16235 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16236 value = force_reg (mode, gen_lowpart (mode, value));
16237 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16238 if (mode != QImode)
16239 {
16240 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16241 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16242 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16243 }
16244 else
16245 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16246 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16247 }
16248
16249 static void
16250 emit_strmov (rtx destmem, rtx srcmem,
16251 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16252 {
16253 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16254 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16255 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16256 }
16257
16258 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16259 static void
16260 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16261 rtx destptr, rtx srcptr, rtx count, int max_size)
16262 {
16263 rtx src, dest;
16264 if (CONST_INT_P (count))
16265 {
16266 HOST_WIDE_INT countval = INTVAL (count);
16267 int offset = 0;
16268
16269 if ((countval & 0x10) && max_size > 16)
16270 {
16271 if (TARGET_64BIT)
16272 {
16273 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16274 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16275 }
16276 else
16277 gcc_unreachable ();
16278 offset += 16;
16279 }
16280 if ((countval & 0x08) && max_size > 8)
16281 {
16282 if (TARGET_64BIT)
16283 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16284 else
16285 {
16286 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16287 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16288 }
16289 offset += 8;
16290 }
16291 if ((countval & 0x04) && max_size > 4)
16292 {
16293 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16294 offset += 4;
16295 }
16296 if ((countval & 0x02) && max_size > 2)
16297 {
16298 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16299 offset += 2;
16300 }
16301 if ((countval & 0x01) && max_size > 1)
16302 {
16303 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16304 offset += 1;
16305 }
16306 return;
16307 }
16308 if (max_size > 8)
16309 {
16310 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16311 count, 1, OPTAB_DIRECT);
16312 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16313 count, QImode, 1, 4);
16314 return;
16315 }
16316
16317 /* When there are stringops, we can cheaply increase dest and src pointers.
16318 Otherwise we save code size by maintaining offset (zero is readily
16319 available from preceding rep operation) and using x86 addressing modes.
16320 */
16321 if (TARGET_SINGLE_STRINGOP)
16322 {
16323 if (max_size > 4)
16324 {
16325 rtx label = ix86_expand_aligntest (count, 4, true);
16326 src = change_address (srcmem, SImode, srcptr);
16327 dest = change_address (destmem, SImode, destptr);
16328 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16329 emit_label (label);
16330 LABEL_NUSES (label) = 1;
16331 }
16332 if (max_size > 2)
16333 {
16334 rtx label = ix86_expand_aligntest (count, 2, true);
16335 src = change_address (srcmem, HImode, srcptr);
16336 dest = change_address (destmem, HImode, destptr);
16337 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16338 emit_label (label);
16339 LABEL_NUSES (label) = 1;
16340 }
16341 if (max_size > 1)
16342 {
16343 rtx label = ix86_expand_aligntest (count, 1, true);
16344 src = change_address (srcmem, QImode, srcptr);
16345 dest = change_address (destmem, QImode, destptr);
16346 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16347 emit_label (label);
16348 LABEL_NUSES (label) = 1;
16349 }
16350 }
16351 else
16352 {
16353 rtx offset = force_reg (Pmode, const0_rtx);
16354 rtx tmp;
16355
16356 if (max_size > 4)
16357 {
16358 rtx label = ix86_expand_aligntest (count, 4, true);
16359 src = change_address (srcmem, SImode, srcptr);
16360 dest = change_address (destmem, SImode, destptr);
16361 emit_move_insn (dest, src);
16362 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16363 true, OPTAB_LIB_WIDEN);
16364 if (tmp != offset)
16365 emit_move_insn (offset, tmp);
16366 emit_label (label);
16367 LABEL_NUSES (label) = 1;
16368 }
16369 if (max_size > 2)
16370 {
16371 rtx label = ix86_expand_aligntest (count, 2, true);
16372 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16373 src = change_address (srcmem, HImode, tmp);
16374 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16375 dest = change_address (destmem, HImode, tmp);
16376 emit_move_insn (dest, src);
16377 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16378 true, OPTAB_LIB_WIDEN);
16379 if (tmp != offset)
16380 emit_move_insn (offset, tmp);
16381 emit_label (label);
16382 LABEL_NUSES (label) = 1;
16383 }
16384 if (max_size > 1)
16385 {
16386 rtx label = ix86_expand_aligntest (count, 1, true);
16387 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16388 src = change_address (srcmem, QImode, tmp);
16389 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16390 dest = change_address (destmem, QImode, tmp);
16391 emit_move_insn (dest, src);
16392 emit_label (label);
16393 LABEL_NUSES (label) = 1;
16394 }
16395 }
16396 }
16397
16398 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16399 static void
16400 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16401 rtx count, int max_size)
16402 {
16403 count =
16404 expand_simple_binop (counter_mode (count), AND, count,
16405 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16406 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16407 gen_lowpart (QImode, value), count, QImode,
16408 1, max_size / 2);
16409 }
16410
16411 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16412 static void
16413 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16414 {
16415 rtx dest;
16416
16417 if (CONST_INT_P (count))
16418 {
16419 HOST_WIDE_INT countval = INTVAL (count);
16420 int offset = 0;
16421
16422 if ((countval & 0x10) && max_size > 16)
16423 {
16424 if (TARGET_64BIT)
16425 {
16426 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16427 emit_insn (gen_strset (destptr, dest, value));
16428 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16429 emit_insn (gen_strset (destptr, dest, value));
16430 }
16431 else
16432 gcc_unreachable ();
16433 offset += 16;
16434 }
16435 if ((countval & 0x08) && max_size > 8)
16436 {
16437 if (TARGET_64BIT)
16438 {
16439 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16440 emit_insn (gen_strset (destptr, dest, value));
16441 }
16442 else
16443 {
16444 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16445 emit_insn (gen_strset (destptr, dest, value));
16446 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16447 emit_insn (gen_strset (destptr, dest, value));
16448 }
16449 offset += 8;
16450 }
16451 if ((countval & 0x04) && max_size > 4)
16452 {
16453 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16454 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16455 offset += 4;
16456 }
16457 if ((countval & 0x02) && max_size > 2)
16458 {
16459 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16460 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16461 offset += 2;
16462 }
16463 if ((countval & 0x01) && max_size > 1)
16464 {
16465 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16466 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16467 offset += 1;
16468 }
16469 return;
16470 }
16471 if (max_size > 32)
16472 {
16473 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16474 return;
16475 }
16476 if (max_size > 16)
16477 {
16478 rtx label = ix86_expand_aligntest (count, 16, true);
16479 if (TARGET_64BIT)
16480 {
16481 dest = change_address (destmem, DImode, destptr);
16482 emit_insn (gen_strset (destptr, dest, value));
16483 emit_insn (gen_strset (destptr, dest, value));
16484 }
16485 else
16486 {
16487 dest = change_address (destmem, SImode, destptr);
16488 emit_insn (gen_strset (destptr, dest, value));
16489 emit_insn (gen_strset (destptr, dest, value));
16490 emit_insn (gen_strset (destptr, dest, value));
16491 emit_insn (gen_strset (destptr, dest, value));
16492 }
16493 emit_label (label);
16494 LABEL_NUSES (label) = 1;
16495 }
16496 if (max_size > 8)
16497 {
16498 rtx label = ix86_expand_aligntest (count, 8, true);
16499 if (TARGET_64BIT)
16500 {
16501 dest = change_address (destmem, DImode, destptr);
16502 emit_insn (gen_strset (destptr, dest, value));
16503 }
16504 else
16505 {
16506 dest = change_address (destmem, SImode, destptr);
16507 emit_insn (gen_strset (destptr, dest, value));
16508 emit_insn (gen_strset (destptr, dest, value));
16509 }
16510 emit_label (label);
16511 LABEL_NUSES (label) = 1;
16512 }
16513 if (max_size > 4)
16514 {
16515 rtx label = ix86_expand_aligntest (count, 4, true);
16516 dest = change_address (destmem, SImode, destptr);
16517 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16518 emit_label (label);
16519 LABEL_NUSES (label) = 1;
16520 }
16521 if (max_size > 2)
16522 {
16523 rtx label = ix86_expand_aligntest (count, 2, true);
16524 dest = change_address (destmem, HImode, destptr);
16525 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16526 emit_label (label);
16527 LABEL_NUSES (label) = 1;
16528 }
16529 if (max_size > 1)
16530 {
16531 rtx label = ix86_expand_aligntest (count, 1, true);
16532 dest = change_address (destmem, QImode, destptr);
16533 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16534 emit_label (label);
16535 LABEL_NUSES (label) = 1;
16536 }
16537 }
16538
16539 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16540 DESIRED_ALIGNMENT. */
16541 static void
16542 expand_movmem_prologue (rtx destmem, rtx srcmem,
16543 rtx destptr, rtx srcptr, rtx count,
16544 int align, int desired_alignment)
16545 {
16546 if (align <= 1 && desired_alignment > 1)
16547 {
16548 rtx label = ix86_expand_aligntest (destptr, 1, false);
16549 srcmem = change_address (srcmem, QImode, srcptr);
16550 destmem = change_address (destmem, QImode, destptr);
16551 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16552 ix86_adjust_counter (count, 1);
16553 emit_label (label);
16554 LABEL_NUSES (label) = 1;
16555 }
16556 if (align <= 2 && desired_alignment > 2)
16557 {
16558 rtx label = ix86_expand_aligntest (destptr, 2, false);
16559 srcmem = change_address (srcmem, HImode, srcptr);
16560 destmem = change_address (destmem, HImode, destptr);
16561 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16562 ix86_adjust_counter (count, 2);
16563 emit_label (label);
16564 LABEL_NUSES (label) = 1;
16565 }
16566 if (align <= 4 && desired_alignment > 4)
16567 {
16568 rtx label = ix86_expand_aligntest (destptr, 4, false);
16569 srcmem = change_address (srcmem, SImode, srcptr);
16570 destmem = change_address (destmem, SImode, destptr);
16571 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16572 ix86_adjust_counter (count, 4);
16573 emit_label (label);
16574 LABEL_NUSES (label) = 1;
16575 }
16576 gcc_assert (desired_alignment <= 8);
16577 }
16578
16579 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16580 DESIRED_ALIGNMENT. */
16581 static void
16582 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
16583 int align, int desired_alignment)
16584 {
16585 if (align <= 1 && desired_alignment > 1)
16586 {
16587 rtx label = ix86_expand_aligntest (destptr, 1, false);
16588 destmem = change_address (destmem, QImode, destptr);
16589 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
16590 ix86_adjust_counter (count, 1);
16591 emit_label (label);
16592 LABEL_NUSES (label) = 1;
16593 }
16594 if (align <= 2 && desired_alignment > 2)
16595 {
16596 rtx label = ix86_expand_aligntest (destptr, 2, false);
16597 destmem = change_address (destmem, HImode, destptr);
16598 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
16599 ix86_adjust_counter (count, 2);
16600 emit_label (label);
16601 LABEL_NUSES (label) = 1;
16602 }
16603 if (align <= 4 && desired_alignment > 4)
16604 {
16605 rtx label = ix86_expand_aligntest (destptr, 4, false);
16606 destmem = change_address (destmem, SImode, destptr);
16607 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
16608 ix86_adjust_counter (count, 4);
16609 emit_label (label);
16610 LABEL_NUSES (label) = 1;
16611 }
16612 gcc_assert (desired_alignment <= 8);
16613 }
16614
16615 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
16616 static enum stringop_alg
16617 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
16618 int *dynamic_check)
16619 {
16620 const struct stringop_algs * algs;
16621 /* Algorithms using the rep prefix want at least edi and ecx;
16622 additionally, memset wants eax and memcpy wants esi. Don't
16623 consider such algorithms if the user has appropriated those
16624 registers for their own purposes. */
16625 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
16626 || (memset
16627 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
16628
16629 #define ALG_USABLE_P(alg) (rep_prefix_usable \
16630 || (alg != rep_prefix_1_byte \
16631 && alg != rep_prefix_4_byte \
16632 && alg != rep_prefix_8_byte))
16633 const struct processor_costs *cost;
16634
16635 cost = optimize_insn_for_size_p () ? &ix86_size_cost : ix86_cost;
16636
16637 *dynamic_check = -1;
16638 if (memset)
16639 algs = &cost->memset[TARGET_64BIT != 0];
16640 else
16641 algs = &cost->memcpy[TARGET_64BIT != 0];
16642 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
16643 return stringop_alg;
16644 /* rep; movq or rep; movl is the smallest variant. */
16645 else if (optimize_insn_for_size_p ())
16646 {
16647 if (!count || (count & 3))
16648 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
16649 else
16650 return rep_prefix_usable ? rep_prefix_4_byte : loop;
16651 }
16652 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
16653 */
16654 else if (expected_size != -1 && expected_size < 4)
16655 return loop_1_byte;
16656 else if (expected_size != -1)
16657 {
16658 unsigned int i;
16659 enum stringop_alg alg = libcall;
16660 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
16661 {
16662 /* We get here if the algorithms that were not libcall-based
16663 were rep-prefix based and we are unable to use rep prefixes
16664 based on global register usage. Break out of the loop and
16665 use the heuristic below. */
16666 if (algs->size[i].max == 0)
16667 break;
16668 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
16669 {
16670 enum stringop_alg candidate = algs->size[i].alg;
16671
16672 if (candidate != libcall && ALG_USABLE_P (candidate))
16673 alg = candidate;
16674 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
16675 last non-libcall inline algorithm. */
16676 if (TARGET_INLINE_ALL_STRINGOPS)
16677 {
16678 /* When the current size is best to be copied by a libcall,
16679 but we are still forced to inline, run the heuristic below
16680 that will pick code for medium sized blocks. */
16681 if (alg != libcall)
16682 return alg;
16683 break;
16684 }
16685 else if (ALG_USABLE_P (candidate))
16686 return candidate;
16687 }
16688 }
16689 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
16690 }
16691 /* When asked to inline the call anyway, try to pick meaningful choice.
16692 We look for maximal size of block that is faster to copy by hand and
16693 take blocks of at most of that size guessing that average size will
16694 be roughly half of the block.
16695
16696 If this turns out to be bad, we might simply specify the preferred
16697 choice in ix86_costs. */
16698 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16699 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
16700 {
16701 int max = -1;
16702 enum stringop_alg alg;
16703 int i;
16704 bool any_alg_usable_p = true;
16705
16706 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
16707 {
16708 enum stringop_alg candidate = algs->size[i].alg;
16709 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
16710
16711 if (candidate != libcall && candidate
16712 && ALG_USABLE_P (candidate))
16713 max = algs->size[i].max;
16714 }
16715 /* If there aren't any usable algorithms, then recursing on
16716 smaller sizes isn't going to find anything. Just return the
16717 simple byte-at-a-time copy loop. */
16718 if (!any_alg_usable_p)
16719 {
16720 /* Pick something reasonable. */
16721 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16722 *dynamic_check = 128;
16723 return loop_1_byte;
16724 }
16725 if (max == -1)
16726 max = 4096;
16727 alg = decide_alg (count, max / 2, memset, dynamic_check);
16728 gcc_assert (*dynamic_check == -1);
16729 gcc_assert (alg != libcall);
16730 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16731 *dynamic_check = max;
16732 return alg;
16733 }
16734 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
16735 #undef ALG_USABLE_P
16736 }
16737
16738 /* Decide on alignment. We know that the operand is already aligned to ALIGN
16739 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
16740 static int
16741 decide_alignment (int align,
16742 enum stringop_alg alg,
16743 int expected_size)
16744 {
16745 int desired_align = 0;
16746 switch (alg)
16747 {
16748 case no_stringop:
16749 gcc_unreachable ();
16750 case loop:
16751 case unrolled_loop:
16752 desired_align = GET_MODE_SIZE (Pmode);
16753 break;
16754 case rep_prefix_8_byte:
16755 desired_align = 8;
16756 break;
16757 case rep_prefix_4_byte:
16758 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
16759 copying whole cacheline at once. */
16760 if (TARGET_PENTIUMPRO)
16761 desired_align = 8;
16762 else
16763 desired_align = 4;
16764 break;
16765 case rep_prefix_1_byte:
16766 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
16767 copying whole cacheline at once. */
16768 if (TARGET_PENTIUMPRO)
16769 desired_align = 8;
16770 else
16771 desired_align = 1;
16772 break;
16773 case loop_1_byte:
16774 desired_align = 1;
16775 break;
16776 case libcall:
16777 return 0;
16778 }
16779
16780 if (optimize_size)
16781 desired_align = 1;
16782 if (desired_align < align)
16783 desired_align = align;
16784 if (expected_size != -1 && expected_size < 4)
16785 desired_align = align;
16786 return desired_align;
16787 }
16788
16789 /* Return the smallest power of 2 greater than VAL. */
16790 static int
16791 smallest_pow2_greater_than (int val)
16792 {
16793 int ret = 1;
16794 while (ret <= val)
16795 ret <<= 1;
16796 return ret;
16797 }
16798
16799 /* Expand string move (memcpy) operation. Use i386 string operations when
16800 profitable. expand_setmem contains similar code. The code depends upon
16801 architecture, block size and alignment, but always has the same
16802 overall structure:
16803
16804 1) Prologue guard: Conditional that jumps up to epilogues for small
16805 blocks that can be handled by epilogue alone. This is faster but
16806 also needed for correctness, since prologue assume the block is larger
16807 than the desired alignment.
16808
16809 Optional dynamic check for size and libcall for large
16810 blocks is emitted here too, with -minline-stringops-dynamically.
16811
16812 2) Prologue: copy first few bytes in order to get destination aligned
16813 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
16814 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
16815 We emit either a jump tree on power of two sized blocks, or a byte loop.
16816
16817 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
16818 with specified algorithm.
16819
16820 4) Epilogue: code copying tail of the block that is too small to be
16821 handled by main body (or up to size guarded by prologue guard). */
16822
16823 int
16824 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
16825 rtx expected_align_exp, rtx expected_size_exp)
16826 {
16827 rtx destreg;
16828 rtx srcreg;
16829 rtx label = NULL;
16830 rtx tmp;
16831 rtx jump_around_label = NULL;
16832 HOST_WIDE_INT align = 1;
16833 unsigned HOST_WIDE_INT count = 0;
16834 HOST_WIDE_INT expected_size = -1;
16835 int size_needed = 0, epilogue_size_needed;
16836 int desired_align = 0;
16837 enum stringop_alg alg;
16838 int dynamic_check;
16839
16840 if (CONST_INT_P (align_exp))
16841 align = INTVAL (align_exp);
16842 /* i386 can do misaligned access on reasonably increased cost. */
16843 if (CONST_INT_P (expected_align_exp)
16844 && INTVAL (expected_align_exp) > align)
16845 align = INTVAL (expected_align_exp);
16846 if (CONST_INT_P (count_exp))
16847 count = expected_size = INTVAL (count_exp);
16848 if (CONST_INT_P (expected_size_exp) && count == 0)
16849 expected_size = INTVAL (expected_size_exp);
16850
16851 /* Make sure we don't need to care about overflow later on. */
16852 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16853 return 0;
16854
16855 /* Step 0: Decide on preferred algorithm, desired alignment and
16856 size of chunks to be copied by main loop. */
16857
16858 alg = decide_alg (count, expected_size, false, &dynamic_check);
16859 desired_align = decide_alignment (align, alg, expected_size);
16860
16861 if (!TARGET_ALIGN_STRINGOPS)
16862 align = desired_align;
16863
16864 if (alg == libcall)
16865 return 0;
16866 gcc_assert (alg != no_stringop);
16867 if (!count)
16868 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
16869 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16870 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
16871 switch (alg)
16872 {
16873 case libcall:
16874 case no_stringop:
16875 gcc_unreachable ();
16876 case loop:
16877 size_needed = GET_MODE_SIZE (Pmode);
16878 break;
16879 case unrolled_loop:
16880 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
16881 break;
16882 case rep_prefix_8_byte:
16883 size_needed = 8;
16884 break;
16885 case rep_prefix_4_byte:
16886 size_needed = 4;
16887 break;
16888 case rep_prefix_1_byte:
16889 case loop_1_byte:
16890 size_needed = 1;
16891 break;
16892 }
16893
16894 epilogue_size_needed = size_needed;
16895
16896 /* Step 1: Prologue guard. */
16897
16898 /* Alignment code needs count to be in register. */
16899 if (CONST_INT_P (count_exp) && desired_align > align)
16900 count_exp = force_reg (counter_mode (count_exp), count_exp);
16901 gcc_assert (desired_align >= 1 && align >= 1);
16902
16903 /* Ensure that alignment prologue won't copy past end of block. */
16904 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16905 {
16906 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16907 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16908 Make sure it is power of 2. */
16909 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16910
16911 if (CONST_INT_P (count_exp))
16912 {
16913 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
16914 goto epilogue;
16915 }
16916 else
16917 {
16918 label = gen_label_rtx ();
16919 emit_cmp_and_jump_insns (count_exp,
16920 GEN_INT (epilogue_size_needed),
16921 LTU, 0, counter_mode (count_exp), 1, label);
16922 if (expected_size == -1 || expected_size < epilogue_size_needed)
16923 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16924 else
16925 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16926 }
16927 }
16928
16929 /* Emit code to decide on runtime whether library call or inline should be
16930 used. */
16931 if (dynamic_check != -1)
16932 {
16933 if (CONST_INT_P (count_exp))
16934 {
16935 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
16936 {
16937 emit_block_move_via_libcall (dst, src, count_exp, false);
16938 count_exp = const0_rtx;
16939 goto epilogue;
16940 }
16941 }
16942 else
16943 {
16944 rtx hot_label = gen_label_rtx ();
16945 jump_around_label = gen_label_rtx ();
16946 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16947 LEU, 0, GET_MODE (count_exp), 1, hot_label);
16948 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16949 emit_block_move_via_libcall (dst, src, count_exp, false);
16950 emit_jump (jump_around_label);
16951 emit_label (hot_label);
16952 }
16953 }
16954
16955 /* Step 2: Alignment prologue. */
16956
16957 if (desired_align > align)
16958 {
16959 /* Except for the first move in epilogue, we no longer know
16960 constant offset in aliasing info. It don't seems to worth
16961 the pain to maintain it for the first move, so throw away
16962 the info early. */
16963 src = change_address (src, BLKmode, srcreg);
16964 dst = change_address (dst, BLKmode, destreg);
16965 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
16966 desired_align);
16967 }
16968 if (label && size_needed == 1)
16969 {
16970 emit_label (label);
16971 LABEL_NUSES (label) = 1;
16972 label = NULL;
16973 }
16974
16975 /* Step 3: Main loop. */
16976
16977 switch (alg)
16978 {
16979 case libcall:
16980 case no_stringop:
16981 gcc_unreachable ();
16982 case loop_1_byte:
16983 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16984 count_exp, QImode, 1, expected_size);
16985 break;
16986 case loop:
16987 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16988 count_exp, Pmode, 1, expected_size);
16989 break;
16990 case unrolled_loop:
16991 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
16992 registers for 4 temporaries anyway. */
16993 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16994 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
16995 expected_size);
16996 break;
16997 case rep_prefix_8_byte:
16998 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
16999 DImode);
17000 break;
17001 case rep_prefix_4_byte:
17002 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17003 SImode);
17004 break;
17005 case rep_prefix_1_byte:
17006 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17007 QImode);
17008 break;
17009 }
17010 /* Adjust properly the offset of src and dest memory for aliasing. */
17011 if (CONST_INT_P (count_exp))
17012 {
17013 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17014 (count / size_needed) * size_needed);
17015 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17016 (count / size_needed) * size_needed);
17017 }
17018 else
17019 {
17020 src = change_address (src, BLKmode, srcreg);
17021 dst = change_address (dst, BLKmode, destreg);
17022 }
17023
17024 /* Step 4: Epilogue to copy the remaining bytes. */
17025 epilogue:
17026 if (label)
17027 {
17028 /* When the main loop is done, COUNT_EXP might hold original count,
17029 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17030 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17031 bytes. Compensate if needed. */
17032
17033 if (size_needed < epilogue_size_needed)
17034 {
17035 tmp =
17036 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17037 GEN_INT (size_needed - 1), count_exp, 1,
17038 OPTAB_DIRECT);
17039 if (tmp != count_exp)
17040 emit_move_insn (count_exp, tmp);
17041 }
17042 emit_label (label);
17043 LABEL_NUSES (label) = 1;
17044 }
17045
17046 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17047 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17048 epilogue_size_needed);
17049 if (jump_around_label)
17050 emit_label (jump_around_label);
17051 return 1;
17052 }
17053
17054 /* Helper function for memcpy. For QImode value 0xXY produce
17055 0xXYXYXYXY of wide specified by MODE. This is essentially
17056 a * 0x10101010, but we can do slightly better than
17057 synth_mult by unwinding the sequence by hand on CPUs with
17058 slow multiply. */
17059 static rtx
17060 promote_duplicated_reg (enum machine_mode mode, rtx val)
17061 {
17062 enum machine_mode valmode = GET_MODE (val);
17063 rtx tmp;
17064 int nops = mode == DImode ? 3 : 2;
17065
17066 gcc_assert (mode == SImode || mode == DImode);
17067 if (val == const0_rtx)
17068 return copy_to_mode_reg (mode, const0_rtx);
17069 if (CONST_INT_P (val))
17070 {
17071 HOST_WIDE_INT v = INTVAL (val) & 255;
17072
17073 v |= v << 8;
17074 v |= v << 16;
17075 if (mode == DImode)
17076 v |= (v << 16) << 16;
17077 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17078 }
17079
17080 if (valmode == VOIDmode)
17081 valmode = QImode;
17082 if (valmode != QImode)
17083 val = gen_lowpart (QImode, val);
17084 if (mode == QImode)
17085 return val;
17086 if (!TARGET_PARTIAL_REG_STALL)
17087 nops--;
17088 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17089 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17090 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17091 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17092 {
17093 rtx reg = convert_modes (mode, QImode, val, true);
17094 tmp = promote_duplicated_reg (mode, const1_rtx);
17095 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17096 OPTAB_DIRECT);
17097 }
17098 else
17099 {
17100 rtx reg = convert_modes (mode, QImode, val, true);
17101
17102 if (!TARGET_PARTIAL_REG_STALL)
17103 if (mode == SImode)
17104 emit_insn (gen_movsi_insv_1 (reg, reg));
17105 else
17106 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17107 else
17108 {
17109 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17110 NULL, 1, OPTAB_DIRECT);
17111 reg =
17112 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17113 }
17114 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17115 NULL, 1, OPTAB_DIRECT);
17116 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17117 if (mode == SImode)
17118 return reg;
17119 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17120 NULL, 1, OPTAB_DIRECT);
17121 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17122 return reg;
17123 }
17124 }
17125
17126 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17127 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17128 alignment from ALIGN to DESIRED_ALIGN. */
17129 static rtx
17130 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17131 {
17132 rtx promoted_val;
17133
17134 if (TARGET_64BIT
17135 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17136 promoted_val = promote_duplicated_reg (DImode, val);
17137 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17138 promoted_val = promote_duplicated_reg (SImode, val);
17139 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17140 promoted_val = promote_duplicated_reg (HImode, val);
17141 else
17142 promoted_val = val;
17143
17144 return promoted_val;
17145 }
17146
17147 /* Expand string clear operation (bzero). Use i386 string operations when
17148 profitable. See expand_movmem comment for explanation of individual
17149 steps performed. */
17150 int
17151 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17152 rtx expected_align_exp, rtx expected_size_exp)
17153 {
17154 rtx destreg;
17155 rtx label = NULL;
17156 rtx tmp;
17157 rtx jump_around_label = NULL;
17158 HOST_WIDE_INT align = 1;
17159 unsigned HOST_WIDE_INT count = 0;
17160 HOST_WIDE_INT expected_size = -1;
17161 int size_needed = 0, epilogue_size_needed;
17162 int desired_align = 0;
17163 enum stringop_alg alg;
17164 rtx promoted_val = NULL;
17165 bool force_loopy_epilogue = false;
17166 int dynamic_check;
17167
17168 if (CONST_INT_P (align_exp))
17169 align = INTVAL (align_exp);
17170 /* i386 can do misaligned access on reasonably increased cost. */
17171 if (CONST_INT_P (expected_align_exp)
17172 && INTVAL (expected_align_exp) > align)
17173 align = INTVAL (expected_align_exp);
17174 if (CONST_INT_P (count_exp))
17175 count = expected_size = INTVAL (count_exp);
17176 if (CONST_INT_P (expected_size_exp) && count == 0)
17177 expected_size = INTVAL (expected_size_exp);
17178
17179 /* Make sure we don't need to care about overflow later on. */
17180 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17181 return 0;
17182
17183 /* Step 0: Decide on preferred algorithm, desired alignment and
17184 size of chunks to be copied by main loop. */
17185
17186 alg = decide_alg (count, expected_size, true, &dynamic_check);
17187 desired_align = decide_alignment (align, alg, expected_size);
17188
17189 if (!TARGET_ALIGN_STRINGOPS)
17190 align = desired_align;
17191
17192 if (alg == libcall)
17193 return 0;
17194 gcc_assert (alg != no_stringop);
17195 if (!count)
17196 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
17197 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17198 switch (alg)
17199 {
17200 case libcall:
17201 case no_stringop:
17202 gcc_unreachable ();
17203 case loop:
17204 size_needed = GET_MODE_SIZE (Pmode);
17205 break;
17206 case unrolled_loop:
17207 size_needed = GET_MODE_SIZE (Pmode) * 4;
17208 break;
17209 case rep_prefix_8_byte:
17210 size_needed = 8;
17211 break;
17212 case rep_prefix_4_byte:
17213 size_needed = 4;
17214 break;
17215 case rep_prefix_1_byte:
17216 case loop_1_byte:
17217 size_needed = 1;
17218 break;
17219 }
17220 epilogue_size_needed = size_needed;
17221
17222 /* Step 1: Prologue guard. */
17223
17224 /* Alignment code needs count to be in register. */
17225 if (CONST_INT_P (count_exp) && desired_align > align)
17226 {
17227 enum machine_mode mode = SImode;
17228 if (TARGET_64BIT && (count & ~0xffffffff))
17229 mode = DImode;
17230 count_exp = force_reg (mode, count_exp);
17231 }
17232 /* Do the cheap promotion to allow better CSE across the
17233 main loop and epilogue (ie one load of the big constant in the
17234 front of all code. */
17235 if (CONST_INT_P (val_exp))
17236 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17237 desired_align, align);
17238 /* Ensure that alignment prologue won't copy past end of block. */
17239 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17240 {
17241 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17242 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17243 Make sure it is power of 2. */
17244 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17245
17246 /* To improve performance of small blocks, we jump around the VAL
17247 promoting mode. This mean that if the promoted VAL is not constant,
17248 we might not use it in the epilogue and have to use byte
17249 loop variant. */
17250 if (epilogue_size_needed > 2 && !promoted_val)
17251 force_loopy_epilogue = true;
17252 label = gen_label_rtx ();
17253 emit_cmp_and_jump_insns (count_exp,
17254 GEN_INT (epilogue_size_needed),
17255 LTU, 0, counter_mode (count_exp), 1, label);
17256 if (GET_CODE (count_exp) == CONST_INT)
17257 ;
17258 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17259 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17260 else
17261 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17262 }
17263 if (dynamic_check != -1)
17264 {
17265 rtx hot_label = gen_label_rtx ();
17266 jump_around_label = gen_label_rtx ();
17267 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17268 LEU, 0, counter_mode (count_exp), 1, hot_label);
17269 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17270 set_storage_via_libcall (dst, count_exp, val_exp, false);
17271 emit_jump (jump_around_label);
17272 emit_label (hot_label);
17273 }
17274
17275 /* Step 2: Alignment prologue. */
17276
17277 /* Do the expensive promotion once we branched off the small blocks. */
17278 if (!promoted_val)
17279 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17280 desired_align, align);
17281 gcc_assert (desired_align >= 1 && align >= 1);
17282
17283 if (desired_align > align)
17284 {
17285 /* Except for the first move in epilogue, we no longer know
17286 constant offset in aliasing info. It don't seems to worth
17287 the pain to maintain it for the first move, so throw away
17288 the info early. */
17289 dst = change_address (dst, BLKmode, destreg);
17290 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17291 desired_align);
17292 }
17293 if (label && size_needed == 1)
17294 {
17295 emit_label (label);
17296 LABEL_NUSES (label) = 1;
17297 label = NULL;
17298 }
17299
17300 /* Step 3: Main loop. */
17301
17302 switch (alg)
17303 {
17304 case libcall:
17305 case no_stringop:
17306 gcc_unreachable ();
17307 case loop_1_byte:
17308 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17309 count_exp, QImode, 1, expected_size);
17310 break;
17311 case loop:
17312 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17313 count_exp, Pmode, 1, expected_size);
17314 break;
17315 case unrolled_loop:
17316 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17317 count_exp, Pmode, 4, expected_size);
17318 break;
17319 case rep_prefix_8_byte:
17320 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17321 DImode);
17322 break;
17323 case rep_prefix_4_byte:
17324 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17325 SImode);
17326 break;
17327 case rep_prefix_1_byte:
17328 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17329 QImode);
17330 break;
17331 }
17332 /* Adjust properly the offset of src and dest memory for aliasing. */
17333 if (CONST_INT_P (count_exp))
17334 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17335 (count / size_needed) * size_needed);
17336 else
17337 dst = change_address (dst, BLKmode, destreg);
17338
17339 /* Step 4: Epilogue to copy the remaining bytes. */
17340
17341 if (label)
17342 {
17343 /* When the main loop is done, COUNT_EXP might hold original count,
17344 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17345 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17346 bytes. Compensate if needed. */
17347
17348 if (size_needed < desired_align - align)
17349 {
17350 tmp =
17351 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17352 GEN_INT (size_needed - 1), count_exp, 1,
17353 OPTAB_DIRECT);
17354 size_needed = desired_align - align + 1;
17355 if (tmp != count_exp)
17356 emit_move_insn (count_exp, tmp);
17357 }
17358 emit_label (label);
17359 LABEL_NUSES (label) = 1;
17360 }
17361 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17362 {
17363 if (force_loopy_epilogue)
17364 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17365 size_needed);
17366 else
17367 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17368 size_needed);
17369 }
17370 if (jump_around_label)
17371 emit_label (jump_around_label);
17372 return 1;
17373 }
17374
17375 /* Expand the appropriate insns for doing strlen if not just doing
17376 repnz; scasb
17377
17378 out = result, initialized with the start address
17379 align_rtx = alignment of the address.
17380 scratch = scratch register, initialized with the startaddress when
17381 not aligned, otherwise undefined
17382
17383 This is just the body. It needs the initializations mentioned above and
17384 some address computing at the end. These things are done in i386.md. */
17385
17386 static void
17387 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17388 {
17389 int align;
17390 rtx tmp;
17391 rtx align_2_label = NULL_RTX;
17392 rtx align_3_label = NULL_RTX;
17393 rtx align_4_label = gen_label_rtx ();
17394 rtx end_0_label = gen_label_rtx ();
17395 rtx mem;
17396 rtx tmpreg = gen_reg_rtx (SImode);
17397 rtx scratch = gen_reg_rtx (SImode);
17398 rtx cmp;
17399
17400 align = 0;
17401 if (CONST_INT_P (align_rtx))
17402 align = INTVAL (align_rtx);
17403
17404 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17405
17406 /* Is there a known alignment and is it less than 4? */
17407 if (align < 4)
17408 {
17409 rtx scratch1 = gen_reg_rtx (Pmode);
17410 emit_move_insn (scratch1, out);
17411 /* Is there a known alignment and is it not 2? */
17412 if (align != 2)
17413 {
17414 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17415 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17416
17417 /* Leave just the 3 lower bits. */
17418 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17419 NULL_RTX, 0, OPTAB_WIDEN);
17420
17421 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17422 Pmode, 1, align_4_label);
17423 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17424 Pmode, 1, align_2_label);
17425 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17426 Pmode, 1, align_3_label);
17427 }
17428 else
17429 {
17430 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17431 check if is aligned to 4 - byte. */
17432
17433 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17434 NULL_RTX, 0, OPTAB_WIDEN);
17435
17436 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17437 Pmode, 1, align_4_label);
17438 }
17439
17440 mem = change_address (src, QImode, out);
17441
17442 /* Now compare the bytes. */
17443
17444 /* Compare the first n unaligned byte on a byte per byte basis. */
17445 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17446 QImode, 1, end_0_label);
17447
17448 /* Increment the address. */
17449 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17450
17451 /* Not needed with an alignment of 2 */
17452 if (align != 2)
17453 {
17454 emit_label (align_2_label);
17455
17456 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17457 end_0_label);
17458
17459 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17460
17461 emit_label (align_3_label);
17462 }
17463
17464 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17465 end_0_label);
17466
17467 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17468 }
17469
17470 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17471 align this loop. It gives only huge programs, but does not help to
17472 speed up. */
17473 emit_label (align_4_label);
17474
17475 mem = change_address (src, SImode, out);
17476 emit_move_insn (scratch, mem);
17477 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17478
17479 /* This formula yields a nonzero result iff one of the bytes is zero.
17480 This saves three branches inside loop and many cycles. */
17481
17482 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17483 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17484 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17485 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17486 gen_int_mode (0x80808080, SImode)));
17487 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17488 align_4_label);
17489
17490 if (TARGET_CMOVE)
17491 {
17492 rtx reg = gen_reg_rtx (SImode);
17493 rtx reg2 = gen_reg_rtx (Pmode);
17494 emit_move_insn (reg, tmpreg);
17495 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17496
17497 /* If zero is not in the first two bytes, move two bytes forward. */
17498 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17499 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17500 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17501 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17502 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17503 reg,
17504 tmpreg)));
17505 /* Emit lea manually to avoid clobbering of flags. */
17506 emit_insn (gen_rtx_SET (SImode, reg2,
17507 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17508
17509 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17510 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17511 emit_insn (gen_rtx_SET (VOIDmode, out,
17512 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17513 reg2,
17514 out)));
17515
17516 }
17517 else
17518 {
17519 rtx end_2_label = gen_label_rtx ();
17520 /* Is zero in the first two bytes? */
17521
17522 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17523 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17524 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17525 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17526 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17527 pc_rtx);
17528 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17529 JUMP_LABEL (tmp) = end_2_label;
17530
17531 /* Not in the first two. Move two bytes forward. */
17532 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17533 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
17534
17535 emit_label (end_2_label);
17536
17537 }
17538
17539 /* Avoid branch in fixing the byte. */
17540 tmpreg = gen_lowpart (QImode, tmpreg);
17541 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
17542 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
17543 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
17544
17545 emit_label (end_0_label);
17546 }
17547
17548 /* Expand strlen. */
17549
17550 int
17551 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
17552 {
17553 rtx addr, scratch1, scratch2, scratch3, scratch4;
17554
17555 /* The generic case of strlen expander is long. Avoid it's
17556 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
17557
17558 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17559 && !TARGET_INLINE_ALL_STRINGOPS
17560 && !optimize_insn_for_size_p ()
17561 && (!CONST_INT_P (align) || INTVAL (align) < 4))
17562 return 0;
17563
17564 addr = force_reg (Pmode, XEXP (src, 0));
17565 scratch1 = gen_reg_rtx (Pmode);
17566
17567 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17568 && !optimize_insn_for_size_p ())
17569 {
17570 /* Well it seems that some optimizer does not combine a call like
17571 foo(strlen(bar), strlen(bar));
17572 when the move and the subtraction is done here. It does calculate
17573 the length just once when these instructions are done inside of
17574 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
17575 often used and I use one fewer register for the lifetime of
17576 output_strlen_unroll() this is better. */
17577
17578 emit_move_insn (out, addr);
17579
17580 ix86_expand_strlensi_unroll_1 (out, src, align);
17581
17582 /* strlensi_unroll_1 returns the address of the zero at the end of
17583 the string, like memchr(), so compute the length by subtracting
17584 the start address. */
17585 emit_insn ((*ix86_gen_sub3) (out, out, addr));
17586 }
17587 else
17588 {
17589 rtx unspec;
17590
17591 /* Can't use this if the user has appropriated eax, ecx, or edi. */
17592 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
17593 return false;
17594
17595 scratch2 = gen_reg_rtx (Pmode);
17596 scratch3 = gen_reg_rtx (Pmode);
17597 scratch4 = force_reg (Pmode, constm1_rtx);
17598
17599 emit_move_insn (scratch3, addr);
17600 eoschar = force_reg (QImode, eoschar);
17601
17602 src = replace_equiv_address_nv (src, scratch3);
17603
17604 /* If .md starts supporting :P, this can be done in .md. */
17605 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
17606 scratch4), UNSPEC_SCAS);
17607 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
17608 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
17609 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
17610 }
17611 return 1;
17612 }
17613
17614 /* For given symbol (function) construct code to compute address of it's PLT
17615 entry in large x86-64 PIC model. */
17616 rtx
17617 construct_plt_address (rtx symbol)
17618 {
17619 rtx tmp = gen_reg_rtx (Pmode);
17620 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
17621
17622 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
17623 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
17624
17625 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
17626 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
17627 return tmp;
17628 }
17629
17630 void
17631 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
17632 rtx callarg2 ATTRIBUTE_UNUSED,
17633 rtx pop, int sibcall)
17634 {
17635 rtx use = NULL, call;
17636
17637 if (pop == const0_rtx)
17638 pop = NULL;
17639 gcc_assert (!TARGET_64BIT || !pop);
17640
17641 if (TARGET_MACHO && !TARGET_64BIT)
17642 {
17643 #if TARGET_MACHO
17644 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
17645 fnaddr = machopic_indirect_call_target (fnaddr);
17646 #endif
17647 }
17648 else
17649 {
17650 /* Static functions and indirect calls don't need the pic register. */
17651 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
17652 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
17653 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
17654 use_reg (&use, pic_offset_table_rtx);
17655 }
17656
17657 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
17658 {
17659 rtx al = gen_rtx_REG (QImode, AX_REG);
17660 emit_move_insn (al, callarg2);
17661 use_reg (&use, al);
17662 }
17663
17664 if (ix86_cmodel == CM_LARGE_PIC
17665 && GET_CODE (fnaddr) == MEM
17666 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
17667 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
17668 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
17669 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
17670 {
17671 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
17672 fnaddr = gen_rtx_MEM (QImode, fnaddr);
17673 }
17674 if (sibcall && TARGET_64BIT
17675 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
17676 {
17677 rtx addr;
17678 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
17679 fnaddr = gen_rtx_REG (Pmode, R11_REG);
17680 emit_move_insn (fnaddr, addr);
17681 fnaddr = gen_rtx_MEM (QImode, fnaddr);
17682 }
17683
17684 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
17685 if (retval)
17686 call = gen_rtx_SET (VOIDmode, retval, call);
17687 if (pop)
17688 {
17689 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
17690 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
17691 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
17692 }
17693
17694 call = emit_call_insn (call);
17695 if (use)
17696 CALL_INSN_FUNCTION_USAGE (call) = use;
17697 }
17698
17699 \f
17700 /* Clear stack slot assignments remembered from previous functions.
17701 This is called from INIT_EXPANDERS once before RTL is emitted for each
17702 function. */
17703
17704 static struct machine_function *
17705 ix86_init_machine_status (void)
17706 {
17707 struct machine_function *f;
17708
17709 f = GGC_CNEW (struct machine_function);
17710 f->use_fast_prologue_epilogue_nregs = -1;
17711 f->tls_descriptor_call_expanded_p = 0;
17712 f->call_abi = DEFAULT_ABI;
17713
17714 return f;
17715 }
17716
17717 /* Return a MEM corresponding to a stack slot with mode MODE.
17718 Allocate a new slot if necessary.
17719
17720 The RTL for a function can have several slots available: N is
17721 which slot to use. */
17722
17723 rtx
17724 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
17725 {
17726 struct stack_local_entry *s;
17727
17728 gcc_assert (n < MAX_386_STACK_LOCALS);
17729
17730 /* Virtual slot is valid only before vregs are instantiated. */
17731 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
17732
17733 for (s = ix86_stack_locals; s; s = s->next)
17734 if (s->mode == mode && s->n == n)
17735 return copy_rtx (s->rtl);
17736
17737 s = (struct stack_local_entry *)
17738 ggc_alloc (sizeof (struct stack_local_entry));
17739 s->n = n;
17740 s->mode = mode;
17741 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
17742
17743 s->next = ix86_stack_locals;
17744 ix86_stack_locals = s;
17745 return s->rtl;
17746 }
17747
17748 /* Construct the SYMBOL_REF for the tls_get_addr function. */
17749
17750 static GTY(()) rtx ix86_tls_symbol;
17751 rtx
17752 ix86_tls_get_addr (void)
17753 {
17754
17755 if (!ix86_tls_symbol)
17756 {
17757 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
17758 (TARGET_ANY_GNU_TLS
17759 && !TARGET_64BIT)
17760 ? "___tls_get_addr"
17761 : "__tls_get_addr");
17762 }
17763
17764 return ix86_tls_symbol;
17765 }
17766
17767 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
17768
17769 static GTY(()) rtx ix86_tls_module_base_symbol;
17770 rtx
17771 ix86_tls_module_base (void)
17772 {
17773
17774 if (!ix86_tls_module_base_symbol)
17775 {
17776 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
17777 "_TLS_MODULE_BASE_");
17778 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
17779 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
17780 }
17781
17782 return ix86_tls_module_base_symbol;
17783 }
17784 \f
17785 /* Calculate the length of the memory address in the instruction
17786 encoding. Does not include the one-byte modrm, opcode, or prefix. */
17787
17788 int
17789 memory_address_length (rtx addr)
17790 {
17791 struct ix86_address parts;
17792 rtx base, index, disp;
17793 int len;
17794 int ok;
17795
17796 if (GET_CODE (addr) == PRE_DEC
17797 || GET_CODE (addr) == POST_INC
17798 || GET_CODE (addr) == PRE_MODIFY
17799 || GET_CODE (addr) == POST_MODIFY)
17800 return 0;
17801
17802 ok = ix86_decompose_address (addr, &parts);
17803 gcc_assert (ok);
17804
17805 if (parts.base && GET_CODE (parts.base) == SUBREG)
17806 parts.base = SUBREG_REG (parts.base);
17807 if (parts.index && GET_CODE (parts.index) == SUBREG)
17808 parts.index = SUBREG_REG (parts.index);
17809
17810 base = parts.base;
17811 index = parts.index;
17812 disp = parts.disp;
17813 len = 0;
17814
17815 /* Rule of thumb:
17816 - esp as the base always wants an index,
17817 - ebp as the base always wants a displacement. */
17818
17819 /* Register Indirect. */
17820 if (base && !index && !disp)
17821 {
17822 /* esp (for its index) and ebp (for its displacement) need
17823 the two-byte modrm form. */
17824 if (addr == stack_pointer_rtx
17825 || addr == arg_pointer_rtx
17826 || addr == frame_pointer_rtx
17827 || addr == hard_frame_pointer_rtx)
17828 len = 1;
17829 }
17830
17831 /* Direct Addressing. */
17832 else if (disp && !base && !index)
17833 len = 4;
17834
17835 else
17836 {
17837 /* Find the length of the displacement constant. */
17838 if (disp)
17839 {
17840 if (base && satisfies_constraint_K (disp))
17841 len = 1;
17842 else
17843 len = 4;
17844 }
17845 /* ebp always wants a displacement. */
17846 else if (base == hard_frame_pointer_rtx)
17847 len = 1;
17848
17849 /* An index requires the two-byte modrm form.... */
17850 if (index
17851 /* ...like esp, which always wants an index. */
17852 || base == stack_pointer_rtx
17853 || base == arg_pointer_rtx
17854 || base == frame_pointer_rtx)
17855 len += 1;
17856 }
17857
17858 return len;
17859 }
17860
17861 /* Compute default value for "length_immediate" attribute. When SHORTFORM
17862 is set, expect that insn have 8bit immediate alternative. */
17863 int
17864 ix86_attr_length_immediate_default (rtx insn, int shortform)
17865 {
17866 int len = 0;
17867 int i;
17868 extract_insn_cached (insn);
17869 for (i = recog_data.n_operands - 1; i >= 0; --i)
17870 if (CONSTANT_P (recog_data.operand[i]))
17871 {
17872 gcc_assert (!len);
17873 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
17874 len = 1;
17875 else
17876 {
17877 switch (get_attr_mode (insn))
17878 {
17879 case MODE_QI:
17880 len+=1;
17881 break;
17882 case MODE_HI:
17883 len+=2;
17884 break;
17885 case MODE_SI:
17886 len+=4;
17887 break;
17888 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
17889 case MODE_DI:
17890 len+=4;
17891 break;
17892 default:
17893 fatal_insn ("unknown insn mode", insn);
17894 }
17895 }
17896 }
17897 return len;
17898 }
17899 /* Compute default value for "length_address" attribute. */
17900 int
17901 ix86_attr_length_address_default (rtx insn)
17902 {
17903 int i;
17904
17905 if (get_attr_type (insn) == TYPE_LEA)
17906 {
17907 rtx set = PATTERN (insn);
17908
17909 if (GET_CODE (set) == PARALLEL)
17910 set = XVECEXP (set, 0, 0);
17911
17912 gcc_assert (GET_CODE (set) == SET);
17913
17914 return memory_address_length (SET_SRC (set));
17915 }
17916
17917 extract_insn_cached (insn);
17918 for (i = recog_data.n_operands - 1; i >= 0; --i)
17919 if (MEM_P (recog_data.operand[i]))
17920 {
17921 return memory_address_length (XEXP (recog_data.operand[i], 0));
17922 break;
17923 }
17924 return 0;
17925 }
17926 \f
17927 /* Return the maximum number of instructions a cpu can issue. */
17928
17929 static int
17930 ix86_issue_rate (void)
17931 {
17932 switch (ix86_tune)
17933 {
17934 case PROCESSOR_PENTIUM:
17935 case PROCESSOR_K6:
17936 return 2;
17937
17938 case PROCESSOR_PENTIUMPRO:
17939 case PROCESSOR_PENTIUM4:
17940 case PROCESSOR_ATHLON:
17941 case PROCESSOR_K8:
17942 case PROCESSOR_AMDFAM10:
17943 case PROCESSOR_NOCONA:
17944 case PROCESSOR_GENERIC32:
17945 case PROCESSOR_GENERIC64:
17946 return 3;
17947
17948 case PROCESSOR_CORE2:
17949 return 4;
17950
17951 default:
17952 return 1;
17953 }
17954 }
17955
17956 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
17957 by DEP_INSN and nothing set by DEP_INSN. */
17958
17959 static int
17960 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
17961 {
17962 rtx set, set2;
17963
17964 /* Simplify the test for uninteresting insns. */
17965 if (insn_type != TYPE_SETCC
17966 && insn_type != TYPE_ICMOV
17967 && insn_type != TYPE_FCMOV
17968 && insn_type != TYPE_IBR)
17969 return 0;
17970
17971 if ((set = single_set (dep_insn)) != 0)
17972 {
17973 set = SET_DEST (set);
17974 set2 = NULL_RTX;
17975 }
17976 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
17977 && XVECLEN (PATTERN (dep_insn), 0) == 2
17978 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
17979 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
17980 {
17981 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
17982 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
17983 }
17984 else
17985 return 0;
17986
17987 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
17988 return 0;
17989
17990 /* This test is true if the dependent insn reads the flags but
17991 not any other potentially set register. */
17992 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
17993 return 0;
17994
17995 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
17996 return 0;
17997
17998 return 1;
17999 }
18000
18001 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18002 address with operands set by DEP_INSN. */
18003
18004 static int
18005 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18006 {
18007 rtx addr;
18008
18009 if (insn_type == TYPE_LEA
18010 && TARGET_PENTIUM)
18011 {
18012 addr = PATTERN (insn);
18013
18014 if (GET_CODE (addr) == PARALLEL)
18015 addr = XVECEXP (addr, 0, 0);
18016
18017 gcc_assert (GET_CODE (addr) == SET);
18018
18019 addr = SET_SRC (addr);
18020 }
18021 else
18022 {
18023 int i;
18024 extract_insn_cached (insn);
18025 for (i = recog_data.n_operands - 1; i >= 0; --i)
18026 if (MEM_P (recog_data.operand[i]))
18027 {
18028 addr = XEXP (recog_data.operand[i], 0);
18029 goto found;
18030 }
18031 return 0;
18032 found:;
18033 }
18034
18035 return modified_in_p (addr, dep_insn);
18036 }
18037
18038 static int
18039 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18040 {
18041 enum attr_type insn_type, dep_insn_type;
18042 enum attr_memory memory;
18043 rtx set, set2;
18044 int dep_insn_code_number;
18045
18046 /* Anti and output dependencies have zero cost on all CPUs. */
18047 if (REG_NOTE_KIND (link) != 0)
18048 return 0;
18049
18050 dep_insn_code_number = recog_memoized (dep_insn);
18051
18052 /* If we can't recognize the insns, we can't really do anything. */
18053 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
18054 return cost;
18055
18056 insn_type = get_attr_type (insn);
18057 dep_insn_type = get_attr_type (dep_insn);
18058
18059 switch (ix86_tune)
18060 {
18061 case PROCESSOR_PENTIUM:
18062 /* Address Generation Interlock adds a cycle of latency. */
18063 if (ix86_agi_dependent (insn, dep_insn, insn_type))
18064 cost += 1;
18065
18066 /* ??? Compares pair with jump/setcc. */
18067 if (ix86_flags_dependent (insn, dep_insn, insn_type))
18068 cost = 0;
18069
18070 /* Floating point stores require value to be ready one cycle earlier. */
18071 if (insn_type == TYPE_FMOV
18072 && get_attr_memory (insn) == MEMORY_STORE
18073 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18074 cost += 1;
18075 break;
18076
18077 case PROCESSOR_PENTIUMPRO:
18078 memory = get_attr_memory (insn);
18079
18080 /* INT->FP conversion is expensive. */
18081 if (get_attr_fp_int_src (dep_insn))
18082 cost += 5;
18083
18084 /* There is one cycle extra latency between an FP op and a store. */
18085 if (insn_type == TYPE_FMOV
18086 && (set = single_set (dep_insn)) != NULL_RTX
18087 && (set2 = single_set (insn)) != NULL_RTX
18088 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
18089 && MEM_P (SET_DEST (set2)))
18090 cost += 1;
18091
18092 /* Show ability of reorder buffer to hide latency of load by executing
18093 in parallel with previous instruction in case
18094 previous instruction is not needed to compute the address. */
18095 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18096 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18097 {
18098 /* Claim moves to take one cycle, as core can issue one load
18099 at time and the next load can start cycle later. */
18100 if (dep_insn_type == TYPE_IMOV
18101 || dep_insn_type == TYPE_FMOV)
18102 cost = 1;
18103 else if (cost > 1)
18104 cost--;
18105 }
18106 break;
18107
18108 case PROCESSOR_K6:
18109 memory = get_attr_memory (insn);
18110
18111 /* The esp dependency is resolved before the instruction is really
18112 finished. */
18113 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
18114 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
18115 return 1;
18116
18117 /* INT->FP conversion is expensive. */
18118 if (get_attr_fp_int_src (dep_insn))
18119 cost += 5;
18120
18121 /* Show ability of reorder buffer to hide latency of load by executing
18122 in parallel with previous instruction in case
18123 previous instruction is not needed to compute the address. */
18124 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18125 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18126 {
18127 /* Claim moves to take one cycle, as core can issue one load
18128 at time and the next load can start cycle later. */
18129 if (dep_insn_type == TYPE_IMOV
18130 || dep_insn_type == TYPE_FMOV)
18131 cost = 1;
18132 else if (cost > 2)
18133 cost -= 2;
18134 else
18135 cost = 1;
18136 }
18137 break;
18138
18139 case PROCESSOR_ATHLON:
18140 case PROCESSOR_K8:
18141 case PROCESSOR_AMDFAM10:
18142 case PROCESSOR_GENERIC32:
18143 case PROCESSOR_GENERIC64:
18144 memory = get_attr_memory (insn);
18145
18146 /* Show ability of reorder buffer to hide latency of load by executing
18147 in parallel with previous instruction in case
18148 previous instruction is not needed to compute the address. */
18149 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18150 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18151 {
18152 enum attr_unit unit = get_attr_unit (insn);
18153 int loadcost = 3;
18154
18155 /* Because of the difference between the length of integer and
18156 floating unit pipeline preparation stages, the memory operands
18157 for floating point are cheaper.
18158
18159 ??? For Athlon it the difference is most probably 2. */
18160 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
18161 loadcost = 3;
18162 else
18163 loadcost = TARGET_ATHLON ? 2 : 0;
18164
18165 if (cost >= loadcost)
18166 cost -= loadcost;
18167 else
18168 cost = 0;
18169 }
18170
18171 default:
18172 break;
18173 }
18174
18175 return cost;
18176 }
18177
18178 /* How many alternative schedules to try. This should be as wide as the
18179 scheduling freedom in the DFA, but no wider. Making this value too
18180 large results extra work for the scheduler. */
18181
18182 static int
18183 ia32_multipass_dfa_lookahead (void)
18184 {
18185 switch (ix86_tune)
18186 {
18187 case PROCESSOR_PENTIUM:
18188 return 2;
18189
18190 case PROCESSOR_PENTIUMPRO:
18191 case PROCESSOR_K6:
18192 return 1;
18193
18194 default:
18195 return 0;
18196 }
18197 }
18198
18199 \f
18200 /* Compute the alignment given to a constant that is being placed in memory.
18201 EXP is the constant and ALIGN is the alignment that the object would
18202 ordinarily have.
18203 The value of this function is used instead of that alignment to align
18204 the object. */
18205
18206 int
18207 ix86_constant_alignment (tree exp, int align)
18208 {
18209 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18210 || TREE_CODE (exp) == INTEGER_CST)
18211 {
18212 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18213 return 64;
18214 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18215 return 128;
18216 }
18217 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18218 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18219 return BITS_PER_WORD;
18220
18221 return align;
18222 }
18223
18224 /* Compute the alignment for a static variable.
18225 TYPE is the data type, and ALIGN is the alignment that
18226 the object would ordinarily have. The value of this function is used
18227 instead of that alignment to align the object. */
18228
18229 int
18230 ix86_data_alignment (tree type, int align)
18231 {
18232 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18233
18234 if (AGGREGATE_TYPE_P (type)
18235 && TYPE_SIZE (type)
18236 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18237 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18238 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18239 && align < max_align)
18240 align = max_align;
18241
18242 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18243 to 16byte boundary. */
18244 if (TARGET_64BIT)
18245 {
18246 if (AGGREGATE_TYPE_P (type)
18247 && TYPE_SIZE (type)
18248 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18249 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18250 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18251 return 128;
18252 }
18253
18254 if (TREE_CODE (type) == ARRAY_TYPE)
18255 {
18256 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18257 return 64;
18258 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18259 return 128;
18260 }
18261 else if (TREE_CODE (type) == COMPLEX_TYPE)
18262 {
18263
18264 if (TYPE_MODE (type) == DCmode && align < 64)
18265 return 64;
18266 if ((TYPE_MODE (type) == XCmode
18267 || TYPE_MODE (type) == TCmode) && align < 128)
18268 return 128;
18269 }
18270 else if ((TREE_CODE (type) == RECORD_TYPE
18271 || TREE_CODE (type) == UNION_TYPE
18272 || TREE_CODE (type) == QUAL_UNION_TYPE)
18273 && TYPE_FIELDS (type))
18274 {
18275 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18276 return 64;
18277 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18278 return 128;
18279 }
18280 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18281 || TREE_CODE (type) == INTEGER_TYPE)
18282 {
18283 if (TYPE_MODE (type) == DFmode && align < 64)
18284 return 64;
18285 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18286 return 128;
18287 }
18288
18289 return align;
18290 }
18291
18292 /* Compute the alignment for a local variable or a stack slot. TYPE is
18293 the data type, MODE is the widest mode available and ALIGN is the
18294 alignment that the object would ordinarily have. The value of this
18295 macro is used instead of that alignment to align the object. */
18296
18297 unsigned int
18298 ix86_local_alignment (tree type, enum machine_mode mode,
18299 unsigned int align)
18300 {
18301 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18302 register in MODE. We will return the largest alignment of XF
18303 and DF. */
18304 if (!type)
18305 {
18306 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18307 align = GET_MODE_ALIGNMENT (DFmode);
18308 return align;
18309 }
18310
18311 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18312 to 16byte boundary. */
18313 if (TARGET_64BIT)
18314 {
18315 if (AGGREGATE_TYPE_P (type)
18316 && TYPE_SIZE (type)
18317 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18318 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18319 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18320 return 128;
18321 }
18322 if (TREE_CODE (type) == ARRAY_TYPE)
18323 {
18324 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18325 return 64;
18326 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18327 return 128;
18328 }
18329 else if (TREE_CODE (type) == COMPLEX_TYPE)
18330 {
18331 if (TYPE_MODE (type) == DCmode && align < 64)
18332 return 64;
18333 if ((TYPE_MODE (type) == XCmode
18334 || TYPE_MODE (type) == TCmode) && align < 128)
18335 return 128;
18336 }
18337 else if ((TREE_CODE (type) == RECORD_TYPE
18338 || TREE_CODE (type) == UNION_TYPE
18339 || TREE_CODE (type) == QUAL_UNION_TYPE)
18340 && TYPE_FIELDS (type))
18341 {
18342 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18343 return 64;
18344 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18345 return 128;
18346 }
18347 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18348 || TREE_CODE (type) == INTEGER_TYPE)
18349 {
18350
18351 if (TYPE_MODE (type) == DFmode && align < 64)
18352 return 64;
18353 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18354 return 128;
18355 }
18356 return align;
18357 }
18358 \f
18359 /* Emit RTL insns to initialize the variable parts of a trampoline.
18360 FNADDR is an RTX for the address of the function's pure code.
18361 CXT is an RTX for the static chain value for the function. */
18362 void
18363 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18364 {
18365 if (!TARGET_64BIT)
18366 {
18367 /* Compute offset from the end of the jmp to the target function. */
18368 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18369 plus_constant (tramp, 10),
18370 NULL_RTX, 1, OPTAB_DIRECT);
18371 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18372 gen_int_mode (0xb9, QImode));
18373 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18374 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18375 gen_int_mode (0xe9, QImode));
18376 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18377 }
18378 else
18379 {
18380 int offset = 0;
18381 /* Try to load address using shorter movl instead of movabs.
18382 We may want to support movq for kernel mode, but kernel does not use
18383 trampolines at the moment. */
18384 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18385 {
18386 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18387 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18388 gen_int_mode (0xbb41, HImode));
18389 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18390 gen_lowpart (SImode, fnaddr));
18391 offset += 6;
18392 }
18393 else
18394 {
18395 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18396 gen_int_mode (0xbb49, HImode));
18397 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18398 fnaddr);
18399 offset += 10;
18400 }
18401 /* Load static chain using movabs to r10. */
18402 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18403 gen_int_mode (0xba49, HImode));
18404 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18405 cxt);
18406 offset += 10;
18407 /* Jump to the r11 */
18408 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18409 gen_int_mode (0xff49, HImode));
18410 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18411 gen_int_mode (0xe3, QImode));
18412 offset += 3;
18413 gcc_assert (offset <= TRAMPOLINE_SIZE);
18414 }
18415
18416 #ifdef ENABLE_EXECUTE_STACK
18417 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18418 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18419 #endif
18420 }
18421 \f
18422 /* Codes for all the SSE/MMX builtins. */
18423 enum ix86_builtins
18424 {
18425 IX86_BUILTIN_ADDPS,
18426 IX86_BUILTIN_ADDSS,
18427 IX86_BUILTIN_DIVPS,
18428 IX86_BUILTIN_DIVSS,
18429 IX86_BUILTIN_MULPS,
18430 IX86_BUILTIN_MULSS,
18431 IX86_BUILTIN_SUBPS,
18432 IX86_BUILTIN_SUBSS,
18433
18434 IX86_BUILTIN_CMPEQPS,
18435 IX86_BUILTIN_CMPLTPS,
18436 IX86_BUILTIN_CMPLEPS,
18437 IX86_BUILTIN_CMPGTPS,
18438 IX86_BUILTIN_CMPGEPS,
18439 IX86_BUILTIN_CMPNEQPS,
18440 IX86_BUILTIN_CMPNLTPS,
18441 IX86_BUILTIN_CMPNLEPS,
18442 IX86_BUILTIN_CMPNGTPS,
18443 IX86_BUILTIN_CMPNGEPS,
18444 IX86_BUILTIN_CMPORDPS,
18445 IX86_BUILTIN_CMPUNORDPS,
18446 IX86_BUILTIN_CMPEQSS,
18447 IX86_BUILTIN_CMPLTSS,
18448 IX86_BUILTIN_CMPLESS,
18449 IX86_BUILTIN_CMPNEQSS,
18450 IX86_BUILTIN_CMPNLTSS,
18451 IX86_BUILTIN_CMPNLESS,
18452 IX86_BUILTIN_CMPNGTSS,
18453 IX86_BUILTIN_CMPNGESS,
18454 IX86_BUILTIN_CMPORDSS,
18455 IX86_BUILTIN_CMPUNORDSS,
18456
18457 IX86_BUILTIN_COMIEQSS,
18458 IX86_BUILTIN_COMILTSS,
18459 IX86_BUILTIN_COMILESS,
18460 IX86_BUILTIN_COMIGTSS,
18461 IX86_BUILTIN_COMIGESS,
18462 IX86_BUILTIN_COMINEQSS,
18463 IX86_BUILTIN_UCOMIEQSS,
18464 IX86_BUILTIN_UCOMILTSS,
18465 IX86_BUILTIN_UCOMILESS,
18466 IX86_BUILTIN_UCOMIGTSS,
18467 IX86_BUILTIN_UCOMIGESS,
18468 IX86_BUILTIN_UCOMINEQSS,
18469
18470 IX86_BUILTIN_CVTPI2PS,
18471 IX86_BUILTIN_CVTPS2PI,
18472 IX86_BUILTIN_CVTSI2SS,
18473 IX86_BUILTIN_CVTSI642SS,
18474 IX86_BUILTIN_CVTSS2SI,
18475 IX86_BUILTIN_CVTSS2SI64,
18476 IX86_BUILTIN_CVTTPS2PI,
18477 IX86_BUILTIN_CVTTSS2SI,
18478 IX86_BUILTIN_CVTTSS2SI64,
18479
18480 IX86_BUILTIN_MAXPS,
18481 IX86_BUILTIN_MAXSS,
18482 IX86_BUILTIN_MINPS,
18483 IX86_BUILTIN_MINSS,
18484
18485 IX86_BUILTIN_LOADUPS,
18486 IX86_BUILTIN_STOREUPS,
18487 IX86_BUILTIN_MOVSS,
18488
18489 IX86_BUILTIN_MOVHLPS,
18490 IX86_BUILTIN_MOVLHPS,
18491 IX86_BUILTIN_LOADHPS,
18492 IX86_BUILTIN_LOADLPS,
18493 IX86_BUILTIN_STOREHPS,
18494 IX86_BUILTIN_STORELPS,
18495
18496 IX86_BUILTIN_MASKMOVQ,
18497 IX86_BUILTIN_MOVMSKPS,
18498 IX86_BUILTIN_PMOVMSKB,
18499
18500 IX86_BUILTIN_MOVNTPS,
18501 IX86_BUILTIN_MOVNTQ,
18502
18503 IX86_BUILTIN_LOADDQU,
18504 IX86_BUILTIN_STOREDQU,
18505
18506 IX86_BUILTIN_PACKSSWB,
18507 IX86_BUILTIN_PACKSSDW,
18508 IX86_BUILTIN_PACKUSWB,
18509
18510 IX86_BUILTIN_PADDB,
18511 IX86_BUILTIN_PADDW,
18512 IX86_BUILTIN_PADDD,
18513 IX86_BUILTIN_PADDQ,
18514 IX86_BUILTIN_PADDSB,
18515 IX86_BUILTIN_PADDSW,
18516 IX86_BUILTIN_PADDUSB,
18517 IX86_BUILTIN_PADDUSW,
18518 IX86_BUILTIN_PSUBB,
18519 IX86_BUILTIN_PSUBW,
18520 IX86_BUILTIN_PSUBD,
18521 IX86_BUILTIN_PSUBQ,
18522 IX86_BUILTIN_PSUBSB,
18523 IX86_BUILTIN_PSUBSW,
18524 IX86_BUILTIN_PSUBUSB,
18525 IX86_BUILTIN_PSUBUSW,
18526
18527 IX86_BUILTIN_PAND,
18528 IX86_BUILTIN_PANDN,
18529 IX86_BUILTIN_POR,
18530 IX86_BUILTIN_PXOR,
18531
18532 IX86_BUILTIN_PAVGB,
18533 IX86_BUILTIN_PAVGW,
18534
18535 IX86_BUILTIN_PCMPEQB,
18536 IX86_BUILTIN_PCMPEQW,
18537 IX86_BUILTIN_PCMPEQD,
18538 IX86_BUILTIN_PCMPGTB,
18539 IX86_BUILTIN_PCMPGTW,
18540 IX86_BUILTIN_PCMPGTD,
18541
18542 IX86_BUILTIN_PMADDWD,
18543
18544 IX86_BUILTIN_PMAXSW,
18545 IX86_BUILTIN_PMAXUB,
18546 IX86_BUILTIN_PMINSW,
18547 IX86_BUILTIN_PMINUB,
18548
18549 IX86_BUILTIN_PMULHUW,
18550 IX86_BUILTIN_PMULHW,
18551 IX86_BUILTIN_PMULLW,
18552
18553 IX86_BUILTIN_PSADBW,
18554 IX86_BUILTIN_PSHUFW,
18555
18556 IX86_BUILTIN_PSLLW,
18557 IX86_BUILTIN_PSLLD,
18558 IX86_BUILTIN_PSLLQ,
18559 IX86_BUILTIN_PSRAW,
18560 IX86_BUILTIN_PSRAD,
18561 IX86_BUILTIN_PSRLW,
18562 IX86_BUILTIN_PSRLD,
18563 IX86_BUILTIN_PSRLQ,
18564 IX86_BUILTIN_PSLLWI,
18565 IX86_BUILTIN_PSLLDI,
18566 IX86_BUILTIN_PSLLQI,
18567 IX86_BUILTIN_PSRAWI,
18568 IX86_BUILTIN_PSRADI,
18569 IX86_BUILTIN_PSRLWI,
18570 IX86_BUILTIN_PSRLDI,
18571 IX86_BUILTIN_PSRLQI,
18572
18573 IX86_BUILTIN_PUNPCKHBW,
18574 IX86_BUILTIN_PUNPCKHWD,
18575 IX86_BUILTIN_PUNPCKHDQ,
18576 IX86_BUILTIN_PUNPCKLBW,
18577 IX86_BUILTIN_PUNPCKLWD,
18578 IX86_BUILTIN_PUNPCKLDQ,
18579
18580 IX86_BUILTIN_SHUFPS,
18581
18582 IX86_BUILTIN_RCPPS,
18583 IX86_BUILTIN_RCPSS,
18584 IX86_BUILTIN_RSQRTPS,
18585 IX86_BUILTIN_RSQRTPS_NR,
18586 IX86_BUILTIN_RSQRTSS,
18587 IX86_BUILTIN_RSQRTF,
18588 IX86_BUILTIN_SQRTPS,
18589 IX86_BUILTIN_SQRTPS_NR,
18590 IX86_BUILTIN_SQRTSS,
18591
18592 IX86_BUILTIN_UNPCKHPS,
18593 IX86_BUILTIN_UNPCKLPS,
18594
18595 IX86_BUILTIN_ANDPS,
18596 IX86_BUILTIN_ANDNPS,
18597 IX86_BUILTIN_ORPS,
18598 IX86_BUILTIN_XORPS,
18599
18600 IX86_BUILTIN_EMMS,
18601 IX86_BUILTIN_LDMXCSR,
18602 IX86_BUILTIN_STMXCSR,
18603 IX86_BUILTIN_SFENCE,
18604
18605 /* 3DNow! Original */
18606 IX86_BUILTIN_FEMMS,
18607 IX86_BUILTIN_PAVGUSB,
18608 IX86_BUILTIN_PF2ID,
18609 IX86_BUILTIN_PFACC,
18610 IX86_BUILTIN_PFADD,
18611 IX86_BUILTIN_PFCMPEQ,
18612 IX86_BUILTIN_PFCMPGE,
18613 IX86_BUILTIN_PFCMPGT,
18614 IX86_BUILTIN_PFMAX,
18615 IX86_BUILTIN_PFMIN,
18616 IX86_BUILTIN_PFMUL,
18617 IX86_BUILTIN_PFRCP,
18618 IX86_BUILTIN_PFRCPIT1,
18619 IX86_BUILTIN_PFRCPIT2,
18620 IX86_BUILTIN_PFRSQIT1,
18621 IX86_BUILTIN_PFRSQRT,
18622 IX86_BUILTIN_PFSUB,
18623 IX86_BUILTIN_PFSUBR,
18624 IX86_BUILTIN_PI2FD,
18625 IX86_BUILTIN_PMULHRW,
18626
18627 /* 3DNow! Athlon Extensions */
18628 IX86_BUILTIN_PF2IW,
18629 IX86_BUILTIN_PFNACC,
18630 IX86_BUILTIN_PFPNACC,
18631 IX86_BUILTIN_PI2FW,
18632 IX86_BUILTIN_PSWAPDSI,
18633 IX86_BUILTIN_PSWAPDSF,
18634
18635 /* SSE2 */
18636 IX86_BUILTIN_ADDPD,
18637 IX86_BUILTIN_ADDSD,
18638 IX86_BUILTIN_DIVPD,
18639 IX86_BUILTIN_DIVSD,
18640 IX86_BUILTIN_MULPD,
18641 IX86_BUILTIN_MULSD,
18642 IX86_BUILTIN_SUBPD,
18643 IX86_BUILTIN_SUBSD,
18644
18645 IX86_BUILTIN_CMPEQPD,
18646 IX86_BUILTIN_CMPLTPD,
18647 IX86_BUILTIN_CMPLEPD,
18648 IX86_BUILTIN_CMPGTPD,
18649 IX86_BUILTIN_CMPGEPD,
18650 IX86_BUILTIN_CMPNEQPD,
18651 IX86_BUILTIN_CMPNLTPD,
18652 IX86_BUILTIN_CMPNLEPD,
18653 IX86_BUILTIN_CMPNGTPD,
18654 IX86_BUILTIN_CMPNGEPD,
18655 IX86_BUILTIN_CMPORDPD,
18656 IX86_BUILTIN_CMPUNORDPD,
18657 IX86_BUILTIN_CMPEQSD,
18658 IX86_BUILTIN_CMPLTSD,
18659 IX86_BUILTIN_CMPLESD,
18660 IX86_BUILTIN_CMPNEQSD,
18661 IX86_BUILTIN_CMPNLTSD,
18662 IX86_BUILTIN_CMPNLESD,
18663 IX86_BUILTIN_CMPORDSD,
18664 IX86_BUILTIN_CMPUNORDSD,
18665
18666 IX86_BUILTIN_COMIEQSD,
18667 IX86_BUILTIN_COMILTSD,
18668 IX86_BUILTIN_COMILESD,
18669 IX86_BUILTIN_COMIGTSD,
18670 IX86_BUILTIN_COMIGESD,
18671 IX86_BUILTIN_COMINEQSD,
18672 IX86_BUILTIN_UCOMIEQSD,
18673 IX86_BUILTIN_UCOMILTSD,
18674 IX86_BUILTIN_UCOMILESD,
18675 IX86_BUILTIN_UCOMIGTSD,
18676 IX86_BUILTIN_UCOMIGESD,
18677 IX86_BUILTIN_UCOMINEQSD,
18678
18679 IX86_BUILTIN_MAXPD,
18680 IX86_BUILTIN_MAXSD,
18681 IX86_BUILTIN_MINPD,
18682 IX86_BUILTIN_MINSD,
18683
18684 IX86_BUILTIN_ANDPD,
18685 IX86_BUILTIN_ANDNPD,
18686 IX86_BUILTIN_ORPD,
18687 IX86_BUILTIN_XORPD,
18688
18689 IX86_BUILTIN_SQRTPD,
18690 IX86_BUILTIN_SQRTSD,
18691
18692 IX86_BUILTIN_UNPCKHPD,
18693 IX86_BUILTIN_UNPCKLPD,
18694
18695 IX86_BUILTIN_SHUFPD,
18696
18697 IX86_BUILTIN_LOADUPD,
18698 IX86_BUILTIN_STOREUPD,
18699 IX86_BUILTIN_MOVSD,
18700
18701 IX86_BUILTIN_LOADHPD,
18702 IX86_BUILTIN_LOADLPD,
18703
18704 IX86_BUILTIN_CVTDQ2PD,
18705 IX86_BUILTIN_CVTDQ2PS,
18706
18707 IX86_BUILTIN_CVTPD2DQ,
18708 IX86_BUILTIN_CVTPD2PI,
18709 IX86_BUILTIN_CVTPD2PS,
18710 IX86_BUILTIN_CVTTPD2DQ,
18711 IX86_BUILTIN_CVTTPD2PI,
18712
18713 IX86_BUILTIN_CVTPI2PD,
18714 IX86_BUILTIN_CVTSI2SD,
18715 IX86_BUILTIN_CVTSI642SD,
18716
18717 IX86_BUILTIN_CVTSD2SI,
18718 IX86_BUILTIN_CVTSD2SI64,
18719 IX86_BUILTIN_CVTSD2SS,
18720 IX86_BUILTIN_CVTSS2SD,
18721 IX86_BUILTIN_CVTTSD2SI,
18722 IX86_BUILTIN_CVTTSD2SI64,
18723
18724 IX86_BUILTIN_CVTPS2DQ,
18725 IX86_BUILTIN_CVTPS2PD,
18726 IX86_BUILTIN_CVTTPS2DQ,
18727
18728 IX86_BUILTIN_MOVNTI,
18729 IX86_BUILTIN_MOVNTPD,
18730 IX86_BUILTIN_MOVNTDQ,
18731
18732 IX86_BUILTIN_MOVQ128,
18733
18734 /* SSE2 MMX */
18735 IX86_BUILTIN_MASKMOVDQU,
18736 IX86_BUILTIN_MOVMSKPD,
18737 IX86_BUILTIN_PMOVMSKB128,
18738
18739 IX86_BUILTIN_PACKSSWB128,
18740 IX86_BUILTIN_PACKSSDW128,
18741 IX86_BUILTIN_PACKUSWB128,
18742
18743 IX86_BUILTIN_PADDB128,
18744 IX86_BUILTIN_PADDW128,
18745 IX86_BUILTIN_PADDD128,
18746 IX86_BUILTIN_PADDQ128,
18747 IX86_BUILTIN_PADDSB128,
18748 IX86_BUILTIN_PADDSW128,
18749 IX86_BUILTIN_PADDUSB128,
18750 IX86_BUILTIN_PADDUSW128,
18751 IX86_BUILTIN_PSUBB128,
18752 IX86_BUILTIN_PSUBW128,
18753 IX86_BUILTIN_PSUBD128,
18754 IX86_BUILTIN_PSUBQ128,
18755 IX86_BUILTIN_PSUBSB128,
18756 IX86_BUILTIN_PSUBSW128,
18757 IX86_BUILTIN_PSUBUSB128,
18758 IX86_BUILTIN_PSUBUSW128,
18759
18760 IX86_BUILTIN_PAND128,
18761 IX86_BUILTIN_PANDN128,
18762 IX86_BUILTIN_POR128,
18763 IX86_BUILTIN_PXOR128,
18764
18765 IX86_BUILTIN_PAVGB128,
18766 IX86_BUILTIN_PAVGW128,
18767
18768 IX86_BUILTIN_PCMPEQB128,
18769 IX86_BUILTIN_PCMPEQW128,
18770 IX86_BUILTIN_PCMPEQD128,
18771 IX86_BUILTIN_PCMPGTB128,
18772 IX86_BUILTIN_PCMPGTW128,
18773 IX86_BUILTIN_PCMPGTD128,
18774
18775 IX86_BUILTIN_PMADDWD128,
18776
18777 IX86_BUILTIN_PMAXSW128,
18778 IX86_BUILTIN_PMAXUB128,
18779 IX86_BUILTIN_PMINSW128,
18780 IX86_BUILTIN_PMINUB128,
18781
18782 IX86_BUILTIN_PMULUDQ,
18783 IX86_BUILTIN_PMULUDQ128,
18784 IX86_BUILTIN_PMULHUW128,
18785 IX86_BUILTIN_PMULHW128,
18786 IX86_BUILTIN_PMULLW128,
18787
18788 IX86_BUILTIN_PSADBW128,
18789 IX86_BUILTIN_PSHUFHW,
18790 IX86_BUILTIN_PSHUFLW,
18791 IX86_BUILTIN_PSHUFD,
18792
18793 IX86_BUILTIN_PSLLDQI128,
18794 IX86_BUILTIN_PSLLWI128,
18795 IX86_BUILTIN_PSLLDI128,
18796 IX86_BUILTIN_PSLLQI128,
18797 IX86_BUILTIN_PSRAWI128,
18798 IX86_BUILTIN_PSRADI128,
18799 IX86_BUILTIN_PSRLDQI128,
18800 IX86_BUILTIN_PSRLWI128,
18801 IX86_BUILTIN_PSRLDI128,
18802 IX86_BUILTIN_PSRLQI128,
18803
18804 IX86_BUILTIN_PSLLDQ128,
18805 IX86_BUILTIN_PSLLW128,
18806 IX86_BUILTIN_PSLLD128,
18807 IX86_BUILTIN_PSLLQ128,
18808 IX86_BUILTIN_PSRAW128,
18809 IX86_BUILTIN_PSRAD128,
18810 IX86_BUILTIN_PSRLW128,
18811 IX86_BUILTIN_PSRLD128,
18812 IX86_BUILTIN_PSRLQ128,
18813
18814 IX86_BUILTIN_PUNPCKHBW128,
18815 IX86_BUILTIN_PUNPCKHWD128,
18816 IX86_BUILTIN_PUNPCKHDQ128,
18817 IX86_BUILTIN_PUNPCKHQDQ128,
18818 IX86_BUILTIN_PUNPCKLBW128,
18819 IX86_BUILTIN_PUNPCKLWD128,
18820 IX86_BUILTIN_PUNPCKLDQ128,
18821 IX86_BUILTIN_PUNPCKLQDQ128,
18822
18823 IX86_BUILTIN_CLFLUSH,
18824 IX86_BUILTIN_MFENCE,
18825 IX86_BUILTIN_LFENCE,
18826
18827 /* SSE3. */
18828 IX86_BUILTIN_ADDSUBPS,
18829 IX86_BUILTIN_HADDPS,
18830 IX86_BUILTIN_HSUBPS,
18831 IX86_BUILTIN_MOVSHDUP,
18832 IX86_BUILTIN_MOVSLDUP,
18833 IX86_BUILTIN_ADDSUBPD,
18834 IX86_BUILTIN_HADDPD,
18835 IX86_BUILTIN_HSUBPD,
18836 IX86_BUILTIN_LDDQU,
18837
18838 IX86_BUILTIN_MONITOR,
18839 IX86_BUILTIN_MWAIT,
18840
18841 /* SSSE3. */
18842 IX86_BUILTIN_PHADDW,
18843 IX86_BUILTIN_PHADDD,
18844 IX86_BUILTIN_PHADDSW,
18845 IX86_BUILTIN_PHSUBW,
18846 IX86_BUILTIN_PHSUBD,
18847 IX86_BUILTIN_PHSUBSW,
18848 IX86_BUILTIN_PMADDUBSW,
18849 IX86_BUILTIN_PMULHRSW,
18850 IX86_BUILTIN_PSHUFB,
18851 IX86_BUILTIN_PSIGNB,
18852 IX86_BUILTIN_PSIGNW,
18853 IX86_BUILTIN_PSIGND,
18854 IX86_BUILTIN_PALIGNR,
18855 IX86_BUILTIN_PABSB,
18856 IX86_BUILTIN_PABSW,
18857 IX86_BUILTIN_PABSD,
18858
18859 IX86_BUILTIN_PHADDW128,
18860 IX86_BUILTIN_PHADDD128,
18861 IX86_BUILTIN_PHADDSW128,
18862 IX86_BUILTIN_PHSUBW128,
18863 IX86_BUILTIN_PHSUBD128,
18864 IX86_BUILTIN_PHSUBSW128,
18865 IX86_BUILTIN_PMADDUBSW128,
18866 IX86_BUILTIN_PMULHRSW128,
18867 IX86_BUILTIN_PSHUFB128,
18868 IX86_BUILTIN_PSIGNB128,
18869 IX86_BUILTIN_PSIGNW128,
18870 IX86_BUILTIN_PSIGND128,
18871 IX86_BUILTIN_PALIGNR128,
18872 IX86_BUILTIN_PABSB128,
18873 IX86_BUILTIN_PABSW128,
18874 IX86_BUILTIN_PABSD128,
18875
18876 /* AMDFAM10 - SSE4A New Instructions. */
18877 IX86_BUILTIN_MOVNTSD,
18878 IX86_BUILTIN_MOVNTSS,
18879 IX86_BUILTIN_EXTRQI,
18880 IX86_BUILTIN_EXTRQ,
18881 IX86_BUILTIN_INSERTQI,
18882 IX86_BUILTIN_INSERTQ,
18883
18884 /* SSE4.1. */
18885 IX86_BUILTIN_BLENDPD,
18886 IX86_BUILTIN_BLENDPS,
18887 IX86_BUILTIN_BLENDVPD,
18888 IX86_BUILTIN_BLENDVPS,
18889 IX86_BUILTIN_PBLENDVB128,
18890 IX86_BUILTIN_PBLENDW128,
18891
18892 IX86_BUILTIN_DPPD,
18893 IX86_BUILTIN_DPPS,
18894
18895 IX86_BUILTIN_INSERTPS128,
18896
18897 IX86_BUILTIN_MOVNTDQA,
18898 IX86_BUILTIN_MPSADBW128,
18899 IX86_BUILTIN_PACKUSDW128,
18900 IX86_BUILTIN_PCMPEQQ,
18901 IX86_BUILTIN_PHMINPOSUW128,
18902
18903 IX86_BUILTIN_PMAXSB128,
18904 IX86_BUILTIN_PMAXSD128,
18905 IX86_BUILTIN_PMAXUD128,
18906 IX86_BUILTIN_PMAXUW128,
18907
18908 IX86_BUILTIN_PMINSB128,
18909 IX86_BUILTIN_PMINSD128,
18910 IX86_BUILTIN_PMINUD128,
18911 IX86_BUILTIN_PMINUW128,
18912
18913 IX86_BUILTIN_PMOVSXBW128,
18914 IX86_BUILTIN_PMOVSXBD128,
18915 IX86_BUILTIN_PMOVSXBQ128,
18916 IX86_BUILTIN_PMOVSXWD128,
18917 IX86_BUILTIN_PMOVSXWQ128,
18918 IX86_BUILTIN_PMOVSXDQ128,
18919
18920 IX86_BUILTIN_PMOVZXBW128,
18921 IX86_BUILTIN_PMOVZXBD128,
18922 IX86_BUILTIN_PMOVZXBQ128,
18923 IX86_BUILTIN_PMOVZXWD128,
18924 IX86_BUILTIN_PMOVZXWQ128,
18925 IX86_BUILTIN_PMOVZXDQ128,
18926
18927 IX86_BUILTIN_PMULDQ128,
18928 IX86_BUILTIN_PMULLD128,
18929
18930 IX86_BUILTIN_ROUNDPD,
18931 IX86_BUILTIN_ROUNDPS,
18932 IX86_BUILTIN_ROUNDSD,
18933 IX86_BUILTIN_ROUNDSS,
18934
18935 IX86_BUILTIN_PTESTZ,
18936 IX86_BUILTIN_PTESTC,
18937 IX86_BUILTIN_PTESTNZC,
18938
18939 IX86_BUILTIN_VEC_INIT_V2SI,
18940 IX86_BUILTIN_VEC_INIT_V4HI,
18941 IX86_BUILTIN_VEC_INIT_V8QI,
18942 IX86_BUILTIN_VEC_EXT_V2DF,
18943 IX86_BUILTIN_VEC_EXT_V2DI,
18944 IX86_BUILTIN_VEC_EXT_V4SF,
18945 IX86_BUILTIN_VEC_EXT_V4SI,
18946 IX86_BUILTIN_VEC_EXT_V8HI,
18947 IX86_BUILTIN_VEC_EXT_V2SI,
18948 IX86_BUILTIN_VEC_EXT_V4HI,
18949 IX86_BUILTIN_VEC_EXT_V16QI,
18950 IX86_BUILTIN_VEC_SET_V2DI,
18951 IX86_BUILTIN_VEC_SET_V4SF,
18952 IX86_BUILTIN_VEC_SET_V4SI,
18953 IX86_BUILTIN_VEC_SET_V8HI,
18954 IX86_BUILTIN_VEC_SET_V4HI,
18955 IX86_BUILTIN_VEC_SET_V16QI,
18956
18957 IX86_BUILTIN_VEC_PACK_SFIX,
18958
18959 /* SSE4.2. */
18960 IX86_BUILTIN_CRC32QI,
18961 IX86_BUILTIN_CRC32HI,
18962 IX86_BUILTIN_CRC32SI,
18963 IX86_BUILTIN_CRC32DI,
18964
18965 IX86_BUILTIN_PCMPESTRI128,
18966 IX86_BUILTIN_PCMPESTRM128,
18967 IX86_BUILTIN_PCMPESTRA128,
18968 IX86_BUILTIN_PCMPESTRC128,
18969 IX86_BUILTIN_PCMPESTRO128,
18970 IX86_BUILTIN_PCMPESTRS128,
18971 IX86_BUILTIN_PCMPESTRZ128,
18972 IX86_BUILTIN_PCMPISTRI128,
18973 IX86_BUILTIN_PCMPISTRM128,
18974 IX86_BUILTIN_PCMPISTRA128,
18975 IX86_BUILTIN_PCMPISTRC128,
18976 IX86_BUILTIN_PCMPISTRO128,
18977 IX86_BUILTIN_PCMPISTRS128,
18978 IX86_BUILTIN_PCMPISTRZ128,
18979
18980 IX86_BUILTIN_PCMPGTQ,
18981
18982 /* AES instructions */
18983 IX86_BUILTIN_AESENC128,
18984 IX86_BUILTIN_AESENCLAST128,
18985 IX86_BUILTIN_AESDEC128,
18986 IX86_BUILTIN_AESDECLAST128,
18987 IX86_BUILTIN_AESIMC128,
18988 IX86_BUILTIN_AESKEYGENASSIST128,
18989
18990 /* PCLMUL instruction */
18991 IX86_BUILTIN_PCLMULQDQ128,
18992
18993 /* TFmode support builtins. */
18994 IX86_BUILTIN_INFQ,
18995 IX86_BUILTIN_FABSQ,
18996 IX86_BUILTIN_COPYSIGNQ,
18997
18998 /* SSE5 instructions */
18999 IX86_BUILTIN_FMADDSS,
19000 IX86_BUILTIN_FMADDSD,
19001 IX86_BUILTIN_FMADDPS,
19002 IX86_BUILTIN_FMADDPD,
19003 IX86_BUILTIN_FMSUBSS,
19004 IX86_BUILTIN_FMSUBSD,
19005 IX86_BUILTIN_FMSUBPS,
19006 IX86_BUILTIN_FMSUBPD,
19007 IX86_BUILTIN_FNMADDSS,
19008 IX86_BUILTIN_FNMADDSD,
19009 IX86_BUILTIN_FNMADDPS,
19010 IX86_BUILTIN_FNMADDPD,
19011 IX86_BUILTIN_FNMSUBSS,
19012 IX86_BUILTIN_FNMSUBSD,
19013 IX86_BUILTIN_FNMSUBPS,
19014 IX86_BUILTIN_FNMSUBPD,
19015 IX86_BUILTIN_PCMOV_V2DI,
19016 IX86_BUILTIN_PCMOV_V4SI,
19017 IX86_BUILTIN_PCMOV_V8HI,
19018 IX86_BUILTIN_PCMOV_V16QI,
19019 IX86_BUILTIN_PCMOV_V4SF,
19020 IX86_BUILTIN_PCMOV_V2DF,
19021 IX86_BUILTIN_PPERM,
19022 IX86_BUILTIN_PERMPS,
19023 IX86_BUILTIN_PERMPD,
19024 IX86_BUILTIN_PMACSSWW,
19025 IX86_BUILTIN_PMACSWW,
19026 IX86_BUILTIN_PMACSSWD,
19027 IX86_BUILTIN_PMACSWD,
19028 IX86_BUILTIN_PMACSSDD,
19029 IX86_BUILTIN_PMACSDD,
19030 IX86_BUILTIN_PMACSSDQL,
19031 IX86_BUILTIN_PMACSSDQH,
19032 IX86_BUILTIN_PMACSDQL,
19033 IX86_BUILTIN_PMACSDQH,
19034 IX86_BUILTIN_PMADCSSWD,
19035 IX86_BUILTIN_PMADCSWD,
19036 IX86_BUILTIN_PHADDBW,
19037 IX86_BUILTIN_PHADDBD,
19038 IX86_BUILTIN_PHADDBQ,
19039 IX86_BUILTIN_PHADDWD,
19040 IX86_BUILTIN_PHADDWQ,
19041 IX86_BUILTIN_PHADDDQ,
19042 IX86_BUILTIN_PHADDUBW,
19043 IX86_BUILTIN_PHADDUBD,
19044 IX86_BUILTIN_PHADDUBQ,
19045 IX86_BUILTIN_PHADDUWD,
19046 IX86_BUILTIN_PHADDUWQ,
19047 IX86_BUILTIN_PHADDUDQ,
19048 IX86_BUILTIN_PHSUBBW,
19049 IX86_BUILTIN_PHSUBWD,
19050 IX86_BUILTIN_PHSUBDQ,
19051 IX86_BUILTIN_PROTB,
19052 IX86_BUILTIN_PROTW,
19053 IX86_BUILTIN_PROTD,
19054 IX86_BUILTIN_PROTQ,
19055 IX86_BUILTIN_PROTB_IMM,
19056 IX86_BUILTIN_PROTW_IMM,
19057 IX86_BUILTIN_PROTD_IMM,
19058 IX86_BUILTIN_PROTQ_IMM,
19059 IX86_BUILTIN_PSHLB,
19060 IX86_BUILTIN_PSHLW,
19061 IX86_BUILTIN_PSHLD,
19062 IX86_BUILTIN_PSHLQ,
19063 IX86_BUILTIN_PSHAB,
19064 IX86_BUILTIN_PSHAW,
19065 IX86_BUILTIN_PSHAD,
19066 IX86_BUILTIN_PSHAQ,
19067 IX86_BUILTIN_FRCZSS,
19068 IX86_BUILTIN_FRCZSD,
19069 IX86_BUILTIN_FRCZPS,
19070 IX86_BUILTIN_FRCZPD,
19071 IX86_BUILTIN_CVTPH2PS,
19072 IX86_BUILTIN_CVTPS2PH,
19073
19074 IX86_BUILTIN_COMEQSS,
19075 IX86_BUILTIN_COMNESS,
19076 IX86_BUILTIN_COMLTSS,
19077 IX86_BUILTIN_COMLESS,
19078 IX86_BUILTIN_COMGTSS,
19079 IX86_BUILTIN_COMGESS,
19080 IX86_BUILTIN_COMUEQSS,
19081 IX86_BUILTIN_COMUNESS,
19082 IX86_BUILTIN_COMULTSS,
19083 IX86_BUILTIN_COMULESS,
19084 IX86_BUILTIN_COMUGTSS,
19085 IX86_BUILTIN_COMUGESS,
19086 IX86_BUILTIN_COMORDSS,
19087 IX86_BUILTIN_COMUNORDSS,
19088 IX86_BUILTIN_COMFALSESS,
19089 IX86_BUILTIN_COMTRUESS,
19090
19091 IX86_BUILTIN_COMEQSD,
19092 IX86_BUILTIN_COMNESD,
19093 IX86_BUILTIN_COMLTSD,
19094 IX86_BUILTIN_COMLESD,
19095 IX86_BUILTIN_COMGTSD,
19096 IX86_BUILTIN_COMGESD,
19097 IX86_BUILTIN_COMUEQSD,
19098 IX86_BUILTIN_COMUNESD,
19099 IX86_BUILTIN_COMULTSD,
19100 IX86_BUILTIN_COMULESD,
19101 IX86_BUILTIN_COMUGTSD,
19102 IX86_BUILTIN_COMUGESD,
19103 IX86_BUILTIN_COMORDSD,
19104 IX86_BUILTIN_COMUNORDSD,
19105 IX86_BUILTIN_COMFALSESD,
19106 IX86_BUILTIN_COMTRUESD,
19107
19108 IX86_BUILTIN_COMEQPS,
19109 IX86_BUILTIN_COMNEPS,
19110 IX86_BUILTIN_COMLTPS,
19111 IX86_BUILTIN_COMLEPS,
19112 IX86_BUILTIN_COMGTPS,
19113 IX86_BUILTIN_COMGEPS,
19114 IX86_BUILTIN_COMUEQPS,
19115 IX86_BUILTIN_COMUNEPS,
19116 IX86_BUILTIN_COMULTPS,
19117 IX86_BUILTIN_COMULEPS,
19118 IX86_BUILTIN_COMUGTPS,
19119 IX86_BUILTIN_COMUGEPS,
19120 IX86_BUILTIN_COMORDPS,
19121 IX86_BUILTIN_COMUNORDPS,
19122 IX86_BUILTIN_COMFALSEPS,
19123 IX86_BUILTIN_COMTRUEPS,
19124
19125 IX86_BUILTIN_COMEQPD,
19126 IX86_BUILTIN_COMNEPD,
19127 IX86_BUILTIN_COMLTPD,
19128 IX86_BUILTIN_COMLEPD,
19129 IX86_BUILTIN_COMGTPD,
19130 IX86_BUILTIN_COMGEPD,
19131 IX86_BUILTIN_COMUEQPD,
19132 IX86_BUILTIN_COMUNEPD,
19133 IX86_BUILTIN_COMULTPD,
19134 IX86_BUILTIN_COMULEPD,
19135 IX86_BUILTIN_COMUGTPD,
19136 IX86_BUILTIN_COMUGEPD,
19137 IX86_BUILTIN_COMORDPD,
19138 IX86_BUILTIN_COMUNORDPD,
19139 IX86_BUILTIN_COMFALSEPD,
19140 IX86_BUILTIN_COMTRUEPD,
19141
19142 IX86_BUILTIN_PCOMEQUB,
19143 IX86_BUILTIN_PCOMNEUB,
19144 IX86_BUILTIN_PCOMLTUB,
19145 IX86_BUILTIN_PCOMLEUB,
19146 IX86_BUILTIN_PCOMGTUB,
19147 IX86_BUILTIN_PCOMGEUB,
19148 IX86_BUILTIN_PCOMFALSEUB,
19149 IX86_BUILTIN_PCOMTRUEUB,
19150 IX86_BUILTIN_PCOMEQUW,
19151 IX86_BUILTIN_PCOMNEUW,
19152 IX86_BUILTIN_PCOMLTUW,
19153 IX86_BUILTIN_PCOMLEUW,
19154 IX86_BUILTIN_PCOMGTUW,
19155 IX86_BUILTIN_PCOMGEUW,
19156 IX86_BUILTIN_PCOMFALSEUW,
19157 IX86_BUILTIN_PCOMTRUEUW,
19158 IX86_BUILTIN_PCOMEQUD,
19159 IX86_BUILTIN_PCOMNEUD,
19160 IX86_BUILTIN_PCOMLTUD,
19161 IX86_BUILTIN_PCOMLEUD,
19162 IX86_BUILTIN_PCOMGTUD,
19163 IX86_BUILTIN_PCOMGEUD,
19164 IX86_BUILTIN_PCOMFALSEUD,
19165 IX86_BUILTIN_PCOMTRUEUD,
19166 IX86_BUILTIN_PCOMEQUQ,
19167 IX86_BUILTIN_PCOMNEUQ,
19168 IX86_BUILTIN_PCOMLTUQ,
19169 IX86_BUILTIN_PCOMLEUQ,
19170 IX86_BUILTIN_PCOMGTUQ,
19171 IX86_BUILTIN_PCOMGEUQ,
19172 IX86_BUILTIN_PCOMFALSEUQ,
19173 IX86_BUILTIN_PCOMTRUEUQ,
19174
19175 IX86_BUILTIN_PCOMEQB,
19176 IX86_BUILTIN_PCOMNEB,
19177 IX86_BUILTIN_PCOMLTB,
19178 IX86_BUILTIN_PCOMLEB,
19179 IX86_BUILTIN_PCOMGTB,
19180 IX86_BUILTIN_PCOMGEB,
19181 IX86_BUILTIN_PCOMFALSEB,
19182 IX86_BUILTIN_PCOMTRUEB,
19183 IX86_BUILTIN_PCOMEQW,
19184 IX86_BUILTIN_PCOMNEW,
19185 IX86_BUILTIN_PCOMLTW,
19186 IX86_BUILTIN_PCOMLEW,
19187 IX86_BUILTIN_PCOMGTW,
19188 IX86_BUILTIN_PCOMGEW,
19189 IX86_BUILTIN_PCOMFALSEW,
19190 IX86_BUILTIN_PCOMTRUEW,
19191 IX86_BUILTIN_PCOMEQD,
19192 IX86_BUILTIN_PCOMNED,
19193 IX86_BUILTIN_PCOMLTD,
19194 IX86_BUILTIN_PCOMLED,
19195 IX86_BUILTIN_PCOMGTD,
19196 IX86_BUILTIN_PCOMGED,
19197 IX86_BUILTIN_PCOMFALSED,
19198 IX86_BUILTIN_PCOMTRUED,
19199 IX86_BUILTIN_PCOMEQQ,
19200 IX86_BUILTIN_PCOMNEQ,
19201 IX86_BUILTIN_PCOMLTQ,
19202 IX86_BUILTIN_PCOMLEQ,
19203 IX86_BUILTIN_PCOMGTQ,
19204 IX86_BUILTIN_PCOMGEQ,
19205 IX86_BUILTIN_PCOMFALSEQ,
19206 IX86_BUILTIN_PCOMTRUEQ,
19207
19208 IX86_BUILTIN_MAX
19209 };
19210
19211 /* Table for the ix86 builtin decls. */
19212 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19213
19214 /* Table to record which ISA options the builtin needs. */
19215 static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19216
19217 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19218 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19219 * function decl in the ix86_builtins array. Returns the function decl or
19220 * NULL_TREE, if the builtin was not added.
19221 *
19222 * Record all builtins, even if it isn't an instruction set in the current ISA
19223 * in case the user uses function specific options for a different ISA. When
19224 * the builtin is expanded, check at that time whether it is valid. */
19225
19226 static inline tree
19227 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19228 {
19229 tree decl = NULL_TREE;
19230
19231 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19232 {
19233 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
19234 NULL, NULL_TREE);
19235 ix86_builtins[(int) code] = decl;
19236 ix86_builtins_isa[(int) code] = mask;
19237 }
19238
19239 return decl;
19240 }
19241
19242 /* Like def_builtin, but also marks the function decl "const". */
19243
19244 static inline tree
19245 def_builtin_const (int mask, const char *name, tree type,
19246 enum ix86_builtins code)
19247 {
19248 tree decl = def_builtin (mask, name, type, code);
19249 if (decl)
19250 TREE_READONLY (decl) = 1;
19251 return decl;
19252 }
19253
19254 /* Bits for builtin_description.flag. */
19255
19256 /* Set when we don't support the comparison natively, and should
19257 swap_comparison in order to support it. */
19258 #define BUILTIN_DESC_SWAP_OPERANDS 1
19259
19260 struct builtin_description
19261 {
19262 const unsigned int mask;
19263 const enum insn_code icode;
19264 const char *const name;
19265 const enum ix86_builtins code;
19266 const enum rtx_code comparison;
19267 const int flag;
19268 };
19269
19270 static const struct builtin_description bdesc_comi[] =
19271 {
19272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
19293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
19294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
19295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
19296 };
19297
19298 static const struct builtin_description bdesc_pcmpestr[] =
19299 {
19300 /* SSE4.2 */
19301 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
19302 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
19303 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
19304 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
19305 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
19306 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
19307 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
19308 };
19309
19310 static const struct builtin_description bdesc_pcmpistr[] =
19311 {
19312 /* SSE4.2 */
19313 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
19314 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
19315 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
19316 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
19317 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
19318 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
19319 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
19320 };
19321
19322 /* Special builtin types */
19323 enum ix86_special_builtin_type
19324 {
19325 SPECIAL_FTYPE_UNKNOWN,
19326 VOID_FTYPE_VOID,
19327 V16QI_FTYPE_PCCHAR,
19328 V4SF_FTYPE_PCFLOAT,
19329 V2DF_FTYPE_PCDOUBLE,
19330 V4SF_FTYPE_V4SF_PCV2SF,
19331 V2DF_FTYPE_V2DF_PCDOUBLE,
19332 V2DI_FTYPE_PV2DI,
19333 VOID_FTYPE_PV2SF_V4SF,
19334 VOID_FTYPE_PV2DI_V2DI,
19335 VOID_FTYPE_PCHAR_V16QI,
19336 VOID_FTYPE_PFLOAT_V4SF,
19337 VOID_FTYPE_PDOUBLE_V2DF,
19338 VOID_FTYPE_PDI_DI,
19339 VOID_FTYPE_PINT_INT
19340 };
19341
19342 /* Builtin types */
19343 enum ix86_builtin_type
19344 {
19345 FTYPE_UNKNOWN,
19346 FLOAT128_FTYPE_FLOAT128,
19347 FLOAT_FTYPE_FLOAT,
19348 FLOAT128_FTYPE_FLOAT128_FLOAT128,
19349 INT_FTYPE_V2DI_V2DI_PTEST,
19350 INT64_FTYPE_V4SF,
19351 INT64_FTYPE_V2DF,
19352 INT_FTYPE_V16QI,
19353 INT_FTYPE_V8QI,
19354 INT_FTYPE_V4SF,
19355 INT_FTYPE_V2DF,
19356 V16QI_FTYPE_V16QI,
19357 V8HI_FTYPE_V8HI,
19358 V8HI_FTYPE_V16QI,
19359 V8QI_FTYPE_V8QI,
19360 V4SI_FTYPE_V4SI,
19361 V4SI_FTYPE_V16QI,
19362 V4SI_FTYPE_V8HI,
19363 V4SI_FTYPE_V4SF,
19364 V4SI_FTYPE_V2DF,
19365 V4HI_FTYPE_V4HI,
19366 V4SF_FTYPE_V4SF,
19367 V4SF_FTYPE_V4SF_VEC_MERGE,
19368 V4SF_FTYPE_V4SI,
19369 V4SF_FTYPE_V2DF,
19370 V2DI_FTYPE_V2DI,
19371 V2DI_FTYPE_V16QI,
19372 V2DI_FTYPE_V8HI,
19373 V2DI_FTYPE_V4SI,
19374 V2DF_FTYPE_V2DF,
19375 V2DF_FTYPE_V2DF_VEC_MERGE,
19376 V2DF_FTYPE_V4SI,
19377 V2DF_FTYPE_V4SF,
19378 V2DF_FTYPE_V2SI,
19379 V2SI_FTYPE_V2SI,
19380 V2SI_FTYPE_V4SF,
19381 V2SI_FTYPE_V2SF,
19382 V2SI_FTYPE_V2DF,
19383 V2SF_FTYPE_V2SF,
19384 V2SF_FTYPE_V2SI,
19385 V16QI_FTYPE_V16QI_V16QI,
19386 V16QI_FTYPE_V8HI_V8HI,
19387 V8QI_FTYPE_V8QI_V8QI,
19388 V8QI_FTYPE_V4HI_V4HI,
19389 V8HI_FTYPE_V8HI_V8HI,
19390 V8HI_FTYPE_V8HI_V8HI_COUNT,
19391 V8HI_FTYPE_V16QI_V16QI,
19392 V8HI_FTYPE_V4SI_V4SI,
19393 V8HI_FTYPE_V8HI_SI_COUNT,
19394 V4SI_FTYPE_V4SI_V4SI,
19395 V4SI_FTYPE_V4SI_V4SI_COUNT,
19396 V4SI_FTYPE_V8HI_V8HI,
19397 V4SI_FTYPE_V4SF_V4SF,
19398 V4SI_FTYPE_V2DF_V2DF,
19399 V4SI_FTYPE_V4SI_SI_COUNT,
19400 V4HI_FTYPE_V4HI_V4HI,
19401 V4HI_FTYPE_V4HI_V4HI_COUNT,
19402 V4HI_FTYPE_V8QI_V8QI,
19403 V4HI_FTYPE_V2SI_V2SI,
19404 V4HI_FTYPE_V4HI_SI_COUNT,
19405 V4SF_FTYPE_V4SF_V4SF,
19406 V4SF_FTYPE_V4SF_V4SF_SWAP,
19407 V4SF_FTYPE_V4SF_V2SI,
19408 V4SF_FTYPE_V4SF_V2DF,
19409 V4SF_FTYPE_V4SF_DI,
19410 V4SF_FTYPE_V4SF_SI,
19411 V2DI_FTYPE_V2DI_V2DI,
19412 V2DI_FTYPE_V2DI_V2DI_COUNT,
19413 V2DI_FTYPE_V16QI_V16QI,
19414 V2DI_FTYPE_V4SI_V4SI,
19415 V2DI_FTYPE_V2DI_V16QI,
19416 V2DI_FTYPE_V2DF_V2DF,
19417 V2DI_FTYPE_V2DI_SI_COUNT,
19418 V2SI_FTYPE_V2SI_V2SI,
19419 V2SI_FTYPE_V2SI_V2SI_COUNT,
19420 V2SI_FTYPE_V4HI_V4HI,
19421 V2SI_FTYPE_V2SF_V2SF,
19422 V2SI_FTYPE_V2SI_SI_COUNT,
19423 V2DF_FTYPE_V2DF_V2DF,
19424 V2DF_FTYPE_V2DF_V2DF_SWAP,
19425 V2DF_FTYPE_V2DF_V4SF,
19426 V2DF_FTYPE_V2DF_DI,
19427 V2DF_FTYPE_V2DF_SI,
19428 V2SF_FTYPE_V2SF_V2SF,
19429 V1DI_FTYPE_V1DI_V1DI,
19430 V1DI_FTYPE_V1DI_V1DI_COUNT,
19431 V1DI_FTYPE_V8QI_V8QI,
19432 V1DI_FTYPE_V2SI_V2SI,
19433 V1DI_FTYPE_V1DI_SI_COUNT,
19434 UINT64_FTYPE_UINT64_UINT64,
19435 UINT_FTYPE_UINT_UINT,
19436 UINT_FTYPE_UINT_USHORT,
19437 UINT_FTYPE_UINT_UCHAR,
19438 V8HI_FTYPE_V8HI_INT,
19439 V4SI_FTYPE_V4SI_INT,
19440 V4HI_FTYPE_V4HI_INT,
19441 V4SF_FTYPE_V4SF_INT,
19442 V2DI_FTYPE_V2DI_INT,
19443 V2DI2TI_FTYPE_V2DI_INT,
19444 V2DF_FTYPE_V2DF_INT,
19445 V16QI_FTYPE_V16QI_V16QI_V16QI,
19446 V4SF_FTYPE_V4SF_V4SF_V4SF,
19447 V2DF_FTYPE_V2DF_V2DF_V2DF,
19448 V16QI_FTYPE_V16QI_V16QI_INT,
19449 V8HI_FTYPE_V8HI_V8HI_INT,
19450 V4SI_FTYPE_V4SI_V4SI_INT,
19451 V4SF_FTYPE_V4SF_V4SF_INT,
19452 V2DI_FTYPE_V2DI_V2DI_INT,
19453 V2DI2TI_FTYPE_V2DI_V2DI_INT,
19454 V1DI2DI_FTYPE_V1DI_V1DI_INT,
19455 V2DF_FTYPE_V2DF_V2DF_INT,
19456 V2DI_FTYPE_V2DI_UINT_UINT,
19457 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
19458 };
19459
19460 /* Special builtins with variable number of arguments. */
19461 static const struct builtin_description bdesc_special_args[] =
19462 {
19463 /* MMX */
19464 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
19465
19466 /* 3DNow! */
19467 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
19468
19469 /* SSE */
19470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
19473
19474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
19475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
19476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
19477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
19478
19479 /* SSE or 3DNow!A */
19480 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19481 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
19482
19483 /* SSE2 */
19484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
19488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
19490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
19491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
19492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
19493
19494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
19495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
19496
19497 /* SSE3 */
19498 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
19499
19500 /* SSE4.1 */
19501 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
19502
19503 /* SSE4A */
19504 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19505 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19506 };
19507
19508 /* Builtins with variable number of arguments. */
19509 static const struct builtin_description bdesc_args[] =
19510 {
19511 /* MMX */
19512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19518
19519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19523 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19527
19528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19530
19531 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19532 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19533 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19534 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19535
19536 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19537 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19538 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19539 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19540 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19541 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19542
19543 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19544 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19545 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19546 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19547 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
19548 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
19549
19550 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
19551 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
19552 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
19553
19554 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
19555
19556 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19557 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19558 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
19559 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19560 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19561 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
19562
19563 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19564 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19565 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
19566 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19567 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19568 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
19569
19570 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19571 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19572 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19573 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19574
19575 /* 3DNow! */
19576 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
19577 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
19578 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19579 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19580
19581 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19582 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19583 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19584 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19585 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19586 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19587 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19588 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19589 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19590 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19591 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19592 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19593 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19594 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19595 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19596
19597 /* 3DNow!A */
19598 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
19599 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
19600 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
19601 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19602 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19603 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19604
19605 /* SSE */
19606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
19607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19608 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19610 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
19613 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
19614 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
19615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
19616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
19617 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
19618
19619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19620
19621 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19622 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19623 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19629
19630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
19631 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
19632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
19633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19634 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
19637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
19638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
19639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
19641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
19643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
19644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
19645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19646 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
19647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
19648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
19649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19652
19653 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19654 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19657
19658 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19660 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19661 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19662
19663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19665 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19666 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19667 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19668
19669 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
19670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
19671 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
19672
19673 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
19674
19675 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19678
19679 /* SSE MMX or 3Dnow!A */
19680 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19681 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19682 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19683
19684 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19685 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19686 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19687 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19688
19689 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
19690 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
19691
19692 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
19693
19694 /* SSE2 */
19695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19696
19697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
19698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
19699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
19700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
19701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
19702
19703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
19704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
19705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
19706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
19707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
19708
19709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
19710
19711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
19712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
19713 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
19714 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
19715
19716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
19717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
19718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
19719
19720 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19721 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19722 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19723 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19728
19729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
19730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
19731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
19732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
19734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
19736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
19737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
19738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
19742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
19743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
19744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
19746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
19747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
19748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19749
19750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19751 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19754
19755 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19757 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19758 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19759
19760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19763
19764 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
19765
19766 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19767 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19768 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19769 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19770 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19771 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19772 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19773 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19774
19775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19783
19784 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19785 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
19786
19787 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19789 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19790 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19791
19792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19794
19795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19801
19802 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19803 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19804 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19806
19807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19815
19816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
19817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
19818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
19819
19820 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
19822
19823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
19824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
19825
19826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
19827
19828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
19829 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
19830 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
19831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
19832
19833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
19834 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19835 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19836 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
19837 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19838 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19839 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
19840
19841 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
19842 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19843 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19844 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
19845 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19846 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19847 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
19848
19849 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19850 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19851 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19852 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19853
19854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
19855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
19856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
19857
19858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
19859
19860 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
19861 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
19862
19863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
19864
19865 /* SSE2 MMX */
19866 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
19867 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
19868
19869 /* SSE3 */
19870 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
19871 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19872
19873 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19874 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19875 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19876 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19877 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19878 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19879
19880 /* SSSE3 */
19881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
19882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
19883 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
19884 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
19885 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
19886 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
19887
19888 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19889 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19890 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19891 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19892 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19893 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19894 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19895 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19896 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19897 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19898 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19899 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19900 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
19901 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
19902 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19903 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19904 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19905 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19906 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19907 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19908 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19909 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19910 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19911 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19912
19913 /* SSSE3. */
19914 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
19915 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
19916
19917 /* SSE4.1 */
19918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19920 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
19921 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
19922 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19923 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19924 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
19926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
19927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
19928
19929 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
19930 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
19931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
19932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
19933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
19934 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
19935 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
19936 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
19937 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
19938 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
19939 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
19940 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
19941 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
19942
19943 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
19944 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19945 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19946 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19947 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19948 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19949 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19950 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19951 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19952 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19953 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
19954 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19955
19956 /* SSE4.1 and SSE5 */
19957 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
19958 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
19959 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19960 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19961
19962 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19963 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19964 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19965
19966 /* SSE4.2 */
19967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
19969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
19970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
19971 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
19972
19973 /* SSE4A */
19974 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
19975 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
19976 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
19977 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19978
19979 /* AES */
19980 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
19981 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
19982
19983 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19984 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19985 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19986 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19987
19988 /* PCLMUL */
19989 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
19990 };
19991
19992 /* SSE5 */
19993 enum multi_arg_type {
19994 MULTI_ARG_UNKNOWN,
19995 MULTI_ARG_3_SF,
19996 MULTI_ARG_3_DF,
19997 MULTI_ARG_3_DI,
19998 MULTI_ARG_3_SI,
19999 MULTI_ARG_3_SI_DI,
20000 MULTI_ARG_3_HI,
20001 MULTI_ARG_3_HI_SI,
20002 MULTI_ARG_3_QI,
20003 MULTI_ARG_3_PERMPS,
20004 MULTI_ARG_3_PERMPD,
20005 MULTI_ARG_2_SF,
20006 MULTI_ARG_2_DF,
20007 MULTI_ARG_2_DI,
20008 MULTI_ARG_2_SI,
20009 MULTI_ARG_2_HI,
20010 MULTI_ARG_2_QI,
20011 MULTI_ARG_2_DI_IMM,
20012 MULTI_ARG_2_SI_IMM,
20013 MULTI_ARG_2_HI_IMM,
20014 MULTI_ARG_2_QI_IMM,
20015 MULTI_ARG_2_SF_CMP,
20016 MULTI_ARG_2_DF_CMP,
20017 MULTI_ARG_2_DI_CMP,
20018 MULTI_ARG_2_SI_CMP,
20019 MULTI_ARG_2_HI_CMP,
20020 MULTI_ARG_2_QI_CMP,
20021 MULTI_ARG_2_DI_TF,
20022 MULTI_ARG_2_SI_TF,
20023 MULTI_ARG_2_HI_TF,
20024 MULTI_ARG_2_QI_TF,
20025 MULTI_ARG_2_SF_TF,
20026 MULTI_ARG_2_DF_TF,
20027 MULTI_ARG_1_SF,
20028 MULTI_ARG_1_DF,
20029 MULTI_ARG_1_DI,
20030 MULTI_ARG_1_SI,
20031 MULTI_ARG_1_HI,
20032 MULTI_ARG_1_QI,
20033 MULTI_ARG_1_SI_DI,
20034 MULTI_ARG_1_HI_DI,
20035 MULTI_ARG_1_HI_SI,
20036 MULTI_ARG_1_QI_DI,
20037 MULTI_ARG_1_QI_SI,
20038 MULTI_ARG_1_QI_HI,
20039 MULTI_ARG_1_PH2PS,
20040 MULTI_ARG_1_PS2PH
20041 };
20042
20043 static const struct builtin_description bdesc_multi_arg[] =
20044 {
20045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
20046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
20047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
20048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
20049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
20050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
20051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
20052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
20053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
20054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
20055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
20056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
20057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
20058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
20059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
20060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
20061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
20064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
20065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
20066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
20067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
20068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
20069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
20070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
20071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
20072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
20073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
20075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
20076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
20077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
20083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
20084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
20085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
20086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
20087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
20088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
20089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
20090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
20091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
20092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
20093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
20094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
20095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
20096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
20097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
20098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
20099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
20100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
20101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
20102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
20103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
20104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
20105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
20106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
20107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
20108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
20109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
20110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
20111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
20112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
20113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
20114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
20115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
20116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
20117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
20118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
20119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
20120
20121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
20122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
20125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
20126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
20127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
20128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
20129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
20132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
20133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
20134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
20135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
20136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
20137
20138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
20139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
20140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
20141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
20142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
20143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
20144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
20145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
20146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
20149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
20150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
20151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
20152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
20153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
20154
20155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
20156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
20157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
20158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
20159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
20160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
20161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
20162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
20163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
20166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
20167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
20168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
20169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
20170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
20171
20172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
20173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
20174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
20175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
20176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
20177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
20178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
20179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
20180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
20183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
20184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
20185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
20186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
20187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
20188
20189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
20190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
20191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
20192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
20193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
20194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
20195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
20196
20197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
20198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
20199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
20200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
20201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
20202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
20203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
20204
20205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
20206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
20207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
20208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
20209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
20210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
20211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
20212
20213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
20214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
20217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
20218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
20219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
20220
20221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
20222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
20225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
20226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
20227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
20228
20229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
20230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
20233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
20234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
20235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
20236
20237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
20238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
20241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
20242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
20243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
20244
20245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
20246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
20249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
20250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
20251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
20252
20253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
20254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
20255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
20256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
20257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
20258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
20259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
20260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
20261
20262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20270
20271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
20272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
20273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
20274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
20275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
20276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
20277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
20278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
20279 };
20280
20281 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
20282 in the current target ISA to allow the user to compile particular modules
20283 with different target specific options that differ from the command line
20284 options. */
20285 static void
20286 ix86_init_mmx_sse_builtins (void)
20287 {
20288 const struct builtin_description * d;
20289 size_t i;
20290
20291 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
20292 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20293 tree V1DI_type_node
20294 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
20295 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
20296 tree V2DI_type_node
20297 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
20298 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
20299 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
20300 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
20301 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20302 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
20303 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
20304
20305 tree pchar_type_node = build_pointer_type (char_type_node);
20306 tree pcchar_type_node
20307 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
20308 tree pfloat_type_node = build_pointer_type (float_type_node);
20309 tree pcfloat_type_node
20310 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
20311 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
20312 tree pcv2sf_type_node
20313 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
20314 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
20315 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
20316
20317 /* Comparisons. */
20318 tree int_ftype_v4sf_v4sf
20319 = build_function_type_list (integer_type_node,
20320 V4SF_type_node, V4SF_type_node, NULL_TREE);
20321 tree v4si_ftype_v4sf_v4sf
20322 = build_function_type_list (V4SI_type_node,
20323 V4SF_type_node, V4SF_type_node, NULL_TREE);
20324 /* MMX/SSE/integer conversions. */
20325 tree int_ftype_v4sf
20326 = build_function_type_list (integer_type_node,
20327 V4SF_type_node, NULL_TREE);
20328 tree int64_ftype_v4sf
20329 = build_function_type_list (long_long_integer_type_node,
20330 V4SF_type_node, NULL_TREE);
20331 tree int_ftype_v8qi
20332 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
20333 tree v4sf_ftype_v4sf_int
20334 = build_function_type_list (V4SF_type_node,
20335 V4SF_type_node, integer_type_node, NULL_TREE);
20336 tree v4sf_ftype_v4sf_int64
20337 = build_function_type_list (V4SF_type_node,
20338 V4SF_type_node, long_long_integer_type_node,
20339 NULL_TREE);
20340 tree v4sf_ftype_v4sf_v2si
20341 = build_function_type_list (V4SF_type_node,
20342 V4SF_type_node, V2SI_type_node, NULL_TREE);
20343
20344 /* Miscellaneous. */
20345 tree v8qi_ftype_v4hi_v4hi
20346 = build_function_type_list (V8QI_type_node,
20347 V4HI_type_node, V4HI_type_node, NULL_TREE);
20348 tree v4hi_ftype_v2si_v2si
20349 = build_function_type_list (V4HI_type_node,
20350 V2SI_type_node, V2SI_type_node, NULL_TREE);
20351 tree v4sf_ftype_v4sf_v4sf_int
20352 = build_function_type_list (V4SF_type_node,
20353 V4SF_type_node, V4SF_type_node,
20354 integer_type_node, NULL_TREE);
20355 tree v2si_ftype_v4hi_v4hi
20356 = build_function_type_list (V2SI_type_node,
20357 V4HI_type_node, V4HI_type_node, NULL_TREE);
20358 tree v4hi_ftype_v4hi_int
20359 = build_function_type_list (V4HI_type_node,
20360 V4HI_type_node, integer_type_node, NULL_TREE);
20361 tree v2si_ftype_v2si_int
20362 = build_function_type_list (V2SI_type_node,
20363 V2SI_type_node, integer_type_node, NULL_TREE);
20364 tree v1di_ftype_v1di_int
20365 = build_function_type_list (V1DI_type_node,
20366 V1DI_type_node, integer_type_node, NULL_TREE);
20367
20368 tree void_ftype_void
20369 = build_function_type (void_type_node, void_list_node);
20370 tree void_ftype_unsigned
20371 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
20372 tree void_ftype_unsigned_unsigned
20373 = build_function_type_list (void_type_node, unsigned_type_node,
20374 unsigned_type_node, NULL_TREE);
20375 tree void_ftype_pcvoid_unsigned_unsigned
20376 = build_function_type_list (void_type_node, const_ptr_type_node,
20377 unsigned_type_node, unsigned_type_node,
20378 NULL_TREE);
20379 tree unsigned_ftype_void
20380 = build_function_type (unsigned_type_node, void_list_node);
20381 tree v2si_ftype_v4sf
20382 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
20383 /* Loads/stores. */
20384 tree void_ftype_v8qi_v8qi_pchar
20385 = build_function_type_list (void_type_node,
20386 V8QI_type_node, V8QI_type_node,
20387 pchar_type_node, NULL_TREE);
20388 tree v4sf_ftype_pcfloat
20389 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
20390 tree v4sf_ftype_v4sf_pcv2sf
20391 = build_function_type_list (V4SF_type_node,
20392 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
20393 tree void_ftype_pv2sf_v4sf
20394 = build_function_type_list (void_type_node,
20395 pv2sf_type_node, V4SF_type_node, NULL_TREE);
20396 tree void_ftype_pfloat_v4sf
20397 = build_function_type_list (void_type_node,
20398 pfloat_type_node, V4SF_type_node, NULL_TREE);
20399 tree void_ftype_pdi_di
20400 = build_function_type_list (void_type_node,
20401 pdi_type_node, long_long_unsigned_type_node,
20402 NULL_TREE);
20403 tree void_ftype_pv2di_v2di
20404 = build_function_type_list (void_type_node,
20405 pv2di_type_node, V2DI_type_node, NULL_TREE);
20406 /* Normal vector unops. */
20407 tree v4sf_ftype_v4sf
20408 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
20409 tree v16qi_ftype_v16qi
20410 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
20411 tree v8hi_ftype_v8hi
20412 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
20413 tree v4si_ftype_v4si
20414 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
20415 tree v8qi_ftype_v8qi
20416 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
20417 tree v4hi_ftype_v4hi
20418 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
20419
20420 /* Normal vector binops. */
20421 tree v4sf_ftype_v4sf_v4sf
20422 = build_function_type_list (V4SF_type_node,
20423 V4SF_type_node, V4SF_type_node, NULL_TREE);
20424 tree v8qi_ftype_v8qi_v8qi
20425 = build_function_type_list (V8QI_type_node,
20426 V8QI_type_node, V8QI_type_node, NULL_TREE);
20427 tree v4hi_ftype_v4hi_v4hi
20428 = build_function_type_list (V4HI_type_node,
20429 V4HI_type_node, V4HI_type_node, NULL_TREE);
20430 tree v2si_ftype_v2si_v2si
20431 = build_function_type_list (V2SI_type_node,
20432 V2SI_type_node, V2SI_type_node, NULL_TREE);
20433 tree v1di_ftype_v1di_v1di
20434 = build_function_type_list (V1DI_type_node,
20435 V1DI_type_node, V1DI_type_node, NULL_TREE);
20436 tree v1di_ftype_v1di_v1di_int
20437 = build_function_type_list (V1DI_type_node,
20438 V1DI_type_node, V1DI_type_node,
20439 integer_type_node, NULL_TREE);
20440 tree v2si_ftype_v2sf
20441 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
20442 tree v2sf_ftype_v2si
20443 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
20444 tree v2si_ftype_v2si
20445 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
20446 tree v2sf_ftype_v2sf
20447 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
20448 tree v2sf_ftype_v2sf_v2sf
20449 = build_function_type_list (V2SF_type_node,
20450 V2SF_type_node, V2SF_type_node, NULL_TREE);
20451 tree v2si_ftype_v2sf_v2sf
20452 = build_function_type_list (V2SI_type_node,
20453 V2SF_type_node, V2SF_type_node, NULL_TREE);
20454 tree pint_type_node = build_pointer_type (integer_type_node);
20455 tree pdouble_type_node = build_pointer_type (double_type_node);
20456 tree pcdouble_type_node = build_pointer_type (
20457 build_type_variant (double_type_node, 1, 0));
20458 tree int_ftype_v2df_v2df
20459 = build_function_type_list (integer_type_node,
20460 V2DF_type_node, V2DF_type_node, NULL_TREE);
20461
20462 tree void_ftype_pcvoid
20463 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
20464 tree v4sf_ftype_v4si
20465 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
20466 tree v4si_ftype_v4sf
20467 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
20468 tree v2df_ftype_v4si
20469 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
20470 tree v4si_ftype_v2df
20471 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
20472 tree v4si_ftype_v2df_v2df
20473 = build_function_type_list (V4SI_type_node,
20474 V2DF_type_node, V2DF_type_node, NULL_TREE);
20475 tree v2si_ftype_v2df
20476 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
20477 tree v4sf_ftype_v2df
20478 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
20479 tree v2df_ftype_v2si
20480 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
20481 tree v2df_ftype_v4sf
20482 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
20483 tree int_ftype_v2df
20484 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
20485 tree int64_ftype_v2df
20486 = build_function_type_list (long_long_integer_type_node,
20487 V2DF_type_node, NULL_TREE);
20488 tree v2df_ftype_v2df_int
20489 = build_function_type_list (V2DF_type_node,
20490 V2DF_type_node, integer_type_node, NULL_TREE);
20491 tree v2df_ftype_v2df_int64
20492 = build_function_type_list (V2DF_type_node,
20493 V2DF_type_node, long_long_integer_type_node,
20494 NULL_TREE);
20495 tree v4sf_ftype_v4sf_v2df
20496 = build_function_type_list (V4SF_type_node,
20497 V4SF_type_node, V2DF_type_node, NULL_TREE);
20498 tree v2df_ftype_v2df_v4sf
20499 = build_function_type_list (V2DF_type_node,
20500 V2DF_type_node, V4SF_type_node, NULL_TREE);
20501 tree v2df_ftype_v2df_v2df_int
20502 = build_function_type_list (V2DF_type_node,
20503 V2DF_type_node, V2DF_type_node,
20504 integer_type_node,
20505 NULL_TREE);
20506 tree v2df_ftype_v2df_pcdouble
20507 = build_function_type_list (V2DF_type_node,
20508 V2DF_type_node, pcdouble_type_node, NULL_TREE);
20509 tree void_ftype_pdouble_v2df
20510 = build_function_type_list (void_type_node,
20511 pdouble_type_node, V2DF_type_node, NULL_TREE);
20512 tree void_ftype_pint_int
20513 = build_function_type_list (void_type_node,
20514 pint_type_node, integer_type_node, NULL_TREE);
20515 tree void_ftype_v16qi_v16qi_pchar
20516 = build_function_type_list (void_type_node,
20517 V16QI_type_node, V16QI_type_node,
20518 pchar_type_node, NULL_TREE);
20519 tree v2df_ftype_pcdouble
20520 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
20521 tree v2df_ftype_v2df_v2df
20522 = build_function_type_list (V2DF_type_node,
20523 V2DF_type_node, V2DF_type_node, NULL_TREE);
20524 tree v16qi_ftype_v16qi_v16qi
20525 = build_function_type_list (V16QI_type_node,
20526 V16QI_type_node, V16QI_type_node, NULL_TREE);
20527 tree v8hi_ftype_v8hi_v8hi
20528 = build_function_type_list (V8HI_type_node,
20529 V8HI_type_node, V8HI_type_node, NULL_TREE);
20530 tree v4si_ftype_v4si_v4si
20531 = build_function_type_list (V4SI_type_node,
20532 V4SI_type_node, V4SI_type_node, NULL_TREE);
20533 tree v2di_ftype_v2di_v2di
20534 = build_function_type_list (V2DI_type_node,
20535 V2DI_type_node, V2DI_type_node, NULL_TREE);
20536 tree v2di_ftype_v2df_v2df
20537 = build_function_type_list (V2DI_type_node,
20538 V2DF_type_node, V2DF_type_node, NULL_TREE);
20539 tree v2df_ftype_v2df
20540 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
20541 tree v2di_ftype_v2di_int
20542 = build_function_type_list (V2DI_type_node,
20543 V2DI_type_node, integer_type_node, NULL_TREE);
20544 tree v2di_ftype_v2di_v2di_int
20545 = build_function_type_list (V2DI_type_node, V2DI_type_node,
20546 V2DI_type_node, integer_type_node, NULL_TREE);
20547 tree v4si_ftype_v4si_int
20548 = build_function_type_list (V4SI_type_node,
20549 V4SI_type_node, integer_type_node, NULL_TREE);
20550 tree v8hi_ftype_v8hi_int
20551 = build_function_type_list (V8HI_type_node,
20552 V8HI_type_node, integer_type_node, NULL_TREE);
20553 tree v4si_ftype_v8hi_v8hi
20554 = build_function_type_list (V4SI_type_node,
20555 V8HI_type_node, V8HI_type_node, NULL_TREE);
20556 tree v1di_ftype_v8qi_v8qi
20557 = build_function_type_list (V1DI_type_node,
20558 V8QI_type_node, V8QI_type_node, NULL_TREE);
20559 tree v1di_ftype_v2si_v2si
20560 = build_function_type_list (V1DI_type_node,
20561 V2SI_type_node, V2SI_type_node, NULL_TREE);
20562 tree v2di_ftype_v16qi_v16qi
20563 = build_function_type_list (V2DI_type_node,
20564 V16QI_type_node, V16QI_type_node, NULL_TREE);
20565 tree v2di_ftype_v4si_v4si
20566 = build_function_type_list (V2DI_type_node,
20567 V4SI_type_node, V4SI_type_node, NULL_TREE);
20568 tree int_ftype_v16qi
20569 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
20570 tree v16qi_ftype_pcchar
20571 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
20572 tree void_ftype_pchar_v16qi
20573 = build_function_type_list (void_type_node,
20574 pchar_type_node, V16QI_type_node, NULL_TREE);
20575
20576 tree v2di_ftype_v2di_unsigned_unsigned
20577 = build_function_type_list (V2DI_type_node, V2DI_type_node,
20578 unsigned_type_node, unsigned_type_node,
20579 NULL_TREE);
20580 tree v2di_ftype_v2di_v2di_unsigned_unsigned
20581 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
20582 unsigned_type_node, unsigned_type_node,
20583 NULL_TREE);
20584 tree v2di_ftype_v2di_v16qi
20585 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
20586 NULL_TREE);
20587 tree v2df_ftype_v2df_v2df_v2df
20588 = build_function_type_list (V2DF_type_node,
20589 V2DF_type_node, V2DF_type_node,
20590 V2DF_type_node, NULL_TREE);
20591 tree v4sf_ftype_v4sf_v4sf_v4sf
20592 = build_function_type_list (V4SF_type_node,
20593 V4SF_type_node, V4SF_type_node,
20594 V4SF_type_node, NULL_TREE);
20595 tree v8hi_ftype_v16qi
20596 = build_function_type_list (V8HI_type_node, V16QI_type_node,
20597 NULL_TREE);
20598 tree v4si_ftype_v16qi
20599 = build_function_type_list (V4SI_type_node, V16QI_type_node,
20600 NULL_TREE);
20601 tree v2di_ftype_v16qi
20602 = build_function_type_list (V2DI_type_node, V16QI_type_node,
20603 NULL_TREE);
20604 tree v4si_ftype_v8hi
20605 = build_function_type_list (V4SI_type_node, V8HI_type_node,
20606 NULL_TREE);
20607 tree v2di_ftype_v8hi
20608 = build_function_type_list (V2DI_type_node, V8HI_type_node,
20609 NULL_TREE);
20610 tree v2di_ftype_v4si
20611 = build_function_type_list (V2DI_type_node, V4SI_type_node,
20612 NULL_TREE);
20613 tree v2di_ftype_pv2di
20614 = build_function_type_list (V2DI_type_node, pv2di_type_node,
20615 NULL_TREE);
20616 tree v16qi_ftype_v16qi_v16qi_int
20617 = build_function_type_list (V16QI_type_node, V16QI_type_node,
20618 V16QI_type_node, integer_type_node,
20619 NULL_TREE);
20620 tree v16qi_ftype_v16qi_v16qi_v16qi
20621 = build_function_type_list (V16QI_type_node, V16QI_type_node,
20622 V16QI_type_node, V16QI_type_node,
20623 NULL_TREE);
20624 tree v8hi_ftype_v8hi_v8hi_int
20625 = build_function_type_list (V8HI_type_node, V8HI_type_node,
20626 V8HI_type_node, integer_type_node,
20627 NULL_TREE);
20628 tree v4si_ftype_v4si_v4si_int
20629 = build_function_type_list (V4SI_type_node, V4SI_type_node,
20630 V4SI_type_node, integer_type_node,
20631 NULL_TREE);
20632 tree int_ftype_v2di_v2di
20633 = build_function_type_list (integer_type_node,
20634 V2DI_type_node, V2DI_type_node,
20635 NULL_TREE);
20636 tree int_ftype_v16qi_int_v16qi_int_int
20637 = build_function_type_list (integer_type_node,
20638 V16QI_type_node,
20639 integer_type_node,
20640 V16QI_type_node,
20641 integer_type_node,
20642 integer_type_node,
20643 NULL_TREE);
20644 tree v16qi_ftype_v16qi_int_v16qi_int_int
20645 = build_function_type_list (V16QI_type_node,
20646 V16QI_type_node,
20647 integer_type_node,
20648 V16QI_type_node,
20649 integer_type_node,
20650 integer_type_node,
20651 NULL_TREE);
20652 tree int_ftype_v16qi_v16qi_int
20653 = build_function_type_list (integer_type_node,
20654 V16QI_type_node,
20655 V16QI_type_node,
20656 integer_type_node,
20657 NULL_TREE);
20658
20659 /* SSE5 instructions */
20660 tree v2di_ftype_v2di_v2di_v2di
20661 = build_function_type_list (V2DI_type_node,
20662 V2DI_type_node,
20663 V2DI_type_node,
20664 V2DI_type_node,
20665 NULL_TREE);
20666
20667 tree v4si_ftype_v4si_v4si_v4si
20668 = build_function_type_list (V4SI_type_node,
20669 V4SI_type_node,
20670 V4SI_type_node,
20671 V4SI_type_node,
20672 NULL_TREE);
20673
20674 tree v4si_ftype_v4si_v4si_v2di
20675 = build_function_type_list (V4SI_type_node,
20676 V4SI_type_node,
20677 V4SI_type_node,
20678 V2DI_type_node,
20679 NULL_TREE);
20680
20681 tree v8hi_ftype_v8hi_v8hi_v8hi
20682 = build_function_type_list (V8HI_type_node,
20683 V8HI_type_node,
20684 V8HI_type_node,
20685 V8HI_type_node,
20686 NULL_TREE);
20687
20688 tree v8hi_ftype_v8hi_v8hi_v4si
20689 = build_function_type_list (V8HI_type_node,
20690 V8HI_type_node,
20691 V8HI_type_node,
20692 V4SI_type_node,
20693 NULL_TREE);
20694
20695 tree v2df_ftype_v2df_v2df_v16qi
20696 = build_function_type_list (V2DF_type_node,
20697 V2DF_type_node,
20698 V2DF_type_node,
20699 V16QI_type_node,
20700 NULL_TREE);
20701
20702 tree v4sf_ftype_v4sf_v4sf_v16qi
20703 = build_function_type_list (V4SF_type_node,
20704 V4SF_type_node,
20705 V4SF_type_node,
20706 V16QI_type_node,
20707 NULL_TREE);
20708
20709 tree v2di_ftype_v2di_si
20710 = build_function_type_list (V2DI_type_node,
20711 V2DI_type_node,
20712 integer_type_node,
20713 NULL_TREE);
20714
20715 tree v4si_ftype_v4si_si
20716 = build_function_type_list (V4SI_type_node,
20717 V4SI_type_node,
20718 integer_type_node,
20719 NULL_TREE);
20720
20721 tree v8hi_ftype_v8hi_si
20722 = build_function_type_list (V8HI_type_node,
20723 V8HI_type_node,
20724 integer_type_node,
20725 NULL_TREE);
20726
20727 tree v16qi_ftype_v16qi_si
20728 = build_function_type_list (V16QI_type_node,
20729 V16QI_type_node,
20730 integer_type_node,
20731 NULL_TREE);
20732 tree v4sf_ftype_v4hi
20733 = build_function_type_list (V4SF_type_node,
20734 V4HI_type_node,
20735 NULL_TREE);
20736
20737 tree v4hi_ftype_v4sf
20738 = build_function_type_list (V4HI_type_node,
20739 V4SF_type_node,
20740 NULL_TREE);
20741
20742 tree v2di_ftype_v2di
20743 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
20744
20745 tree v16qi_ftype_v8hi_v8hi
20746 = build_function_type_list (V16QI_type_node,
20747 V8HI_type_node, V8HI_type_node,
20748 NULL_TREE);
20749 tree v8hi_ftype_v4si_v4si
20750 = build_function_type_list (V8HI_type_node,
20751 V4SI_type_node, V4SI_type_node,
20752 NULL_TREE);
20753 tree v8hi_ftype_v16qi_v16qi
20754 = build_function_type_list (V8HI_type_node,
20755 V16QI_type_node, V16QI_type_node,
20756 NULL_TREE);
20757 tree v4hi_ftype_v8qi_v8qi
20758 = build_function_type_list (V4HI_type_node,
20759 V8QI_type_node, V8QI_type_node,
20760 NULL_TREE);
20761 tree unsigned_ftype_unsigned_uchar
20762 = build_function_type_list (unsigned_type_node,
20763 unsigned_type_node,
20764 unsigned_char_type_node,
20765 NULL_TREE);
20766 tree unsigned_ftype_unsigned_ushort
20767 = build_function_type_list (unsigned_type_node,
20768 unsigned_type_node,
20769 short_unsigned_type_node,
20770 NULL_TREE);
20771 tree unsigned_ftype_unsigned_unsigned
20772 = build_function_type_list (unsigned_type_node,
20773 unsigned_type_node,
20774 unsigned_type_node,
20775 NULL_TREE);
20776 tree uint64_ftype_uint64_uint64
20777 = build_function_type_list (long_long_unsigned_type_node,
20778 long_long_unsigned_type_node,
20779 long_long_unsigned_type_node,
20780 NULL_TREE);
20781 tree float_ftype_float
20782 = build_function_type_list (float_type_node,
20783 float_type_node,
20784 NULL_TREE);
20785
20786 tree ftype;
20787
20788 /* Add all special builtins with variable number of operands. */
20789 for (i = 0, d = bdesc_special_args;
20790 i < ARRAY_SIZE (bdesc_special_args);
20791 i++, d++)
20792 {
20793 tree type;
20794
20795 if (d->name == 0)
20796 continue;
20797
20798 switch ((enum ix86_special_builtin_type) d->flag)
20799 {
20800 case VOID_FTYPE_VOID:
20801 type = void_ftype_void;
20802 break;
20803 case V16QI_FTYPE_PCCHAR:
20804 type = v16qi_ftype_pcchar;
20805 break;
20806 case V4SF_FTYPE_PCFLOAT:
20807 type = v4sf_ftype_pcfloat;
20808 break;
20809 case V2DI_FTYPE_PV2DI:
20810 type = v2di_ftype_pv2di;
20811 break;
20812 case V2DF_FTYPE_PCDOUBLE:
20813 type = v2df_ftype_pcdouble;
20814 break;
20815 case V4SF_FTYPE_V4SF_PCV2SF:
20816 type = v4sf_ftype_v4sf_pcv2sf;
20817 break;
20818 case V2DF_FTYPE_V2DF_PCDOUBLE:
20819 type = v2df_ftype_v2df_pcdouble;
20820 break;
20821 case VOID_FTYPE_PV2SF_V4SF:
20822 type = void_ftype_pv2sf_v4sf;
20823 break;
20824 case VOID_FTYPE_PV2DI_V2DI:
20825 type = void_ftype_pv2di_v2di;
20826 break;
20827 case VOID_FTYPE_PCHAR_V16QI:
20828 type = void_ftype_pchar_v16qi;
20829 break;
20830 case VOID_FTYPE_PFLOAT_V4SF:
20831 type = void_ftype_pfloat_v4sf;
20832 break;
20833 case VOID_FTYPE_PDOUBLE_V2DF:
20834 type = void_ftype_pdouble_v2df;
20835 break;
20836 case VOID_FTYPE_PDI_DI:
20837 type = void_ftype_pdi_di;
20838 break;
20839 case VOID_FTYPE_PINT_INT:
20840 type = void_ftype_pint_int;
20841 break;
20842 default:
20843 gcc_unreachable ();
20844 }
20845
20846 def_builtin (d->mask, d->name, type, d->code);
20847 }
20848
20849 /* Add all builtins with variable number of operands. */
20850 for (i = 0, d = bdesc_args;
20851 i < ARRAY_SIZE (bdesc_args);
20852 i++, d++)
20853 {
20854 tree type;
20855
20856 if (d->name == 0)
20857 continue;
20858
20859 switch ((enum ix86_builtin_type) d->flag)
20860 {
20861 case FLOAT_FTYPE_FLOAT:
20862 type = float_ftype_float;
20863 break;
20864 case INT_FTYPE_V2DI_V2DI_PTEST:
20865 type = int_ftype_v2di_v2di;
20866 break;
20867 case INT64_FTYPE_V4SF:
20868 type = int64_ftype_v4sf;
20869 break;
20870 case INT64_FTYPE_V2DF:
20871 type = int64_ftype_v2df;
20872 break;
20873 case INT_FTYPE_V16QI:
20874 type = int_ftype_v16qi;
20875 break;
20876 case INT_FTYPE_V8QI:
20877 type = int_ftype_v8qi;
20878 break;
20879 case INT_FTYPE_V4SF:
20880 type = int_ftype_v4sf;
20881 break;
20882 case INT_FTYPE_V2DF:
20883 type = int_ftype_v2df;
20884 break;
20885 case V16QI_FTYPE_V16QI:
20886 type = v16qi_ftype_v16qi;
20887 break;
20888 case V8HI_FTYPE_V8HI:
20889 type = v8hi_ftype_v8hi;
20890 break;
20891 case V8HI_FTYPE_V16QI:
20892 type = v8hi_ftype_v16qi;
20893 break;
20894 case V8QI_FTYPE_V8QI:
20895 type = v8qi_ftype_v8qi;
20896 break;
20897 case V4SI_FTYPE_V4SI:
20898 type = v4si_ftype_v4si;
20899 break;
20900 case V4SI_FTYPE_V16QI:
20901 type = v4si_ftype_v16qi;
20902 break;
20903 case V4SI_FTYPE_V8HI:
20904 type = v4si_ftype_v8hi;
20905 break;
20906 case V4SI_FTYPE_V4SF:
20907 type = v4si_ftype_v4sf;
20908 break;
20909 case V4SI_FTYPE_V2DF:
20910 type = v4si_ftype_v2df;
20911 break;
20912 case V4HI_FTYPE_V4HI:
20913 type = v4hi_ftype_v4hi;
20914 break;
20915 case V4SF_FTYPE_V4SF:
20916 case V4SF_FTYPE_V4SF_VEC_MERGE:
20917 type = v4sf_ftype_v4sf;
20918 break;
20919 case V4SF_FTYPE_V4SI:
20920 type = v4sf_ftype_v4si;
20921 break;
20922 case V4SF_FTYPE_V2DF:
20923 type = v4sf_ftype_v2df;
20924 break;
20925 case V2DI_FTYPE_V2DI:
20926 type = v2di_ftype_v2di;
20927 break;
20928 case V2DI_FTYPE_V16QI:
20929 type = v2di_ftype_v16qi;
20930 break;
20931 case V2DI_FTYPE_V8HI:
20932 type = v2di_ftype_v8hi;
20933 break;
20934 case V2DI_FTYPE_V4SI:
20935 type = v2di_ftype_v4si;
20936 break;
20937 case V2SI_FTYPE_V2SI:
20938 type = v2si_ftype_v2si;
20939 break;
20940 case V2SI_FTYPE_V4SF:
20941 type = v2si_ftype_v4sf;
20942 break;
20943 case V2SI_FTYPE_V2DF:
20944 type = v2si_ftype_v2df;
20945 break;
20946 case V2SI_FTYPE_V2SF:
20947 type = v2si_ftype_v2sf;
20948 break;
20949 case V2DF_FTYPE_V4SF:
20950 type = v2df_ftype_v4sf;
20951 break;
20952 case V2DF_FTYPE_V2DF:
20953 case V2DF_FTYPE_V2DF_VEC_MERGE:
20954 type = v2df_ftype_v2df;
20955 break;
20956 case V2DF_FTYPE_V2SI:
20957 type = v2df_ftype_v2si;
20958 break;
20959 case V2DF_FTYPE_V4SI:
20960 type = v2df_ftype_v4si;
20961 break;
20962 case V2SF_FTYPE_V2SF:
20963 type = v2sf_ftype_v2sf;
20964 break;
20965 case V2SF_FTYPE_V2SI:
20966 type = v2sf_ftype_v2si;
20967 break;
20968 case V16QI_FTYPE_V16QI_V16QI:
20969 type = v16qi_ftype_v16qi_v16qi;
20970 break;
20971 case V16QI_FTYPE_V8HI_V8HI:
20972 type = v16qi_ftype_v8hi_v8hi;
20973 break;
20974 case V8QI_FTYPE_V8QI_V8QI:
20975 type = v8qi_ftype_v8qi_v8qi;
20976 break;
20977 case V8QI_FTYPE_V4HI_V4HI:
20978 type = v8qi_ftype_v4hi_v4hi;
20979 break;
20980 case V8HI_FTYPE_V8HI_V8HI:
20981 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20982 type = v8hi_ftype_v8hi_v8hi;
20983 break;
20984 case V8HI_FTYPE_V16QI_V16QI:
20985 type = v8hi_ftype_v16qi_v16qi;
20986 break;
20987 case V8HI_FTYPE_V4SI_V4SI:
20988 type = v8hi_ftype_v4si_v4si;
20989 break;
20990 case V8HI_FTYPE_V8HI_SI_COUNT:
20991 type = v8hi_ftype_v8hi_int;
20992 break;
20993 case V4SI_FTYPE_V4SI_V4SI:
20994 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20995 type = v4si_ftype_v4si_v4si;
20996 break;
20997 case V4SI_FTYPE_V8HI_V8HI:
20998 type = v4si_ftype_v8hi_v8hi;
20999 break;
21000 case V4SI_FTYPE_V4SF_V4SF:
21001 type = v4si_ftype_v4sf_v4sf;
21002 break;
21003 case V4SI_FTYPE_V2DF_V2DF:
21004 type = v4si_ftype_v2df_v2df;
21005 break;
21006 case V4SI_FTYPE_V4SI_SI_COUNT:
21007 type = v4si_ftype_v4si_int;
21008 break;
21009 case V4HI_FTYPE_V4HI_V4HI:
21010 case V4HI_FTYPE_V4HI_V4HI_COUNT:
21011 type = v4hi_ftype_v4hi_v4hi;
21012 break;
21013 case V4HI_FTYPE_V8QI_V8QI:
21014 type = v4hi_ftype_v8qi_v8qi;
21015 break;
21016 case V4HI_FTYPE_V2SI_V2SI:
21017 type = v4hi_ftype_v2si_v2si;
21018 break;
21019 case V4HI_FTYPE_V4HI_SI_COUNT:
21020 type = v4hi_ftype_v4hi_int;
21021 break;
21022 case V4SF_FTYPE_V4SF_V4SF:
21023 case V4SF_FTYPE_V4SF_V4SF_SWAP:
21024 type = v4sf_ftype_v4sf_v4sf;
21025 break;
21026 case V4SF_FTYPE_V4SF_V2SI:
21027 type = v4sf_ftype_v4sf_v2si;
21028 break;
21029 case V4SF_FTYPE_V4SF_V2DF:
21030 type = v4sf_ftype_v4sf_v2df;
21031 break;
21032 case V4SF_FTYPE_V4SF_DI:
21033 type = v4sf_ftype_v4sf_int64;
21034 break;
21035 case V4SF_FTYPE_V4SF_SI:
21036 type = v4sf_ftype_v4sf_int;
21037 break;
21038 case V2DI_FTYPE_V2DI_V2DI:
21039 case V2DI_FTYPE_V2DI_V2DI_COUNT:
21040 type = v2di_ftype_v2di_v2di;
21041 break;
21042 case V2DI_FTYPE_V16QI_V16QI:
21043 type = v2di_ftype_v16qi_v16qi;
21044 break;
21045 case V2DI_FTYPE_V4SI_V4SI:
21046 type = v2di_ftype_v4si_v4si;
21047 break;
21048 case V2DI_FTYPE_V2DI_V16QI:
21049 type = v2di_ftype_v2di_v16qi;
21050 break;
21051 case V2DI_FTYPE_V2DF_V2DF:
21052 type = v2di_ftype_v2df_v2df;
21053 break;
21054 case V2DI_FTYPE_V2DI_SI_COUNT:
21055 type = v2di_ftype_v2di_int;
21056 break;
21057 case V2SI_FTYPE_V2SI_V2SI:
21058 case V2SI_FTYPE_V2SI_V2SI_COUNT:
21059 type = v2si_ftype_v2si_v2si;
21060 break;
21061 case V2SI_FTYPE_V4HI_V4HI:
21062 type = v2si_ftype_v4hi_v4hi;
21063 break;
21064 case V2SI_FTYPE_V2SF_V2SF:
21065 type = v2si_ftype_v2sf_v2sf;
21066 break;
21067 case V2SI_FTYPE_V2SI_SI_COUNT:
21068 type = v2si_ftype_v2si_int;
21069 break;
21070 case V2DF_FTYPE_V2DF_V2DF:
21071 case V2DF_FTYPE_V2DF_V2DF_SWAP:
21072 type = v2df_ftype_v2df_v2df;
21073 break;
21074 case V2DF_FTYPE_V2DF_V4SF:
21075 type = v2df_ftype_v2df_v4sf;
21076 break;
21077 case V2DF_FTYPE_V2DF_DI:
21078 type = v2df_ftype_v2df_int64;
21079 break;
21080 case V2DF_FTYPE_V2DF_SI:
21081 type = v2df_ftype_v2df_int;
21082 break;
21083 case V2SF_FTYPE_V2SF_V2SF:
21084 type = v2sf_ftype_v2sf_v2sf;
21085 break;
21086 case V1DI_FTYPE_V1DI_V1DI:
21087 case V1DI_FTYPE_V1DI_V1DI_COUNT:
21088 type = v1di_ftype_v1di_v1di;
21089 break;
21090 case V1DI_FTYPE_V8QI_V8QI:
21091 type = v1di_ftype_v8qi_v8qi;
21092 break;
21093 case V1DI_FTYPE_V2SI_V2SI:
21094 type = v1di_ftype_v2si_v2si;
21095 break;
21096 case V1DI_FTYPE_V1DI_SI_COUNT:
21097 type = v1di_ftype_v1di_int;
21098 break;
21099 case UINT64_FTYPE_UINT64_UINT64:
21100 type = uint64_ftype_uint64_uint64;
21101 break;
21102 case UINT_FTYPE_UINT_UINT:
21103 type = unsigned_ftype_unsigned_unsigned;
21104 break;
21105 case UINT_FTYPE_UINT_USHORT:
21106 type = unsigned_ftype_unsigned_ushort;
21107 break;
21108 case UINT_FTYPE_UINT_UCHAR:
21109 type = unsigned_ftype_unsigned_uchar;
21110 break;
21111 case V8HI_FTYPE_V8HI_INT:
21112 type = v8hi_ftype_v8hi_int;
21113 break;
21114 case V4SI_FTYPE_V4SI_INT:
21115 type = v4si_ftype_v4si_int;
21116 break;
21117 case V4HI_FTYPE_V4HI_INT:
21118 type = v4hi_ftype_v4hi_int;
21119 break;
21120 case V4SF_FTYPE_V4SF_INT:
21121 type = v4sf_ftype_v4sf_int;
21122 break;
21123 case V2DI_FTYPE_V2DI_INT:
21124 case V2DI2TI_FTYPE_V2DI_INT:
21125 type = v2di_ftype_v2di_int;
21126 break;
21127 case V2DF_FTYPE_V2DF_INT:
21128 type = v2df_ftype_v2df_int;
21129 break;
21130 case V16QI_FTYPE_V16QI_V16QI_V16QI:
21131 type = v16qi_ftype_v16qi_v16qi_v16qi;
21132 break;
21133 case V4SF_FTYPE_V4SF_V4SF_V4SF:
21134 type = v4sf_ftype_v4sf_v4sf_v4sf;
21135 break;
21136 case V2DF_FTYPE_V2DF_V2DF_V2DF:
21137 type = v2df_ftype_v2df_v2df_v2df;
21138 break;
21139 case V16QI_FTYPE_V16QI_V16QI_INT:
21140 type = v16qi_ftype_v16qi_v16qi_int;
21141 break;
21142 case V8HI_FTYPE_V8HI_V8HI_INT:
21143 type = v8hi_ftype_v8hi_v8hi_int;
21144 break;
21145 case V4SI_FTYPE_V4SI_V4SI_INT:
21146 type = v4si_ftype_v4si_v4si_int;
21147 break;
21148 case V4SF_FTYPE_V4SF_V4SF_INT:
21149 type = v4sf_ftype_v4sf_v4sf_int;
21150 break;
21151 case V2DI_FTYPE_V2DI_V2DI_INT:
21152 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21153 type = v2di_ftype_v2di_v2di_int;
21154 break;
21155 case V2DF_FTYPE_V2DF_V2DF_INT:
21156 type = v2df_ftype_v2df_v2df_int;
21157 break;
21158 case V2DI_FTYPE_V2DI_UINT_UINT:
21159 type = v2di_ftype_v2di_unsigned_unsigned;
21160 break;
21161 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21162 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
21163 break;
21164 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21165 type = v1di_ftype_v1di_v1di_int;
21166 break;
21167 default:
21168 gcc_unreachable ();
21169 }
21170
21171 def_builtin_const (d->mask, d->name, type, d->code);
21172 }
21173
21174 /* pcmpestr[im] insns. */
21175 for (i = 0, d = bdesc_pcmpestr;
21176 i < ARRAY_SIZE (bdesc_pcmpestr);
21177 i++, d++)
21178 {
21179 if (d->code == IX86_BUILTIN_PCMPESTRM128)
21180 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
21181 else
21182 ftype = int_ftype_v16qi_int_v16qi_int_int;
21183 def_builtin_const (d->mask, d->name, ftype, d->code);
21184 }
21185
21186 /* pcmpistr[im] insns. */
21187 for (i = 0, d = bdesc_pcmpistr;
21188 i < ARRAY_SIZE (bdesc_pcmpistr);
21189 i++, d++)
21190 {
21191 if (d->code == IX86_BUILTIN_PCMPISTRM128)
21192 ftype = v16qi_ftype_v16qi_v16qi_int;
21193 else
21194 ftype = int_ftype_v16qi_v16qi_int;
21195 def_builtin_const (d->mask, d->name, ftype, d->code);
21196 }
21197
21198 /* comi/ucomi insns. */
21199 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21200 if (d->mask == OPTION_MASK_ISA_SSE2)
21201 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
21202 else
21203 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
21204
21205 /* SSE */
21206 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
21207 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
21208
21209 /* SSE or 3DNow!A */
21210 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
21211
21212 /* SSE2 */
21213 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
21214
21215 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
21216 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
21217
21218 /* SSE3. */
21219 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
21220 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
21221
21222 /* AES */
21223 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
21224 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
21225 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
21226 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
21227 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
21228 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
21229
21230 /* PCLMUL */
21231 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
21232
21233 /* Access to the vec_init patterns. */
21234 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
21235 integer_type_node, NULL_TREE);
21236 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
21237
21238 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
21239 short_integer_type_node,
21240 short_integer_type_node,
21241 short_integer_type_node, NULL_TREE);
21242 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
21243
21244 ftype = build_function_type_list (V8QI_type_node, char_type_node,
21245 char_type_node, char_type_node,
21246 char_type_node, char_type_node,
21247 char_type_node, char_type_node,
21248 char_type_node, NULL_TREE);
21249 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
21250
21251 /* Access to the vec_extract patterns. */
21252 ftype = build_function_type_list (double_type_node, V2DF_type_node,
21253 integer_type_node, NULL_TREE);
21254 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
21255
21256 ftype = build_function_type_list (long_long_integer_type_node,
21257 V2DI_type_node, integer_type_node,
21258 NULL_TREE);
21259 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
21260
21261 ftype = build_function_type_list (float_type_node, V4SF_type_node,
21262 integer_type_node, NULL_TREE);
21263 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
21264
21265 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
21266 integer_type_node, NULL_TREE);
21267 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
21268
21269 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
21270 integer_type_node, NULL_TREE);
21271 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
21272
21273 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
21274 integer_type_node, NULL_TREE);
21275 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
21276
21277 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
21278 integer_type_node, NULL_TREE);
21279 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
21280
21281 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
21282 integer_type_node, NULL_TREE);
21283 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
21284
21285 /* Access to the vec_set patterns. */
21286 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
21287 intDI_type_node,
21288 integer_type_node, NULL_TREE);
21289 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
21290
21291 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
21292 float_type_node,
21293 integer_type_node, NULL_TREE);
21294 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
21295
21296 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
21297 intSI_type_node,
21298 integer_type_node, NULL_TREE);
21299 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
21300
21301 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
21302 intHI_type_node,
21303 integer_type_node, NULL_TREE);
21304 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
21305
21306 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
21307 intHI_type_node,
21308 integer_type_node, NULL_TREE);
21309 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
21310
21311 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
21312 intQI_type_node,
21313 integer_type_node, NULL_TREE);
21314 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
21315
21316 /* Add SSE5 multi-arg argument instructions */
21317 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21318 {
21319 tree mtype = NULL_TREE;
21320
21321 if (d->name == 0)
21322 continue;
21323
21324 switch ((enum multi_arg_type)d->flag)
21325 {
21326 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
21327 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
21328 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
21329 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
21330 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
21331 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
21332 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
21333 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
21334 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
21335 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
21336 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
21337 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
21338 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
21339 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
21340 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
21341 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
21342 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
21343 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
21344 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
21345 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
21346 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
21347 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
21348 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
21349 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
21350 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
21351 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
21352 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
21353 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
21354 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
21355 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
21356 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
21357 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
21358 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
21359 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
21360 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
21361 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
21362 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
21363 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
21364 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
21365 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
21366 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
21367 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
21368 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
21369 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
21370 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
21371 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
21372 case MULTI_ARG_UNKNOWN:
21373 default:
21374 gcc_unreachable ();
21375 }
21376
21377 if (mtype)
21378 def_builtin_const (d->mask, d->name, mtype, d->code);
21379 }
21380 }
21381
21382 /* Internal method for ix86_init_builtins. */
21383
21384 static void
21385 ix86_init_builtins_va_builtins_abi (void)
21386 {
21387 tree ms_va_ref, sysv_va_ref;
21388 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
21389 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
21390 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
21391 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
21392
21393 if (!TARGET_64BIT)
21394 return;
21395 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
21396 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
21397 ms_va_ref = build_reference_type (ms_va_list_type_node);
21398 sysv_va_ref =
21399 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
21400
21401 fnvoid_va_end_ms =
21402 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
21403 fnvoid_va_start_ms =
21404 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
21405 fnvoid_va_end_sysv =
21406 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
21407 fnvoid_va_start_sysv =
21408 build_varargs_function_type_list (void_type_node, sysv_va_ref,
21409 NULL_TREE);
21410 fnvoid_va_copy_ms =
21411 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
21412 NULL_TREE);
21413 fnvoid_va_copy_sysv =
21414 build_function_type_list (void_type_node, sysv_va_ref,
21415 sysv_va_ref, NULL_TREE);
21416
21417 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
21418 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
21419 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
21420 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
21421 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
21422 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
21423 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
21424 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21425 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
21426 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21427 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
21428 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21429 }
21430
21431 static void
21432 ix86_init_builtins (void)
21433 {
21434 tree float128_type_node = make_node (REAL_TYPE);
21435 tree ftype, decl;
21436
21437 /* The __float80 type. */
21438 if (TYPE_MODE (long_double_type_node) == XFmode)
21439 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
21440 "__float80");
21441 else
21442 {
21443 /* The __float80 type. */
21444 tree float80_type_node = make_node (REAL_TYPE);
21445
21446 TYPE_PRECISION (float80_type_node) = 80;
21447 layout_type (float80_type_node);
21448 (*lang_hooks.types.register_builtin_type) (float80_type_node,
21449 "__float80");
21450 }
21451
21452 /* The __float128 type. */
21453 TYPE_PRECISION (float128_type_node) = 128;
21454 layout_type (float128_type_node);
21455 (*lang_hooks.types.register_builtin_type) (float128_type_node,
21456 "__float128");
21457
21458 /* TFmode support builtins. */
21459 ftype = build_function_type (float128_type_node, void_list_node);
21460 decl = add_builtin_function ("__builtin_infq", ftype,
21461 IX86_BUILTIN_INFQ, BUILT_IN_MD,
21462 NULL, NULL_TREE);
21463 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
21464
21465 /* We will expand them to normal call if SSE2 isn't available since
21466 they are used by libgcc. */
21467 ftype = build_function_type_list (float128_type_node,
21468 float128_type_node,
21469 NULL_TREE);
21470 decl = add_builtin_function ("__builtin_fabsq", ftype,
21471 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
21472 "__fabstf2", NULL_TREE);
21473 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
21474 TREE_READONLY (decl) = 1;
21475
21476 ftype = build_function_type_list (float128_type_node,
21477 float128_type_node,
21478 float128_type_node,
21479 NULL_TREE);
21480 decl = add_builtin_function ("__builtin_copysignq", ftype,
21481 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
21482 "__copysigntf3", NULL_TREE);
21483 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
21484 TREE_READONLY (decl) = 1;
21485
21486 ix86_init_mmx_sse_builtins ();
21487 if (TARGET_64BIT)
21488 ix86_init_builtins_va_builtins_abi ();
21489 }
21490
21491 /* Errors in the source file can cause expand_expr to return const0_rtx
21492 where we expect a vector. To avoid crashing, use one of the vector
21493 clear instructions. */
21494 static rtx
21495 safe_vector_operand (rtx x, enum machine_mode mode)
21496 {
21497 if (x == const0_rtx)
21498 x = CONST0_RTX (mode);
21499 return x;
21500 }
21501
21502 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
21503
21504 static rtx
21505 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
21506 {
21507 rtx pat;
21508 tree arg0 = CALL_EXPR_ARG (exp, 0);
21509 tree arg1 = CALL_EXPR_ARG (exp, 1);
21510 rtx op0 = expand_normal (arg0);
21511 rtx op1 = expand_normal (arg1);
21512 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21513 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21514 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21515
21516 if (VECTOR_MODE_P (mode0))
21517 op0 = safe_vector_operand (op0, mode0);
21518 if (VECTOR_MODE_P (mode1))
21519 op1 = safe_vector_operand (op1, mode1);
21520
21521 if (optimize || !target
21522 || GET_MODE (target) != tmode
21523 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21524 target = gen_reg_rtx (tmode);
21525
21526 if (GET_MODE (op1) == SImode && mode1 == TImode)
21527 {
21528 rtx x = gen_reg_rtx (V4SImode);
21529 emit_insn (gen_sse2_loadd (x, op1));
21530 op1 = gen_lowpart (TImode, x);
21531 }
21532
21533 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21534 op0 = copy_to_mode_reg (mode0, op0);
21535 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21536 op1 = copy_to_mode_reg (mode1, op1);
21537
21538 pat = GEN_FCN (icode) (target, op0, op1);
21539 if (! pat)
21540 return 0;
21541
21542 emit_insn (pat);
21543
21544 return target;
21545 }
21546
21547 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
21548
21549 static rtx
21550 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
21551 enum multi_arg_type m_type,
21552 enum insn_code sub_code)
21553 {
21554 rtx pat;
21555 int i;
21556 int nargs;
21557 bool comparison_p = false;
21558 bool tf_p = false;
21559 bool last_arg_constant = false;
21560 int num_memory = 0;
21561 struct {
21562 rtx op;
21563 enum machine_mode mode;
21564 } args[4];
21565
21566 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21567
21568 switch (m_type)
21569 {
21570 case MULTI_ARG_3_SF:
21571 case MULTI_ARG_3_DF:
21572 case MULTI_ARG_3_DI:
21573 case MULTI_ARG_3_SI:
21574 case MULTI_ARG_3_SI_DI:
21575 case MULTI_ARG_3_HI:
21576 case MULTI_ARG_3_HI_SI:
21577 case MULTI_ARG_3_QI:
21578 case MULTI_ARG_3_PERMPS:
21579 case MULTI_ARG_3_PERMPD:
21580 nargs = 3;
21581 break;
21582
21583 case MULTI_ARG_2_SF:
21584 case MULTI_ARG_2_DF:
21585 case MULTI_ARG_2_DI:
21586 case MULTI_ARG_2_SI:
21587 case MULTI_ARG_2_HI:
21588 case MULTI_ARG_2_QI:
21589 nargs = 2;
21590 break;
21591
21592 case MULTI_ARG_2_DI_IMM:
21593 case MULTI_ARG_2_SI_IMM:
21594 case MULTI_ARG_2_HI_IMM:
21595 case MULTI_ARG_2_QI_IMM:
21596 nargs = 2;
21597 last_arg_constant = true;
21598 break;
21599
21600 case MULTI_ARG_1_SF:
21601 case MULTI_ARG_1_DF:
21602 case MULTI_ARG_1_DI:
21603 case MULTI_ARG_1_SI:
21604 case MULTI_ARG_1_HI:
21605 case MULTI_ARG_1_QI:
21606 case MULTI_ARG_1_SI_DI:
21607 case MULTI_ARG_1_HI_DI:
21608 case MULTI_ARG_1_HI_SI:
21609 case MULTI_ARG_1_QI_DI:
21610 case MULTI_ARG_1_QI_SI:
21611 case MULTI_ARG_1_QI_HI:
21612 case MULTI_ARG_1_PH2PS:
21613 case MULTI_ARG_1_PS2PH:
21614 nargs = 1;
21615 break;
21616
21617 case MULTI_ARG_2_SF_CMP:
21618 case MULTI_ARG_2_DF_CMP:
21619 case MULTI_ARG_2_DI_CMP:
21620 case MULTI_ARG_2_SI_CMP:
21621 case MULTI_ARG_2_HI_CMP:
21622 case MULTI_ARG_2_QI_CMP:
21623 nargs = 2;
21624 comparison_p = true;
21625 break;
21626
21627 case MULTI_ARG_2_SF_TF:
21628 case MULTI_ARG_2_DF_TF:
21629 case MULTI_ARG_2_DI_TF:
21630 case MULTI_ARG_2_SI_TF:
21631 case MULTI_ARG_2_HI_TF:
21632 case MULTI_ARG_2_QI_TF:
21633 nargs = 2;
21634 tf_p = true;
21635 break;
21636
21637 case MULTI_ARG_UNKNOWN:
21638 default:
21639 gcc_unreachable ();
21640 }
21641
21642 if (optimize || !target
21643 || GET_MODE (target) != tmode
21644 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21645 target = gen_reg_rtx (tmode);
21646
21647 gcc_assert (nargs <= 4);
21648
21649 for (i = 0; i < nargs; i++)
21650 {
21651 tree arg = CALL_EXPR_ARG (exp, i);
21652 rtx op = expand_normal (arg);
21653 int adjust = (comparison_p) ? 1 : 0;
21654 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
21655
21656 if (last_arg_constant && i == nargs-1)
21657 {
21658 if (GET_CODE (op) != CONST_INT)
21659 {
21660 error ("last argument must be an immediate");
21661 return gen_reg_rtx (tmode);
21662 }
21663 }
21664 else
21665 {
21666 if (VECTOR_MODE_P (mode))
21667 op = safe_vector_operand (op, mode);
21668
21669 /* If we aren't optimizing, only allow one memory operand to be
21670 generated. */
21671 if (memory_operand (op, mode))
21672 num_memory++;
21673
21674 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
21675
21676 if (optimize
21677 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
21678 || num_memory > 1)
21679 op = force_reg (mode, op);
21680 }
21681
21682 args[i].op = op;
21683 args[i].mode = mode;
21684 }
21685
21686 switch (nargs)
21687 {
21688 case 1:
21689 pat = GEN_FCN (icode) (target, args[0].op);
21690 break;
21691
21692 case 2:
21693 if (tf_p)
21694 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
21695 GEN_INT ((int)sub_code));
21696 else if (! comparison_p)
21697 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21698 else
21699 {
21700 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
21701 args[0].op,
21702 args[1].op);
21703
21704 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
21705 }
21706 break;
21707
21708 case 3:
21709 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
21710 break;
21711
21712 default:
21713 gcc_unreachable ();
21714 }
21715
21716 if (! pat)
21717 return 0;
21718
21719 emit_insn (pat);
21720 return target;
21721 }
21722
21723 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
21724 insns with vec_merge. */
21725
21726 static rtx
21727 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
21728 rtx target)
21729 {
21730 rtx pat;
21731 tree arg0 = CALL_EXPR_ARG (exp, 0);
21732 rtx op1, op0 = expand_normal (arg0);
21733 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21734 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21735
21736 if (optimize || !target
21737 || GET_MODE (target) != tmode
21738 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21739 target = gen_reg_rtx (tmode);
21740
21741 if (VECTOR_MODE_P (mode0))
21742 op0 = safe_vector_operand (op0, mode0);
21743
21744 if ((optimize && !register_operand (op0, mode0))
21745 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21746 op0 = copy_to_mode_reg (mode0, op0);
21747
21748 op1 = op0;
21749 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
21750 op1 = copy_to_mode_reg (mode0, op1);
21751
21752 pat = GEN_FCN (icode) (target, op0, op1);
21753 if (! pat)
21754 return 0;
21755 emit_insn (pat);
21756 return target;
21757 }
21758
21759 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
21760
21761 static rtx
21762 ix86_expand_sse_compare (const struct builtin_description *d,
21763 tree exp, rtx target, bool swap)
21764 {
21765 rtx pat;
21766 tree arg0 = CALL_EXPR_ARG (exp, 0);
21767 tree arg1 = CALL_EXPR_ARG (exp, 1);
21768 rtx op0 = expand_normal (arg0);
21769 rtx op1 = expand_normal (arg1);
21770 rtx op2;
21771 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
21772 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
21773 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
21774 enum rtx_code comparison = d->comparison;
21775
21776 if (VECTOR_MODE_P (mode0))
21777 op0 = safe_vector_operand (op0, mode0);
21778 if (VECTOR_MODE_P (mode1))
21779 op1 = safe_vector_operand (op1, mode1);
21780
21781 /* Swap operands if we have a comparison that isn't available in
21782 hardware. */
21783 if (swap)
21784 {
21785 rtx tmp = gen_reg_rtx (mode1);
21786 emit_move_insn (tmp, op1);
21787 op1 = op0;
21788 op0 = tmp;
21789 }
21790
21791 if (optimize || !target
21792 || GET_MODE (target) != tmode
21793 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
21794 target = gen_reg_rtx (tmode);
21795
21796 if ((optimize && !register_operand (op0, mode0))
21797 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
21798 op0 = copy_to_mode_reg (mode0, op0);
21799 if ((optimize && !register_operand (op1, mode1))
21800 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
21801 op1 = copy_to_mode_reg (mode1, op1);
21802
21803 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
21804 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
21805 if (! pat)
21806 return 0;
21807 emit_insn (pat);
21808 return target;
21809 }
21810
21811 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
21812
21813 static rtx
21814 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
21815 rtx target)
21816 {
21817 rtx pat;
21818 tree arg0 = CALL_EXPR_ARG (exp, 0);
21819 tree arg1 = CALL_EXPR_ARG (exp, 1);
21820 rtx op0 = expand_normal (arg0);
21821 rtx op1 = expand_normal (arg1);
21822 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
21823 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
21824 enum rtx_code comparison = d->comparison;
21825
21826 if (VECTOR_MODE_P (mode0))
21827 op0 = safe_vector_operand (op0, mode0);
21828 if (VECTOR_MODE_P (mode1))
21829 op1 = safe_vector_operand (op1, mode1);
21830
21831 /* Swap operands if we have a comparison that isn't available in
21832 hardware. */
21833 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
21834 {
21835 rtx tmp = op1;
21836 op1 = op0;
21837 op0 = tmp;
21838 }
21839
21840 target = gen_reg_rtx (SImode);
21841 emit_move_insn (target, const0_rtx);
21842 target = gen_rtx_SUBREG (QImode, target, 0);
21843
21844 if ((optimize && !register_operand (op0, mode0))
21845 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
21846 op0 = copy_to_mode_reg (mode0, op0);
21847 if ((optimize && !register_operand (op1, mode1))
21848 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
21849 op1 = copy_to_mode_reg (mode1, op1);
21850
21851 pat = GEN_FCN (d->icode) (op0, op1);
21852 if (! pat)
21853 return 0;
21854 emit_insn (pat);
21855 emit_insn (gen_rtx_SET (VOIDmode,
21856 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21857 gen_rtx_fmt_ee (comparison, QImode,
21858 SET_DEST (pat),
21859 const0_rtx)));
21860
21861 return SUBREG_REG (target);
21862 }
21863
21864 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
21865
21866 static rtx
21867 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
21868 rtx target)
21869 {
21870 rtx pat;
21871 tree arg0 = CALL_EXPR_ARG (exp, 0);
21872 tree arg1 = CALL_EXPR_ARG (exp, 1);
21873 rtx op0 = expand_normal (arg0);
21874 rtx op1 = expand_normal (arg1);
21875 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
21876 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
21877 enum rtx_code comparison = d->comparison;
21878
21879 if (VECTOR_MODE_P (mode0))
21880 op0 = safe_vector_operand (op0, mode0);
21881 if (VECTOR_MODE_P (mode1))
21882 op1 = safe_vector_operand (op1, mode1);
21883
21884 target = gen_reg_rtx (SImode);
21885 emit_move_insn (target, const0_rtx);
21886 target = gen_rtx_SUBREG (QImode, target, 0);
21887
21888 if ((optimize && !register_operand (op0, mode0))
21889 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
21890 op0 = copy_to_mode_reg (mode0, op0);
21891 if ((optimize && !register_operand (op1, mode1))
21892 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
21893 op1 = copy_to_mode_reg (mode1, op1);
21894
21895 pat = GEN_FCN (d->icode) (op0, op1);
21896 if (! pat)
21897 return 0;
21898 emit_insn (pat);
21899 emit_insn (gen_rtx_SET (VOIDmode,
21900 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21901 gen_rtx_fmt_ee (comparison, QImode,
21902 SET_DEST (pat),
21903 const0_rtx)));
21904
21905 return SUBREG_REG (target);
21906 }
21907
21908 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
21909
21910 static rtx
21911 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
21912 tree exp, rtx target)
21913 {
21914 rtx pat;
21915 tree arg0 = CALL_EXPR_ARG (exp, 0);
21916 tree arg1 = CALL_EXPR_ARG (exp, 1);
21917 tree arg2 = CALL_EXPR_ARG (exp, 2);
21918 tree arg3 = CALL_EXPR_ARG (exp, 3);
21919 tree arg4 = CALL_EXPR_ARG (exp, 4);
21920 rtx scratch0, scratch1;
21921 rtx op0 = expand_normal (arg0);
21922 rtx op1 = expand_normal (arg1);
21923 rtx op2 = expand_normal (arg2);
21924 rtx op3 = expand_normal (arg3);
21925 rtx op4 = expand_normal (arg4);
21926 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
21927
21928 tmode0 = insn_data[d->icode].operand[0].mode;
21929 tmode1 = insn_data[d->icode].operand[1].mode;
21930 modev2 = insn_data[d->icode].operand[2].mode;
21931 modei3 = insn_data[d->icode].operand[3].mode;
21932 modev4 = insn_data[d->icode].operand[4].mode;
21933 modei5 = insn_data[d->icode].operand[5].mode;
21934 modeimm = insn_data[d->icode].operand[6].mode;
21935
21936 if (VECTOR_MODE_P (modev2))
21937 op0 = safe_vector_operand (op0, modev2);
21938 if (VECTOR_MODE_P (modev4))
21939 op2 = safe_vector_operand (op2, modev4);
21940
21941 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
21942 op0 = copy_to_mode_reg (modev2, op0);
21943 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
21944 op1 = copy_to_mode_reg (modei3, op1);
21945 if ((optimize && !register_operand (op2, modev4))
21946 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
21947 op2 = copy_to_mode_reg (modev4, op2);
21948 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
21949 op3 = copy_to_mode_reg (modei5, op3);
21950
21951 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
21952 {
21953 error ("the fifth argument must be a 8-bit immediate");
21954 return const0_rtx;
21955 }
21956
21957 if (d->code == IX86_BUILTIN_PCMPESTRI128)
21958 {
21959 if (optimize || !target
21960 || GET_MODE (target) != tmode0
21961 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
21962 target = gen_reg_rtx (tmode0);
21963
21964 scratch1 = gen_reg_rtx (tmode1);
21965
21966 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
21967 }
21968 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
21969 {
21970 if (optimize || !target
21971 || GET_MODE (target) != tmode1
21972 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
21973 target = gen_reg_rtx (tmode1);
21974
21975 scratch0 = gen_reg_rtx (tmode0);
21976
21977 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
21978 }
21979 else
21980 {
21981 gcc_assert (d->flag);
21982
21983 scratch0 = gen_reg_rtx (tmode0);
21984 scratch1 = gen_reg_rtx (tmode1);
21985
21986 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
21987 }
21988
21989 if (! pat)
21990 return 0;
21991
21992 emit_insn (pat);
21993
21994 if (d->flag)
21995 {
21996 target = gen_reg_rtx (SImode);
21997 emit_move_insn (target, const0_rtx);
21998 target = gen_rtx_SUBREG (QImode, target, 0);
21999
22000 emit_insn
22001 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
22002 gen_rtx_fmt_ee (EQ, QImode,
22003 gen_rtx_REG ((enum machine_mode) d->flag,
22004 FLAGS_REG),
22005 const0_rtx)));
22006 return SUBREG_REG (target);
22007 }
22008 else
22009 return target;
22010 }
22011
22012
22013 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
22014
22015 static rtx
22016 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
22017 tree exp, rtx target)
22018 {
22019 rtx pat;
22020 tree arg0 = CALL_EXPR_ARG (exp, 0);
22021 tree arg1 = CALL_EXPR_ARG (exp, 1);
22022 tree arg2 = CALL_EXPR_ARG (exp, 2);
22023 rtx scratch0, scratch1;
22024 rtx op0 = expand_normal (arg0);
22025 rtx op1 = expand_normal (arg1);
22026 rtx op2 = expand_normal (arg2);
22027 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
22028
22029 tmode0 = insn_data[d->icode].operand[0].mode;
22030 tmode1 = insn_data[d->icode].operand[1].mode;
22031 modev2 = insn_data[d->icode].operand[2].mode;
22032 modev3 = insn_data[d->icode].operand[3].mode;
22033 modeimm = insn_data[d->icode].operand[4].mode;
22034
22035 if (VECTOR_MODE_P (modev2))
22036 op0 = safe_vector_operand (op0, modev2);
22037 if (VECTOR_MODE_P (modev3))
22038 op1 = safe_vector_operand (op1, modev3);
22039
22040 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
22041 op0 = copy_to_mode_reg (modev2, op0);
22042 if ((optimize && !register_operand (op1, modev3))
22043 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
22044 op1 = copy_to_mode_reg (modev3, op1);
22045
22046 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
22047 {
22048 error ("the third argument must be a 8-bit immediate");
22049 return const0_rtx;
22050 }
22051
22052 if (d->code == IX86_BUILTIN_PCMPISTRI128)
22053 {
22054 if (optimize || !target
22055 || GET_MODE (target) != tmode0
22056 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
22057 target = gen_reg_rtx (tmode0);
22058
22059 scratch1 = gen_reg_rtx (tmode1);
22060
22061 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
22062 }
22063 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
22064 {
22065 if (optimize || !target
22066 || GET_MODE (target) != tmode1
22067 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
22068 target = gen_reg_rtx (tmode1);
22069
22070 scratch0 = gen_reg_rtx (tmode0);
22071
22072 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
22073 }
22074 else
22075 {
22076 gcc_assert (d->flag);
22077
22078 scratch0 = gen_reg_rtx (tmode0);
22079 scratch1 = gen_reg_rtx (tmode1);
22080
22081 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
22082 }
22083
22084 if (! pat)
22085 return 0;
22086
22087 emit_insn (pat);
22088
22089 if (d->flag)
22090 {
22091 target = gen_reg_rtx (SImode);
22092 emit_move_insn (target, const0_rtx);
22093 target = gen_rtx_SUBREG (QImode, target, 0);
22094
22095 emit_insn
22096 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
22097 gen_rtx_fmt_ee (EQ, QImode,
22098 gen_rtx_REG ((enum machine_mode) d->flag,
22099 FLAGS_REG),
22100 const0_rtx)));
22101 return SUBREG_REG (target);
22102 }
22103 else
22104 return target;
22105 }
22106
22107 /* Subroutine of ix86_expand_builtin to take care of insns with
22108 variable number of operands. */
22109
22110 static rtx
22111 ix86_expand_args_builtin (const struct builtin_description *d,
22112 tree exp, rtx target)
22113 {
22114 rtx pat, real_target;
22115 unsigned int i, nargs;
22116 unsigned int nargs_constant = 0;
22117 int num_memory = 0;
22118 struct
22119 {
22120 rtx op;
22121 enum machine_mode mode;
22122 } args[4];
22123 bool last_arg_count = false;
22124 enum insn_code icode = d->icode;
22125 const struct insn_data *insn_p = &insn_data[icode];
22126 enum machine_mode tmode = insn_p->operand[0].mode;
22127 enum machine_mode rmode = VOIDmode;
22128 bool swap = false;
22129 enum rtx_code comparison = d->comparison;
22130
22131 switch ((enum ix86_builtin_type) d->flag)
22132 {
22133 case INT_FTYPE_V2DI_V2DI_PTEST:
22134 return ix86_expand_sse_ptest (d, exp, target);
22135 case FLOAT128_FTYPE_FLOAT128:
22136 case FLOAT_FTYPE_FLOAT:
22137 case INT64_FTYPE_V4SF:
22138 case INT64_FTYPE_V2DF:
22139 case INT_FTYPE_V16QI:
22140 case INT_FTYPE_V8QI:
22141 case INT_FTYPE_V4SF:
22142 case INT_FTYPE_V2DF:
22143 case V16QI_FTYPE_V16QI:
22144 case V8HI_FTYPE_V8HI:
22145 case V8HI_FTYPE_V16QI:
22146 case V8QI_FTYPE_V8QI:
22147 case V4SI_FTYPE_V4SI:
22148 case V4SI_FTYPE_V16QI:
22149 case V4SI_FTYPE_V4SF:
22150 case V4SI_FTYPE_V8HI:
22151 case V4SI_FTYPE_V2DF:
22152 case V4HI_FTYPE_V4HI:
22153 case V4SF_FTYPE_V4SF:
22154 case V4SF_FTYPE_V4SI:
22155 case V4SF_FTYPE_V2DF:
22156 case V2DI_FTYPE_V2DI:
22157 case V2DI_FTYPE_V16QI:
22158 case V2DI_FTYPE_V8HI:
22159 case V2DI_FTYPE_V4SI:
22160 case V2DF_FTYPE_V2DF:
22161 case V2DF_FTYPE_V4SI:
22162 case V2DF_FTYPE_V4SF:
22163 case V2DF_FTYPE_V2SI:
22164 case V2SI_FTYPE_V2SI:
22165 case V2SI_FTYPE_V4SF:
22166 case V2SI_FTYPE_V2SF:
22167 case V2SI_FTYPE_V2DF:
22168 case V2SF_FTYPE_V2SF:
22169 case V2SF_FTYPE_V2SI:
22170 nargs = 1;
22171 break;
22172 case V4SF_FTYPE_V4SF_VEC_MERGE:
22173 case V2DF_FTYPE_V2DF_VEC_MERGE:
22174 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
22175 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
22176 case V16QI_FTYPE_V16QI_V16QI:
22177 case V16QI_FTYPE_V8HI_V8HI:
22178 case V8QI_FTYPE_V8QI_V8QI:
22179 case V8QI_FTYPE_V4HI_V4HI:
22180 case V8HI_FTYPE_V8HI_V8HI:
22181 case V8HI_FTYPE_V16QI_V16QI:
22182 case V8HI_FTYPE_V4SI_V4SI:
22183 case V4SI_FTYPE_V4SI_V4SI:
22184 case V4SI_FTYPE_V8HI_V8HI:
22185 case V4SI_FTYPE_V4SF_V4SF:
22186 case V4SI_FTYPE_V2DF_V2DF:
22187 case V4HI_FTYPE_V4HI_V4HI:
22188 case V4HI_FTYPE_V8QI_V8QI:
22189 case V4HI_FTYPE_V2SI_V2SI:
22190 case V4SF_FTYPE_V4SF_V4SF:
22191 case V4SF_FTYPE_V4SF_V2SI:
22192 case V4SF_FTYPE_V4SF_V2DF:
22193 case V4SF_FTYPE_V4SF_DI:
22194 case V4SF_FTYPE_V4SF_SI:
22195 case V2DI_FTYPE_V2DI_V2DI:
22196 case V2DI_FTYPE_V16QI_V16QI:
22197 case V2DI_FTYPE_V4SI_V4SI:
22198 case V2DI_FTYPE_V2DI_V16QI:
22199 case V2DI_FTYPE_V2DF_V2DF:
22200 case V2SI_FTYPE_V2SI_V2SI:
22201 case V2SI_FTYPE_V4HI_V4HI:
22202 case V2SI_FTYPE_V2SF_V2SF:
22203 case V2DF_FTYPE_V2DF_V2DF:
22204 case V2DF_FTYPE_V2DF_V4SF:
22205 case V2DF_FTYPE_V2DF_DI:
22206 case V2DF_FTYPE_V2DF_SI:
22207 case V2SF_FTYPE_V2SF_V2SF:
22208 case V1DI_FTYPE_V1DI_V1DI:
22209 case V1DI_FTYPE_V8QI_V8QI:
22210 case V1DI_FTYPE_V2SI_V2SI:
22211 if (comparison == UNKNOWN)
22212 return ix86_expand_binop_builtin (icode, exp, target);
22213 nargs = 2;
22214 break;
22215 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22216 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22217 gcc_assert (comparison != UNKNOWN);
22218 nargs = 2;
22219 swap = true;
22220 break;
22221 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22222 case V8HI_FTYPE_V8HI_SI_COUNT:
22223 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22224 case V4SI_FTYPE_V4SI_SI_COUNT:
22225 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22226 case V4HI_FTYPE_V4HI_SI_COUNT:
22227 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22228 case V2DI_FTYPE_V2DI_SI_COUNT:
22229 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22230 case V2SI_FTYPE_V2SI_SI_COUNT:
22231 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22232 case V1DI_FTYPE_V1DI_SI_COUNT:
22233 nargs = 2;
22234 last_arg_count = true;
22235 break;
22236 case UINT64_FTYPE_UINT64_UINT64:
22237 case UINT_FTYPE_UINT_UINT:
22238 case UINT_FTYPE_UINT_USHORT:
22239 case UINT_FTYPE_UINT_UCHAR:
22240 nargs = 2;
22241 break;
22242 case V2DI2TI_FTYPE_V2DI_INT:
22243 nargs = 2;
22244 rmode = V2DImode;
22245 nargs_constant = 1;
22246 break;
22247 case V8HI_FTYPE_V8HI_INT:
22248 case V4SI_FTYPE_V4SI_INT:
22249 case V4HI_FTYPE_V4HI_INT:
22250 case V4SF_FTYPE_V4SF_INT:
22251 case V2DI_FTYPE_V2DI_INT:
22252 case V2DF_FTYPE_V2DF_INT:
22253 nargs = 2;
22254 nargs_constant = 1;
22255 break;
22256 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22257 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22258 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22259 nargs = 3;
22260 break;
22261 case V16QI_FTYPE_V16QI_V16QI_INT:
22262 case V8HI_FTYPE_V8HI_V8HI_INT:
22263 case V4SI_FTYPE_V4SI_V4SI_INT:
22264 case V4SF_FTYPE_V4SF_V4SF_INT:
22265 case V2DI_FTYPE_V2DI_V2DI_INT:
22266 case V2DF_FTYPE_V2DF_V2DF_INT:
22267 nargs = 3;
22268 nargs_constant = 1;
22269 break;
22270 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22271 nargs = 3;
22272 rmode = V2DImode;
22273 nargs_constant = 1;
22274 break;
22275 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22276 nargs = 3;
22277 rmode = DImode;
22278 nargs_constant = 1;
22279 break;
22280 case V2DI_FTYPE_V2DI_UINT_UINT:
22281 nargs = 3;
22282 nargs_constant = 2;
22283 break;
22284 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22285 nargs = 4;
22286 nargs_constant = 2;
22287 break;
22288 default:
22289 gcc_unreachable ();
22290 }
22291
22292 gcc_assert (nargs <= ARRAY_SIZE (args));
22293
22294 if (comparison != UNKNOWN)
22295 {
22296 gcc_assert (nargs == 2);
22297 return ix86_expand_sse_compare (d, exp, target, swap);
22298 }
22299
22300 if (rmode == VOIDmode || rmode == tmode)
22301 {
22302 if (optimize
22303 || target == 0
22304 || GET_MODE (target) != tmode
22305 || ! (*insn_p->operand[0].predicate) (target, tmode))
22306 target = gen_reg_rtx (tmode);
22307 real_target = target;
22308 }
22309 else
22310 {
22311 target = gen_reg_rtx (rmode);
22312 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
22313 }
22314
22315 for (i = 0; i < nargs; i++)
22316 {
22317 tree arg = CALL_EXPR_ARG (exp, i);
22318 rtx op = expand_normal (arg);
22319 enum machine_mode mode = insn_p->operand[i + 1].mode;
22320 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
22321
22322 if (last_arg_count && (i + 1) == nargs)
22323 {
22324 /* SIMD shift insns take either an 8-bit immediate or
22325 register as count. But builtin functions take int as
22326 count. If count doesn't match, we put it in register. */
22327 if (!match)
22328 {
22329 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
22330 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
22331 op = copy_to_reg (op);
22332 }
22333 }
22334 else if ((nargs - i) <= nargs_constant)
22335 {
22336 if (!match)
22337 switch (icode)
22338 {
22339 case CODE_FOR_sse4_1_roundpd:
22340 case CODE_FOR_sse4_1_roundps:
22341 case CODE_FOR_sse4_1_roundsd:
22342 case CODE_FOR_sse4_1_roundss:
22343 case CODE_FOR_sse4_1_blendps:
22344 error ("the last argument must be a 4-bit immediate");
22345 return const0_rtx;
22346
22347 case CODE_FOR_sse4_1_blendpd:
22348 error ("the last argument must be a 2-bit immediate");
22349 return const0_rtx;
22350
22351 default:
22352 switch (nargs_constant)
22353 {
22354 case 2:
22355 if ((nargs - i) == nargs_constant)
22356 {
22357 error ("the next to last argument must be an 8-bit immediate");
22358 break;
22359 }
22360 case 1:
22361 error ("the last argument must be an 8-bit immediate");
22362 break;
22363 default:
22364 gcc_unreachable ();
22365 }
22366 return const0_rtx;
22367 }
22368 }
22369 else
22370 {
22371 if (VECTOR_MODE_P (mode))
22372 op = safe_vector_operand (op, mode);
22373
22374 /* If we aren't optimizing, only allow one memory operand to
22375 be generated. */
22376 if (memory_operand (op, mode))
22377 num_memory++;
22378
22379 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
22380 {
22381 if (optimize || !match || num_memory > 1)
22382 op = copy_to_mode_reg (mode, op);
22383 }
22384 else
22385 {
22386 op = copy_to_reg (op);
22387 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
22388 }
22389 }
22390
22391 args[i].op = op;
22392 args[i].mode = mode;
22393 }
22394
22395 switch (nargs)
22396 {
22397 case 1:
22398 pat = GEN_FCN (icode) (real_target, args[0].op);
22399 break;
22400 case 2:
22401 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
22402 break;
22403 case 3:
22404 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
22405 args[2].op);
22406 break;
22407 case 4:
22408 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
22409 args[2].op, args[3].op);
22410 break;
22411 default:
22412 gcc_unreachable ();
22413 }
22414
22415 if (! pat)
22416 return 0;
22417
22418 emit_insn (pat);
22419 return target;
22420 }
22421
22422 /* Subroutine of ix86_expand_builtin to take care of special insns
22423 with variable number of operands. */
22424
22425 static rtx
22426 ix86_expand_special_args_builtin (const struct builtin_description *d,
22427 tree exp, rtx target)
22428 {
22429 tree arg;
22430 rtx pat, op;
22431 unsigned int i, nargs, arg_adjust, memory;
22432 struct
22433 {
22434 rtx op;
22435 enum machine_mode mode;
22436 } args[2];
22437 enum insn_code icode = d->icode;
22438 bool last_arg_constant = false;
22439 const struct insn_data *insn_p = &insn_data[icode];
22440 enum machine_mode tmode = insn_p->operand[0].mode;
22441 enum { load, store } klass;
22442
22443 switch ((enum ix86_special_builtin_type) d->flag)
22444 {
22445 case VOID_FTYPE_VOID:
22446 emit_insn (GEN_FCN (icode) (target));
22447 return 0;
22448 case V2DI_FTYPE_PV2DI:
22449 case V16QI_FTYPE_PCCHAR:
22450 case V4SF_FTYPE_PCFLOAT:
22451 case V2DF_FTYPE_PCDOUBLE:
22452 nargs = 1;
22453 klass = load;
22454 memory = 0;
22455 break;
22456 case VOID_FTYPE_PV2SF_V4SF:
22457 case VOID_FTYPE_PV2DI_V2DI:
22458 case VOID_FTYPE_PCHAR_V16QI:
22459 case VOID_FTYPE_PFLOAT_V4SF:
22460 case VOID_FTYPE_PDOUBLE_V2DF:
22461 case VOID_FTYPE_PDI_DI:
22462 case VOID_FTYPE_PINT_INT:
22463 nargs = 1;
22464 klass = store;
22465 /* Reserve memory operand for target. */
22466 memory = ARRAY_SIZE (args);
22467 break;
22468 case V4SF_FTYPE_V4SF_PCV2SF:
22469 case V2DF_FTYPE_V2DF_PCDOUBLE:
22470 nargs = 2;
22471 klass = load;
22472 memory = 1;
22473 break;
22474 default:
22475 gcc_unreachable ();
22476 }
22477
22478 gcc_assert (nargs <= ARRAY_SIZE (args));
22479
22480 if (klass == store)
22481 {
22482 arg = CALL_EXPR_ARG (exp, 0);
22483 op = expand_normal (arg);
22484 gcc_assert (target == 0);
22485 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
22486 arg_adjust = 1;
22487 }
22488 else
22489 {
22490 arg_adjust = 0;
22491 if (optimize
22492 || target == 0
22493 || GET_MODE (target) != tmode
22494 || ! (*insn_p->operand[0].predicate) (target, tmode))
22495 target = gen_reg_rtx (tmode);
22496 }
22497
22498 for (i = 0; i < nargs; i++)
22499 {
22500 enum machine_mode mode = insn_p->operand[i + 1].mode;
22501 bool match;
22502
22503 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
22504 op = expand_normal (arg);
22505 match = (*insn_p->operand[i + 1].predicate) (op, mode);
22506
22507 if (last_arg_constant && (i + 1) == nargs)
22508 {
22509 if (!match)
22510 switch (icode)
22511 {
22512 default:
22513 error ("the last argument must be an 8-bit immediate");
22514 return const0_rtx;
22515 }
22516 }
22517 else
22518 {
22519 if (i == memory)
22520 {
22521 /* This must be the memory operand. */
22522 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
22523 gcc_assert (GET_MODE (op) == mode
22524 || GET_MODE (op) == VOIDmode);
22525 }
22526 else
22527 {
22528 /* This must be register. */
22529 if (VECTOR_MODE_P (mode))
22530 op = safe_vector_operand (op, mode);
22531
22532 gcc_assert (GET_MODE (op) == mode
22533 || GET_MODE (op) == VOIDmode);
22534 op = copy_to_mode_reg (mode, op);
22535 }
22536 }
22537
22538 args[i].op = op;
22539 args[i].mode = mode;
22540 }
22541
22542 switch (nargs)
22543 {
22544 case 1:
22545 pat = GEN_FCN (icode) (target, args[0].op);
22546 break;
22547 case 2:
22548 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
22549 break;
22550 default:
22551 gcc_unreachable ();
22552 }
22553
22554 if (! pat)
22555 return 0;
22556 emit_insn (pat);
22557 return klass == store ? 0 : target;
22558 }
22559
22560 /* Return the integer constant in ARG. Constrain it to be in the range
22561 of the subparts of VEC_TYPE; issue an error if not. */
22562
22563 static int
22564 get_element_number (tree vec_type, tree arg)
22565 {
22566 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
22567
22568 if (!host_integerp (arg, 1)
22569 || (elt = tree_low_cst (arg, 1), elt > max))
22570 {
22571 error ("selector must be an integer constant in the range 0..%wi", max);
22572 return 0;
22573 }
22574
22575 return elt;
22576 }
22577
22578 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22579 ix86_expand_vector_init. We DO have language-level syntax for this, in
22580 the form of (type){ init-list }. Except that since we can't place emms
22581 instructions from inside the compiler, we can't allow the use of MMX
22582 registers unless the user explicitly asks for it. So we do *not* define
22583 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
22584 we have builtins invoked by mmintrin.h that gives us license to emit
22585 these sorts of instructions. */
22586
22587 static rtx
22588 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
22589 {
22590 enum machine_mode tmode = TYPE_MODE (type);
22591 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
22592 int i, n_elt = GET_MODE_NUNITS (tmode);
22593 rtvec v = rtvec_alloc (n_elt);
22594
22595 gcc_assert (VECTOR_MODE_P (tmode));
22596 gcc_assert (call_expr_nargs (exp) == n_elt);
22597
22598 for (i = 0; i < n_elt; ++i)
22599 {
22600 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
22601 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
22602 }
22603
22604 if (!target || !register_operand (target, tmode))
22605 target = gen_reg_rtx (tmode);
22606
22607 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
22608 return target;
22609 }
22610
22611 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22612 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
22613 had a language-level syntax for referencing vector elements. */
22614
22615 static rtx
22616 ix86_expand_vec_ext_builtin (tree exp, rtx target)
22617 {
22618 enum machine_mode tmode, mode0;
22619 tree arg0, arg1;
22620 int elt;
22621 rtx op0;
22622
22623 arg0 = CALL_EXPR_ARG (exp, 0);
22624 arg1 = CALL_EXPR_ARG (exp, 1);
22625
22626 op0 = expand_normal (arg0);
22627 elt = get_element_number (TREE_TYPE (arg0), arg1);
22628
22629 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
22630 mode0 = TYPE_MODE (TREE_TYPE (arg0));
22631 gcc_assert (VECTOR_MODE_P (mode0));
22632
22633 op0 = force_reg (mode0, op0);
22634
22635 if (optimize || !target || !register_operand (target, tmode))
22636 target = gen_reg_rtx (tmode);
22637
22638 ix86_expand_vector_extract (true, target, op0, elt);
22639
22640 return target;
22641 }
22642
22643 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22644 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
22645 a language-level syntax for referencing vector elements. */
22646
22647 static rtx
22648 ix86_expand_vec_set_builtin (tree exp)
22649 {
22650 enum machine_mode tmode, mode1;
22651 tree arg0, arg1, arg2;
22652 int elt;
22653 rtx op0, op1, target;
22654
22655 arg0 = CALL_EXPR_ARG (exp, 0);
22656 arg1 = CALL_EXPR_ARG (exp, 1);
22657 arg2 = CALL_EXPR_ARG (exp, 2);
22658
22659 tmode = TYPE_MODE (TREE_TYPE (arg0));
22660 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
22661 gcc_assert (VECTOR_MODE_P (tmode));
22662
22663 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
22664 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
22665 elt = get_element_number (TREE_TYPE (arg0), arg2);
22666
22667 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
22668 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
22669
22670 op0 = force_reg (tmode, op0);
22671 op1 = force_reg (mode1, op1);
22672
22673 /* OP0 is the source of these builtin functions and shouldn't be
22674 modified. Create a copy, use it and return it as target. */
22675 target = gen_reg_rtx (tmode);
22676 emit_move_insn (target, op0);
22677 ix86_expand_vector_set (true, target, op1, elt);
22678
22679 return target;
22680 }
22681
22682 /* Expand an expression EXP that calls a built-in function,
22683 with result going to TARGET if that's convenient
22684 (and in mode MODE if that's convenient).
22685 SUBTARGET may be used as the target for computing one of EXP's operands.
22686 IGNORE is nonzero if the value is to be ignored. */
22687
22688 static rtx
22689 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
22690 enum machine_mode mode ATTRIBUTE_UNUSED,
22691 int ignore ATTRIBUTE_UNUSED)
22692 {
22693 const struct builtin_description *d;
22694 size_t i;
22695 enum insn_code icode;
22696 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
22697 tree arg0, arg1, arg2;
22698 rtx op0, op1, op2, pat;
22699 enum machine_mode mode0, mode1, mode2;
22700 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
22701
22702 /* Determine whether the builtin function is available under the current ISA.
22703 Originally the builtin was not created if it wasn't applicable to the
22704 current ISA based on the command line switches. With function specific
22705 options, we need to check in the context of the function making the call
22706 whether it is supported. */
22707 if (ix86_builtins_isa[fcode]
22708 && !(ix86_builtins_isa[fcode] & ix86_isa_flags))
22709 {
22710 char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL,
22711 NULL, NULL, false);
22712
22713 if (!opts)
22714 error ("%qE needs unknown isa option", fndecl);
22715 else
22716 {
22717 gcc_assert (opts != NULL);
22718 error ("%qE needs isa option %s", fndecl, opts);
22719 free (opts);
22720 }
22721 return const0_rtx;
22722 }
22723
22724 switch (fcode)
22725 {
22726 case IX86_BUILTIN_MASKMOVQ:
22727 case IX86_BUILTIN_MASKMOVDQU:
22728 icode = (fcode == IX86_BUILTIN_MASKMOVQ
22729 ? CODE_FOR_mmx_maskmovq
22730 : CODE_FOR_sse2_maskmovdqu);
22731 /* Note the arg order is different from the operand order. */
22732 arg1 = CALL_EXPR_ARG (exp, 0);
22733 arg2 = CALL_EXPR_ARG (exp, 1);
22734 arg0 = CALL_EXPR_ARG (exp, 2);
22735 op0 = expand_normal (arg0);
22736 op1 = expand_normal (arg1);
22737 op2 = expand_normal (arg2);
22738 mode0 = insn_data[icode].operand[0].mode;
22739 mode1 = insn_data[icode].operand[1].mode;
22740 mode2 = insn_data[icode].operand[2].mode;
22741
22742 op0 = force_reg (Pmode, op0);
22743 op0 = gen_rtx_MEM (mode1, op0);
22744
22745 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
22746 op0 = copy_to_mode_reg (mode0, op0);
22747 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
22748 op1 = copy_to_mode_reg (mode1, op1);
22749 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
22750 op2 = copy_to_mode_reg (mode2, op2);
22751 pat = GEN_FCN (icode) (op0, op1, op2);
22752 if (! pat)
22753 return 0;
22754 emit_insn (pat);
22755 return 0;
22756
22757 case IX86_BUILTIN_LDMXCSR:
22758 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
22759 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
22760 emit_move_insn (target, op0);
22761 emit_insn (gen_sse_ldmxcsr (target));
22762 return 0;
22763
22764 case IX86_BUILTIN_STMXCSR:
22765 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
22766 emit_insn (gen_sse_stmxcsr (target));
22767 return copy_to_mode_reg (SImode, target);
22768
22769 case IX86_BUILTIN_CLFLUSH:
22770 arg0 = CALL_EXPR_ARG (exp, 0);
22771 op0 = expand_normal (arg0);
22772 icode = CODE_FOR_sse2_clflush;
22773 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
22774 op0 = copy_to_mode_reg (Pmode, op0);
22775
22776 emit_insn (gen_sse2_clflush (op0));
22777 return 0;
22778
22779 case IX86_BUILTIN_MONITOR:
22780 arg0 = CALL_EXPR_ARG (exp, 0);
22781 arg1 = CALL_EXPR_ARG (exp, 1);
22782 arg2 = CALL_EXPR_ARG (exp, 2);
22783 op0 = expand_normal (arg0);
22784 op1 = expand_normal (arg1);
22785 op2 = expand_normal (arg2);
22786 if (!REG_P (op0))
22787 op0 = copy_to_mode_reg (Pmode, op0);
22788 if (!REG_P (op1))
22789 op1 = copy_to_mode_reg (SImode, op1);
22790 if (!REG_P (op2))
22791 op2 = copy_to_mode_reg (SImode, op2);
22792 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
22793 return 0;
22794
22795 case IX86_BUILTIN_MWAIT:
22796 arg0 = CALL_EXPR_ARG (exp, 0);
22797 arg1 = CALL_EXPR_ARG (exp, 1);
22798 op0 = expand_normal (arg0);
22799 op1 = expand_normal (arg1);
22800 if (!REG_P (op0))
22801 op0 = copy_to_mode_reg (SImode, op0);
22802 if (!REG_P (op1))
22803 op1 = copy_to_mode_reg (SImode, op1);
22804 emit_insn (gen_sse3_mwait (op0, op1));
22805 return 0;
22806
22807 case IX86_BUILTIN_VEC_INIT_V2SI:
22808 case IX86_BUILTIN_VEC_INIT_V4HI:
22809 case IX86_BUILTIN_VEC_INIT_V8QI:
22810 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
22811
22812 case IX86_BUILTIN_VEC_EXT_V2DF:
22813 case IX86_BUILTIN_VEC_EXT_V2DI:
22814 case IX86_BUILTIN_VEC_EXT_V4SF:
22815 case IX86_BUILTIN_VEC_EXT_V4SI:
22816 case IX86_BUILTIN_VEC_EXT_V8HI:
22817 case IX86_BUILTIN_VEC_EXT_V2SI:
22818 case IX86_BUILTIN_VEC_EXT_V4HI:
22819 case IX86_BUILTIN_VEC_EXT_V16QI:
22820 return ix86_expand_vec_ext_builtin (exp, target);
22821
22822 case IX86_BUILTIN_VEC_SET_V2DI:
22823 case IX86_BUILTIN_VEC_SET_V4SF:
22824 case IX86_BUILTIN_VEC_SET_V4SI:
22825 case IX86_BUILTIN_VEC_SET_V8HI:
22826 case IX86_BUILTIN_VEC_SET_V4HI:
22827 case IX86_BUILTIN_VEC_SET_V16QI:
22828 return ix86_expand_vec_set_builtin (exp);
22829
22830 case IX86_BUILTIN_INFQ:
22831 {
22832 REAL_VALUE_TYPE inf;
22833 rtx tmp;
22834
22835 real_inf (&inf);
22836 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
22837
22838 tmp = validize_mem (force_const_mem (mode, tmp));
22839
22840 if (target == 0)
22841 target = gen_reg_rtx (mode);
22842
22843 emit_move_insn (target, tmp);
22844 return target;
22845 }
22846
22847 default:
22848 break;
22849 }
22850
22851 for (i = 0, d = bdesc_special_args;
22852 i < ARRAY_SIZE (bdesc_special_args);
22853 i++, d++)
22854 if (d->code == fcode)
22855 return ix86_expand_special_args_builtin (d, exp, target);
22856
22857 for (i = 0, d = bdesc_args;
22858 i < ARRAY_SIZE (bdesc_args);
22859 i++, d++)
22860 if (d->code == fcode)
22861 switch (fcode)
22862 {
22863 case IX86_BUILTIN_FABSQ:
22864 case IX86_BUILTIN_COPYSIGNQ:
22865 if (!TARGET_SSE2)
22866 /* Emit a normal call if SSE2 isn't available. */
22867 return expand_call (exp, target, ignore);
22868 default:
22869 return ix86_expand_args_builtin (d, exp, target);
22870 }
22871
22872 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22873 if (d->code == fcode)
22874 return ix86_expand_sse_comi (d, exp, target);
22875
22876 for (i = 0, d = bdesc_pcmpestr;
22877 i < ARRAY_SIZE (bdesc_pcmpestr);
22878 i++, d++)
22879 if (d->code == fcode)
22880 return ix86_expand_sse_pcmpestr (d, exp, target);
22881
22882 for (i = 0, d = bdesc_pcmpistr;
22883 i < ARRAY_SIZE (bdesc_pcmpistr);
22884 i++, d++)
22885 if (d->code == fcode)
22886 return ix86_expand_sse_pcmpistr (d, exp, target);
22887
22888 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22889 if (d->code == fcode)
22890 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
22891 (enum multi_arg_type)d->flag,
22892 d->comparison);
22893
22894 gcc_unreachable ();
22895 }
22896
22897 /* Returns a function decl for a vectorized version of the builtin function
22898 with builtin function code FN and the result vector type TYPE, or NULL_TREE
22899 if it is not available. */
22900
22901 static tree
22902 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
22903 tree type_in)
22904 {
22905 enum machine_mode in_mode, out_mode;
22906 int in_n, out_n;
22907
22908 if (TREE_CODE (type_out) != VECTOR_TYPE
22909 || TREE_CODE (type_in) != VECTOR_TYPE)
22910 return NULL_TREE;
22911
22912 out_mode = TYPE_MODE (TREE_TYPE (type_out));
22913 out_n = TYPE_VECTOR_SUBPARTS (type_out);
22914 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22915 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22916
22917 switch (fn)
22918 {
22919 case BUILT_IN_SQRT:
22920 if (out_mode == DFmode && out_n == 2
22921 && in_mode == DFmode && in_n == 2)
22922 return ix86_builtins[IX86_BUILTIN_SQRTPD];
22923 break;
22924
22925 case BUILT_IN_SQRTF:
22926 if (out_mode == SFmode && out_n == 4
22927 && in_mode == SFmode && in_n == 4)
22928 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
22929 break;
22930
22931 case BUILT_IN_LRINT:
22932 if (out_mode == SImode && out_n == 4
22933 && in_mode == DFmode && in_n == 2)
22934 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
22935 break;
22936
22937 case BUILT_IN_LRINTF:
22938 if (out_mode == SImode && out_n == 4
22939 && in_mode == SFmode && in_n == 4)
22940 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
22941 break;
22942
22943 default:
22944 ;
22945 }
22946
22947 /* Dispatch to a handler for a vectorization library. */
22948 if (ix86_veclib_handler)
22949 return (*ix86_veclib_handler)(fn, type_out, type_in);
22950
22951 return NULL_TREE;
22952 }
22953
22954 /* Handler for an SVML-style interface to
22955 a library with vectorized intrinsics. */
22956
22957 static tree
22958 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
22959 {
22960 char name[20];
22961 tree fntype, new_fndecl, args;
22962 unsigned arity;
22963 const char *bname;
22964 enum machine_mode el_mode, in_mode;
22965 int n, in_n;
22966
22967 /* The SVML is suitable for unsafe math only. */
22968 if (!flag_unsafe_math_optimizations)
22969 return NULL_TREE;
22970
22971 el_mode = TYPE_MODE (TREE_TYPE (type_out));
22972 n = TYPE_VECTOR_SUBPARTS (type_out);
22973 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22974 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22975 if (el_mode != in_mode
22976 || n != in_n)
22977 return NULL_TREE;
22978
22979 switch (fn)
22980 {
22981 case BUILT_IN_EXP:
22982 case BUILT_IN_LOG:
22983 case BUILT_IN_LOG10:
22984 case BUILT_IN_POW:
22985 case BUILT_IN_TANH:
22986 case BUILT_IN_TAN:
22987 case BUILT_IN_ATAN:
22988 case BUILT_IN_ATAN2:
22989 case BUILT_IN_ATANH:
22990 case BUILT_IN_CBRT:
22991 case BUILT_IN_SINH:
22992 case BUILT_IN_SIN:
22993 case BUILT_IN_ASINH:
22994 case BUILT_IN_ASIN:
22995 case BUILT_IN_COSH:
22996 case BUILT_IN_COS:
22997 case BUILT_IN_ACOSH:
22998 case BUILT_IN_ACOS:
22999 if (el_mode != DFmode || n != 2)
23000 return NULL_TREE;
23001 break;
23002
23003 case BUILT_IN_EXPF:
23004 case BUILT_IN_LOGF:
23005 case BUILT_IN_LOG10F:
23006 case BUILT_IN_POWF:
23007 case BUILT_IN_TANHF:
23008 case BUILT_IN_TANF:
23009 case BUILT_IN_ATANF:
23010 case BUILT_IN_ATAN2F:
23011 case BUILT_IN_ATANHF:
23012 case BUILT_IN_CBRTF:
23013 case BUILT_IN_SINHF:
23014 case BUILT_IN_SINF:
23015 case BUILT_IN_ASINHF:
23016 case BUILT_IN_ASINF:
23017 case BUILT_IN_COSHF:
23018 case BUILT_IN_COSF:
23019 case BUILT_IN_ACOSHF:
23020 case BUILT_IN_ACOSF:
23021 if (el_mode != SFmode || n != 4)
23022 return NULL_TREE;
23023 break;
23024
23025 default:
23026 return NULL_TREE;
23027 }
23028
23029 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
23030
23031 if (fn == BUILT_IN_LOGF)
23032 strcpy (name, "vmlsLn4");
23033 else if (fn == BUILT_IN_LOG)
23034 strcpy (name, "vmldLn2");
23035 else if (n == 4)
23036 {
23037 sprintf (name, "vmls%s", bname+10);
23038 name[strlen (name)-1] = '4';
23039 }
23040 else
23041 sprintf (name, "vmld%s2", bname+10);
23042
23043 /* Convert to uppercase. */
23044 name[4] &= ~0x20;
23045
23046 arity = 0;
23047 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
23048 args = TREE_CHAIN (args))
23049 arity++;
23050
23051 if (arity == 1)
23052 fntype = build_function_type_list (type_out, type_in, NULL);
23053 else
23054 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
23055
23056 /* Build a function declaration for the vectorized function. */
23057 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
23058 TREE_PUBLIC (new_fndecl) = 1;
23059 DECL_EXTERNAL (new_fndecl) = 1;
23060 DECL_IS_NOVOPS (new_fndecl) = 1;
23061 TREE_READONLY (new_fndecl) = 1;
23062
23063 return new_fndecl;
23064 }
23065
23066 /* Handler for an ACML-style interface to
23067 a library with vectorized intrinsics. */
23068
23069 static tree
23070 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
23071 {
23072 char name[20] = "__vr.._";
23073 tree fntype, new_fndecl, args;
23074 unsigned arity;
23075 const char *bname;
23076 enum machine_mode el_mode, in_mode;
23077 int n, in_n;
23078
23079 /* The ACML is 64bits only and suitable for unsafe math only as
23080 it does not correctly support parts of IEEE with the required
23081 precision such as denormals. */
23082 if (!TARGET_64BIT
23083 || !flag_unsafe_math_optimizations)
23084 return NULL_TREE;
23085
23086 el_mode = TYPE_MODE (TREE_TYPE (type_out));
23087 n = TYPE_VECTOR_SUBPARTS (type_out);
23088 in_mode = TYPE_MODE (TREE_TYPE (type_in));
23089 in_n = TYPE_VECTOR_SUBPARTS (type_in);
23090 if (el_mode != in_mode
23091 || n != in_n)
23092 return NULL_TREE;
23093
23094 switch (fn)
23095 {
23096 case BUILT_IN_SIN:
23097 case BUILT_IN_COS:
23098 case BUILT_IN_EXP:
23099 case BUILT_IN_LOG:
23100 case BUILT_IN_LOG2:
23101 case BUILT_IN_LOG10:
23102 name[4] = 'd';
23103 name[5] = '2';
23104 if (el_mode != DFmode
23105 || n != 2)
23106 return NULL_TREE;
23107 break;
23108
23109 case BUILT_IN_SINF:
23110 case BUILT_IN_COSF:
23111 case BUILT_IN_EXPF:
23112 case BUILT_IN_POWF:
23113 case BUILT_IN_LOGF:
23114 case BUILT_IN_LOG2F:
23115 case BUILT_IN_LOG10F:
23116 name[4] = 's';
23117 name[5] = '4';
23118 if (el_mode != SFmode
23119 || n != 4)
23120 return NULL_TREE;
23121 break;
23122
23123 default:
23124 return NULL_TREE;
23125 }
23126
23127 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
23128 sprintf (name + 7, "%s", bname+10);
23129
23130 arity = 0;
23131 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
23132 args = TREE_CHAIN (args))
23133 arity++;
23134
23135 if (arity == 1)
23136 fntype = build_function_type_list (type_out, type_in, NULL);
23137 else
23138 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
23139
23140 /* Build a function declaration for the vectorized function. */
23141 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
23142 TREE_PUBLIC (new_fndecl) = 1;
23143 DECL_EXTERNAL (new_fndecl) = 1;
23144 DECL_IS_NOVOPS (new_fndecl) = 1;
23145 TREE_READONLY (new_fndecl) = 1;
23146
23147 return new_fndecl;
23148 }
23149
23150
23151 /* Returns a decl of a function that implements conversion of an integer vector
23152 into a floating-point vector, or vice-versa. TYPE is the type of the integer
23153 side of the conversion.
23154 Return NULL_TREE if it is not available. */
23155
23156 static tree
23157 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
23158 {
23159 if (TREE_CODE (type) != VECTOR_TYPE)
23160 return NULL_TREE;
23161
23162 switch (code)
23163 {
23164 case FLOAT_EXPR:
23165 switch (TYPE_MODE (type))
23166 {
23167 case V4SImode:
23168 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
23169 default:
23170 return NULL_TREE;
23171 }
23172
23173 case FIX_TRUNC_EXPR:
23174 switch (TYPE_MODE (type))
23175 {
23176 case V4SImode:
23177 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
23178 default:
23179 return NULL_TREE;
23180 }
23181 default:
23182 return NULL_TREE;
23183
23184 }
23185 }
23186
23187 /* Returns a code for a target-specific builtin that implements
23188 reciprocal of the function, or NULL_TREE if not available. */
23189
23190 static tree
23191 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
23192 bool sqrt ATTRIBUTE_UNUSED)
23193 {
23194 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
23195 && flag_finite_math_only && !flag_trapping_math
23196 && flag_unsafe_math_optimizations))
23197 return NULL_TREE;
23198
23199 if (md_fn)
23200 /* Machine dependent builtins. */
23201 switch (fn)
23202 {
23203 /* Vectorized version of sqrt to rsqrt conversion. */
23204 case IX86_BUILTIN_SQRTPS_NR:
23205 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
23206
23207 default:
23208 return NULL_TREE;
23209 }
23210 else
23211 /* Normal builtins. */
23212 switch (fn)
23213 {
23214 /* Sqrt to rsqrt conversion. */
23215 case BUILT_IN_SQRTF:
23216 return ix86_builtins[IX86_BUILTIN_RSQRTF];
23217
23218 default:
23219 return NULL_TREE;
23220 }
23221 }
23222
23223 /* Store OPERAND to the memory after reload is completed. This means
23224 that we can't easily use assign_stack_local. */
23225 rtx
23226 ix86_force_to_memory (enum machine_mode mode, rtx operand)
23227 {
23228 rtx result;
23229
23230 gcc_assert (reload_completed);
23231 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
23232 {
23233 result = gen_rtx_MEM (mode,
23234 gen_rtx_PLUS (Pmode,
23235 stack_pointer_rtx,
23236 GEN_INT (-RED_ZONE_SIZE)));
23237 emit_move_insn (result, operand);
23238 }
23239 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
23240 {
23241 switch (mode)
23242 {
23243 case HImode:
23244 case SImode:
23245 operand = gen_lowpart (DImode, operand);
23246 /* FALLTHRU */
23247 case DImode:
23248 emit_insn (
23249 gen_rtx_SET (VOIDmode,
23250 gen_rtx_MEM (DImode,
23251 gen_rtx_PRE_DEC (DImode,
23252 stack_pointer_rtx)),
23253 operand));
23254 break;
23255 default:
23256 gcc_unreachable ();
23257 }
23258 result = gen_rtx_MEM (mode, stack_pointer_rtx);
23259 }
23260 else
23261 {
23262 switch (mode)
23263 {
23264 case DImode:
23265 {
23266 rtx operands[2];
23267 split_di (&operand, 1, operands, operands + 1);
23268 emit_insn (
23269 gen_rtx_SET (VOIDmode,
23270 gen_rtx_MEM (SImode,
23271 gen_rtx_PRE_DEC (Pmode,
23272 stack_pointer_rtx)),
23273 operands[1]));
23274 emit_insn (
23275 gen_rtx_SET (VOIDmode,
23276 gen_rtx_MEM (SImode,
23277 gen_rtx_PRE_DEC (Pmode,
23278 stack_pointer_rtx)),
23279 operands[0]));
23280 }
23281 break;
23282 case HImode:
23283 /* Store HImodes as SImodes. */
23284 operand = gen_lowpart (SImode, operand);
23285 /* FALLTHRU */
23286 case SImode:
23287 emit_insn (
23288 gen_rtx_SET (VOIDmode,
23289 gen_rtx_MEM (GET_MODE (operand),
23290 gen_rtx_PRE_DEC (SImode,
23291 stack_pointer_rtx)),
23292 operand));
23293 break;
23294 default:
23295 gcc_unreachable ();
23296 }
23297 result = gen_rtx_MEM (mode, stack_pointer_rtx);
23298 }
23299 return result;
23300 }
23301
23302 /* Free operand from the memory. */
23303 void
23304 ix86_free_from_memory (enum machine_mode mode)
23305 {
23306 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
23307 {
23308 int size;
23309
23310 if (mode == DImode || TARGET_64BIT)
23311 size = 8;
23312 else
23313 size = 4;
23314 /* Use LEA to deallocate stack space. In peephole2 it will be converted
23315 to pop or add instruction if registers are available. */
23316 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
23317 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
23318 GEN_INT (size))));
23319 }
23320 }
23321
23322 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
23323 QImode must go into class Q_REGS.
23324 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
23325 movdf to do mem-to-mem moves through integer regs. */
23326 enum reg_class
23327 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
23328 {
23329 enum machine_mode mode = GET_MODE (x);
23330
23331 /* We're only allowed to return a subclass of CLASS. Many of the
23332 following checks fail for NO_REGS, so eliminate that early. */
23333 if (regclass == NO_REGS)
23334 return NO_REGS;
23335
23336 /* All classes can load zeros. */
23337 if (x == CONST0_RTX (mode))
23338 return regclass;
23339
23340 /* Force constants into memory if we are loading a (nonzero) constant into
23341 an MMX or SSE register. This is because there are no MMX/SSE instructions
23342 to load from a constant. */
23343 if (CONSTANT_P (x)
23344 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
23345 return NO_REGS;
23346
23347 /* Prefer SSE regs only, if we can use them for math. */
23348 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
23349 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
23350
23351 /* Floating-point constants need more complex checks. */
23352 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
23353 {
23354 /* General regs can load everything. */
23355 if (reg_class_subset_p (regclass, GENERAL_REGS))
23356 return regclass;
23357
23358 /* Floats can load 0 and 1 plus some others. Note that we eliminated
23359 zero above. We only want to wind up preferring 80387 registers if
23360 we plan on doing computation with them. */
23361 if (TARGET_80387
23362 && standard_80387_constant_p (x))
23363 {
23364 /* Limit class to non-sse. */
23365 if (regclass == FLOAT_SSE_REGS)
23366 return FLOAT_REGS;
23367 if (regclass == FP_TOP_SSE_REGS)
23368 return FP_TOP_REG;
23369 if (regclass == FP_SECOND_SSE_REGS)
23370 return FP_SECOND_REG;
23371 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
23372 return regclass;
23373 }
23374
23375 return NO_REGS;
23376 }
23377
23378 /* Generally when we see PLUS here, it's the function invariant
23379 (plus soft-fp const_int). Which can only be computed into general
23380 regs. */
23381 if (GET_CODE (x) == PLUS)
23382 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
23383
23384 /* QImode constants are easy to load, but non-constant QImode data
23385 must go into Q_REGS. */
23386 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
23387 {
23388 if (reg_class_subset_p (regclass, Q_REGS))
23389 return regclass;
23390 if (reg_class_subset_p (Q_REGS, regclass))
23391 return Q_REGS;
23392 return NO_REGS;
23393 }
23394
23395 return regclass;
23396 }
23397
23398 /* Discourage putting floating-point values in SSE registers unless
23399 SSE math is being used, and likewise for the 387 registers. */
23400 enum reg_class
23401 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
23402 {
23403 enum machine_mode mode = GET_MODE (x);
23404
23405 /* Restrict the output reload class to the register bank that we are doing
23406 math on. If we would like not to return a subset of CLASS, reject this
23407 alternative: if reload cannot do this, it will still use its choice. */
23408 mode = GET_MODE (x);
23409 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23410 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
23411
23412 if (X87_FLOAT_MODE_P (mode))
23413 {
23414 if (regclass == FP_TOP_SSE_REGS)
23415 return FP_TOP_REG;
23416 else if (regclass == FP_SECOND_SSE_REGS)
23417 return FP_SECOND_REG;
23418 else
23419 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
23420 }
23421
23422 return regclass;
23423 }
23424
23425 static enum reg_class
23426 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
23427 enum machine_mode mode,
23428 secondary_reload_info *sri ATTRIBUTE_UNUSED)
23429 {
23430 /* QImode spills from non-QI registers require
23431 intermediate register on 32bit targets. */
23432 if (!in_p && mode == QImode && !TARGET_64BIT
23433 && (rclass == GENERAL_REGS
23434 || rclass == LEGACY_REGS
23435 || rclass == INDEX_REGS))
23436 {
23437 int regno;
23438
23439 if (REG_P (x))
23440 regno = REGNO (x);
23441 else
23442 regno = -1;
23443
23444 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
23445 regno = true_regnum (x);
23446
23447 /* Return Q_REGS if the operand is in memory. */
23448 if (regno == -1)
23449 return Q_REGS;
23450 }
23451
23452 return NO_REGS;
23453 }
23454
23455 /* If we are copying between general and FP registers, we need a memory
23456 location. The same is true for SSE and MMX registers.
23457
23458 To optimize register_move_cost performance, allow inline variant.
23459
23460 The macro can't work reliably when one of the CLASSES is class containing
23461 registers from multiple units (SSE, MMX, integer). We avoid this by never
23462 combining those units in single alternative in the machine description.
23463 Ensure that this constraint holds to avoid unexpected surprises.
23464
23465 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
23466 enforce these sanity checks. */
23467
23468 static inline int
23469 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
23470 enum machine_mode mode, int strict)
23471 {
23472 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
23473 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
23474 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
23475 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
23476 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
23477 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
23478 {
23479 gcc_assert (!strict);
23480 return true;
23481 }
23482
23483 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
23484 return true;
23485
23486 /* ??? This is a lie. We do have moves between mmx/general, and for
23487 mmx/sse2. But by saying we need secondary memory we discourage the
23488 register allocator from using the mmx registers unless needed. */
23489 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
23490 return true;
23491
23492 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
23493 {
23494 /* SSE1 doesn't have any direct moves from other classes. */
23495 if (!TARGET_SSE2)
23496 return true;
23497
23498 /* If the target says that inter-unit moves are more expensive
23499 than moving through memory, then don't generate them. */
23500 if (!TARGET_INTER_UNIT_MOVES)
23501 return true;
23502
23503 /* Between SSE and general, we have moves no larger than word size. */
23504 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23505 return true;
23506 }
23507
23508 return false;
23509 }
23510
23511 int
23512 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
23513 enum machine_mode mode, int strict)
23514 {
23515 return inline_secondary_memory_needed (class1, class2, mode, strict);
23516 }
23517
23518 /* Return true if the registers in CLASS cannot represent the change from
23519 modes FROM to TO. */
23520
23521 bool
23522 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
23523 enum reg_class regclass)
23524 {
23525 if (from == to)
23526 return false;
23527
23528 /* x87 registers can't do subreg at all, as all values are reformatted
23529 to extended precision. */
23530 if (MAYBE_FLOAT_CLASS_P (regclass))
23531 return true;
23532
23533 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
23534 {
23535 /* Vector registers do not support QI or HImode loads. If we don't
23536 disallow a change to these modes, reload will assume it's ok to
23537 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
23538 the vec_dupv4hi pattern. */
23539 if (GET_MODE_SIZE (from) < 4)
23540 return true;
23541
23542 /* Vector registers do not support subreg with nonzero offsets, which
23543 are otherwise valid for integer registers. Since we can't see
23544 whether we have a nonzero offset from here, prohibit all
23545 nonparadoxical subregs changing size. */
23546 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
23547 return true;
23548 }
23549
23550 return false;
23551 }
23552
23553 /* Return the cost of moving data of mode M between a
23554 register and memory. A value of 2 is the default; this cost is
23555 relative to those in `REGISTER_MOVE_COST'.
23556
23557 This function is used extensively by register_move_cost that is used to
23558 build tables at startup. Make it inline in this case.
23559 When IN is 2, return maximum of in and out move cost.
23560
23561 If moving between registers and memory is more expensive than
23562 between two registers, you should define this macro to express the
23563 relative cost.
23564
23565 Model also increased moving costs of QImode registers in non
23566 Q_REGS classes.
23567 */
23568 static inline int
23569 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
23570 int in)
23571 {
23572 int cost;
23573 if (FLOAT_CLASS_P (regclass))
23574 {
23575 int index;
23576 switch (mode)
23577 {
23578 case SFmode:
23579 index = 0;
23580 break;
23581 case DFmode:
23582 index = 1;
23583 break;
23584 case XFmode:
23585 index = 2;
23586 break;
23587 default:
23588 return 100;
23589 }
23590 if (in == 2)
23591 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
23592 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
23593 }
23594 if (SSE_CLASS_P (regclass))
23595 {
23596 int index;
23597 switch (GET_MODE_SIZE (mode))
23598 {
23599 case 4:
23600 index = 0;
23601 break;
23602 case 8:
23603 index = 1;
23604 break;
23605 case 16:
23606 index = 2;
23607 break;
23608 default:
23609 return 100;
23610 }
23611 if (in == 2)
23612 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
23613 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
23614 }
23615 if (MMX_CLASS_P (regclass))
23616 {
23617 int index;
23618 switch (GET_MODE_SIZE (mode))
23619 {
23620 case 4:
23621 index = 0;
23622 break;
23623 case 8:
23624 index = 1;
23625 break;
23626 default:
23627 return 100;
23628 }
23629 if (in)
23630 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
23631 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
23632 }
23633 switch (GET_MODE_SIZE (mode))
23634 {
23635 case 1:
23636 if (Q_CLASS_P (regclass) || TARGET_64BIT)
23637 {
23638 if (!in)
23639 return ix86_cost->int_store[0];
23640 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
23641 cost = ix86_cost->movzbl_load;
23642 else
23643 cost = ix86_cost->int_load[0];
23644 if (in == 2)
23645 return MAX (cost, ix86_cost->int_store[0]);
23646 return cost;
23647 }
23648 else
23649 {
23650 if (in == 2)
23651 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
23652 if (in)
23653 return ix86_cost->movzbl_load;
23654 else
23655 return ix86_cost->int_store[0] + 4;
23656 }
23657 break;
23658 case 2:
23659 if (in == 2)
23660 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
23661 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
23662 default:
23663 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
23664 if (mode == TFmode)
23665 mode = XFmode;
23666 if (in == 2)
23667 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
23668 else if (in)
23669 cost = ix86_cost->int_load[2];
23670 else
23671 cost = ix86_cost->int_store[2];
23672 return (cost * (((int) GET_MODE_SIZE (mode)
23673 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
23674 }
23675 }
23676
23677 int
23678 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
23679 {
23680 return inline_memory_move_cost (mode, regclass, in);
23681 }
23682
23683
23684 /* Return the cost of moving data from a register in class CLASS1 to
23685 one in class CLASS2.
23686
23687 It is not required that the cost always equal 2 when FROM is the same as TO;
23688 on some machines it is expensive to move between registers if they are not
23689 general registers. */
23690
23691 int
23692 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
23693 enum reg_class class2)
23694 {
23695 /* In case we require secondary memory, compute cost of the store followed
23696 by load. In order to avoid bad register allocation choices, we need
23697 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
23698
23699 if (inline_secondary_memory_needed (class1, class2, mode, 0))
23700 {
23701 int cost = 1;
23702
23703 cost += inline_memory_move_cost (mode, class1, 2);
23704 cost += inline_memory_move_cost (mode, class2, 2);
23705
23706 /* In case of copying from general_purpose_register we may emit multiple
23707 stores followed by single load causing memory size mismatch stall.
23708 Count this as arbitrarily high cost of 20. */
23709 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
23710 cost += 20;
23711
23712 /* In the case of FP/MMX moves, the registers actually overlap, and we
23713 have to switch modes in order to treat them differently. */
23714 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
23715 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
23716 cost += 20;
23717
23718 return cost;
23719 }
23720
23721 /* Moves between SSE/MMX and integer unit are expensive. */
23722 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
23723 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
23724
23725 /* ??? By keeping returned value relatively high, we limit the number
23726 of moves between integer and MMX/SSE registers for all targets.
23727 Additionally, high value prevents problem with x86_modes_tieable_p(),
23728 where integer modes in MMX/SSE registers are not tieable
23729 because of missing QImode and HImode moves to, from or between
23730 MMX/SSE registers. */
23731 return MAX (8, ix86_cost->mmxsse_to_integer);
23732
23733 if (MAYBE_FLOAT_CLASS_P (class1))
23734 return ix86_cost->fp_move;
23735 if (MAYBE_SSE_CLASS_P (class1))
23736 return ix86_cost->sse_move;
23737 if (MAYBE_MMX_CLASS_P (class1))
23738 return ix86_cost->mmx_move;
23739 return 2;
23740 }
23741
23742 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
23743
23744 bool
23745 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
23746 {
23747 /* Flags and only flags can only hold CCmode values. */
23748 if (CC_REGNO_P (regno))
23749 return GET_MODE_CLASS (mode) == MODE_CC;
23750 if (GET_MODE_CLASS (mode) == MODE_CC
23751 || GET_MODE_CLASS (mode) == MODE_RANDOM
23752 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
23753 return 0;
23754 if (FP_REGNO_P (regno))
23755 return VALID_FP_MODE_P (mode);
23756 if (SSE_REGNO_P (regno))
23757 {
23758 /* We implement the move patterns for all vector modes into and
23759 out of SSE registers, even when no operation instructions
23760 are available. */
23761 return (VALID_SSE_REG_MODE (mode)
23762 || VALID_SSE2_REG_MODE (mode)
23763 || VALID_MMX_REG_MODE (mode)
23764 || VALID_MMX_REG_MODE_3DNOW (mode));
23765 }
23766 if (MMX_REGNO_P (regno))
23767 {
23768 /* We implement the move patterns for 3DNOW modes even in MMX mode,
23769 so if the register is available at all, then we can move data of
23770 the given mode into or out of it. */
23771 return (VALID_MMX_REG_MODE (mode)
23772 || VALID_MMX_REG_MODE_3DNOW (mode));
23773 }
23774
23775 if (mode == QImode)
23776 {
23777 /* Take care for QImode values - they can be in non-QI regs,
23778 but then they do cause partial register stalls. */
23779 if (regno < 4 || TARGET_64BIT)
23780 return 1;
23781 if (!TARGET_PARTIAL_REG_STALL)
23782 return 1;
23783 return reload_in_progress || reload_completed;
23784 }
23785 /* We handle both integer and floats in the general purpose registers. */
23786 else if (VALID_INT_MODE_P (mode))
23787 return 1;
23788 else if (VALID_FP_MODE_P (mode))
23789 return 1;
23790 else if (VALID_DFP_MODE_P (mode))
23791 return 1;
23792 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
23793 on to use that value in smaller contexts, this can easily force a
23794 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
23795 supporting DImode, allow it. */
23796 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
23797 return 1;
23798
23799 return 0;
23800 }
23801
23802 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
23803 tieable integer mode. */
23804
23805 static bool
23806 ix86_tieable_integer_mode_p (enum machine_mode mode)
23807 {
23808 switch (mode)
23809 {
23810 case HImode:
23811 case SImode:
23812 return true;
23813
23814 case QImode:
23815 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
23816
23817 case DImode:
23818 return TARGET_64BIT;
23819
23820 default:
23821 return false;
23822 }
23823 }
23824
23825 /* Return true if MODE1 is accessible in a register that can hold MODE2
23826 without copying. That is, all register classes that can hold MODE2
23827 can also hold MODE1. */
23828
23829 bool
23830 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
23831 {
23832 if (mode1 == mode2)
23833 return true;
23834
23835 if (ix86_tieable_integer_mode_p (mode1)
23836 && ix86_tieable_integer_mode_p (mode2))
23837 return true;
23838
23839 /* MODE2 being XFmode implies fp stack or general regs, which means we
23840 can tie any smaller floating point modes to it. Note that we do not
23841 tie this with TFmode. */
23842 if (mode2 == XFmode)
23843 return mode1 == SFmode || mode1 == DFmode;
23844
23845 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
23846 that we can tie it with SFmode. */
23847 if (mode2 == DFmode)
23848 return mode1 == SFmode;
23849
23850 /* If MODE2 is only appropriate for an SSE register, then tie with
23851 any other mode acceptable to SSE registers. */
23852 if (GET_MODE_SIZE (mode2) == 16
23853 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
23854 return (GET_MODE_SIZE (mode1) == 16
23855 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
23856
23857 /* If MODE2 is appropriate for an MMX register, then tie
23858 with any other mode acceptable to MMX registers. */
23859 if (GET_MODE_SIZE (mode2) == 8
23860 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
23861 return (GET_MODE_SIZE (mode1) == 8
23862 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
23863
23864 return false;
23865 }
23866
23867 /* Compute a (partial) cost for rtx X. Return true if the complete
23868 cost has been computed, and false if subexpressions should be
23869 scanned. In either case, *TOTAL contains the cost result. */
23870
23871 static bool
23872 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
23873 {
23874 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
23875 enum machine_mode mode = GET_MODE (x);
23876
23877 switch (code)
23878 {
23879 case CONST_INT:
23880 case CONST:
23881 case LABEL_REF:
23882 case SYMBOL_REF:
23883 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
23884 *total = 3;
23885 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
23886 *total = 2;
23887 else if (flag_pic && SYMBOLIC_CONST (x)
23888 && (!TARGET_64BIT
23889 || (!GET_CODE (x) != LABEL_REF
23890 && (GET_CODE (x) != SYMBOL_REF
23891 || !SYMBOL_REF_LOCAL_P (x)))))
23892 *total = 1;
23893 else
23894 *total = 0;
23895 return true;
23896
23897 case CONST_DOUBLE:
23898 if (mode == VOIDmode)
23899 *total = 0;
23900 else
23901 switch (standard_80387_constant_p (x))
23902 {
23903 case 1: /* 0.0 */
23904 *total = 1;
23905 break;
23906 default: /* Other constants */
23907 *total = 2;
23908 break;
23909 case 0:
23910 case -1:
23911 /* Start with (MEM (SYMBOL_REF)), since that's where
23912 it'll probably end up. Add a penalty for size. */
23913 *total = (COSTS_N_INSNS (1)
23914 + (flag_pic != 0 && !TARGET_64BIT)
23915 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
23916 break;
23917 }
23918 return true;
23919
23920 case ZERO_EXTEND:
23921 /* The zero extensions is often completely free on x86_64, so make
23922 it as cheap as possible. */
23923 if (TARGET_64BIT && mode == DImode
23924 && GET_MODE (XEXP (x, 0)) == SImode)
23925 *total = 1;
23926 else if (TARGET_ZERO_EXTEND_WITH_AND)
23927 *total = ix86_cost->add;
23928 else
23929 *total = ix86_cost->movzx;
23930 return false;
23931
23932 case SIGN_EXTEND:
23933 *total = ix86_cost->movsx;
23934 return false;
23935
23936 case ASHIFT:
23937 if (CONST_INT_P (XEXP (x, 1))
23938 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
23939 {
23940 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
23941 if (value == 1)
23942 {
23943 *total = ix86_cost->add;
23944 return false;
23945 }
23946 if ((value == 2 || value == 3)
23947 && ix86_cost->lea <= ix86_cost->shift_const)
23948 {
23949 *total = ix86_cost->lea;
23950 return false;
23951 }
23952 }
23953 /* FALLTHRU */
23954
23955 case ROTATE:
23956 case ASHIFTRT:
23957 case LSHIFTRT:
23958 case ROTATERT:
23959 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
23960 {
23961 if (CONST_INT_P (XEXP (x, 1)))
23962 {
23963 if (INTVAL (XEXP (x, 1)) > 32)
23964 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
23965 else
23966 *total = ix86_cost->shift_const * 2;
23967 }
23968 else
23969 {
23970 if (GET_CODE (XEXP (x, 1)) == AND)
23971 *total = ix86_cost->shift_var * 2;
23972 else
23973 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
23974 }
23975 }
23976 else
23977 {
23978 if (CONST_INT_P (XEXP (x, 1)))
23979 *total = ix86_cost->shift_const;
23980 else
23981 *total = ix86_cost->shift_var;
23982 }
23983 return false;
23984
23985 case MULT:
23986 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23987 {
23988 /* ??? SSE scalar cost should be used here. */
23989 *total = ix86_cost->fmul;
23990 return false;
23991 }
23992 else if (X87_FLOAT_MODE_P (mode))
23993 {
23994 *total = ix86_cost->fmul;
23995 return false;
23996 }
23997 else if (FLOAT_MODE_P (mode))
23998 {
23999 /* ??? SSE vector cost should be used here. */
24000 *total = ix86_cost->fmul;
24001 return false;
24002 }
24003 else
24004 {
24005 rtx op0 = XEXP (x, 0);
24006 rtx op1 = XEXP (x, 1);
24007 int nbits;
24008 if (CONST_INT_P (XEXP (x, 1)))
24009 {
24010 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
24011 for (nbits = 0; value != 0; value &= value - 1)
24012 nbits++;
24013 }
24014 else
24015 /* This is arbitrary. */
24016 nbits = 7;
24017
24018 /* Compute costs correctly for widening multiplication. */
24019 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
24020 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
24021 == GET_MODE_SIZE (mode))
24022 {
24023 int is_mulwiden = 0;
24024 enum machine_mode inner_mode = GET_MODE (op0);
24025
24026 if (GET_CODE (op0) == GET_CODE (op1))
24027 is_mulwiden = 1, op1 = XEXP (op1, 0);
24028 else if (CONST_INT_P (op1))
24029 {
24030 if (GET_CODE (op0) == SIGN_EXTEND)
24031 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
24032 == INTVAL (op1);
24033 else
24034 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
24035 }
24036
24037 if (is_mulwiden)
24038 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
24039 }
24040
24041 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
24042 + nbits * ix86_cost->mult_bit
24043 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
24044
24045 return true;
24046 }
24047
24048 case DIV:
24049 case UDIV:
24050 case MOD:
24051 case UMOD:
24052 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
24053 /* ??? SSE cost should be used here. */
24054 *total = ix86_cost->fdiv;
24055 else if (X87_FLOAT_MODE_P (mode))
24056 *total = ix86_cost->fdiv;
24057 else if (FLOAT_MODE_P (mode))
24058 /* ??? SSE vector cost should be used here. */
24059 *total = ix86_cost->fdiv;
24060 else
24061 *total = ix86_cost->divide[MODE_INDEX (mode)];
24062 return false;
24063
24064 case PLUS:
24065 if (GET_MODE_CLASS (mode) == MODE_INT
24066 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
24067 {
24068 if (GET_CODE (XEXP (x, 0)) == PLUS
24069 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
24070 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
24071 && CONSTANT_P (XEXP (x, 1)))
24072 {
24073 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
24074 if (val == 2 || val == 4 || val == 8)
24075 {
24076 *total = ix86_cost->lea;
24077 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
24078 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
24079 outer_code);
24080 *total += rtx_cost (XEXP (x, 1), outer_code);
24081 return true;
24082 }
24083 }
24084 else if (GET_CODE (XEXP (x, 0)) == MULT
24085 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
24086 {
24087 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
24088 if (val == 2 || val == 4 || val == 8)
24089 {
24090 *total = ix86_cost->lea;
24091 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
24092 *total += rtx_cost (XEXP (x, 1), outer_code);
24093 return true;
24094 }
24095 }
24096 else if (GET_CODE (XEXP (x, 0)) == PLUS)
24097 {
24098 *total = ix86_cost->lea;
24099 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
24100 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
24101 *total += rtx_cost (XEXP (x, 1), outer_code);
24102 return true;
24103 }
24104 }
24105 /* FALLTHRU */
24106
24107 case MINUS:
24108 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
24109 {
24110 /* ??? SSE cost should be used here. */
24111 *total = ix86_cost->fadd;
24112 return false;
24113 }
24114 else if (X87_FLOAT_MODE_P (mode))
24115 {
24116 *total = ix86_cost->fadd;
24117 return false;
24118 }
24119 else if (FLOAT_MODE_P (mode))
24120 {
24121 /* ??? SSE vector cost should be used here. */
24122 *total = ix86_cost->fadd;
24123 return false;
24124 }
24125 /* FALLTHRU */
24126
24127 case AND:
24128 case IOR:
24129 case XOR:
24130 if (!TARGET_64BIT && mode == DImode)
24131 {
24132 *total = (ix86_cost->add * 2
24133 + (rtx_cost (XEXP (x, 0), outer_code)
24134 << (GET_MODE (XEXP (x, 0)) != DImode))
24135 + (rtx_cost (XEXP (x, 1), outer_code)
24136 << (GET_MODE (XEXP (x, 1)) != DImode)));
24137 return true;
24138 }
24139 /* FALLTHRU */
24140
24141 case NEG:
24142 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
24143 {
24144 /* ??? SSE cost should be used here. */
24145 *total = ix86_cost->fchs;
24146 return false;
24147 }
24148 else if (X87_FLOAT_MODE_P (mode))
24149 {
24150 *total = ix86_cost->fchs;
24151 return false;
24152 }
24153 else if (FLOAT_MODE_P (mode))
24154 {
24155 /* ??? SSE vector cost should be used here. */
24156 *total = ix86_cost->fchs;
24157 return false;
24158 }
24159 /* FALLTHRU */
24160
24161 case NOT:
24162 if (!TARGET_64BIT && mode == DImode)
24163 *total = ix86_cost->add * 2;
24164 else
24165 *total = ix86_cost->add;
24166 return false;
24167
24168 case COMPARE:
24169 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
24170 && XEXP (XEXP (x, 0), 1) == const1_rtx
24171 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
24172 && XEXP (x, 1) == const0_rtx)
24173 {
24174 /* This kind of construct is implemented using test[bwl].
24175 Treat it as if we had an AND. */
24176 *total = (ix86_cost->add
24177 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
24178 + rtx_cost (const1_rtx, outer_code));
24179 return true;
24180 }
24181 return false;
24182
24183 case FLOAT_EXTEND:
24184 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
24185 *total = 0;
24186 return false;
24187
24188 case ABS:
24189 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
24190 /* ??? SSE cost should be used here. */
24191 *total = ix86_cost->fabs;
24192 else if (X87_FLOAT_MODE_P (mode))
24193 *total = ix86_cost->fabs;
24194 else if (FLOAT_MODE_P (mode))
24195 /* ??? SSE vector cost should be used here. */
24196 *total = ix86_cost->fabs;
24197 return false;
24198
24199 case SQRT:
24200 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
24201 /* ??? SSE cost should be used here. */
24202 *total = ix86_cost->fsqrt;
24203 else if (X87_FLOAT_MODE_P (mode))
24204 *total = ix86_cost->fsqrt;
24205 else if (FLOAT_MODE_P (mode))
24206 /* ??? SSE vector cost should be used here. */
24207 *total = ix86_cost->fsqrt;
24208 return false;
24209
24210 case UNSPEC:
24211 if (XINT (x, 1) == UNSPEC_TP)
24212 *total = 0;
24213 return false;
24214
24215 default:
24216 return false;
24217 }
24218 }
24219
24220 #if TARGET_MACHO
24221
24222 static int current_machopic_label_num;
24223
24224 /* Given a symbol name and its associated stub, write out the
24225 definition of the stub. */
24226
24227 void
24228 machopic_output_stub (FILE *file, const char *symb, const char *stub)
24229 {
24230 unsigned int length;
24231 char *binder_name, *symbol_name, lazy_ptr_name[32];
24232 int label = ++current_machopic_label_num;
24233
24234 /* For 64-bit we shouldn't get here. */
24235 gcc_assert (!TARGET_64BIT);
24236
24237 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
24238 symb = (*targetm.strip_name_encoding) (symb);
24239
24240 length = strlen (stub);
24241 binder_name = XALLOCAVEC (char, length + 32);
24242 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
24243
24244 length = strlen (symb);
24245 symbol_name = XALLOCAVEC (char, length + 32);
24246 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
24247
24248 sprintf (lazy_ptr_name, "L%d$lz", label);
24249
24250 if (MACHOPIC_PURE)
24251 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
24252 else
24253 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
24254
24255 fprintf (file, "%s:\n", stub);
24256 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
24257
24258 if (MACHOPIC_PURE)
24259 {
24260 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
24261 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
24262 fprintf (file, "\tjmp\t*%%edx\n");
24263 }
24264 else
24265 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
24266
24267 fprintf (file, "%s:\n", binder_name);
24268
24269 if (MACHOPIC_PURE)
24270 {
24271 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
24272 fprintf (file, "\tpushl\t%%eax\n");
24273 }
24274 else
24275 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
24276
24277 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
24278
24279 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
24280 fprintf (file, "%s:\n", lazy_ptr_name);
24281 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
24282 fprintf (file, "\t.long %s\n", binder_name);
24283 }
24284
24285 void
24286 darwin_x86_file_end (void)
24287 {
24288 darwin_file_end ();
24289 ix86_file_end ();
24290 }
24291 #endif /* TARGET_MACHO */
24292
24293 /* Order the registers for register allocator. */
24294
24295 void
24296 x86_order_regs_for_local_alloc (void)
24297 {
24298 int pos = 0;
24299 int i;
24300
24301 /* First allocate the local general purpose registers. */
24302 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24303 if (GENERAL_REGNO_P (i) && call_used_regs[i])
24304 reg_alloc_order [pos++] = i;
24305
24306 /* Global general purpose registers. */
24307 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24308 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
24309 reg_alloc_order [pos++] = i;
24310
24311 /* x87 registers come first in case we are doing FP math
24312 using them. */
24313 if (!TARGET_SSE_MATH)
24314 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
24315 reg_alloc_order [pos++] = i;
24316
24317 /* SSE registers. */
24318 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
24319 reg_alloc_order [pos++] = i;
24320 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
24321 reg_alloc_order [pos++] = i;
24322
24323 /* x87 registers. */
24324 if (TARGET_SSE_MATH)
24325 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
24326 reg_alloc_order [pos++] = i;
24327
24328 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
24329 reg_alloc_order [pos++] = i;
24330
24331 /* Initialize the rest of array as we do not allocate some registers
24332 at all. */
24333 while (pos < FIRST_PSEUDO_REGISTER)
24334 reg_alloc_order [pos++] = 0;
24335 }
24336
24337 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
24338 struct attribute_spec.handler. */
24339 static tree
24340 ix86_handle_abi_attribute (tree *node, tree name,
24341 tree args ATTRIBUTE_UNUSED,
24342 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
24343 {
24344 if (TREE_CODE (*node) != FUNCTION_TYPE
24345 && TREE_CODE (*node) != METHOD_TYPE
24346 && TREE_CODE (*node) != FIELD_DECL
24347 && TREE_CODE (*node) != TYPE_DECL)
24348 {
24349 warning (OPT_Wattributes, "%qs attribute only applies to functions",
24350 IDENTIFIER_POINTER (name));
24351 *no_add_attrs = true;
24352 return NULL_TREE;
24353 }
24354 if (!TARGET_64BIT)
24355 {
24356 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
24357 IDENTIFIER_POINTER (name));
24358 *no_add_attrs = true;
24359 return NULL_TREE;
24360 }
24361
24362 /* Can combine regparm with all attributes but fastcall. */
24363 if (is_attribute_p ("ms_abi", name))
24364 {
24365 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
24366 {
24367 error ("ms_abi and sysv_abi attributes are not compatible");
24368 }
24369
24370 return NULL_TREE;
24371 }
24372 else if (is_attribute_p ("sysv_abi", name))
24373 {
24374 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
24375 {
24376 error ("ms_abi and sysv_abi attributes are not compatible");
24377 }
24378
24379 return NULL_TREE;
24380 }
24381
24382 return NULL_TREE;
24383 }
24384
24385 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
24386 struct attribute_spec.handler. */
24387 static tree
24388 ix86_handle_struct_attribute (tree *node, tree name,
24389 tree args ATTRIBUTE_UNUSED,
24390 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
24391 {
24392 tree *type = NULL;
24393 if (DECL_P (*node))
24394 {
24395 if (TREE_CODE (*node) == TYPE_DECL)
24396 type = &TREE_TYPE (*node);
24397 }
24398 else
24399 type = node;
24400
24401 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
24402 || TREE_CODE (*type) == UNION_TYPE)))
24403 {
24404 warning (OPT_Wattributes, "%qs attribute ignored",
24405 IDENTIFIER_POINTER (name));
24406 *no_add_attrs = true;
24407 }
24408
24409 else if ((is_attribute_p ("ms_struct", name)
24410 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
24411 || ((is_attribute_p ("gcc_struct", name)
24412 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
24413 {
24414 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
24415 IDENTIFIER_POINTER (name));
24416 *no_add_attrs = true;
24417 }
24418
24419 return NULL_TREE;
24420 }
24421
24422 static bool
24423 ix86_ms_bitfield_layout_p (const_tree record_type)
24424 {
24425 return (TARGET_MS_BITFIELD_LAYOUT &&
24426 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
24427 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
24428 }
24429
24430 /* Returns an expression indicating where the this parameter is
24431 located on entry to the FUNCTION. */
24432
24433 static rtx
24434 x86_this_parameter (tree function)
24435 {
24436 tree type = TREE_TYPE (function);
24437 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
24438 int nregs;
24439
24440 if (TARGET_64BIT)
24441 {
24442 const int *parm_regs;
24443
24444 if (ix86_function_type_abi (type) == MS_ABI)
24445 parm_regs = x86_64_ms_abi_int_parameter_registers;
24446 else
24447 parm_regs = x86_64_int_parameter_registers;
24448 return gen_rtx_REG (DImode, parm_regs[aggr]);
24449 }
24450
24451 nregs = ix86_function_regparm (type, function);
24452
24453 if (nregs > 0 && !stdarg_p (type))
24454 {
24455 int regno;
24456
24457 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
24458 regno = aggr ? DX_REG : CX_REG;
24459 else
24460 {
24461 regno = AX_REG;
24462 if (aggr)
24463 {
24464 regno = DX_REG;
24465 if (nregs == 1)
24466 return gen_rtx_MEM (SImode,
24467 plus_constant (stack_pointer_rtx, 4));
24468 }
24469 }
24470 return gen_rtx_REG (SImode, regno);
24471 }
24472
24473 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
24474 }
24475
24476 /* Determine whether x86_output_mi_thunk can succeed. */
24477
24478 static bool
24479 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
24480 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
24481 HOST_WIDE_INT vcall_offset, const_tree function)
24482 {
24483 /* 64-bit can handle anything. */
24484 if (TARGET_64BIT)
24485 return true;
24486
24487 /* For 32-bit, everything's fine if we have one free register. */
24488 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
24489 return true;
24490
24491 /* Need a free register for vcall_offset. */
24492 if (vcall_offset)
24493 return false;
24494
24495 /* Need a free register for GOT references. */
24496 if (flag_pic && !(*targetm.binds_local_p) (function))
24497 return false;
24498
24499 /* Otherwise ok. */
24500 return true;
24501 }
24502
24503 /* Output the assembler code for a thunk function. THUNK_DECL is the
24504 declaration for the thunk function itself, FUNCTION is the decl for
24505 the target function. DELTA is an immediate constant offset to be
24506 added to THIS. If VCALL_OFFSET is nonzero, the word at
24507 *(*this + vcall_offset) should be added to THIS. */
24508
24509 static void
24510 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
24511 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
24512 HOST_WIDE_INT vcall_offset, tree function)
24513 {
24514 rtx xops[3];
24515 rtx this_param = x86_this_parameter (function);
24516 rtx this_reg, tmp;
24517
24518 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
24519 pull it in now and let DELTA benefit. */
24520 if (REG_P (this_param))
24521 this_reg = this_param;
24522 else if (vcall_offset)
24523 {
24524 /* Put the this parameter into %eax. */
24525 xops[0] = this_param;
24526 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
24527 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24528 }
24529 else
24530 this_reg = NULL_RTX;
24531
24532 /* Adjust the this parameter by a fixed constant. */
24533 if (delta)
24534 {
24535 xops[0] = GEN_INT (delta);
24536 xops[1] = this_reg ? this_reg : this_param;
24537 if (TARGET_64BIT)
24538 {
24539 if (!x86_64_general_operand (xops[0], DImode))
24540 {
24541 tmp = gen_rtx_REG (DImode, R10_REG);
24542 xops[1] = tmp;
24543 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
24544 xops[0] = tmp;
24545 xops[1] = this_param;
24546 }
24547 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
24548 }
24549 else
24550 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
24551 }
24552
24553 /* Adjust the this parameter by a value stored in the vtable. */
24554 if (vcall_offset)
24555 {
24556 if (TARGET_64BIT)
24557 tmp = gen_rtx_REG (DImode, R10_REG);
24558 else
24559 {
24560 int tmp_regno = CX_REG;
24561 if (lookup_attribute ("fastcall",
24562 TYPE_ATTRIBUTES (TREE_TYPE (function))))
24563 tmp_regno = AX_REG;
24564 tmp = gen_rtx_REG (SImode, tmp_regno);
24565 }
24566
24567 xops[0] = gen_rtx_MEM (Pmode, this_reg);
24568 xops[1] = tmp;
24569 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24570
24571 /* Adjust the this parameter. */
24572 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
24573 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
24574 {
24575 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
24576 xops[0] = GEN_INT (vcall_offset);
24577 xops[1] = tmp2;
24578 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
24579 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
24580 }
24581 xops[1] = this_reg;
24582 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
24583 }
24584
24585 /* If necessary, drop THIS back to its stack slot. */
24586 if (this_reg && this_reg != this_param)
24587 {
24588 xops[0] = this_reg;
24589 xops[1] = this_param;
24590 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24591 }
24592
24593 xops[0] = XEXP (DECL_RTL (function), 0);
24594 if (TARGET_64BIT)
24595 {
24596 if (!flag_pic || (*targetm.binds_local_p) (function))
24597 output_asm_insn ("jmp\t%P0", xops);
24598 /* All thunks should be in the same object as their target,
24599 and thus binds_local_p should be true. */
24600 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
24601 gcc_unreachable ();
24602 else
24603 {
24604 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
24605 tmp = gen_rtx_CONST (Pmode, tmp);
24606 tmp = gen_rtx_MEM (QImode, tmp);
24607 xops[0] = tmp;
24608 output_asm_insn ("jmp\t%A0", xops);
24609 }
24610 }
24611 else
24612 {
24613 if (!flag_pic || (*targetm.binds_local_p) (function))
24614 output_asm_insn ("jmp\t%P0", xops);
24615 else
24616 #if TARGET_MACHO
24617 if (TARGET_MACHO)
24618 {
24619 rtx sym_ref = XEXP (DECL_RTL (function), 0);
24620 tmp = (gen_rtx_SYMBOL_REF
24621 (Pmode,
24622 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
24623 tmp = gen_rtx_MEM (QImode, tmp);
24624 xops[0] = tmp;
24625 output_asm_insn ("jmp\t%0", xops);
24626 }
24627 else
24628 #endif /* TARGET_MACHO */
24629 {
24630 tmp = gen_rtx_REG (SImode, CX_REG);
24631 output_set_got (tmp, NULL_RTX);
24632
24633 xops[1] = tmp;
24634 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
24635 output_asm_insn ("jmp\t{*}%1", xops);
24636 }
24637 }
24638 }
24639
24640 static void
24641 x86_file_start (void)
24642 {
24643 default_file_start ();
24644 #if TARGET_MACHO
24645 darwin_file_start ();
24646 #endif
24647 if (X86_FILE_START_VERSION_DIRECTIVE)
24648 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
24649 if (X86_FILE_START_FLTUSED)
24650 fputs ("\t.global\t__fltused\n", asm_out_file);
24651 if (ix86_asm_dialect == ASM_INTEL)
24652 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
24653 }
24654
24655 int
24656 x86_field_alignment (tree field, int computed)
24657 {
24658 enum machine_mode mode;
24659 tree type = TREE_TYPE (field);
24660
24661 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
24662 return computed;
24663 mode = TYPE_MODE (strip_array_types (type));
24664 if (mode == DFmode || mode == DCmode
24665 || GET_MODE_CLASS (mode) == MODE_INT
24666 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
24667 return MIN (32, computed);
24668 return computed;
24669 }
24670
24671 /* Output assembler code to FILE to increment profiler label # LABELNO
24672 for profiling a function entry. */
24673 void
24674 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
24675 {
24676 if (TARGET_64BIT)
24677 {
24678 #ifndef NO_PROFILE_COUNTERS
24679 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
24680 #endif
24681
24682 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
24683 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
24684 else
24685 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
24686 }
24687 else if (flag_pic)
24688 {
24689 #ifndef NO_PROFILE_COUNTERS
24690 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
24691 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
24692 #endif
24693 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
24694 }
24695 else
24696 {
24697 #ifndef NO_PROFILE_COUNTERS
24698 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
24699 PROFILE_COUNT_REGISTER);
24700 #endif
24701 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
24702 }
24703 }
24704
24705 /* We don't have exact information about the insn sizes, but we may assume
24706 quite safely that we are informed about all 1 byte insns and memory
24707 address sizes. This is enough to eliminate unnecessary padding in
24708 99% of cases. */
24709
24710 static int
24711 min_insn_size (rtx insn)
24712 {
24713 int l = 0;
24714
24715 if (!INSN_P (insn) || !active_insn_p (insn))
24716 return 0;
24717
24718 /* Discard alignments we've emit and jump instructions. */
24719 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24720 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24721 return 0;
24722 if (JUMP_P (insn)
24723 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
24724 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
24725 return 0;
24726
24727 /* Important case - calls are always 5 bytes.
24728 It is common to have many calls in the row. */
24729 if (CALL_P (insn)
24730 && symbolic_reference_mentioned_p (PATTERN (insn))
24731 && !SIBLING_CALL_P (insn))
24732 return 5;
24733 if (get_attr_length (insn) <= 1)
24734 return 1;
24735
24736 /* For normal instructions we may rely on the sizes of addresses
24737 and the presence of symbol to require 4 bytes of encoding.
24738 This is not the case for jumps where references are PC relative. */
24739 if (!JUMP_P (insn))
24740 {
24741 l = get_attr_length_address (insn);
24742 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
24743 l = 4;
24744 }
24745 if (l)
24746 return 1+l;
24747 else
24748 return 2;
24749 }
24750
24751 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24752 window. */
24753
24754 static void
24755 ix86_avoid_jump_misspredicts (void)
24756 {
24757 rtx insn, start = get_insns ();
24758 int nbytes = 0, njumps = 0;
24759 int isjump = 0;
24760
24761 /* Look for all minimal intervals of instructions containing 4 jumps.
24762 The intervals are bounded by START and INSN. NBYTES is the total
24763 size of instructions in the interval including INSN and not including
24764 START. When the NBYTES is smaller than 16 bytes, it is possible
24765 that the end of START and INSN ends up in the same 16byte page.
24766
24767 The smallest offset in the page INSN can start is the case where START
24768 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24769 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
24770 */
24771 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24772 {
24773
24774 nbytes += min_insn_size (insn);
24775 if (dump_file)
24776 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
24777 INSN_UID (insn), min_insn_size (insn));
24778 if ((JUMP_P (insn)
24779 && GET_CODE (PATTERN (insn)) != ADDR_VEC
24780 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
24781 || CALL_P (insn))
24782 njumps++;
24783 else
24784 continue;
24785
24786 while (njumps > 3)
24787 {
24788 start = NEXT_INSN (start);
24789 if ((JUMP_P (start)
24790 && GET_CODE (PATTERN (start)) != ADDR_VEC
24791 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
24792 || CALL_P (start))
24793 njumps--, isjump = 1;
24794 else
24795 isjump = 0;
24796 nbytes -= min_insn_size (start);
24797 }
24798 gcc_assert (njumps >= 0);
24799 if (dump_file)
24800 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
24801 INSN_UID (start), INSN_UID (insn), nbytes);
24802
24803 if (njumps == 3 && isjump && nbytes < 16)
24804 {
24805 int padsize = 15 - nbytes + min_insn_size (insn);
24806
24807 if (dump_file)
24808 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
24809 INSN_UID (insn), padsize);
24810 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
24811 }
24812 }
24813 }
24814
24815 /* AMD Athlon works faster
24816 when RET is not destination of conditional jump or directly preceded
24817 by other jump instruction. We avoid the penalty by inserting NOP just
24818 before the RET instructions in such cases. */
24819 static void
24820 ix86_pad_returns (void)
24821 {
24822 edge e;
24823 edge_iterator ei;
24824
24825 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
24826 {
24827 basic_block bb = e->src;
24828 rtx ret = BB_END (bb);
24829 rtx prev;
24830 bool replace = false;
24831
24832 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
24833 || optimize_bb_for_size_p (bb))
24834 continue;
24835 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
24836 if (active_insn_p (prev) || LABEL_P (prev))
24837 break;
24838 if (prev && LABEL_P (prev))
24839 {
24840 edge e;
24841 edge_iterator ei;
24842
24843 FOR_EACH_EDGE (e, ei, bb->preds)
24844 if (EDGE_FREQUENCY (e) && e->src->index >= 0
24845 && !(e->flags & EDGE_FALLTHRU))
24846 replace = true;
24847 }
24848 if (!replace)
24849 {
24850 prev = prev_active_insn (ret);
24851 if (prev
24852 && ((JUMP_P (prev) && any_condjump_p (prev))
24853 || CALL_P (prev)))
24854 replace = true;
24855 /* Empty functions get branch mispredict even when the jump destination
24856 is not visible to us. */
24857 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
24858 replace = true;
24859 }
24860 if (replace)
24861 {
24862 emit_insn_before (gen_return_internal_long (), ret);
24863 delete_insn (ret);
24864 }
24865 }
24866 }
24867
24868 /* Implement machine specific optimizations. We implement padding of returns
24869 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24870 static void
24871 ix86_reorg (void)
24872 {
24873 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
24874 ix86_pad_returns ();
24875 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
24876 ix86_avoid_jump_misspredicts ();
24877 }
24878
24879 /* Return nonzero when QImode register that must be represented via REX prefix
24880 is used. */
24881 bool
24882 x86_extended_QIreg_mentioned_p (rtx insn)
24883 {
24884 int i;
24885 extract_insn_cached (insn);
24886 for (i = 0; i < recog_data.n_operands; i++)
24887 if (REG_P (recog_data.operand[i])
24888 && REGNO (recog_data.operand[i]) >= 4)
24889 return true;
24890 return false;
24891 }
24892
24893 /* Return nonzero when P points to register encoded via REX prefix.
24894 Called via for_each_rtx. */
24895 static int
24896 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
24897 {
24898 unsigned int regno;
24899 if (!REG_P (*p))
24900 return 0;
24901 regno = REGNO (*p);
24902 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
24903 }
24904
24905 /* Return true when INSN mentions register that must be encoded using REX
24906 prefix. */
24907 bool
24908 x86_extended_reg_mentioned_p (rtx insn)
24909 {
24910 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
24911 }
24912
24913 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24914 optabs would emit if we didn't have TFmode patterns. */
24915
24916 void
24917 x86_emit_floatuns (rtx operands[2])
24918 {
24919 rtx neglab, donelab, i0, i1, f0, in, out;
24920 enum machine_mode mode, inmode;
24921
24922 inmode = GET_MODE (operands[1]);
24923 gcc_assert (inmode == SImode || inmode == DImode);
24924
24925 out = operands[0];
24926 in = force_reg (inmode, operands[1]);
24927 mode = GET_MODE (out);
24928 neglab = gen_label_rtx ();
24929 donelab = gen_label_rtx ();
24930 f0 = gen_reg_rtx (mode);
24931
24932 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24933
24934 expand_float (out, in, 0);
24935
24936 emit_jump_insn (gen_jump (donelab));
24937 emit_barrier ();
24938
24939 emit_label (neglab);
24940
24941 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24942 1, OPTAB_DIRECT);
24943 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24944 1, OPTAB_DIRECT);
24945 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24946
24947 expand_float (f0, i0, 0);
24948
24949 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
24950
24951 emit_label (donelab);
24952 }
24953 \f
24954 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
24955 with all elements equal to VAR. Return true if successful. */
24956
24957 static bool
24958 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
24959 rtx target, rtx val)
24960 {
24961 enum machine_mode smode, wsmode, wvmode;
24962 rtx x;
24963
24964 switch (mode)
24965 {
24966 case V2SImode:
24967 case V2SFmode:
24968 if (!mmx_ok)
24969 return false;
24970 /* FALLTHRU */
24971
24972 case V2DFmode:
24973 case V2DImode:
24974 case V4SFmode:
24975 case V4SImode:
24976 val = force_reg (GET_MODE_INNER (mode), val);
24977 x = gen_rtx_VEC_DUPLICATE (mode, val);
24978 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24979 return true;
24980
24981 case V4HImode:
24982 if (!mmx_ok)
24983 return false;
24984 if (TARGET_SSE || TARGET_3DNOW_A)
24985 {
24986 val = gen_lowpart (SImode, val);
24987 x = gen_rtx_TRUNCATE (HImode, val);
24988 x = gen_rtx_VEC_DUPLICATE (mode, x);
24989 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24990 return true;
24991 }
24992 else
24993 {
24994 smode = HImode;
24995 wsmode = SImode;
24996 wvmode = V2SImode;
24997 goto widen;
24998 }
24999
25000 case V8QImode:
25001 if (!mmx_ok)
25002 return false;
25003 smode = QImode;
25004 wsmode = HImode;
25005 wvmode = V4HImode;
25006 goto widen;
25007 case V8HImode:
25008 if (TARGET_SSE2)
25009 {
25010 rtx tmp1, tmp2;
25011 /* Extend HImode to SImode using a paradoxical SUBREG. */
25012 tmp1 = gen_reg_rtx (SImode);
25013 emit_move_insn (tmp1, gen_lowpart (SImode, val));
25014 /* Insert the SImode value as low element of V4SImode vector. */
25015 tmp2 = gen_reg_rtx (V4SImode);
25016 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
25017 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
25018 CONST0_RTX (V4SImode),
25019 const1_rtx);
25020 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
25021 /* Cast the V4SImode vector back to a V8HImode vector. */
25022 tmp1 = gen_reg_rtx (V8HImode);
25023 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
25024 /* Duplicate the low short through the whole low SImode word. */
25025 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
25026 /* Cast the V8HImode vector back to a V4SImode vector. */
25027 tmp2 = gen_reg_rtx (V4SImode);
25028 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
25029 /* Replicate the low element of the V4SImode vector. */
25030 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
25031 /* Cast the V2SImode back to V8HImode, and store in target. */
25032 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
25033 return true;
25034 }
25035 smode = HImode;
25036 wsmode = SImode;
25037 wvmode = V4SImode;
25038 goto widen;
25039 case V16QImode:
25040 if (TARGET_SSE2)
25041 {
25042 rtx tmp1, tmp2;
25043 /* Extend QImode to SImode using a paradoxical SUBREG. */
25044 tmp1 = gen_reg_rtx (SImode);
25045 emit_move_insn (tmp1, gen_lowpart (SImode, val));
25046 /* Insert the SImode value as low element of V4SImode vector. */
25047 tmp2 = gen_reg_rtx (V4SImode);
25048 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
25049 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
25050 CONST0_RTX (V4SImode),
25051 const1_rtx);
25052 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
25053 /* Cast the V4SImode vector back to a V16QImode vector. */
25054 tmp1 = gen_reg_rtx (V16QImode);
25055 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
25056 /* Duplicate the low byte through the whole low SImode word. */
25057 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
25058 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
25059 /* Cast the V16QImode vector back to a V4SImode vector. */
25060 tmp2 = gen_reg_rtx (V4SImode);
25061 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
25062 /* Replicate the low element of the V4SImode vector. */
25063 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
25064 /* Cast the V2SImode back to V16QImode, and store in target. */
25065 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
25066 return true;
25067 }
25068 smode = QImode;
25069 wsmode = HImode;
25070 wvmode = V8HImode;
25071 goto widen;
25072 widen:
25073 /* Replicate the value once into the next wider mode and recurse. */
25074 val = convert_modes (wsmode, smode, val, true);
25075 x = expand_simple_binop (wsmode, ASHIFT, val,
25076 GEN_INT (GET_MODE_BITSIZE (smode)),
25077 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25078 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
25079
25080 x = gen_reg_rtx (wvmode);
25081 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
25082 gcc_unreachable ();
25083 emit_move_insn (target, gen_lowpart (mode, x));
25084 return true;
25085
25086 default:
25087 return false;
25088 }
25089 }
25090
25091 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
25092 whose ONE_VAR element is VAR, and other elements are zero. Return true
25093 if successful. */
25094
25095 static bool
25096 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
25097 rtx target, rtx var, int one_var)
25098 {
25099 enum machine_mode vsimode;
25100 rtx new_target;
25101 rtx x, tmp;
25102 bool use_vector_set = false;
25103
25104 switch (mode)
25105 {
25106 case V2DImode:
25107 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
25108 break;
25109 case V16QImode:
25110 case V4SImode:
25111 case V4SFmode:
25112 use_vector_set = TARGET_SSE4_1;
25113 break;
25114 case V8HImode:
25115 use_vector_set = TARGET_SSE2;
25116 break;
25117 case V4HImode:
25118 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
25119 break;
25120 default:
25121 break;
25122 }
25123
25124 if (use_vector_set)
25125 {
25126 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
25127 var = force_reg (GET_MODE_INNER (mode), var);
25128 ix86_expand_vector_set (mmx_ok, target, var, one_var);
25129 return true;
25130 }
25131
25132 switch (mode)
25133 {
25134 case V2SFmode:
25135 case V2SImode:
25136 if (!mmx_ok)
25137 return false;
25138 /* FALLTHRU */
25139
25140 case V2DFmode:
25141 case V2DImode:
25142 if (one_var != 0)
25143 return false;
25144 var = force_reg (GET_MODE_INNER (mode), var);
25145 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
25146 emit_insn (gen_rtx_SET (VOIDmode, target, x));
25147 return true;
25148
25149 case V4SFmode:
25150 case V4SImode:
25151 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
25152 new_target = gen_reg_rtx (mode);
25153 else
25154 new_target = target;
25155 var = force_reg (GET_MODE_INNER (mode), var);
25156 x = gen_rtx_VEC_DUPLICATE (mode, var);
25157 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
25158 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
25159 if (one_var != 0)
25160 {
25161 /* We need to shuffle the value to the correct position, so
25162 create a new pseudo to store the intermediate result. */
25163
25164 /* With SSE2, we can use the integer shuffle insns. */
25165 if (mode != V4SFmode && TARGET_SSE2)
25166 {
25167 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
25168 GEN_INT (1),
25169 GEN_INT (one_var == 1 ? 0 : 1),
25170 GEN_INT (one_var == 2 ? 0 : 1),
25171 GEN_INT (one_var == 3 ? 0 : 1)));
25172 if (target != new_target)
25173 emit_move_insn (target, new_target);
25174 return true;
25175 }
25176
25177 /* Otherwise convert the intermediate result to V4SFmode and
25178 use the SSE1 shuffle instructions. */
25179 if (mode != V4SFmode)
25180 {
25181 tmp = gen_reg_rtx (V4SFmode);
25182 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
25183 }
25184 else
25185 tmp = new_target;
25186
25187 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
25188 GEN_INT (1),
25189 GEN_INT (one_var == 1 ? 0 : 1),
25190 GEN_INT (one_var == 2 ? 0+4 : 1+4),
25191 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
25192
25193 if (mode != V4SFmode)
25194 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
25195 else if (tmp != target)
25196 emit_move_insn (target, tmp);
25197 }
25198 else if (target != new_target)
25199 emit_move_insn (target, new_target);
25200 return true;
25201
25202 case V8HImode:
25203 case V16QImode:
25204 vsimode = V4SImode;
25205 goto widen;
25206 case V4HImode:
25207 case V8QImode:
25208 if (!mmx_ok)
25209 return false;
25210 vsimode = V2SImode;
25211 goto widen;
25212 widen:
25213 if (one_var != 0)
25214 return false;
25215
25216 /* Zero extend the variable element to SImode and recurse. */
25217 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
25218
25219 x = gen_reg_rtx (vsimode);
25220 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
25221 var, one_var))
25222 gcc_unreachable ();
25223
25224 emit_move_insn (target, gen_lowpart (mode, x));
25225 return true;
25226
25227 default:
25228 return false;
25229 }
25230 }
25231
25232 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
25233 consisting of the values in VALS. It is known that all elements
25234 except ONE_VAR are constants. Return true if successful. */
25235
25236 static bool
25237 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
25238 rtx target, rtx vals, int one_var)
25239 {
25240 rtx var = XVECEXP (vals, 0, one_var);
25241 enum machine_mode wmode;
25242 rtx const_vec, x;
25243
25244 const_vec = copy_rtx (vals);
25245 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
25246 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
25247
25248 switch (mode)
25249 {
25250 case V2DFmode:
25251 case V2DImode:
25252 case V2SFmode:
25253 case V2SImode:
25254 /* For the two element vectors, it's just as easy to use
25255 the general case. */
25256 return false;
25257
25258 case V4SFmode:
25259 case V4SImode:
25260 case V8HImode:
25261 case V4HImode:
25262 break;
25263
25264 case V16QImode:
25265 if (TARGET_SSE4_1)
25266 break;
25267 wmode = V8HImode;
25268 goto widen;
25269 case V8QImode:
25270 wmode = V4HImode;
25271 goto widen;
25272 widen:
25273 /* There's no way to set one QImode entry easily. Combine
25274 the variable value with its adjacent constant value, and
25275 promote to an HImode set. */
25276 x = XVECEXP (vals, 0, one_var ^ 1);
25277 if (one_var & 1)
25278 {
25279 var = convert_modes (HImode, QImode, var, true);
25280 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
25281 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25282 x = GEN_INT (INTVAL (x) & 0xff);
25283 }
25284 else
25285 {
25286 var = convert_modes (HImode, QImode, var, true);
25287 x = gen_int_mode (INTVAL (x) << 8, HImode);
25288 }
25289 if (x != const0_rtx)
25290 var = expand_simple_binop (HImode, IOR, var, x, var,
25291 1, OPTAB_LIB_WIDEN);
25292
25293 x = gen_reg_rtx (wmode);
25294 emit_move_insn (x, gen_lowpart (wmode, const_vec));
25295 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
25296
25297 emit_move_insn (target, gen_lowpart (mode, x));
25298 return true;
25299
25300 default:
25301 return false;
25302 }
25303
25304 emit_move_insn (target, const_vec);
25305 ix86_expand_vector_set (mmx_ok, target, var, one_var);
25306 return true;
25307 }
25308
25309 /* A subroutine of ix86_expand_vector_init_general. Use vector
25310 concatenate to handle the most general case: all values variable,
25311 and none identical. */
25312
25313 static void
25314 ix86_expand_vector_init_concat (enum machine_mode mode,
25315 rtx target, rtx *ops, int n)
25316 {
25317 enum machine_mode cmode, hmode = VOIDmode;
25318 rtx first[4], second[2];
25319 rtvec v;
25320 int i, j;
25321
25322 switch (n)
25323 {
25324 case 2:
25325 switch (mode)
25326 {
25327 case V4SImode:
25328 cmode = V2SImode;
25329 break;
25330 case V4SFmode:
25331 cmode = V2SFmode;
25332 break;
25333 case V2DImode:
25334 cmode = DImode;
25335 break;
25336 case V2SImode:
25337 cmode = SImode;
25338 break;
25339 case V2DFmode:
25340 cmode = DFmode;
25341 break;
25342 case V2SFmode:
25343 cmode = SFmode;
25344 break;
25345 default:
25346 gcc_unreachable ();
25347 }
25348
25349 if (!register_operand (ops[1], cmode))
25350 ops[1] = force_reg (cmode, ops[1]);
25351 if (!register_operand (ops[0], cmode))
25352 ops[0] = force_reg (cmode, ops[0]);
25353 emit_insn (gen_rtx_SET (VOIDmode, target,
25354 gen_rtx_VEC_CONCAT (mode, ops[0],
25355 ops[1])));
25356 break;
25357
25358 case 4:
25359 switch (mode)
25360 {
25361 case V4SImode:
25362 cmode = V2SImode;
25363 break;
25364 case V4SFmode:
25365 cmode = V2SFmode;
25366 break;
25367 default:
25368 gcc_unreachable ();
25369 }
25370 goto half;
25371
25372 half:
25373 /* FIXME: We process inputs backward to help RA. PR 36222. */
25374 i = n - 1;
25375 j = (n >> 1) - 1;
25376 for (; i > 0; i -= 2, j--)
25377 {
25378 first[j] = gen_reg_rtx (cmode);
25379 v = gen_rtvec (2, ops[i - 1], ops[i]);
25380 ix86_expand_vector_init (false, first[j],
25381 gen_rtx_PARALLEL (cmode, v));
25382 }
25383
25384 n >>= 1;
25385 if (n > 2)
25386 {
25387 gcc_assert (hmode != VOIDmode);
25388 for (i = j = 0; i < n; i += 2, j++)
25389 {
25390 second[j] = gen_reg_rtx (hmode);
25391 ix86_expand_vector_init_concat (hmode, second [j],
25392 &first [i], 2);
25393 }
25394 n >>= 1;
25395 ix86_expand_vector_init_concat (mode, target, second, n);
25396 }
25397 else
25398 ix86_expand_vector_init_concat (mode, target, first, n);
25399 break;
25400
25401 default:
25402 gcc_unreachable ();
25403 }
25404 }
25405
25406 /* A subroutine of ix86_expand_vector_init_general. Use vector
25407 interleave to handle the most general case: all values variable,
25408 and none identical. */
25409
25410 static void
25411 ix86_expand_vector_init_interleave (enum machine_mode mode,
25412 rtx target, rtx *ops, int n)
25413 {
25414 enum machine_mode first_imode, second_imode, third_imode;
25415 int i, j;
25416 rtx op0, op1;
25417 rtx (*gen_load_even) (rtx, rtx, rtx);
25418 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
25419 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
25420
25421 switch (mode)
25422 {
25423 case V8HImode:
25424 gen_load_even = gen_vec_setv8hi;
25425 gen_interleave_first_low = gen_vec_interleave_lowv4si;
25426 gen_interleave_second_low = gen_vec_interleave_lowv2di;
25427 first_imode = V4SImode;
25428 second_imode = V2DImode;
25429 third_imode = VOIDmode;
25430 break;
25431 case V16QImode:
25432 gen_load_even = gen_vec_setv16qi;
25433 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
25434 gen_interleave_second_low = gen_vec_interleave_lowv4si;
25435 first_imode = V8HImode;
25436 second_imode = V4SImode;
25437 third_imode = V2DImode;
25438 break;
25439 default:
25440 gcc_unreachable ();
25441 }
25442
25443 for (i = 0; i < n; i++)
25444 {
25445 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
25446 op0 = gen_reg_rtx (SImode);
25447 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
25448
25449 /* Insert the SImode value as low element of V4SImode vector. */
25450 op1 = gen_reg_rtx (V4SImode);
25451 op0 = gen_rtx_VEC_MERGE (V4SImode,
25452 gen_rtx_VEC_DUPLICATE (V4SImode,
25453 op0),
25454 CONST0_RTX (V4SImode),
25455 const1_rtx);
25456 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
25457
25458 /* Cast the V4SImode vector back to a vector in orignal mode. */
25459 op0 = gen_reg_rtx (mode);
25460 emit_move_insn (op0, gen_lowpart (mode, op1));
25461
25462 /* Load even elements into the second positon. */
25463 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
25464 const1_rtx));
25465
25466 /* Cast vector to FIRST_IMODE vector. */
25467 ops[i] = gen_reg_rtx (first_imode);
25468 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
25469 }
25470
25471 /* Interleave low FIRST_IMODE vectors. */
25472 for (i = j = 0; i < n; i += 2, j++)
25473 {
25474 op0 = gen_reg_rtx (first_imode);
25475 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
25476
25477 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
25478 ops[j] = gen_reg_rtx (second_imode);
25479 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
25480 }
25481
25482 /* Interleave low SECOND_IMODE vectors. */
25483 switch (second_imode)
25484 {
25485 case V4SImode:
25486 for (i = j = 0; i < n / 2; i += 2, j++)
25487 {
25488 op0 = gen_reg_rtx (second_imode);
25489 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
25490 ops[i + 1]));
25491
25492 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
25493 vector. */
25494 ops[j] = gen_reg_rtx (third_imode);
25495 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
25496 }
25497 second_imode = V2DImode;
25498 gen_interleave_second_low = gen_vec_interleave_lowv2di;
25499 /* FALLTHRU */
25500
25501 case V2DImode:
25502 op0 = gen_reg_rtx (second_imode);
25503 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
25504 ops[1]));
25505
25506 /* Cast the SECOND_IMODE vector back to a vector on original
25507 mode. */
25508 emit_insn (gen_rtx_SET (VOIDmode, target,
25509 gen_lowpart (mode, op0)));
25510 break;
25511
25512 default:
25513 gcc_unreachable ();
25514 }
25515 }
25516
25517 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
25518 all values variable, and none identical. */
25519
25520 static void
25521 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
25522 rtx target, rtx vals)
25523 {
25524 rtx ops[16];
25525 int n, i;
25526
25527 switch (mode)
25528 {
25529 case V2SFmode:
25530 case V2SImode:
25531 if (!mmx_ok && !TARGET_SSE)
25532 break;
25533 /* FALLTHRU */
25534
25535 case V4SFmode:
25536 case V4SImode:
25537 case V2DFmode:
25538 case V2DImode:
25539 n = GET_MODE_NUNITS (mode);
25540 for (i = 0; i < n; i++)
25541 ops[i] = XVECEXP (vals, 0, i);
25542 ix86_expand_vector_init_concat (mode, target, ops, n);
25543 return;
25544
25545 case V16QImode:
25546 if (!TARGET_SSE4_1)
25547 break;
25548 /* FALLTHRU */
25549
25550 case V8HImode:
25551 if (!TARGET_SSE2)
25552 break;
25553
25554 n = GET_MODE_NUNITS (mode);
25555 for (i = 0; i < n; i++)
25556 ops[i] = XVECEXP (vals, 0, i);
25557 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
25558 return;
25559
25560 case V4HImode:
25561 case V8QImode:
25562 break;
25563
25564 default:
25565 gcc_unreachable ();
25566 }
25567
25568 {
25569 int i, j, n_elts, n_words, n_elt_per_word;
25570 enum machine_mode inner_mode;
25571 rtx words[4], shift;
25572
25573 inner_mode = GET_MODE_INNER (mode);
25574 n_elts = GET_MODE_NUNITS (mode);
25575 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
25576 n_elt_per_word = n_elts / n_words;
25577 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
25578
25579 for (i = 0; i < n_words; ++i)
25580 {
25581 rtx word = NULL_RTX;
25582
25583 for (j = 0; j < n_elt_per_word; ++j)
25584 {
25585 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
25586 elt = convert_modes (word_mode, inner_mode, elt, true);
25587
25588 if (j == 0)
25589 word = elt;
25590 else
25591 {
25592 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
25593 word, 1, OPTAB_LIB_WIDEN);
25594 word = expand_simple_binop (word_mode, IOR, word, elt,
25595 word, 1, OPTAB_LIB_WIDEN);
25596 }
25597 }
25598
25599 words[i] = word;
25600 }
25601
25602 if (n_words == 1)
25603 emit_move_insn (target, gen_lowpart (mode, words[0]));
25604 else if (n_words == 2)
25605 {
25606 rtx tmp = gen_reg_rtx (mode);
25607 emit_clobber (tmp);
25608 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
25609 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
25610 emit_move_insn (target, tmp);
25611 }
25612 else if (n_words == 4)
25613 {
25614 rtx tmp = gen_reg_rtx (V4SImode);
25615 gcc_assert (word_mode == SImode);
25616 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
25617 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
25618 emit_move_insn (target, gen_lowpart (mode, tmp));
25619 }
25620 else
25621 gcc_unreachable ();
25622 }
25623 }
25624
25625 /* Initialize vector TARGET via VALS. Suppress the use of MMX
25626 instructions unless MMX_OK is true. */
25627
25628 void
25629 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
25630 {
25631 enum machine_mode mode = GET_MODE (target);
25632 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25633 int n_elts = GET_MODE_NUNITS (mode);
25634 int n_var = 0, one_var = -1;
25635 bool all_same = true, all_const_zero = true;
25636 int i;
25637 rtx x;
25638
25639 for (i = 0; i < n_elts; ++i)
25640 {
25641 x = XVECEXP (vals, 0, i);
25642 if (!(CONST_INT_P (x)
25643 || GET_CODE (x) == CONST_DOUBLE
25644 || GET_CODE (x) == CONST_FIXED))
25645 n_var++, one_var = i;
25646 else if (x != CONST0_RTX (inner_mode))
25647 all_const_zero = false;
25648 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
25649 all_same = false;
25650 }
25651
25652 /* Constants are best loaded from the constant pool. */
25653 if (n_var == 0)
25654 {
25655 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
25656 return;
25657 }
25658
25659 /* If all values are identical, broadcast the value. */
25660 if (all_same
25661 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
25662 XVECEXP (vals, 0, 0)))
25663 return;
25664
25665 /* Values where only one field is non-constant are best loaded from
25666 the pool and overwritten via move later. */
25667 if (n_var == 1)
25668 {
25669 if (all_const_zero
25670 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
25671 XVECEXP (vals, 0, one_var),
25672 one_var))
25673 return;
25674
25675 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
25676 return;
25677 }
25678
25679 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
25680 }
25681
25682 void
25683 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
25684 {
25685 enum machine_mode mode = GET_MODE (target);
25686 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25687 bool use_vec_merge = false;
25688 rtx tmp;
25689
25690 switch (mode)
25691 {
25692 case V2SFmode:
25693 case V2SImode:
25694 if (mmx_ok)
25695 {
25696 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
25697 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
25698 if (elt == 0)
25699 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
25700 else
25701 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
25702 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25703 return;
25704 }
25705 break;
25706
25707 case V2DImode:
25708 use_vec_merge = TARGET_SSE4_1;
25709 if (use_vec_merge)
25710 break;
25711
25712 case V2DFmode:
25713 {
25714 rtx op0, op1;
25715
25716 /* For the two element vectors, we implement a VEC_CONCAT with
25717 the extraction of the other element. */
25718
25719 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
25720 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
25721
25722 if (elt == 0)
25723 op0 = val, op1 = tmp;
25724 else
25725 op0 = tmp, op1 = val;
25726
25727 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
25728 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25729 }
25730 return;
25731
25732 case V4SFmode:
25733 use_vec_merge = TARGET_SSE4_1;
25734 if (use_vec_merge)
25735 break;
25736
25737 switch (elt)
25738 {
25739 case 0:
25740 use_vec_merge = true;
25741 break;
25742
25743 case 1:
25744 /* tmp = target = A B C D */
25745 tmp = copy_to_reg (target);
25746 /* target = A A B B */
25747 emit_insn (gen_sse_unpcklps (target, target, target));
25748 /* target = X A B B */
25749 ix86_expand_vector_set (false, target, val, 0);
25750 /* target = A X C D */
25751 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
25752 GEN_INT (1), GEN_INT (0),
25753 GEN_INT (2+4), GEN_INT (3+4)));
25754 return;
25755
25756 case 2:
25757 /* tmp = target = A B C D */
25758 tmp = copy_to_reg (target);
25759 /* tmp = X B C D */
25760 ix86_expand_vector_set (false, tmp, val, 0);
25761 /* target = A B X D */
25762 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
25763 GEN_INT (0), GEN_INT (1),
25764 GEN_INT (0+4), GEN_INT (3+4)));
25765 return;
25766
25767 case 3:
25768 /* tmp = target = A B C D */
25769 tmp = copy_to_reg (target);
25770 /* tmp = X B C D */
25771 ix86_expand_vector_set (false, tmp, val, 0);
25772 /* target = A B X D */
25773 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
25774 GEN_INT (0), GEN_INT (1),
25775 GEN_INT (2+4), GEN_INT (0+4)));
25776 return;
25777
25778 default:
25779 gcc_unreachable ();
25780 }
25781 break;
25782
25783 case V4SImode:
25784 use_vec_merge = TARGET_SSE4_1;
25785 if (use_vec_merge)
25786 break;
25787
25788 /* Element 0 handled by vec_merge below. */
25789 if (elt == 0)
25790 {
25791 use_vec_merge = true;
25792 break;
25793 }
25794
25795 if (TARGET_SSE2)
25796 {
25797 /* With SSE2, use integer shuffles to swap element 0 and ELT,
25798 store into element 0, then shuffle them back. */
25799
25800 rtx order[4];
25801
25802 order[0] = GEN_INT (elt);
25803 order[1] = const1_rtx;
25804 order[2] = const2_rtx;
25805 order[3] = GEN_INT (3);
25806 order[elt] = const0_rtx;
25807
25808 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
25809 order[1], order[2], order[3]));
25810
25811 ix86_expand_vector_set (false, target, val, 0);
25812
25813 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
25814 order[1], order[2], order[3]));
25815 }
25816 else
25817 {
25818 /* For SSE1, we have to reuse the V4SF code. */
25819 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
25820 gen_lowpart (SFmode, val), elt);
25821 }
25822 return;
25823
25824 case V8HImode:
25825 use_vec_merge = TARGET_SSE2;
25826 break;
25827 case V4HImode:
25828 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
25829 break;
25830
25831 case V16QImode:
25832 use_vec_merge = TARGET_SSE4_1;
25833 break;
25834
25835 case V8QImode:
25836 default:
25837 break;
25838 }
25839
25840 if (use_vec_merge)
25841 {
25842 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
25843 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
25844 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25845 }
25846 else
25847 {
25848 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
25849
25850 emit_move_insn (mem, target);
25851
25852 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
25853 emit_move_insn (tmp, val);
25854
25855 emit_move_insn (target, mem);
25856 }
25857 }
25858
25859 void
25860 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
25861 {
25862 enum machine_mode mode = GET_MODE (vec);
25863 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25864 bool use_vec_extr = false;
25865 rtx tmp;
25866
25867 switch (mode)
25868 {
25869 case V2SImode:
25870 case V2SFmode:
25871 if (!mmx_ok)
25872 break;
25873 /* FALLTHRU */
25874
25875 case V2DFmode:
25876 case V2DImode:
25877 use_vec_extr = true;
25878 break;
25879
25880 case V4SFmode:
25881 use_vec_extr = TARGET_SSE4_1;
25882 if (use_vec_extr)
25883 break;
25884
25885 switch (elt)
25886 {
25887 case 0:
25888 tmp = vec;
25889 break;
25890
25891 case 1:
25892 case 3:
25893 tmp = gen_reg_rtx (mode);
25894 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
25895 GEN_INT (elt), GEN_INT (elt),
25896 GEN_INT (elt+4), GEN_INT (elt+4)));
25897 break;
25898
25899 case 2:
25900 tmp = gen_reg_rtx (mode);
25901 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
25902 break;
25903
25904 default:
25905 gcc_unreachable ();
25906 }
25907 vec = tmp;
25908 use_vec_extr = true;
25909 elt = 0;
25910 break;
25911
25912 case V4SImode:
25913 use_vec_extr = TARGET_SSE4_1;
25914 if (use_vec_extr)
25915 break;
25916
25917 if (TARGET_SSE2)
25918 {
25919 switch (elt)
25920 {
25921 case 0:
25922 tmp = vec;
25923 break;
25924
25925 case 1:
25926 case 3:
25927 tmp = gen_reg_rtx (mode);
25928 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
25929 GEN_INT (elt), GEN_INT (elt),
25930 GEN_INT (elt), GEN_INT (elt)));
25931 break;
25932
25933 case 2:
25934 tmp = gen_reg_rtx (mode);
25935 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
25936 break;
25937
25938 default:
25939 gcc_unreachable ();
25940 }
25941 vec = tmp;
25942 use_vec_extr = true;
25943 elt = 0;
25944 }
25945 else
25946 {
25947 /* For SSE1, we have to reuse the V4SF code. */
25948 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
25949 gen_lowpart (V4SFmode, vec), elt);
25950 return;
25951 }
25952 break;
25953
25954 case V8HImode:
25955 use_vec_extr = TARGET_SSE2;
25956 break;
25957 case V4HImode:
25958 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
25959 break;
25960
25961 case V16QImode:
25962 use_vec_extr = TARGET_SSE4_1;
25963 break;
25964
25965 case V8QImode:
25966 /* ??? Could extract the appropriate HImode element and shift. */
25967 default:
25968 break;
25969 }
25970
25971 if (use_vec_extr)
25972 {
25973 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
25974 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
25975
25976 /* Let the rtl optimizers know about the zero extension performed. */
25977 if (inner_mode == QImode || inner_mode == HImode)
25978 {
25979 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
25980 target = gen_lowpart (SImode, target);
25981 }
25982
25983 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25984 }
25985 else
25986 {
25987 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
25988
25989 emit_move_insn (mem, vec);
25990
25991 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
25992 emit_move_insn (target, tmp);
25993 }
25994 }
25995
25996 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
25997 pattern to reduce; DEST is the destination; IN is the input vector. */
25998
25999 void
26000 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
26001 {
26002 rtx tmp1, tmp2, tmp3;
26003
26004 tmp1 = gen_reg_rtx (V4SFmode);
26005 tmp2 = gen_reg_rtx (V4SFmode);
26006 tmp3 = gen_reg_rtx (V4SFmode);
26007
26008 emit_insn (gen_sse_movhlps (tmp1, in, in));
26009 emit_insn (fn (tmp2, tmp1, in));
26010
26011 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
26012 GEN_INT (1), GEN_INT (1),
26013 GEN_INT (1+4), GEN_INT (1+4)));
26014 emit_insn (fn (dest, tmp2, tmp3));
26015 }
26016 \f
26017 /* Target hook for scalar_mode_supported_p. */
26018 static bool
26019 ix86_scalar_mode_supported_p (enum machine_mode mode)
26020 {
26021 if (DECIMAL_FLOAT_MODE_P (mode))
26022 return true;
26023 else if (mode == TFmode)
26024 return true;
26025 else
26026 return default_scalar_mode_supported_p (mode);
26027 }
26028
26029 /* Implements target hook vector_mode_supported_p. */
26030 static bool
26031 ix86_vector_mode_supported_p (enum machine_mode mode)
26032 {
26033 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
26034 return true;
26035 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
26036 return true;
26037 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
26038 return true;
26039 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
26040 return true;
26041 return false;
26042 }
26043
26044 /* Target hook for c_mode_for_suffix. */
26045 static enum machine_mode
26046 ix86_c_mode_for_suffix (char suffix)
26047 {
26048 if (suffix == 'q')
26049 return TFmode;
26050 if (suffix == 'w')
26051 return XFmode;
26052
26053 return VOIDmode;
26054 }
26055
26056 /* Worker function for TARGET_MD_ASM_CLOBBERS.
26057
26058 We do this in the new i386 backend to maintain source compatibility
26059 with the old cc0-based compiler. */
26060
26061 static tree
26062 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
26063 tree inputs ATTRIBUTE_UNUSED,
26064 tree clobbers)
26065 {
26066 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
26067 clobbers);
26068 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
26069 clobbers);
26070 return clobbers;
26071 }
26072
26073 /* Implements target vector targetm.asm.encode_section_info. This
26074 is not used by netware. */
26075
26076 static void ATTRIBUTE_UNUSED
26077 ix86_encode_section_info (tree decl, rtx rtl, int first)
26078 {
26079 default_encode_section_info (decl, rtl, first);
26080
26081 if (TREE_CODE (decl) == VAR_DECL
26082 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
26083 && ix86_in_large_data_p (decl))
26084 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
26085 }
26086
26087 /* Worker function for REVERSE_CONDITION. */
26088
26089 enum rtx_code
26090 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
26091 {
26092 return (mode != CCFPmode && mode != CCFPUmode
26093 ? reverse_condition (code)
26094 : reverse_condition_maybe_unordered (code));
26095 }
26096
26097 /* Output code to perform an x87 FP register move, from OPERANDS[1]
26098 to OPERANDS[0]. */
26099
26100 const char *
26101 output_387_reg_move (rtx insn, rtx *operands)
26102 {
26103 if (REG_P (operands[0]))
26104 {
26105 if (REG_P (operands[1])
26106 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
26107 {
26108 if (REGNO (operands[0]) == FIRST_STACK_REG)
26109 return output_387_ffreep (operands, 0);
26110 return "fstp\t%y0";
26111 }
26112 if (STACK_TOP_P (operands[0]))
26113 return "fld%z1\t%y1";
26114 return "fst\t%y0";
26115 }
26116 else if (MEM_P (operands[0]))
26117 {
26118 gcc_assert (REG_P (operands[1]));
26119 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
26120 return "fstp%z0\t%y0";
26121 else
26122 {
26123 /* There is no non-popping store to memory for XFmode.
26124 So if we need one, follow the store with a load. */
26125 if (GET_MODE (operands[0]) == XFmode)
26126 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
26127 else
26128 return "fst%z0\t%y0";
26129 }
26130 }
26131 else
26132 gcc_unreachable();
26133 }
26134
26135 /* Output code to perform a conditional jump to LABEL, if C2 flag in
26136 FP status register is set. */
26137
26138 void
26139 ix86_emit_fp_unordered_jump (rtx label)
26140 {
26141 rtx reg = gen_reg_rtx (HImode);
26142 rtx temp;
26143
26144 emit_insn (gen_x86_fnstsw_1 (reg));
26145
26146 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
26147 {
26148 emit_insn (gen_x86_sahf_1 (reg));
26149
26150 temp = gen_rtx_REG (CCmode, FLAGS_REG);
26151 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
26152 }
26153 else
26154 {
26155 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
26156
26157 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26158 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
26159 }
26160
26161 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
26162 gen_rtx_LABEL_REF (VOIDmode, label),
26163 pc_rtx);
26164 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
26165
26166 emit_jump_insn (temp);
26167 predict_jump (REG_BR_PROB_BASE * 10 / 100);
26168 }
26169
26170 /* Output code to perform a log1p XFmode calculation. */
26171
26172 void ix86_emit_i387_log1p (rtx op0, rtx op1)
26173 {
26174 rtx label1 = gen_label_rtx ();
26175 rtx label2 = gen_label_rtx ();
26176
26177 rtx tmp = gen_reg_rtx (XFmode);
26178 rtx tmp2 = gen_reg_rtx (XFmode);
26179
26180 emit_insn (gen_absxf2 (tmp, op1));
26181 emit_insn (gen_cmpxf (tmp,
26182 CONST_DOUBLE_FROM_REAL_VALUE (
26183 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
26184 XFmode)));
26185 emit_jump_insn (gen_bge (label1));
26186
26187 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
26188 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
26189 emit_jump (label2);
26190
26191 emit_label (label1);
26192 emit_move_insn (tmp, CONST1_RTX (XFmode));
26193 emit_insn (gen_addxf3 (tmp, op1, tmp));
26194 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
26195 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
26196
26197 emit_label (label2);
26198 }
26199
26200 /* Output code to perform a Newton-Rhapson approximation of a single precision
26201 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
26202
26203 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
26204 {
26205 rtx x0, x1, e0, e1, two;
26206
26207 x0 = gen_reg_rtx (mode);
26208 e0 = gen_reg_rtx (mode);
26209 e1 = gen_reg_rtx (mode);
26210 x1 = gen_reg_rtx (mode);
26211
26212 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
26213
26214 if (VECTOR_MODE_P (mode))
26215 two = ix86_build_const_vector (SFmode, true, two);
26216
26217 two = force_reg (mode, two);
26218
26219 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
26220
26221 /* x0 = rcp(b) estimate */
26222 emit_insn (gen_rtx_SET (VOIDmode, x0,
26223 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
26224 UNSPEC_RCP)));
26225 /* e0 = x0 * b */
26226 emit_insn (gen_rtx_SET (VOIDmode, e0,
26227 gen_rtx_MULT (mode, x0, b)));
26228 /* e1 = 2. - e0 */
26229 emit_insn (gen_rtx_SET (VOIDmode, e1,
26230 gen_rtx_MINUS (mode, two, e0)));
26231 /* x1 = x0 * e1 */
26232 emit_insn (gen_rtx_SET (VOIDmode, x1,
26233 gen_rtx_MULT (mode, x0, e1)));
26234 /* res = a * x1 */
26235 emit_insn (gen_rtx_SET (VOIDmode, res,
26236 gen_rtx_MULT (mode, a, x1)));
26237 }
26238
26239 /* Output code to perform a Newton-Rhapson approximation of a
26240 single precision floating point [reciprocal] square root. */
26241
26242 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
26243 bool recip)
26244 {
26245 rtx x0, e0, e1, e2, e3, mthree, mhalf;
26246 REAL_VALUE_TYPE r;
26247
26248 x0 = gen_reg_rtx (mode);
26249 e0 = gen_reg_rtx (mode);
26250 e1 = gen_reg_rtx (mode);
26251 e2 = gen_reg_rtx (mode);
26252 e3 = gen_reg_rtx (mode);
26253
26254 real_from_integer (&r, VOIDmode, -3, -1, 0);
26255 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
26256
26257 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
26258 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
26259
26260 if (VECTOR_MODE_P (mode))
26261 {
26262 mthree = ix86_build_const_vector (SFmode, true, mthree);
26263 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
26264 }
26265
26266 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
26267 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
26268
26269 /* x0 = rsqrt(a) estimate */
26270 emit_insn (gen_rtx_SET (VOIDmode, x0,
26271 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
26272 UNSPEC_RSQRT)));
26273
26274 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
26275 if (!recip)
26276 {
26277 rtx zero, mask;
26278
26279 zero = gen_reg_rtx (mode);
26280 mask = gen_reg_rtx (mode);
26281
26282 zero = force_reg (mode, CONST0_RTX(mode));
26283 emit_insn (gen_rtx_SET (VOIDmode, mask,
26284 gen_rtx_NE (mode, zero, a)));
26285
26286 emit_insn (gen_rtx_SET (VOIDmode, x0,
26287 gen_rtx_AND (mode, x0, mask)));
26288 }
26289
26290 /* e0 = x0 * a */
26291 emit_insn (gen_rtx_SET (VOIDmode, e0,
26292 gen_rtx_MULT (mode, x0, a)));
26293 /* e1 = e0 * x0 */
26294 emit_insn (gen_rtx_SET (VOIDmode, e1,
26295 gen_rtx_MULT (mode, e0, x0)));
26296
26297 /* e2 = e1 - 3. */
26298 mthree = force_reg (mode, mthree);
26299 emit_insn (gen_rtx_SET (VOIDmode, e2,
26300 gen_rtx_PLUS (mode, e1, mthree)));
26301
26302 mhalf = force_reg (mode, mhalf);
26303 if (recip)
26304 /* e3 = -.5 * x0 */
26305 emit_insn (gen_rtx_SET (VOIDmode, e3,
26306 gen_rtx_MULT (mode, x0, mhalf)));
26307 else
26308 /* e3 = -.5 * e0 */
26309 emit_insn (gen_rtx_SET (VOIDmode, e3,
26310 gen_rtx_MULT (mode, e0, mhalf)));
26311 /* ret = e2 * e3 */
26312 emit_insn (gen_rtx_SET (VOIDmode, res,
26313 gen_rtx_MULT (mode, e2, e3)));
26314 }
26315
26316 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
26317
26318 static void ATTRIBUTE_UNUSED
26319 i386_solaris_elf_named_section (const char *name, unsigned int flags,
26320 tree decl)
26321 {
26322 /* With Binutils 2.15, the "@unwind" marker must be specified on
26323 every occurrence of the ".eh_frame" section, not just the first
26324 one. */
26325 if (TARGET_64BIT
26326 && strcmp (name, ".eh_frame") == 0)
26327 {
26328 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
26329 flags & SECTION_WRITE ? "aw" : "a");
26330 return;
26331 }
26332 default_elf_asm_named_section (name, flags, decl);
26333 }
26334
26335 /* Return the mangling of TYPE if it is an extended fundamental type. */
26336
26337 static const char *
26338 ix86_mangle_type (const_tree type)
26339 {
26340 type = TYPE_MAIN_VARIANT (type);
26341
26342 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
26343 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
26344 return NULL;
26345
26346 switch (TYPE_MODE (type))
26347 {
26348 case TFmode:
26349 /* __float128 is "g". */
26350 return "g";
26351 case XFmode:
26352 /* "long double" or __float80 is "e". */
26353 return "e";
26354 default:
26355 return NULL;
26356 }
26357 }
26358
26359 /* For 32-bit code we can save PIC register setup by using
26360 __stack_chk_fail_local hidden function instead of calling
26361 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
26362 register, so it is better to call __stack_chk_fail directly. */
26363
26364 static tree
26365 ix86_stack_protect_fail (void)
26366 {
26367 return TARGET_64BIT
26368 ? default_external_stack_protect_fail ()
26369 : default_hidden_stack_protect_fail ();
26370 }
26371
26372 /* Select a format to encode pointers in exception handling data. CODE
26373 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
26374 true if the symbol may be affected by dynamic relocations.
26375
26376 ??? All x86 object file formats are capable of representing this.
26377 After all, the relocation needed is the same as for the call insn.
26378 Whether or not a particular assembler allows us to enter such, I
26379 guess we'll have to see. */
26380 int
26381 asm_preferred_eh_data_format (int code, int global)
26382 {
26383 if (flag_pic)
26384 {
26385 int type = DW_EH_PE_sdata8;
26386 if (!TARGET_64BIT
26387 || ix86_cmodel == CM_SMALL_PIC
26388 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
26389 type = DW_EH_PE_sdata4;
26390 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
26391 }
26392 if (ix86_cmodel == CM_SMALL
26393 || (ix86_cmodel == CM_MEDIUM && code))
26394 return DW_EH_PE_udata4;
26395 return DW_EH_PE_absptr;
26396 }
26397 \f
26398 /* Expand copysign from SIGN to the positive value ABS_VALUE
26399 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
26400 the sign-bit. */
26401 static void
26402 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
26403 {
26404 enum machine_mode mode = GET_MODE (sign);
26405 rtx sgn = gen_reg_rtx (mode);
26406 if (mask == NULL_RTX)
26407 {
26408 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
26409 if (!VECTOR_MODE_P (mode))
26410 {
26411 /* We need to generate a scalar mode mask in this case. */
26412 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
26413 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
26414 mask = gen_reg_rtx (mode);
26415 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
26416 }
26417 }
26418 else
26419 mask = gen_rtx_NOT (mode, mask);
26420 emit_insn (gen_rtx_SET (VOIDmode, sgn,
26421 gen_rtx_AND (mode, mask, sign)));
26422 emit_insn (gen_rtx_SET (VOIDmode, result,
26423 gen_rtx_IOR (mode, abs_value, sgn)));
26424 }
26425
26426 /* Expand fabs (OP0) and return a new rtx that holds the result. The
26427 mask for masking out the sign-bit is stored in *SMASK, if that is
26428 non-null. */
26429 static rtx
26430 ix86_expand_sse_fabs (rtx op0, rtx *smask)
26431 {
26432 enum machine_mode mode = GET_MODE (op0);
26433 rtx xa, mask;
26434
26435 xa = gen_reg_rtx (mode);
26436 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
26437 if (!VECTOR_MODE_P (mode))
26438 {
26439 /* We need to generate a scalar mode mask in this case. */
26440 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
26441 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
26442 mask = gen_reg_rtx (mode);
26443 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
26444 }
26445 emit_insn (gen_rtx_SET (VOIDmode, xa,
26446 gen_rtx_AND (mode, op0, mask)));
26447
26448 if (smask)
26449 *smask = mask;
26450
26451 return xa;
26452 }
26453
26454 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
26455 swapping the operands if SWAP_OPERANDS is true. The expanded
26456 code is a forward jump to a newly created label in case the
26457 comparison is true. The generated label rtx is returned. */
26458 static rtx
26459 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
26460 bool swap_operands)
26461 {
26462 rtx label, tmp;
26463
26464 if (swap_operands)
26465 {
26466 tmp = op0;
26467 op0 = op1;
26468 op1 = tmp;
26469 }
26470
26471 label = gen_label_rtx ();
26472 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
26473 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26474 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
26475 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
26476 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26477 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
26478 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
26479 JUMP_LABEL (tmp) = label;
26480
26481 return label;
26482 }
26483
26484 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
26485 using comparison code CODE. Operands are swapped for the comparison if
26486 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
26487 static rtx
26488 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
26489 bool swap_operands)
26490 {
26491 enum machine_mode mode = GET_MODE (op0);
26492 rtx mask = gen_reg_rtx (mode);
26493
26494 if (swap_operands)
26495 {
26496 rtx tmp = op0;
26497 op0 = op1;
26498 op1 = tmp;
26499 }
26500
26501 if (mode == DFmode)
26502 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
26503 gen_rtx_fmt_ee (code, mode, op0, op1)));
26504 else
26505 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
26506 gen_rtx_fmt_ee (code, mode, op0, op1)));
26507
26508 return mask;
26509 }
26510
26511 /* Generate and return a rtx of mode MODE for 2**n where n is the number
26512 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
26513 static rtx
26514 ix86_gen_TWO52 (enum machine_mode mode)
26515 {
26516 REAL_VALUE_TYPE TWO52r;
26517 rtx TWO52;
26518
26519 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
26520 TWO52 = const_double_from_real_value (TWO52r, mode);
26521 TWO52 = force_reg (mode, TWO52);
26522
26523 return TWO52;
26524 }
26525
26526 /* Expand SSE sequence for computing lround from OP1 storing
26527 into OP0. */
26528 void
26529 ix86_expand_lround (rtx op0, rtx op1)
26530 {
26531 /* C code for the stuff we're doing below:
26532 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
26533 return (long)tmp;
26534 */
26535 enum machine_mode mode = GET_MODE (op1);
26536 const struct real_format *fmt;
26537 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
26538 rtx adj;
26539
26540 /* load nextafter (0.5, 0.0) */
26541 fmt = REAL_MODE_FORMAT (mode);
26542 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
26543 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
26544
26545 /* adj = copysign (0.5, op1) */
26546 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
26547 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
26548
26549 /* adj = op1 + adj */
26550 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
26551
26552 /* op0 = (imode)adj */
26553 expand_fix (op0, adj, 0);
26554 }
26555
26556 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
26557 into OPERAND0. */
26558 void
26559 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
26560 {
26561 /* C code for the stuff we're doing below (for do_floor):
26562 xi = (long)op1;
26563 xi -= (double)xi > op1 ? 1 : 0;
26564 return xi;
26565 */
26566 enum machine_mode fmode = GET_MODE (op1);
26567 enum machine_mode imode = GET_MODE (op0);
26568 rtx ireg, freg, label, tmp;
26569
26570 /* reg = (long)op1 */
26571 ireg = gen_reg_rtx (imode);
26572 expand_fix (ireg, op1, 0);
26573
26574 /* freg = (double)reg */
26575 freg = gen_reg_rtx (fmode);
26576 expand_float (freg, ireg, 0);
26577
26578 /* ireg = (freg > op1) ? ireg - 1 : ireg */
26579 label = ix86_expand_sse_compare_and_jump (UNLE,
26580 freg, op1, !do_floor);
26581 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
26582 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
26583 emit_move_insn (ireg, tmp);
26584
26585 emit_label (label);
26586 LABEL_NUSES (label) = 1;
26587
26588 emit_move_insn (op0, ireg);
26589 }
26590
26591 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
26592 result in OPERAND0. */
26593 void
26594 ix86_expand_rint (rtx operand0, rtx operand1)
26595 {
26596 /* C code for the stuff we're doing below:
26597 xa = fabs (operand1);
26598 if (!isless (xa, 2**52))
26599 return operand1;
26600 xa = xa + 2**52 - 2**52;
26601 return copysign (xa, operand1);
26602 */
26603 enum machine_mode mode = GET_MODE (operand0);
26604 rtx res, xa, label, TWO52, mask;
26605
26606 res = gen_reg_rtx (mode);
26607 emit_move_insn (res, operand1);
26608
26609 /* xa = abs (operand1) */
26610 xa = ix86_expand_sse_fabs (res, &mask);
26611
26612 /* if (!isless (xa, TWO52)) goto label; */
26613 TWO52 = ix86_gen_TWO52 (mode);
26614 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26615
26616 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26617 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
26618
26619 ix86_sse_copysign_to_positive (res, xa, res, mask);
26620
26621 emit_label (label);
26622 LABEL_NUSES (label) = 1;
26623
26624 emit_move_insn (operand0, res);
26625 }
26626
26627 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
26628 into OPERAND0. */
26629 void
26630 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
26631 {
26632 /* C code for the stuff we expand below.
26633 double xa = fabs (x), x2;
26634 if (!isless (xa, TWO52))
26635 return x;
26636 xa = xa + TWO52 - TWO52;
26637 x2 = copysign (xa, x);
26638 Compensate. Floor:
26639 if (x2 > x)
26640 x2 -= 1;
26641 Compensate. Ceil:
26642 if (x2 < x)
26643 x2 -= -1;
26644 return x2;
26645 */
26646 enum machine_mode mode = GET_MODE (operand0);
26647 rtx xa, TWO52, tmp, label, one, res, mask;
26648
26649 TWO52 = ix86_gen_TWO52 (mode);
26650
26651 /* Temporary for holding the result, initialized to the input
26652 operand to ease control flow. */
26653 res = gen_reg_rtx (mode);
26654 emit_move_insn (res, operand1);
26655
26656 /* xa = abs (operand1) */
26657 xa = ix86_expand_sse_fabs (res, &mask);
26658
26659 /* if (!isless (xa, TWO52)) goto label; */
26660 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26661
26662 /* xa = xa + TWO52 - TWO52; */
26663 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26664 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
26665
26666 /* xa = copysign (xa, operand1) */
26667 ix86_sse_copysign_to_positive (xa, xa, res, mask);
26668
26669 /* generate 1.0 or -1.0 */
26670 one = force_reg (mode,
26671 const_double_from_real_value (do_floor
26672 ? dconst1 : dconstm1, mode));
26673
26674 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
26675 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
26676 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26677 gen_rtx_AND (mode, one, tmp)));
26678 /* We always need to subtract here to preserve signed zero. */
26679 tmp = expand_simple_binop (mode, MINUS,
26680 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26681 emit_move_insn (res, tmp);
26682
26683 emit_label (label);
26684 LABEL_NUSES (label) = 1;
26685
26686 emit_move_insn (operand0, res);
26687 }
26688
26689 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
26690 into OPERAND0. */
26691 void
26692 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
26693 {
26694 /* C code for the stuff we expand below.
26695 double xa = fabs (x), x2;
26696 if (!isless (xa, TWO52))
26697 return x;
26698 x2 = (double)(long)x;
26699 Compensate. Floor:
26700 if (x2 > x)
26701 x2 -= 1;
26702 Compensate. Ceil:
26703 if (x2 < x)
26704 x2 += 1;
26705 if (HONOR_SIGNED_ZEROS (mode))
26706 return copysign (x2, x);
26707 return x2;
26708 */
26709 enum machine_mode mode = GET_MODE (operand0);
26710 rtx xa, xi, TWO52, tmp, label, one, res, mask;
26711
26712 TWO52 = ix86_gen_TWO52 (mode);
26713
26714 /* Temporary for holding the result, initialized to the input
26715 operand to ease control flow. */
26716 res = gen_reg_rtx (mode);
26717 emit_move_insn (res, operand1);
26718
26719 /* xa = abs (operand1) */
26720 xa = ix86_expand_sse_fabs (res, &mask);
26721
26722 /* if (!isless (xa, TWO52)) goto label; */
26723 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26724
26725 /* xa = (double)(long)x */
26726 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26727 expand_fix (xi, res, 0);
26728 expand_float (xa, xi, 0);
26729
26730 /* generate 1.0 */
26731 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
26732
26733 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
26734 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
26735 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26736 gen_rtx_AND (mode, one, tmp)));
26737 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
26738 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26739 emit_move_insn (res, tmp);
26740
26741 if (HONOR_SIGNED_ZEROS (mode))
26742 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
26743
26744 emit_label (label);
26745 LABEL_NUSES (label) = 1;
26746
26747 emit_move_insn (operand0, res);
26748 }
26749
26750 /* Expand SSE sequence for computing round from OPERAND1 storing
26751 into OPERAND0. Sequence that works without relying on DImode truncation
26752 via cvttsd2siq that is only available on 64bit targets. */
26753 void
26754 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
26755 {
26756 /* C code for the stuff we expand below.
26757 double xa = fabs (x), xa2, x2;
26758 if (!isless (xa, TWO52))
26759 return x;
26760 Using the absolute value and copying back sign makes
26761 -0.0 -> -0.0 correct.
26762 xa2 = xa + TWO52 - TWO52;
26763 Compensate.
26764 dxa = xa2 - xa;
26765 if (dxa <= -0.5)
26766 xa2 += 1;
26767 else if (dxa > 0.5)
26768 xa2 -= 1;
26769 x2 = copysign (xa2, x);
26770 return x2;
26771 */
26772 enum machine_mode mode = GET_MODE (operand0);
26773 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
26774
26775 TWO52 = ix86_gen_TWO52 (mode);
26776
26777 /* Temporary for holding the result, initialized to the input
26778 operand to ease control flow. */
26779 res = gen_reg_rtx (mode);
26780 emit_move_insn (res, operand1);
26781
26782 /* xa = abs (operand1) */
26783 xa = ix86_expand_sse_fabs (res, &mask);
26784
26785 /* if (!isless (xa, TWO52)) goto label; */
26786 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26787
26788 /* xa2 = xa + TWO52 - TWO52; */
26789 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26790 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
26791
26792 /* dxa = xa2 - xa; */
26793 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
26794
26795 /* generate 0.5, 1.0 and -0.5 */
26796 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
26797 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
26798 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
26799 0, OPTAB_DIRECT);
26800
26801 /* Compensate. */
26802 tmp = gen_reg_rtx (mode);
26803 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
26804 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
26805 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26806 gen_rtx_AND (mode, one, tmp)));
26807 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26808 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
26809 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
26810 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26811 gen_rtx_AND (mode, one, tmp)));
26812 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26813
26814 /* res = copysign (xa2, operand1) */
26815 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
26816
26817 emit_label (label);
26818 LABEL_NUSES (label) = 1;
26819
26820 emit_move_insn (operand0, res);
26821 }
26822
26823 /* Expand SSE sequence for computing trunc from OPERAND1 storing
26824 into OPERAND0. */
26825 void
26826 ix86_expand_trunc (rtx operand0, rtx operand1)
26827 {
26828 /* C code for SSE variant we expand below.
26829 double xa = fabs (x), x2;
26830 if (!isless (xa, TWO52))
26831 return x;
26832 x2 = (double)(long)x;
26833 if (HONOR_SIGNED_ZEROS (mode))
26834 return copysign (x2, x);
26835 return x2;
26836 */
26837 enum machine_mode mode = GET_MODE (operand0);
26838 rtx xa, xi, TWO52, label, res, mask;
26839
26840 TWO52 = ix86_gen_TWO52 (mode);
26841
26842 /* Temporary for holding the result, initialized to the input
26843 operand to ease control flow. */
26844 res = gen_reg_rtx (mode);
26845 emit_move_insn (res, operand1);
26846
26847 /* xa = abs (operand1) */
26848 xa = ix86_expand_sse_fabs (res, &mask);
26849
26850 /* if (!isless (xa, TWO52)) goto label; */
26851 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26852
26853 /* x = (double)(long)x */
26854 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26855 expand_fix (xi, res, 0);
26856 expand_float (res, xi, 0);
26857
26858 if (HONOR_SIGNED_ZEROS (mode))
26859 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
26860
26861 emit_label (label);
26862 LABEL_NUSES (label) = 1;
26863
26864 emit_move_insn (operand0, res);
26865 }
26866
26867 /* Expand SSE sequence for computing trunc from OPERAND1 storing
26868 into OPERAND0. */
26869 void
26870 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
26871 {
26872 enum machine_mode mode = GET_MODE (operand0);
26873 rtx xa, mask, TWO52, label, one, res, smask, tmp;
26874
26875 /* C code for SSE variant we expand below.
26876 double xa = fabs (x), x2;
26877 if (!isless (xa, TWO52))
26878 return x;
26879 xa2 = xa + TWO52 - TWO52;
26880 Compensate:
26881 if (xa2 > xa)
26882 xa2 -= 1.0;
26883 x2 = copysign (xa2, x);
26884 return x2;
26885 */
26886
26887 TWO52 = ix86_gen_TWO52 (mode);
26888
26889 /* Temporary for holding the result, initialized to the input
26890 operand to ease control flow. */
26891 res = gen_reg_rtx (mode);
26892 emit_move_insn (res, operand1);
26893
26894 /* xa = abs (operand1) */
26895 xa = ix86_expand_sse_fabs (res, &smask);
26896
26897 /* if (!isless (xa, TWO52)) goto label; */
26898 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26899
26900 /* res = xa + TWO52 - TWO52; */
26901 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26902 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
26903 emit_move_insn (res, tmp);
26904
26905 /* generate 1.0 */
26906 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
26907
26908 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
26909 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
26910 emit_insn (gen_rtx_SET (VOIDmode, mask,
26911 gen_rtx_AND (mode, mask, one)));
26912 tmp = expand_simple_binop (mode, MINUS,
26913 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
26914 emit_move_insn (res, tmp);
26915
26916 /* res = copysign (res, operand1) */
26917 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
26918
26919 emit_label (label);
26920 LABEL_NUSES (label) = 1;
26921
26922 emit_move_insn (operand0, res);
26923 }
26924
26925 /* Expand SSE sequence for computing round from OPERAND1 storing
26926 into OPERAND0. */
26927 void
26928 ix86_expand_round (rtx operand0, rtx operand1)
26929 {
26930 /* C code for the stuff we're doing below:
26931 double xa = fabs (x);
26932 if (!isless (xa, TWO52))
26933 return x;
26934 xa = (double)(long)(xa + nextafter (0.5, 0.0));
26935 return copysign (xa, x);
26936 */
26937 enum machine_mode mode = GET_MODE (operand0);
26938 rtx res, TWO52, xa, label, xi, half, mask;
26939 const struct real_format *fmt;
26940 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
26941
26942 /* Temporary for holding the result, initialized to the input
26943 operand to ease control flow. */
26944 res = gen_reg_rtx (mode);
26945 emit_move_insn (res, operand1);
26946
26947 TWO52 = ix86_gen_TWO52 (mode);
26948 xa = ix86_expand_sse_fabs (res, &mask);
26949 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26950
26951 /* load nextafter (0.5, 0.0) */
26952 fmt = REAL_MODE_FORMAT (mode);
26953 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
26954 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
26955
26956 /* xa = xa + 0.5 */
26957 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
26958 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
26959
26960 /* xa = (double)(int64_t)xa */
26961 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26962 expand_fix (xi, xa, 0);
26963 expand_float (xa, xi, 0);
26964
26965 /* res = copysign (xa, operand1) */
26966 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
26967
26968 emit_label (label);
26969 LABEL_NUSES (label) = 1;
26970
26971 emit_move_insn (operand0, res);
26972 }
26973
26974 \f
26975 /* Validate whether a SSE5 instruction is valid or not.
26976 OPERANDS is the array of operands.
26977 NUM is the number of operands.
26978 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
26979 NUM_MEMORY is the maximum number of memory operands to accept.
26980 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
26981
26982 bool
26983 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
26984 bool uses_oc0, int num_memory, bool commutative)
26985 {
26986 int mem_mask;
26987 int mem_count;
26988 int i;
26989
26990 /* Count the number of memory arguments */
26991 mem_mask = 0;
26992 mem_count = 0;
26993 for (i = 0; i < num; i++)
26994 {
26995 enum machine_mode mode = GET_MODE (operands[i]);
26996 if (register_operand (operands[i], mode))
26997 ;
26998
26999 else if (memory_operand (operands[i], mode))
27000 {
27001 mem_mask |= (1 << i);
27002 mem_count++;
27003 }
27004
27005 else
27006 {
27007 rtx pattern = PATTERN (insn);
27008
27009 /* allow 0 for pcmov */
27010 if (GET_CODE (pattern) != SET
27011 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
27012 || i < 2
27013 || operands[i] != CONST0_RTX (mode))
27014 return false;
27015 }
27016 }
27017
27018 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
27019 a memory operation. */
27020 if (num_memory < 0)
27021 {
27022 num_memory = -num_memory;
27023 if ((mem_mask & (1 << (num-1))) != 0)
27024 {
27025 mem_mask &= ~(1 << (num-1));
27026 mem_count--;
27027 }
27028 }
27029
27030 /* If there were no memory operations, allow the insn */
27031 if (mem_mask == 0)
27032 return true;
27033
27034 /* Do not allow the destination register to be a memory operand. */
27035 else if (mem_mask & (1 << 0))
27036 return false;
27037
27038 /* If there are too many memory operations, disallow the instruction. While
27039 the hardware only allows 1 memory reference, before register allocation
27040 for some insns, we allow two memory operations sometimes in order to allow
27041 code like the following to be optimized:
27042
27043 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
27044
27045 or similar cases that are vectorized into using the fmaddss
27046 instruction. */
27047 else if (mem_count > num_memory)
27048 return false;
27049
27050 /* Don't allow more than one memory operation if not optimizing. */
27051 else if (mem_count > 1 && !optimize)
27052 return false;
27053
27054 else if (num == 4 && mem_count == 1)
27055 {
27056 /* formats (destination is the first argument), example fmaddss:
27057 xmm1, xmm1, xmm2, xmm3/mem
27058 xmm1, xmm1, xmm2/mem, xmm3
27059 xmm1, xmm2, xmm3/mem, xmm1
27060 xmm1, xmm2/mem, xmm3, xmm1 */
27061 if (uses_oc0)
27062 return ((mem_mask == (1 << 1))
27063 || (mem_mask == (1 << 2))
27064 || (mem_mask == (1 << 3)));
27065
27066 /* format, example pmacsdd:
27067 xmm1, xmm2, xmm3/mem, xmm1 */
27068 if (commutative)
27069 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
27070 else
27071 return (mem_mask == (1 << 2));
27072 }
27073
27074 else if (num == 4 && num_memory == 2)
27075 {
27076 /* If there are two memory operations, we can load one of the memory ops
27077 into the destination register. This is for optimizing the
27078 multiply/add ops, which the combiner has optimized both the multiply
27079 and the add insns to have a memory operation. We have to be careful
27080 that the destination doesn't overlap with the inputs. */
27081 rtx op0 = operands[0];
27082
27083 if (reg_mentioned_p (op0, operands[1])
27084 || reg_mentioned_p (op0, operands[2])
27085 || reg_mentioned_p (op0, operands[3]))
27086 return false;
27087
27088 /* formats (destination is the first argument), example fmaddss:
27089 xmm1, xmm1, xmm2, xmm3/mem
27090 xmm1, xmm1, xmm2/mem, xmm3
27091 xmm1, xmm2, xmm3/mem, xmm1
27092 xmm1, xmm2/mem, xmm3, xmm1
27093
27094 For the oc0 case, we will load either operands[1] or operands[3] into
27095 operands[0], so any combination of 2 memory operands is ok. */
27096 if (uses_oc0)
27097 return true;
27098
27099 /* format, example pmacsdd:
27100 xmm1, xmm2, xmm3/mem, xmm1
27101
27102 For the integer multiply/add instructions be more restrictive and
27103 require operands[2] and operands[3] to be the memory operands. */
27104 if (commutative)
27105 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
27106 else
27107 return (mem_mask == ((1 << 2) | (1 << 3)));
27108 }
27109
27110 else if (num == 3 && num_memory == 1)
27111 {
27112 /* formats, example protb:
27113 xmm1, xmm2, xmm3/mem
27114 xmm1, xmm2/mem, xmm3 */
27115 if (uses_oc0)
27116 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
27117
27118 /* format, example comeq:
27119 xmm1, xmm2, xmm3/mem */
27120 else
27121 return (mem_mask == (1 << 2));
27122 }
27123
27124 else
27125 gcc_unreachable ();
27126
27127 return false;
27128 }
27129
27130 \f
27131 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
27132 hardware will allow by using the destination register to load one of the
27133 memory operations. Presently this is used by the multiply/add routines to
27134 allow 2 memory references. */
27135
27136 void
27137 ix86_expand_sse5_multiple_memory (rtx operands[],
27138 int num,
27139 enum machine_mode mode)
27140 {
27141 rtx op0 = operands[0];
27142 if (num != 4
27143 || memory_operand (op0, mode)
27144 || reg_mentioned_p (op0, operands[1])
27145 || reg_mentioned_p (op0, operands[2])
27146 || reg_mentioned_p (op0, operands[3]))
27147 gcc_unreachable ();
27148
27149 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
27150 the destination register. */
27151 if (memory_operand (operands[1], mode))
27152 {
27153 emit_move_insn (op0, operands[1]);
27154 operands[1] = op0;
27155 }
27156 else if (memory_operand (operands[3], mode))
27157 {
27158 emit_move_insn (op0, operands[3]);
27159 operands[3] = op0;
27160 }
27161 else
27162 gcc_unreachable ();
27163
27164 return;
27165 }
27166
27167 \f
27168 /* Table of valid machine attributes. */
27169 static const struct attribute_spec ix86_attribute_table[] =
27170 {
27171 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
27172 /* Stdcall attribute says callee is responsible for popping arguments
27173 if they are not variable. */
27174 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
27175 /* Fastcall attribute says callee is responsible for popping arguments
27176 if they are not variable. */
27177 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
27178 /* Cdecl attribute says the callee is a normal C declaration */
27179 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
27180 /* Regparm attribute specifies how many integer arguments are to be
27181 passed in registers. */
27182 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
27183 /* Sseregparm attribute says we are using x86_64 calling conventions
27184 for FP arguments. */
27185 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
27186 /* force_align_arg_pointer says this function realigns the stack at entry. */
27187 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
27188 false, true, true, ix86_handle_cconv_attribute },
27189 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27190 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
27191 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
27192 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
27193 #endif
27194 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
27195 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
27196 #ifdef SUBTARGET_ATTRIBUTE_TABLE
27197 SUBTARGET_ATTRIBUTE_TABLE,
27198 #endif
27199 /* ms_abi and sysv_abi calling convention function attributes. */
27200 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
27201 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
27202 /* End element. */
27203 { NULL, 0, 0, false, false, false, NULL }
27204 };
27205
27206 /* Implement targetm.vectorize.builtin_vectorization_cost. */
27207 static int
27208 x86_builtin_vectorization_cost (bool runtime_test)
27209 {
27210 /* If the branch of the runtime test is taken - i.e. - the vectorized
27211 version is skipped - this incurs a misprediction cost (because the
27212 vectorized version is expected to be the fall-through). So we subtract
27213 the latency of a mispredicted branch from the costs that are incured
27214 when the vectorized version is executed.
27215
27216 TODO: The values in individual target tables have to be tuned or new
27217 fields may be needed. For eg. on K8, the default branch path is the
27218 not-taken path. If the taken path is predicted correctly, the minimum
27219 penalty of going down the taken-path is 1 cycle. If the taken-path is
27220 not predicted correctly, then the minimum penalty is 10 cycles. */
27221
27222 if (runtime_test)
27223 {
27224 return (-(ix86_cost->cond_taken_branch_cost));
27225 }
27226 else
27227 return 0;
27228 }
27229
27230 /* This function returns the calling abi specific va_list type node.
27231 It returns the FNDECL specific va_list type. */
27232
27233 tree
27234 ix86_fn_abi_va_list (tree fndecl)
27235 {
27236 int abi;
27237
27238 if (!TARGET_64BIT)
27239 return va_list_type_node;
27240 gcc_assert (fndecl != NULL_TREE);
27241 abi = ix86_function_abi ((const_tree) fndecl);
27242
27243 if (abi == MS_ABI)
27244 return ms_va_list_type_node;
27245 else
27246 return sysv_va_list_type_node;
27247 }
27248
27249 /* Returns the canonical va_list type specified by TYPE. If there
27250 is no valid TYPE provided, it return NULL_TREE. */
27251
27252 tree
27253 ix86_canonical_va_list_type (tree type)
27254 {
27255 tree wtype, htype;
27256
27257 /* Resolve references and pointers to va_list type. */
27258 if (INDIRECT_REF_P (type))
27259 type = TREE_TYPE (type);
27260 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
27261 type = TREE_TYPE (type);
27262
27263 if (TARGET_64BIT)
27264 {
27265 wtype = va_list_type_node;
27266 gcc_assert (wtype != NULL_TREE);
27267 htype = type;
27268 if (TREE_CODE (wtype) == ARRAY_TYPE)
27269 {
27270 /* If va_list is an array type, the argument may have decayed
27271 to a pointer type, e.g. by being passed to another function.
27272 In that case, unwrap both types so that we can compare the
27273 underlying records. */
27274 if (TREE_CODE (htype) == ARRAY_TYPE
27275 || POINTER_TYPE_P (htype))
27276 {
27277 wtype = TREE_TYPE (wtype);
27278 htype = TREE_TYPE (htype);
27279 }
27280 }
27281 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27282 return va_list_type_node;
27283 wtype = sysv_va_list_type_node;
27284 gcc_assert (wtype != NULL_TREE);
27285 htype = type;
27286 if (TREE_CODE (wtype) == ARRAY_TYPE)
27287 {
27288 /* If va_list is an array type, the argument may have decayed
27289 to a pointer type, e.g. by being passed to another function.
27290 In that case, unwrap both types so that we can compare the
27291 underlying records. */
27292 if (TREE_CODE (htype) == ARRAY_TYPE
27293 || POINTER_TYPE_P (htype))
27294 {
27295 wtype = TREE_TYPE (wtype);
27296 htype = TREE_TYPE (htype);
27297 }
27298 }
27299 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27300 return sysv_va_list_type_node;
27301 wtype = ms_va_list_type_node;
27302 gcc_assert (wtype != NULL_TREE);
27303 htype = type;
27304 if (TREE_CODE (wtype) == ARRAY_TYPE)
27305 {
27306 /* If va_list is an array type, the argument may have decayed
27307 to a pointer type, e.g. by being passed to another function.
27308 In that case, unwrap both types so that we can compare the
27309 underlying records. */
27310 if (TREE_CODE (htype) == ARRAY_TYPE
27311 || POINTER_TYPE_P (htype))
27312 {
27313 wtype = TREE_TYPE (wtype);
27314 htype = TREE_TYPE (htype);
27315 }
27316 }
27317 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27318 return ms_va_list_type_node;
27319 return NULL_TREE;
27320 }
27321 return std_canonical_va_list_type (type);
27322 }
27323
27324 /* Iterate through the target-specific builtin types for va_list.
27325 IDX denotes the iterator, *PTREE is set to the result type of
27326 the va_list builtin, and *PNAME to its internal type.
27327 Returns zero if there is no element for this index, otherwise
27328 IDX should be increased upon the next call.
27329 Note, do not iterate a base builtin's name like __builtin_va_list.
27330 Used from c_common_nodes_and_builtins. */
27331
27332 int
27333 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
27334 {
27335 if (!TARGET_64BIT)
27336 return 0;
27337 switch (idx) {
27338 case 0:
27339 *ptree = ms_va_list_type_node;
27340 *pname = "__builtin_ms_va_list";
27341 break;
27342 case 1:
27343 *ptree = sysv_va_list_type_node;
27344 *pname = "__builtin_sysv_va_list";
27345 break;
27346 default:
27347 return 0;
27348 }
27349 return 1;
27350 }
27351
27352 /* Initialize the GCC target structure. */
27353 #undef TARGET_RETURN_IN_MEMORY
27354 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
27355
27356 #undef TARGET_ATTRIBUTE_TABLE
27357 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
27358 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27359 # undef TARGET_MERGE_DECL_ATTRIBUTES
27360 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
27361 #endif
27362
27363 #undef TARGET_COMP_TYPE_ATTRIBUTES
27364 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
27365
27366 #undef TARGET_INIT_BUILTINS
27367 #define TARGET_INIT_BUILTINS ix86_init_builtins
27368 #undef TARGET_EXPAND_BUILTIN
27369 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
27370
27371 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
27372 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
27373 ix86_builtin_vectorized_function
27374
27375 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
27376 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
27377
27378 #undef TARGET_BUILTIN_RECIPROCAL
27379 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
27380
27381 #undef TARGET_ASM_FUNCTION_EPILOGUE
27382 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
27383
27384 #undef TARGET_ENCODE_SECTION_INFO
27385 #ifndef SUBTARGET_ENCODE_SECTION_INFO
27386 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
27387 #else
27388 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
27389 #endif
27390
27391 #undef TARGET_ASM_OPEN_PAREN
27392 #define TARGET_ASM_OPEN_PAREN ""
27393 #undef TARGET_ASM_CLOSE_PAREN
27394 #define TARGET_ASM_CLOSE_PAREN ""
27395
27396 #undef TARGET_ASM_ALIGNED_HI_OP
27397 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
27398 #undef TARGET_ASM_ALIGNED_SI_OP
27399 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
27400 #ifdef ASM_QUAD
27401 #undef TARGET_ASM_ALIGNED_DI_OP
27402 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
27403 #endif
27404
27405 #undef TARGET_ASM_UNALIGNED_HI_OP
27406 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
27407 #undef TARGET_ASM_UNALIGNED_SI_OP
27408 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
27409 #undef TARGET_ASM_UNALIGNED_DI_OP
27410 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
27411
27412 #undef TARGET_SCHED_ADJUST_COST
27413 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
27414 #undef TARGET_SCHED_ISSUE_RATE
27415 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
27416 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
27417 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
27418 ia32_multipass_dfa_lookahead
27419
27420 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
27421 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
27422
27423 #ifdef HAVE_AS_TLS
27424 #undef TARGET_HAVE_TLS
27425 #define TARGET_HAVE_TLS true
27426 #endif
27427 #undef TARGET_CANNOT_FORCE_CONST_MEM
27428 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
27429 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
27430 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
27431
27432 #undef TARGET_DELEGITIMIZE_ADDRESS
27433 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
27434
27435 #undef TARGET_MS_BITFIELD_LAYOUT_P
27436 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
27437
27438 #if TARGET_MACHO
27439 #undef TARGET_BINDS_LOCAL_P
27440 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
27441 #endif
27442 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27443 #undef TARGET_BINDS_LOCAL_P
27444 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
27445 #endif
27446
27447 #undef TARGET_ASM_OUTPUT_MI_THUNK
27448 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
27449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
27450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
27451
27452 #undef TARGET_ASM_FILE_START
27453 #define TARGET_ASM_FILE_START x86_file_start
27454
27455 #undef TARGET_DEFAULT_TARGET_FLAGS
27456 #define TARGET_DEFAULT_TARGET_FLAGS \
27457 (TARGET_DEFAULT \
27458 | TARGET_SUBTARGET_DEFAULT \
27459 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
27460
27461 #undef TARGET_HANDLE_OPTION
27462 #define TARGET_HANDLE_OPTION ix86_handle_option
27463
27464 #undef TARGET_RTX_COSTS
27465 #define TARGET_RTX_COSTS ix86_rtx_costs
27466 #undef TARGET_ADDRESS_COST
27467 #define TARGET_ADDRESS_COST ix86_address_cost
27468
27469 #undef TARGET_FIXED_CONDITION_CODE_REGS
27470 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
27471 #undef TARGET_CC_MODES_COMPATIBLE
27472 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
27473
27474 #undef TARGET_MACHINE_DEPENDENT_REORG
27475 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
27476
27477 #undef TARGET_BUILD_BUILTIN_VA_LIST
27478 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
27479
27480 #undef TARGET_FN_ABI_VA_LIST
27481 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
27482
27483 #undef TARGET_CANONICAL_VA_LIST_TYPE
27484 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
27485
27486 #undef TARGET_EXPAND_BUILTIN_VA_START
27487 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
27488
27489 #undef TARGET_MD_ASM_CLOBBERS
27490 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
27491
27492 #undef TARGET_PROMOTE_PROTOTYPES
27493 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
27494 #undef TARGET_STRUCT_VALUE_RTX
27495 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
27496 #undef TARGET_SETUP_INCOMING_VARARGS
27497 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
27498 #undef TARGET_MUST_PASS_IN_STACK
27499 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
27500 #undef TARGET_PASS_BY_REFERENCE
27501 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
27502 #undef TARGET_INTERNAL_ARG_POINTER
27503 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
27504 #undef TARGET_UPDATE_STACK_BOUNDARY
27505 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
27506 #undef TARGET_GET_DRAP_RTX
27507 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
27508 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
27509 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
27510 #undef TARGET_STRICT_ARGUMENT_NAMING
27511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
27512
27513 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
27514 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
27515
27516 #undef TARGET_SCALAR_MODE_SUPPORTED_P
27517 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
27518
27519 #undef TARGET_VECTOR_MODE_SUPPORTED_P
27520 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
27521
27522 #undef TARGET_C_MODE_FOR_SUFFIX
27523 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
27524
27525 #ifdef HAVE_AS_TLS
27526 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
27527 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
27528 #endif
27529
27530 #ifdef SUBTARGET_INSERT_ATTRIBUTES
27531 #undef TARGET_INSERT_ATTRIBUTES
27532 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
27533 #endif
27534
27535 #undef TARGET_MANGLE_TYPE
27536 #define TARGET_MANGLE_TYPE ix86_mangle_type
27537
27538 #undef TARGET_STACK_PROTECT_FAIL
27539 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
27540
27541 #undef TARGET_FUNCTION_VALUE
27542 #define TARGET_FUNCTION_VALUE ix86_function_value
27543
27544 #undef TARGET_SECONDARY_RELOAD
27545 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
27546
27547 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
27548 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
27549
27550 #undef TARGET_SET_CURRENT_FUNCTION
27551 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
27552
27553 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
27554 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p
27555
27556 #undef TARGET_OPTION_SAVE
27557 #define TARGET_OPTION_SAVE ix86_function_specific_save
27558
27559 #undef TARGET_OPTION_RESTORE
27560 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
27561
27562 #undef TARGET_OPTION_PRINT
27563 #define TARGET_OPTION_PRINT ix86_function_specific_print
27564
27565 #undef TARGET_OPTION_CAN_INLINE_P
27566 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
27567
27568 #undef TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION
27569 #define TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION true
27570
27571 #undef TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION
27572 #define TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION true
27573
27574 struct gcc_target targetm = TARGET_INITIALIZER;
27575 \f
27576 #include "gt-i386.h"