re PR middle-end/20983 (varargs functions force va_list variable to stack unnecessarily)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 static int x86_builtin_vectorization_cost (bool);
56
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
59 #endif
60
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
67 : 4)
68
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
72
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74
75 static const
76 struct processor_costs size_cost = { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
94 0, /* "large" insn */
95 2, /* MOVE_RATIO */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
121 2, /* Branch cost */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
143 };
144
145 /* Processor costs (relative to an add) */
146 static const
147 struct processor_costs i386_cost = { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
192 1, /* Branch cost */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
200 DUMMY_STRINGOP_ALGS},
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
214 };
215
216 static const
217 struct processor_costs i486_cost = { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
236 3, /* MOVE_RATIO */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
264 1, /* Branch cost */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
272 DUMMY_STRINGOP_ALGS},
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
286 };
287
288 static const
289 struct processor_costs pentium_cost = {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
308 6, /* MOVE_RATIO */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 2, /* Branch cost */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
342 DUMMY_STRINGOP_ALGS},
343 {{libcall, {{-1, rep_prefix_4_byte}}},
344 DUMMY_STRINGOP_ALGS},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
356 };
357
358 static const
359 struct processor_costs pentiumpro_cost = {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
378 6, /* MOVE_RATIO */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
404 2, /* Branch cost */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
415 */
416 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
417 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
418 DUMMY_STRINGOP_ALGS},
419 {{rep_prefix_4_byte, {{1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
433 };
434
435 static const
436 struct processor_costs geode_cost = {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
455 4, /* MOVE_RATIO */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
466
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
482 1, /* Branch cost */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
490 DUMMY_STRINGOP_ALGS},
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
504 };
505
506 static const
507 struct processor_costs k6_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
526 4, /* MOVE_RATIO */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
555 1, /* Branch cost */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
563 DUMMY_STRINGOP_ALGS},
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
577 };
578
579 static const
580 struct processor_costs athlon_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
599 9, /* MOVE_RATIO */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
625 5, /* Branch cost */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
636 DUMMY_STRINGOP_ALGS},
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs k8_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
672 9, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
701 time). */
702 100, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
714 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
715 {{libcall, {{8, loop}, {24, unrolled_loop},
716 {2048, rep_prefix_4_byte}, {-1, libcall}}},
717 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
729 };
730
731 struct processor_costs amdfam10_cost = {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
750 9, /* MOVE_RATIO */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
772 /* On K8
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
775 On AMDFAM10
776 MOVD reg64, xmmreg Double FADD 3
777 1/1 1/1
778 MOVD reg32, xmmreg Double FADD 3
779 1/1 1/1 */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 5, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
801 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
802 {{libcall, {{8, loop}, {24, unrolled_loop},
803 {2048, rep_prefix_4_byte}, {-1, libcall}}},
804 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
816 };
817
818 static const
819 struct processor_costs pentium4_cost = {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
838 6, /* MOVE_RATIO */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
864 2, /* Branch cost */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
872 DUMMY_STRINGOP_ALGS},
873 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
874 {-1, libcall}}},
875 DUMMY_STRINGOP_ALGS},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
887 };
888
889 static const
890 struct processor_costs nocona_cost = {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
909 17, /* MOVE_RATIO */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
935 1, /* Branch cost */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
943 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
944 {100000, unrolled_loop}, {-1, libcall}}}},
945 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
946 {-1, libcall}}},
947 {libcall, {{24, loop}, {64, unrolled_loop},
948 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
960 };
961
962 static const
963 struct processor_costs core2_cost = {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
982 16, /* MOVE_RATIO */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1015 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1016 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1017 {{libcall, {{8, loop}, {15, unrolled_loop},
1018 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1019 {libcall, {{24, loop}, {32, unrolled_loop},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1032 };
1033
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1035 static const
1036 struct processor_costs generic64_cost = {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS,
1095 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1109 };
1110
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1112 static const
1113 struct processor_costs generic32_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1166 DUMMY_STRINGOP_ALGS},
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1180 };
1181
1182 const struct processor_costs *ix86_cost = &pentium_cost;
1183
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1192
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1201
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1204
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1208
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1216
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1219 | m_NOCONA | m_CORE2 | m_GENERIC,
1220
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1222 m_486 | m_PENT,
1223
1224 /* X86_TUNE_USE_BIT_TEST */
1225 m_386,
1226
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1232
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1237 0,
1238
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1240 ~m_386,
1241
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1244 | m_NOCONA | m_CORE2 | m_GENERIC,
1245
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1249 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1250
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1259 m_PPRO,
1260
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2 | m_GENERIC,
1263
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386 | m_486 | m_K6_GEODE,
1266
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1269
1270 /* X86_TUNE_USE_MOV0 */
1271 m_K6,
1272
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1277 m_PENT4,
1278
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1280 m_PPRO,
1281
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1283 ~m_PENT,
1284
1285 /* X86_TUNE_READ_MODIFY */
1286 ~(m_PENT | m_PPRO),
1287
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1290 | m_GENERIC /* | m_PENT4 ? */,
1291
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT | m_486 | m_386),
1294
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386 | m_PENT4 | m_NOCONA,
1297
1298 /* X86_TUNE_QIMODE_MATH */
1299 ~0,
1300
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1305 ~m_PPRO,
1306
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1308 0,
1309
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1311 m_PPRO,
1312
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1315
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1318 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1325 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1330 | m_GENERIC | m_GEODE),
1331
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1345
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1347 m_AMDFAM10,
1348
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1353 m_ATHLON_K8,
1354
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1356 m_AMD_MULTIPLE,
1357
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO | m_PENT4 | m_NOCONA,
1360
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1363
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1366
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_SHIFT1 */
1371 ~m_486,
1372
1373 /* X86_TUNE_USE_FFREEP */
1374 m_AMD_MULTIPLE,
1375
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE | m_GENERIC),
1378
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1380 ~(m_AMDFAM10),
1381
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1385
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1388
1389 /* X86_TUNE_USE_BT */
1390 m_AMD_MULTIPLE,
1391
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1394
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1400
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1402 ~m_K8,
1403
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1405 m_K8 | m_GENERIC64,
1406
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1409 ~(m_386 | m_486),
1410
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8 | m_GENERIC64 | m_AMDFAM10,
1414
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1416 machines. */
1417 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1420 than a MOV. */
1421 m_PENT,
1422
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1425 m_PENT,
1426
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1430 m_K6,
1431
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1433 from integer to FP. */
1434 m_AMDFAM10,
1435 };
1436
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386 | m_486 | m_PENT | m_K6),
1441
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1443 ~m_386,
1444
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1446 ~(m_386 | m_486),
1447
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1449 ~m_386,
1450
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1452 ~m_386,
1453 };
1454
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1457
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1460 | m_NOCONA | m_CORE2 | m_GENERIC;
1461
1462 static enum stringop_alg stringop_alg = no_stringop;
1463
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1466 epilogue code. */
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1468
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1471 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1472 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1473
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1476
1477 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1478 {
1479 /* ax, dx, cx, bx */
1480 AREG, DREG, CREG, BREG,
1481 /* si, di, bp, sp */
1482 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1483 /* FP registers */
1484 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1485 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1486 /* arg pointer */
1487 NON_Q_REGS,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1490 /* SSE registers */
1491 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1492 SSE_REGS, SSE_REGS,
1493 /* MMX registers */
1494 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1495 MMX_REGS, MMX_REGS,
1496 /* REX registers */
1497 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 /* SSE REX registers */
1500 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1501 SSE_REGS, SSE_REGS,
1502 };
1503
1504 /* The "default" register map used in 32bit mode. */
1505
1506 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1507 {
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1515 };
1516
1517 static int const x86_64_int_parameter_registers[6] =
1518 {
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1521 };
1522
1523 static int const x86_64_ms_abi_int_parameter_registers[4] =
1524 {
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1527 };
1528
1529 static int const x86_64_int_return_registers[4] =
1530 {
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1532 };
1533
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1536 {
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1544 };
1545
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1590 numbers.
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1599 */
1600 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1601 {
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1609 };
1610
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1613
1614 rtx ix86_compare_op0 = NULL_RTX;
1615 rtx ix86_compare_op1 = NULL_RTX;
1616 rtx ix86_compare_emitted = NULL_RTX;
1617
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1620
1621 /* Define the structure for the machine field in struct function. */
1622
1623 struct stack_local_entry GTY(())
1624 {
1625 unsigned short mode;
1626 unsigned short n;
1627 rtx rtl;
1628 struct stack_local_entry *next;
1629 };
1630
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1633
1634 [arguments]
1635 <- ARG_POINTER
1636 saved pc
1637
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1640 [saved regs]
1641
1642 [padding1] \
1643 )
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1646 [frame] (
1647 )
1648 [padding2] /
1649 */
1650 struct ix86_frame
1651 {
1652 int nregs;
1653 int padding1;
1654 int va_arg_size;
1655 HOST_WIDE_INT frame;
1656 int padding2;
1657 int outgoing_arguments_size;
1658 int red_zone_size;
1659
1660 HOST_WIDE_INT to_allocate;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset;
1663 HOST_WIDE_INT hard_frame_pointer_offset;
1664 HOST_WIDE_INT stack_pointer_offset;
1665
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov;
1669 };
1670
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel;
1673 /* Asm dialect. */
1674 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1675 /* TLS dialects. */
1676 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1677
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath;
1680
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune;
1683
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch;
1686
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse;
1689
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm;
1692
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer;
1695 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1696
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary;
1699
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost;
1702
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1705
1706 int ix86_section_threshold = 65536;
1707
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix[16];
1710 int internal_label_prefix_len;
1711
1712 /* Fence to use after loop using movnt. */
1713 tree x86_mfence;
1714
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1719
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1723 {
1724 X86_64_NO_CLASS,
1725 X86_64_INTEGER_CLASS,
1726 X86_64_INTEGERSI_CLASS,
1727 X86_64_SSE_CLASS,
1728 X86_64_SSESF_CLASS,
1729 X86_64_SSEDF_CLASS,
1730 X86_64_SSEUP_CLASS,
1731 X86_64_X87_CLASS,
1732 X86_64_X87UP_CLASS,
1733 X86_64_COMPLEX_X87_CLASS,
1734 X86_64_MEMORY_CLASS
1735 };
1736 static const char * const x86_64_reg_class_name[] =
1737 {
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1740 };
1741
1742 #define MAX_CLASSES 4
1743
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1746 static bool ext_80387_constants_init = 0;
1747
1748 \f
1749 static struct machine_function * ix86_init_machine_status (void);
1750 static rtx ix86_function_value (const_tree, const_tree, bool);
1751 static int ix86_function_regparm (const_tree, const_tree);
1752 static void ix86_compute_frame_layout (struct ix86_frame *);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1754 rtx, rtx, int);
1755
1756 \f
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1758 in memory. */
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1761 #endif
1762
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1765
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit;
1769
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1772
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1776
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1788
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1794
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1796
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1799
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1802 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1803
1804 /* Implement TARGET_HANDLE_OPTION. */
1805
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1808 {
1809 switch (code)
1810 {
1811 case OPT_mmmx:
1812 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1813 if (!value)
1814 {
1815 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1816 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1817 }
1818 return true;
1819
1820 case OPT_m3dnow:
1821 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1822 if (!value)
1823 {
1824 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1826 }
1827 return true;
1828
1829 case OPT_m3dnowa:
1830 return false;
1831
1832 case OPT_msse:
1833 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1834 if (!value)
1835 {
1836 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1838 }
1839 return true;
1840
1841 case OPT_msse2:
1842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1843 if (!value)
1844 {
1845 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1847 }
1848 return true;
1849
1850 case OPT_msse3:
1851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1852 if (!value)
1853 {
1854 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1856 }
1857 return true;
1858
1859 case OPT_mssse3:
1860 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1861 if (!value)
1862 {
1863 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1865 }
1866 return true;
1867
1868 case OPT_msse4_1:
1869 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1870 if (!value)
1871 {
1872 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1873 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1874 }
1875 return true;
1876
1877 case OPT_msse4_2:
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1879 if (!value)
1880 {
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1883 }
1884 return true;
1885
1886 case OPT_msse4:
1887 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1889 return true;
1890
1891 case OPT_mno_sse4:
1892 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1894 return true;
1895
1896 case OPT_msse4a:
1897 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1898 if (!value)
1899 {
1900 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1902 }
1903 return true;
1904
1905 case OPT_msse5:
1906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1907 if (!value)
1908 {
1909 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1910 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1911 }
1912 return true;
1913
1914 default:
1915 return true;
1916 }
1917 }
1918
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1923 been parsed.
1924
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1927
1928 void
1929 override_options (void)
1930 {
1931 int i;
1932 int ix86_tune_defaulted = 0;
1933 int ix86_arch_specified = 0;
1934 unsigned int ix86_arch_mask, ix86_tune_mask;
1935
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1938
1939 static struct ptt
1940 {
1941 const struct processor_costs *cost; /* Processor costs */
1942 const int align_loop; /* Default alignments. */
1943 const int align_loop_max_skip;
1944 const int align_jump;
1945 const int align_jump_max_skip;
1946 const int align_func;
1947 }
1948 const processor_target_table[PROCESSOR_max] =
1949 {
1950 {&i386_cost, 4, 3, 4, 3, 4},
1951 {&i486_cost, 16, 15, 16, 15, 16},
1952 {&pentium_cost, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1954 {&geode_cost, 0, 0, 0, 0, 0},
1955 {&k6_cost, 32, 7, 32, 7, 32},
1956 {&athlon_cost, 16, 7, 16, 7, 16},
1957 {&pentium4_cost, 0, 0, 0, 0, 0},
1958 {&k8_cost, 16, 7, 16, 7, 16},
1959 {&nocona_cost, 0, 0, 0, 0, 0},
1960 {&core2_cost, 16, 10, 16, 10, 16},
1961 {&generic32_cost, 16, 7, 16, 7, 16},
1962 {&generic64_cost, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost, 32, 24, 32, 7, 32}
1964 };
1965
1966 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1967 {
1968 "generic",
1969 "i386",
1970 "i486",
1971 "pentium",
1972 "pentium-mmx",
1973 "pentiumpro",
1974 "pentium2",
1975 "pentium3",
1976 "pentium4",
1977 "pentium-m",
1978 "prescott",
1979 "nocona",
1980 "core2",
1981 "geode",
1982 "k6",
1983 "k6-2",
1984 "k6-3",
1985 "athlon",
1986 "athlon-4",
1987 "k8",
1988 "amdfam10"
1989 };
1990
1991 enum pta_flags
1992 {
1993 PTA_SSE = 1 << 0,
1994 PTA_SSE2 = 1 << 1,
1995 PTA_SSE3 = 1 << 2,
1996 PTA_MMX = 1 << 3,
1997 PTA_PREFETCH_SSE = 1 << 4,
1998 PTA_3DNOW = 1 << 5,
1999 PTA_3DNOW_A = 1 << 6,
2000 PTA_64BIT = 1 << 7,
2001 PTA_SSSE3 = 1 << 8,
2002 PTA_CX16 = 1 << 9,
2003 PTA_POPCNT = 1 << 10,
2004 PTA_ABM = 1 << 11,
2005 PTA_SSE4A = 1 << 12,
2006 PTA_NO_SAHF = 1 << 13,
2007 PTA_SSE4_1 = 1 << 14,
2008 PTA_SSE4_2 = 1 << 15,
2009 PTA_SSE5 = 1 << 16
2010 };
2011
2012 static struct pta
2013 {
2014 const char *const name; /* processor name or nickname. */
2015 const enum processor_type processor;
2016 const unsigned /*enum pta_flags*/ flags;
2017 }
2018 const processor_alias_table[] =
2019 {
2020 {"i386", PROCESSOR_I386, 0},
2021 {"i486", PROCESSOR_I486, 0},
2022 {"i586", PROCESSOR_PENTIUM, 0},
2023 {"pentium", PROCESSOR_PENTIUM, 0},
2024 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2025 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2026 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2027 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2028 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2029 {"i686", PROCESSOR_PENTIUMPRO, 0},
2030 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2031 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2032 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2033 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2034 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2035 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2036 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2037 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2038 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2039 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2040 | PTA_CX16 | PTA_NO_SAHF)},
2041 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2042 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2043 | PTA_SSSE3
2044 | PTA_CX16)},
2045 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2046 |PTA_PREFETCH_SSE)},
2047 {"k6", PROCESSOR_K6, PTA_MMX},
2048 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2049 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2050 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2051 | PTA_PREFETCH_SSE)},
2052 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2053 | PTA_PREFETCH_SSE)},
2054 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2055 | PTA_SSE)},
2056 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2057 | PTA_SSE)},
2058 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2059 | PTA_SSE)},
2060 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2061 | PTA_MMX | PTA_SSE | PTA_SSE2
2062 | PTA_NO_SAHF)},
2063 {"k8", PROCESSOR_K8, (PTA_64BIT
2064 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2065 | PTA_SSE | PTA_SSE2
2066 | PTA_NO_SAHF)},
2067 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2068 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2069 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2070 | PTA_NO_SAHF)},
2071 {"opteron", PROCESSOR_K8, (PTA_64BIT
2072 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2073 | PTA_SSE | PTA_SSE2
2074 | PTA_NO_SAHF)},
2075 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2076 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2077 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2078 | PTA_NO_SAHF)},
2079 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2080 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2081 | PTA_SSE | PTA_SSE2
2082 | PTA_NO_SAHF)},
2083 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2084 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2085 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2086 | PTA_NO_SAHF)},
2087 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2088 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2089 | PTA_SSE | PTA_SSE2
2090 | PTA_NO_SAHF)},
2091 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2092 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2093 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2094 | PTA_SSE4A
2095 | PTA_CX16 | PTA_ABM)},
2096 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2097 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2098 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2099 | PTA_SSE4A
2100 | PTA_CX16 | PTA_ABM)},
2101 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2102 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2103 };
2104
2105 int const pta_size = ARRAY_SIZE (processor_alias_table);
2106
2107 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2108 SUBTARGET_OVERRIDE_OPTIONS;
2109 #endif
2110
2111 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2112 SUBSUBTARGET_OVERRIDE_OPTIONS;
2113 #endif
2114
2115 /* -fPIC is the default for x86_64. */
2116 if (TARGET_MACHO && TARGET_64BIT)
2117 flag_pic = 2;
2118
2119 /* Set the default values for switches whose default depends on TARGET_64BIT
2120 in case they weren't overwritten by command line options. */
2121 if (TARGET_64BIT)
2122 {
2123 /* Mach-O doesn't support omitting the frame pointer for now. */
2124 if (flag_omit_frame_pointer == 2)
2125 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2126 if (flag_asynchronous_unwind_tables == 2)
2127 flag_asynchronous_unwind_tables = 1;
2128 if (flag_pcc_struct_return == 2)
2129 flag_pcc_struct_return = 0;
2130 }
2131 else
2132 {
2133 if (flag_omit_frame_pointer == 2)
2134 flag_omit_frame_pointer = 0;
2135 if (flag_asynchronous_unwind_tables == 2)
2136 flag_asynchronous_unwind_tables = 0;
2137 if (flag_pcc_struct_return == 2)
2138 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2139 }
2140
2141 /* Need to check -mtune=generic first. */
2142 if (ix86_tune_string)
2143 {
2144 if (!strcmp (ix86_tune_string, "generic")
2145 || !strcmp (ix86_tune_string, "i686")
2146 /* As special support for cross compilers we read -mtune=native
2147 as -mtune=generic. With native compilers we won't see the
2148 -mtune=native, as it was changed by the driver. */
2149 || !strcmp (ix86_tune_string, "native"))
2150 {
2151 if (TARGET_64BIT)
2152 ix86_tune_string = "generic64";
2153 else
2154 ix86_tune_string = "generic32";
2155 }
2156 else if (!strncmp (ix86_tune_string, "generic", 7))
2157 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2158 }
2159 else
2160 {
2161 if (ix86_arch_string)
2162 ix86_tune_string = ix86_arch_string;
2163 if (!ix86_tune_string)
2164 {
2165 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2166 ix86_tune_defaulted = 1;
2167 }
2168
2169 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2170 need to use a sensible tune option. */
2171 if (!strcmp (ix86_tune_string, "generic")
2172 || !strcmp (ix86_tune_string, "x86-64")
2173 || !strcmp (ix86_tune_string, "i686"))
2174 {
2175 if (TARGET_64BIT)
2176 ix86_tune_string = "generic64";
2177 else
2178 ix86_tune_string = "generic32";
2179 }
2180 }
2181 if (ix86_stringop_string)
2182 {
2183 if (!strcmp (ix86_stringop_string, "rep_byte"))
2184 stringop_alg = rep_prefix_1_byte;
2185 else if (!strcmp (ix86_stringop_string, "libcall"))
2186 stringop_alg = libcall;
2187 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2188 stringop_alg = rep_prefix_4_byte;
2189 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2190 stringop_alg = rep_prefix_8_byte;
2191 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2192 stringop_alg = loop_1_byte;
2193 else if (!strcmp (ix86_stringop_string, "loop"))
2194 stringop_alg = loop;
2195 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2196 stringop_alg = unrolled_loop;
2197 else
2198 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2199 }
2200 if (!strcmp (ix86_tune_string, "x86-64"))
2201 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2202 "-mtune=generic instead as appropriate.");
2203
2204 if (!ix86_arch_string)
2205 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2206 else
2207 ix86_arch_specified = 1;
2208
2209 if (!strcmp (ix86_arch_string, "generic"))
2210 error ("generic CPU can be used only for -mtune= switch");
2211 if (!strncmp (ix86_arch_string, "generic", 7))
2212 error ("bad value (%s) for -march= switch", ix86_arch_string);
2213
2214 if (ix86_cmodel_string != 0)
2215 {
2216 if (!strcmp (ix86_cmodel_string, "small"))
2217 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2218 else if (!strcmp (ix86_cmodel_string, "medium"))
2219 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2220 else if (!strcmp (ix86_cmodel_string, "large"))
2221 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2222 else if (flag_pic)
2223 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2224 else if (!strcmp (ix86_cmodel_string, "32"))
2225 ix86_cmodel = CM_32;
2226 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2227 ix86_cmodel = CM_KERNEL;
2228 else
2229 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2230 }
2231 else
2232 {
2233 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2234 use of rip-relative addressing. This eliminates fixups that
2235 would otherwise be needed if this object is to be placed in a
2236 DLL, and is essentially just as efficient as direct addressing. */
2237 if (TARGET_64BIT_MS_ABI)
2238 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2239 else if (TARGET_64BIT)
2240 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2241 else
2242 ix86_cmodel = CM_32;
2243 }
2244 if (ix86_asm_string != 0)
2245 {
2246 if (! TARGET_MACHO
2247 && !strcmp (ix86_asm_string, "intel"))
2248 ix86_asm_dialect = ASM_INTEL;
2249 else if (!strcmp (ix86_asm_string, "att"))
2250 ix86_asm_dialect = ASM_ATT;
2251 else
2252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2253 }
2254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2255 error ("code model %qs not supported in the %s bit mode",
2256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2257 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2258 sorry ("%i-bit mode not compiled in",
2259 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2260
2261 for (i = 0; i < pta_size; i++)
2262 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2263 {
2264 ix86_arch = processor_alias_table[i].processor;
2265 /* Default cpu tuning to the architecture. */
2266 ix86_tune = ix86_arch;
2267
2268 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2269 error ("CPU you selected does not support x86-64 "
2270 "instruction set");
2271
2272 if (processor_alias_table[i].flags & PTA_MMX
2273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2274 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2275 if (processor_alias_table[i].flags & PTA_3DNOW
2276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2277 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2278 if (processor_alias_table[i].flags & PTA_3DNOW_A
2279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2280 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2281 if (processor_alias_table[i].flags & PTA_SSE
2282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2284 if (processor_alias_table[i].flags & PTA_SSE2
2285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2287 if (processor_alias_table[i].flags & PTA_SSE3
2288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2289 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2290 if (processor_alias_table[i].flags & PTA_SSSE3
2291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2292 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2293 if (processor_alias_table[i].flags & PTA_SSE4_1
2294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2296 if (processor_alias_table[i].flags & PTA_SSE4_2
2297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2299 if (processor_alias_table[i].flags & PTA_SSE4A
2300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2301 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2302 if (processor_alias_table[i].flags & PTA_SSE5
2303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2304 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2305
2306 if (processor_alias_table[i].flags & PTA_ABM)
2307 x86_abm = true;
2308 if (processor_alias_table[i].flags & PTA_CX16)
2309 x86_cmpxchg16b = true;
2310 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2311 x86_popcnt = true;
2312 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2313 x86_prefetch_sse = true;
2314 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2315 x86_sahf = true;
2316
2317 break;
2318 }
2319
2320 if (i == pta_size)
2321 error ("bad value (%s) for -march= switch", ix86_arch_string);
2322
2323 ix86_arch_mask = 1u << ix86_arch;
2324 for (i = 0; i < X86_ARCH_LAST; ++i)
2325 ix86_arch_features[i] &= ix86_arch_mask;
2326
2327 for (i = 0; i < pta_size; i++)
2328 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2329 {
2330 ix86_tune = processor_alias_table[i].processor;
2331 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2332 {
2333 if (ix86_tune_defaulted)
2334 {
2335 ix86_tune_string = "x86-64";
2336 for (i = 0; i < pta_size; i++)
2337 if (! strcmp (ix86_tune_string,
2338 processor_alias_table[i].name))
2339 break;
2340 ix86_tune = processor_alias_table[i].processor;
2341 }
2342 else
2343 error ("CPU you selected does not support x86-64 "
2344 "instruction set");
2345 }
2346 /* Intel CPUs have always interpreted SSE prefetch instructions as
2347 NOPs; so, we can enable SSE prefetch instructions even when
2348 -mtune (rather than -march) points us to a processor that has them.
2349 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2350 higher processors. */
2351 if (TARGET_CMOVE
2352 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2353 x86_prefetch_sse = true;
2354 break;
2355 }
2356 if (i == pta_size)
2357 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2358
2359 ix86_tune_mask = 1u << ix86_tune;
2360 for (i = 0; i < X86_TUNE_LAST; ++i)
2361 ix86_tune_features[i] &= ix86_tune_mask;
2362
2363 if (optimize_size)
2364 ix86_cost = &size_cost;
2365 else
2366 ix86_cost = processor_target_table[ix86_tune].cost;
2367
2368 /* Arrange to set up i386_stack_locals for all functions. */
2369 init_machine_status = ix86_init_machine_status;
2370
2371 /* Validate -mregparm= value. */
2372 if (ix86_regparm_string)
2373 {
2374 if (TARGET_64BIT)
2375 warning (0, "-mregparm is ignored in 64-bit mode");
2376 i = atoi (ix86_regparm_string);
2377 if (i < 0 || i > REGPARM_MAX)
2378 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2379 else
2380 ix86_regparm = i;
2381 }
2382 if (TARGET_64BIT)
2383 ix86_regparm = REGPARM_MAX;
2384
2385 /* If the user has provided any of the -malign-* options,
2386 warn and use that value only if -falign-* is not set.
2387 Remove this code in GCC 3.2 or later. */
2388 if (ix86_align_loops_string)
2389 {
2390 warning (0, "-malign-loops is obsolete, use -falign-loops");
2391 if (align_loops == 0)
2392 {
2393 i = atoi (ix86_align_loops_string);
2394 if (i < 0 || i > MAX_CODE_ALIGN)
2395 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2396 else
2397 align_loops = 1 << i;
2398 }
2399 }
2400
2401 if (ix86_align_jumps_string)
2402 {
2403 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2404 if (align_jumps == 0)
2405 {
2406 i = atoi (ix86_align_jumps_string);
2407 if (i < 0 || i > MAX_CODE_ALIGN)
2408 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2409 else
2410 align_jumps = 1 << i;
2411 }
2412 }
2413
2414 if (ix86_align_funcs_string)
2415 {
2416 warning (0, "-malign-functions is obsolete, use -falign-functions");
2417 if (align_functions == 0)
2418 {
2419 i = atoi (ix86_align_funcs_string);
2420 if (i < 0 || i > MAX_CODE_ALIGN)
2421 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2422 else
2423 align_functions = 1 << i;
2424 }
2425 }
2426
2427 /* Default align_* from the processor table. */
2428 if (align_loops == 0)
2429 {
2430 align_loops = processor_target_table[ix86_tune].align_loop;
2431 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2432 }
2433 if (align_jumps == 0)
2434 {
2435 align_jumps = processor_target_table[ix86_tune].align_jump;
2436 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2437 }
2438 if (align_functions == 0)
2439 {
2440 align_functions = processor_target_table[ix86_tune].align_func;
2441 }
2442
2443 /* Validate -mbranch-cost= value, or provide default. */
2444 ix86_branch_cost = ix86_cost->branch_cost;
2445 if (ix86_branch_cost_string)
2446 {
2447 i = atoi (ix86_branch_cost_string);
2448 if (i < 0 || i > 5)
2449 error ("-mbranch-cost=%d is not between 0 and 5", i);
2450 else
2451 ix86_branch_cost = i;
2452 }
2453 if (ix86_section_threshold_string)
2454 {
2455 i = atoi (ix86_section_threshold_string);
2456 if (i < 0)
2457 error ("-mlarge-data-threshold=%d is negative", i);
2458 else
2459 ix86_section_threshold = i;
2460 }
2461
2462 if (ix86_tls_dialect_string)
2463 {
2464 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2465 ix86_tls_dialect = TLS_DIALECT_GNU;
2466 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2467 ix86_tls_dialect = TLS_DIALECT_GNU2;
2468 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2469 ix86_tls_dialect = TLS_DIALECT_SUN;
2470 else
2471 error ("bad value (%s) for -mtls-dialect= switch",
2472 ix86_tls_dialect_string);
2473 }
2474
2475 if (ix87_precision_string)
2476 {
2477 i = atoi (ix87_precision_string);
2478 if (i != 32 && i != 64 && i != 80)
2479 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2480 }
2481
2482 if (TARGET_64BIT)
2483 {
2484 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2485
2486 /* Enable by default the SSE and MMX builtins. Do allow the user to
2487 explicitly disable any of these. In particular, disabling SSE and
2488 MMX for kernel code is extremely useful. */
2489 if (!ix86_arch_specified)
2490 ix86_isa_flags
2491 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2492 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2493
2494 if (TARGET_RTD)
2495 warning (0, "-mrtd is ignored in 64bit mode");
2496 }
2497 else
2498 {
2499 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2500
2501 if (!ix86_arch_specified)
2502 ix86_isa_flags
2503 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2504
2505 /* i386 ABI does not specify red zone. It still makes sense to use it
2506 when programmer takes care to stack from being destroyed. */
2507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2508 target_flags |= MASK_NO_RED_ZONE;
2509 }
2510
2511 /* Keep nonleaf frame pointers. */
2512 if (flag_omit_frame_pointer)
2513 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2514 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2515 flag_omit_frame_pointer = 1;
2516
2517 /* If we're doing fast math, we don't care about comparison order
2518 wrt NaNs. This lets us use a shorter comparison sequence. */
2519 if (flag_finite_math_only)
2520 target_flags &= ~MASK_IEEE_FP;
2521
2522 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2523 since the insns won't need emulation. */
2524 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2525 target_flags &= ~MASK_NO_FANCY_MATH_387;
2526
2527 /* Likewise, if the target doesn't have a 387, or we've specified
2528 software floating point, don't use 387 inline intrinsics. */
2529 if (!TARGET_80387)
2530 target_flags |= MASK_NO_FANCY_MATH_387;
2531
2532 /* Turn on SSE4A bultins for -msse5. */
2533 if (TARGET_SSE5)
2534 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2535
2536 /* Turn on SSE4.1 builtins for -msse4.2. */
2537 if (TARGET_SSE4_2)
2538 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2539
2540 /* Turn on SSSE3 builtins for -msse4.1. */
2541 if (TARGET_SSE4_1)
2542 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2543
2544 /* Turn on SSE3 builtins for -mssse3. */
2545 if (TARGET_SSSE3)
2546 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2547
2548 /* Turn on SSE3 builtins for -msse4a. */
2549 if (TARGET_SSE4A)
2550 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2551
2552 /* Turn on SSE2 builtins for -msse3. */
2553 if (TARGET_SSE3)
2554 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2555
2556 /* Turn on SSE builtins for -msse2. */
2557 if (TARGET_SSE2)
2558 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2559
2560 /* Turn on MMX builtins for -msse. */
2561 if (TARGET_SSE)
2562 {
2563 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2564 x86_prefetch_sse = true;
2565 }
2566
2567 /* Turn on MMX builtins for 3Dnow. */
2568 if (TARGET_3DNOW)
2569 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2570
2571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2572 if (TARGET_SSE4_2 || TARGET_ABM)
2573 x86_popcnt = true;
2574
2575 /* Validate -mpreferred-stack-boundary= value, or provide default.
2576 The default of 128 bits is for Pentium III's SSE __m128. We can't
2577 change it because of optimize_size. Otherwise, we can't mix object
2578 files compiled with -Os and -On. */
2579 ix86_preferred_stack_boundary = 128;
2580 if (ix86_preferred_stack_boundary_string)
2581 {
2582 i = atoi (ix86_preferred_stack_boundary_string);
2583 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2584 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2585 TARGET_64BIT ? 4 : 2);
2586 else
2587 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2588 }
2589
2590 /* Accept -msseregparm only if at least SSE support is enabled. */
2591 if (TARGET_SSEREGPARM
2592 && ! TARGET_SSE)
2593 error ("-msseregparm used without SSE enabled");
2594
2595 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2596 if (ix86_fpmath_string != 0)
2597 {
2598 if (! strcmp (ix86_fpmath_string, "387"))
2599 ix86_fpmath = FPMATH_387;
2600 else if (! strcmp (ix86_fpmath_string, "sse"))
2601 {
2602 if (!TARGET_SSE)
2603 {
2604 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2605 ix86_fpmath = FPMATH_387;
2606 }
2607 else
2608 ix86_fpmath = FPMATH_SSE;
2609 }
2610 else if (! strcmp (ix86_fpmath_string, "387,sse")
2611 || ! strcmp (ix86_fpmath_string, "sse,387"))
2612 {
2613 if (!TARGET_SSE)
2614 {
2615 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2616 ix86_fpmath = FPMATH_387;
2617 }
2618 else if (!TARGET_80387)
2619 {
2620 warning (0, "387 instruction set disabled, using SSE arithmetics");
2621 ix86_fpmath = FPMATH_SSE;
2622 }
2623 else
2624 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2625 }
2626 else
2627 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2628 }
2629
2630 /* If the i387 is disabled, then do not return values in it. */
2631 if (!TARGET_80387)
2632 target_flags &= ~MASK_FLOAT_RETURNS;
2633
2634 /* Use external vectorized library in vectorizing intrinsics. */
2635 if (ix86_veclibabi_string)
2636 {
2637 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2638 ix86_veclib_handler = ix86_veclibabi_acml;
2639 else
2640 error ("unknown vectorization library ABI type (%s) for "
2641 "-mveclibabi= switch", ix86_veclibabi_string);
2642 }
2643
2644 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2645 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2646 && !optimize_size)
2647 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2648
2649 /* ??? Unwind info is not correct around the CFG unless either a frame
2650 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2651 unwind info generation to be aware of the CFG and propagating states
2652 around edges. */
2653 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2654 || flag_exceptions || flag_non_call_exceptions)
2655 && flag_omit_frame_pointer
2656 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2657 {
2658 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2659 warning (0, "unwind tables currently require either a frame pointer "
2660 "or -maccumulate-outgoing-args for correctness");
2661 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2662 }
2663
2664 /* For sane SSE instruction set generation we need fcomi instruction.
2665 It is safe to enable all CMOVE instructions. */
2666 if (TARGET_SSE)
2667 TARGET_CMOVE = 1;
2668
2669 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2670 {
2671 char *p;
2672 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2673 p = strchr (internal_label_prefix, 'X');
2674 internal_label_prefix_len = p - internal_label_prefix;
2675 *p = '\0';
2676 }
2677
2678 /* When scheduling description is not available, disable scheduler pass
2679 so it won't slow down the compilation and make x87 code slower. */
2680 if (!TARGET_SCHEDULE)
2681 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2682
2683 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2684 set_param_value ("simultaneous-prefetches",
2685 ix86_cost->simultaneous_prefetches);
2686 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2687 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2688 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2689 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2690 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2691 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2692
2693 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2694 can be optimized to ap = __builtin_next_arg (0). */
2695 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2696 targetm.expand_builtin_va_start = NULL;
2697 }
2698 \f
2699 /* Return true if this goes in large data/bss. */
2700
2701 static bool
2702 ix86_in_large_data_p (tree exp)
2703 {
2704 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2705 return false;
2706
2707 /* Functions are never large data. */
2708 if (TREE_CODE (exp) == FUNCTION_DECL)
2709 return false;
2710
2711 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2712 {
2713 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2714 if (strcmp (section, ".ldata") == 0
2715 || strcmp (section, ".lbss") == 0)
2716 return true;
2717 return false;
2718 }
2719 else
2720 {
2721 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2722
2723 /* If this is an incomplete type with size 0, then we can't put it
2724 in data because it might be too big when completed. */
2725 if (!size || size > ix86_section_threshold)
2726 return true;
2727 }
2728
2729 return false;
2730 }
2731
2732 /* Switch to the appropriate section for output of DECL.
2733 DECL is either a `VAR_DECL' node or a constant of some sort.
2734 RELOC indicates whether forming the initial value of DECL requires
2735 link-time relocations. */
2736
2737 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2738 ATTRIBUTE_UNUSED;
2739
2740 static section *
2741 x86_64_elf_select_section (tree decl, int reloc,
2742 unsigned HOST_WIDE_INT align)
2743 {
2744 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2745 && ix86_in_large_data_p (decl))
2746 {
2747 const char *sname = NULL;
2748 unsigned int flags = SECTION_WRITE;
2749 switch (categorize_decl_for_section (decl, reloc))
2750 {
2751 case SECCAT_DATA:
2752 sname = ".ldata";
2753 break;
2754 case SECCAT_DATA_REL:
2755 sname = ".ldata.rel";
2756 break;
2757 case SECCAT_DATA_REL_LOCAL:
2758 sname = ".ldata.rel.local";
2759 break;
2760 case SECCAT_DATA_REL_RO:
2761 sname = ".ldata.rel.ro";
2762 break;
2763 case SECCAT_DATA_REL_RO_LOCAL:
2764 sname = ".ldata.rel.ro.local";
2765 break;
2766 case SECCAT_BSS:
2767 sname = ".lbss";
2768 flags |= SECTION_BSS;
2769 break;
2770 case SECCAT_RODATA:
2771 case SECCAT_RODATA_MERGE_STR:
2772 case SECCAT_RODATA_MERGE_STR_INIT:
2773 case SECCAT_RODATA_MERGE_CONST:
2774 sname = ".lrodata";
2775 flags = 0;
2776 break;
2777 case SECCAT_SRODATA:
2778 case SECCAT_SDATA:
2779 case SECCAT_SBSS:
2780 gcc_unreachable ();
2781 case SECCAT_TEXT:
2782 case SECCAT_TDATA:
2783 case SECCAT_TBSS:
2784 /* We don't split these for medium model. Place them into
2785 default sections and hope for best. */
2786 break;
2787 }
2788 if (sname)
2789 {
2790 /* We might get called with string constants, but get_named_section
2791 doesn't like them as they are not DECLs. Also, we need to set
2792 flags in that case. */
2793 if (!DECL_P (decl))
2794 return get_section (sname, flags, NULL);
2795 return get_named_section (decl, sname, reloc);
2796 }
2797 }
2798 return default_elf_select_section (decl, reloc, align);
2799 }
2800
2801 /* Build up a unique section name, expressed as a
2802 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2803 RELOC indicates whether the initial value of EXP requires
2804 link-time relocations. */
2805
2806 static void ATTRIBUTE_UNUSED
2807 x86_64_elf_unique_section (tree decl, int reloc)
2808 {
2809 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2810 && ix86_in_large_data_p (decl))
2811 {
2812 const char *prefix = NULL;
2813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2814 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2815
2816 switch (categorize_decl_for_section (decl, reloc))
2817 {
2818 case SECCAT_DATA:
2819 case SECCAT_DATA_REL:
2820 case SECCAT_DATA_REL_LOCAL:
2821 case SECCAT_DATA_REL_RO:
2822 case SECCAT_DATA_REL_RO_LOCAL:
2823 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2824 break;
2825 case SECCAT_BSS:
2826 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2827 break;
2828 case SECCAT_RODATA:
2829 case SECCAT_RODATA_MERGE_STR:
2830 case SECCAT_RODATA_MERGE_STR_INIT:
2831 case SECCAT_RODATA_MERGE_CONST:
2832 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2833 break;
2834 case SECCAT_SRODATA:
2835 case SECCAT_SDATA:
2836 case SECCAT_SBSS:
2837 gcc_unreachable ();
2838 case SECCAT_TEXT:
2839 case SECCAT_TDATA:
2840 case SECCAT_TBSS:
2841 /* We don't split these for medium model. Place them into
2842 default sections and hope for best. */
2843 break;
2844 }
2845 if (prefix)
2846 {
2847 const char *name;
2848 size_t nlen, plen;
2849 char *string;
2850 plen = strlen (prefix);
2851
2852 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2853 name = targetm.strip_name_encoding (name);
2854 nlen = strlen (name);
2855
2856 string = (char *) alloca (nlen + plen + 1);
2857 memcpy (string, prefix, plen);
2858 memcpy (string + plen, name, nlen + 1);
2859
2860 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2861 return;
2862 }
2863 }
2864 default_unique_section (decl, reloc);
2865 }
2866
2867 #ifdef COMMON_ASM_OP
2868 /* This says how to output assembler code to declare an
2869 uninitialized external linkage data object.
2870
2871 For medium model x86-64 we need to use .largecomm opcode for
2872 large objects. */
2873 void
2874 x86_elf_aligned_common (FILE *file,
2875 const char *name, unsigned HOST_WIDE_INT size,
2876 int align)
2877 {
2878 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2879 && size > (unsigned int)ix86_section_threshold)
2880 fprintf (file, ".largecomm\t");
2881 else
2882 fprintf (file, "%s", COMMON_ASM_OP);
2883 assemble_name (file, name);
2884 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2885 size, align / BITS_PER_UNIT);
2886 }
2887 #endif
2888
2889 /* Utility function for targets to use in implementing
2890 ASM_OUTPUT_ALIGNED_BSS. */
2891
2892 void
2893 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2894 const char *name, unsigned HOST_WIDE_INT size,
2895 int align)
2896 {
2897 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2898 && size > (unsigned int)ix86_section_threshold)
2899 switch_to_section (get_named_section (decl, ".lbss", 0));
2900 else
2901 switch_to_section (bss_section);
2902 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2903 #ifdef ASM_DECLARE_OBJECT_NAME
2904 last_assemble_variable_decl = decl;
2905 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2906 #else
2907 /* Standard thing is just output label for the object. */
2908 ASM_OUTPUT_LABEL (file, name);
2909 #endif /* ASM_DECLARE_OBJECT_NAME */
2910 ASM_OUTPUT_SKIP (file, size ? size : 1);
2911 }
2912 \f
2913 void
2914 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2915 {
2916 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2917 make the problem with not enough registers even worse. */
2918 #ifdef INSN_SCHEDULING
2919 if (level > 1)
2920 flag_schedule_insns = 0;
2921 #endif
2922
2923 if (TARGET_MACHO)
2924 /* The Darwin libraries never set errno, so we might as well
2925 avoid calling them when that's the only reason we would. */
2926 flag_errno_math = 0;
2927
2928 /* The default values of these switches depend on the TARGET_64BIT
2929 that is not known at this moment. Mark these values with 2 and
2930 let user the to override these. In case there is no command line option
2931 specifying them, we will set the defaults in override_options. */
2932 if (optimize >= 1)
2933 flag_omit_frame_pointer = 2;
2934 flag_pcc_struct_return = 2;
2935 flag_asynchronous_unwind_tables = 2;
2936 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2937 SUBTARGET_OPTIMIZATION_OPTIONS;
2938 #endif
2939 }
2940 \f
2941 /* Decide whether we can make a sibling call to a function. DECL is the
2942 declaration of the function being targeted by the call and EXP is the
2943 CALL_EXPR representing the call. */
2944
2945 static bool
2946 ix86_function_ok_for_sibcall (tree decl, tree exp)
2947 {
2948 tree func;
2949 rtx a, b;
2950
2951 /* If we are generating position-independent code, we cannot sibcall
2952 optimize any indirect call, or a direct call to a global function,
2953 as the PLT requires %ebx be live. */
2954 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2955 return false;
2956
2957 if (decl)
2958 func = decl;
2959 else
2960 {
2961 func = TREE_TYPE (CALL_EXPR_FN (exp));
2962 if (POINTER_TYPE_P (func))
2963 func = TREE_TYPE (func);
2964 }
2965
2966 /* Check that the return value locations are the same. Like
2967 if we are returning floats on the 80387 register stack, we cannot
2968 make a sibcall from a function that doesn't return a float to a
2969 function that does or, conversely, from a function that does return
2970 a float to a function that doesn't; the necessary stack adjustment
2971 would not be executed. This is also the place we notice
2972 differences in the return value ABI. Note that it is ok for one
2973 of the functions to have void return type as long as the return
2974 value of the other is passed in a register. */
2975 a = ix86_function_value (TREE_TYPE (exp), func, false);
2976 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2977 cfun->decl, false);
2978 if (STACK_REG_P (a) || STACK_REG_P (b))
2979 {
2980 if (!rtx_equal_p (a, b))
2981 return false;
2982 }
2983 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2984 ;
2985 else if (!rtx_equal_p (a, b))
2986 return false;
2987
2988 /* If this call is indirect, we'll need to be able to use a call-clobbered
2989 register for the address of the target function. Make sure that all
2990 such registers are not used for passing parameters. */
2991 if (!decl && !TARGET_64BIT)
2992 {
2993 tree type;
2994
2995 /* We're looking at the CALL_EXPR, we need the type of the function. */
2996 type = CALL_EXPR_FN (exp); /* pointer expression */
2997 type = TREE_TYPE (type); /* pointer type */
2998 type = TREE_TYPE (type); /* function type */
2999
3000 if (ix86_function_regparm (type, NULL) >= 3)
3001 {
3002 /* ??? Need to count the actual number of registers to be used,
3003 not the possible number of registers. Fix later. */
3004 return false;
3005 }
3006 }
3007
3008 /* Dllimport'd functions are also called indirectly. */
3009 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3010 && decl && DECL_DLLIMPORT_P (decl)
3011 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3012 return false;
3013
3014 /* If we forced aligned the stack, then sibcalling would unalign the
3015 stack, which may break the called function. */
3016 if (cfun->machine->force_align_arg_pointer)
3017 return false;
3018
3019 /* Otherwise okay. That also includes certain types of indirect calls. */
3020 return true;
3021 }
3022
3023 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3024 calling convention attributes;
3025 arguments as in struct attribute_spec.handler. */
3026
3027 static tree
3028 ix86_handle_cconv_attribute (tree *node, tree name,
3029 tree args,
3030 int flags ATTRIBUTE_UNUSED,
3031 bool *no_add_attrs)
3032 {
3033 if (TREE_CODE (*node) != FUNCTION_TYPE
3034 && TREE_CODE (*node) != METHOD_TYPE
3035 && TREE_CODE (*node) != FIELD_DECL
3036 && TREE_CODE (*node) != TYPE_DECL)
3037 {
3038 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3039 IDENTIFIER_POINTER (name));
3040 *no_add_attrs = true;
3041 return NULL_TREE;
3042 }
3043
3044 /* Can combine regparm with all attributes but fastcall. */
3045 if (is_attribute_p ("regparm", name))
3046 {
3047 tree cst;
3048
3049 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3050 {
3051 error ("fastcall and regparm attributes are not compatible");
3052 }
3053
3054 cst = TREE_VALUE (args);
3055 if (TREE_CODE (cst) != INTEGER_CST)
3056 {
3057 warning (OPT_Wattributes,
3058 "%qs attribute requires an integer constant argument",
3059 IDENTIFIER_POINTER (name));
3060 *no_add_attrs = true;
3061 }
3062 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3063 {
3064 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3065 IDENTIFIER_POINTER (name), REGPARM_MAX);
3066 *no_add_attrs = true;
3067 }
3068
3069 if (!TARGET_64BIT
3070 && lookup_attribute (ix86_force_align_arg_pointer_string,
3071 TYPE_ATTRIBUTES (*node))
3072 && compare_tree_int (cst, REGPARM_MAX-1))
3073 {
3074 error ("%s functions limited to %d register parameters",
3075 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3076 }
3077
3078 return NULL_TREE;
3079 }
3080
3081 if (TARGET_64BIT)
3082 {
3083 /* Do not warn when emulating the MS ABI. */
3084 if (!TARGET_64BIT_MS_ABI)
3085 warning (OPT_Wattributes, "%qs attribute ignored",
3086 IDENTIFIER_POINTER (name));
3087 *no_add_attrs = true;
3088 return NULL_TREE;
3089 }
3090
3091 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3092 if (is_attribute_p ("fastcall", name))
3093 {
3094 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3095 {
3096 error ("fastcall and cdecl attributes are not compatible");
3097 }
3098 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3099 {
3100 error ("fastcall and stdcall attributes are not compatible");
3101 }
3102 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3103 {
3104 error ("fastcall and regparm attributes are not compatible");
3105 }
3106 }
3107
3108 /* Can combine stdcall with fastcall (redundant), regparm and
3109 sseregparm. */
3110 else if (is_attribute_p ("stdcall", name))
3111 {
3112 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3113 {
3114 error ("stdcall and cdecl attributes are not compatible");
3115 }
3116 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3117 {
3118 error ("stdcall and fastcall attributes are not compatible");
3119 }
3120 }
3121
3122 /* Can combine cdecl with regparm and sseregparm. */
3123 else if (is_attribute_p ("cdecl", name))
3124 {
3125 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3126 {
3127 error ("stdcall and cdecl attributes are not compatible");
3128 }
3129 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3130 {
3131 error ("fastcall and cdecl attributes are not compatible");
3132 }
3133 }
3134
3135 /* Can combine sseregparm with all attributes. */
3136
3137 return NULL_TREE;
3138 }
3139
3140 /* Return 0 if the attributes for two types are incompatible, 1 if they
3141 are compatible, and 2 if they are nearly compatible (which causes a
3142 warning to be generated). */
3143
3144 static int
3145 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3146 {
3147 /* Check for mismatch of non-default calling convention. */
3148 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3149
3150 if (TREE_CODE (type1) != FUNCTION_TYPE
3151 && TREE_CODE (type1) != METHOD_TYPE)
3152 return 1;
3153
3154 /* Check for mismatched fastcall/regparm types. */
3155 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3156 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3157 || (ix86_function_regparm (type1, NULL)
3158 != ix86_function_regparm (type2, NULL)))
3159 return 0;
3160
3161 /* Check for mismatched sseregparm types. */
3162 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3163 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3164 return 0;
3165
3166 /* Check for mismatched return types (cdecl vs stdcall). */
3167 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3168 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3169 return 0;
3170
3171 return 1;
3172 }
3173 \f
3174 /* Return the regparm value for a function with the indicated TYPE and DECL.
3175 DECL may be NULL when calling function indirectly
3176 or considering a libcall. */
3177
3178 static int
3179 ix86_function_regparm (const_tree type, const_tree decl)
3180 {
3181 tree attr;
3182 int regparm = ix86_regparm;
3183
3184 if (TARGET_64BIT)
3185 return regparm;
3186
3187 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3188 if (attr)
3189 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3190
3191 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3192 return 2;
3193
3194 /* Use register calling convention for local functions when possible. */
3195 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3196 && flag_unit_at_a_time && !profile_flag)
3197 {
3198 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3199 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3200 if (i && i->local)
3201 {
3202 int local_regparm, globals = 0, regno;
3203 struct function *f;
3204
3205 /* Make sure no regparm register is taken by a
3206 fixed register or global register variable. */
3207 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3208 if (global_regs[local_regparm] || fixed_regs[local_regparm])
3209 break;
3210
3211 /* We can't use regparm(3) for nested functions as these use
3212 static chain pointer in third argument. */
3213 if (local_regparm == 3
3214 && (decl_function_context (decl)
3215 || ix86_force_align_arg_pointer)
3216 && !DECL_NO_STATIC_CHAIN (decl))
3217 local_regparm = 2;
3218
3219 /* If the function realigns its stackpointer, the prologue will
3220 clobber %ecx. If we've already generated code for the callee,
3221 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3222 scanning the attributes for the self-realigning property. */
3223 f = DECL_STRUCT_FUNCTION (decl);
3224 if (local_regparm == 3
3225 && (f ? !!f->machine->force_align_arg_pointer
3226 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3227 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3228 local_regparm = 2;
3229
3230 /* Each global register variable or fixed register usage
3231 increases register pressure, so less registers should be
3232 used for argument passing. This functionality can be
3233 overriden by explicit regparm value. */
3234 for (regno = 0; regno < 6; regno++)
3235 if (global_regs[regno] || fixed_regs[regno])
3236 globals++;
3237 local_regparm
3238 = globals < local_regparm ? local_regparm - globals : 0;
3239
3240 if (local_regparm > regparm)
3241 regparm = local_regparm;
3242 }
3243 }
3244
3245 return regparm;
3246 }
3247
3248 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3249 DFmode (2) arguments in SSE registers for a function with the
3250 indicated TYPE and DECL. DECL may be NULL when calling function
3251 indirectly or considering a libcall. Otherwise return 0. */
3252
3253 static int
3254 ix86_function_sseregparm (const_tree type, const_tree decl)
3255 {
3256 gcc_assert (!TARGET_64BIT);
3257
3258 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3259 by the sseregparm attribute. */
3260 if (TARGET_SSEREGPARM
3261 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3262 {
3263 if (!TARGET_SSE)
3264 {
3265 if (decl)
3266 error ("Calling %qD with attribute sseregparm without "
3267 "SSE/SSE2 enabled", decl);
3268 else
3269 error ("Calling %qT with attribute sseregparm without "
3270 "SSE/SSE2 enabled", type);
3271 return 0;
3272 }
3273
3274 return 2;
3275 }
3276
3277 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3278 (and DFmode for SSE2) arguments in SSE registers. */
3279 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3280 {
3281 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3282 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3283 if (i && i->local)
3284 return TARGET_SSE2 ? 2 : 1;
3285 }
3286
3287 return 0;
3288 }
3289
3290 /* Return true if EAX is live at the start of the function. Used by
3291 ix86_expand_prologue to determine if we need special help before
3292 calling allocate_stack_worker. */
3293
3294 static bool
3295 ix86_eax_live_at_start_p (void)
3296 {
3297 /* Cheat. Don't bother working forward from ix86_function_regparm
3298 to the function type to whether an actual argument is located in
3299 eax. Instead just look at cfg info, which is still close enough
3300 to correct at this point. This gives false positives for broken
3301 functions that might use uninitialized data that happens to be
3302 allocated in eax, but who cares? */
3303 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3304 }
3305
3306 /* Value is the number of bytes of arguments automatically
3307 popped when returning from a subroutine call.
3308 FUNDECL is the declaration node of the function (as a tree),
3309 FUNTYPE is the data type of the function (as a tree),
3310 or for a library call it is an identifier node for the subroutine name.
3311 SIZE is the number of bytes of arguments passed on the stack.
3312
3313 On the 80386, the RTD insn may be used to pop them if the number
3314 of args is fixed, but if the number is variable then the caller
3315 must pop them all. RTD can't be used for library calls now
3316 because the library is compiled with the Unix compiler.
3317 Use of RTD is a selectable option, since it is incompatible with
3318 standard Unix calling sequences. If the option is not selected,
3319 the caller must always pop the args.
3320
3321 The attribute stdcall is equivalent to RTD on a per module basis. */
3322
3323 int
3324 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3325 {
3326 int rtd;
3327
3328 /* None of the 64-bit ABIs pop arguments. */
3329 if (TARGET_64BIT)
3330 return 0;
3331
3332 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3333
3334 /* Cdecl functions override -mrtd, and never pop the stack. */
3335 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3336 {
3337 /* Stdcall and fastcall functions will pop the stack if not
3338 variable args. */
3339 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3340 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3341 rtd = 1;
3342
3343 if (rtd && ! stdarg_p (funtype))
3344 return size;
3345 }
3346
3347 /* Lose any fake structure return argument if it is passed on the stack. */
3348 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3349 && !KEEP_AGGREGATE_RETURN_POINTER)
3350 {
3351 int nregs = ix86_function_regparm (funtype, fundecl);
3352 if (nregs == 0)
3353 return GET_MODE_SIZE (Pmode);
3354 }
3355
3356 return 0;
3357 }
3358 \f
3359 /* Argument support functions. */
3360
3361 /* Return true when register may be used to pass function parameters. */
3362 bool
3363 ix86_function_arg_regno_p (int regno)
3364 {
3365 int i;
3366 const int *parm_regs;
3367
3368 if (!TARGET_64BIT)
3369 {
3370 if (TARGET_MACHO)
3371 return (regno < REGPARM_MAX
3372 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3373 else
3374 return (regno < REGPARM_MAX
3375 || (TARGET_MMX && MMX_REGNO_P (regno)
3376 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3377 || (TARGET_SSE && SSE_REGNO_P (regno)
3378 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3379 }
3380
3381 if (TARGET_MACHO)
3382 {
3383 if (SSE_REGNO_P (regno) && TARGET_SSE)
3384 return true;
3385 }
3386 else
3387 {
3388 if (TARGET_SSE && SSE_REGNO_P (regno)
3389 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3390 return true;
3391 }
3392
3393 /* RAX is used as hidden argument to va_arg functions. */
3394 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3395 return true;
3396
3397 if (TARGET_64BIT_MS_ABI)
3398 parm_regs = x86_64_ms_abi_int_parameter_registers;
3399 else
3400 parm_regs = x86_64_int_parameter_registers;
3401 for (i = 0; i < REGPARM_MAX; i++)
3402 if (regno == parm_regs[i])
3403 return true;
3404 return false;
3405 }
3406
3407 /* Return if we do not know how to pass TYPE solely in registers. */
3408
3409 static bool
3410 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3411 {
3412 if (must_pass_in_stack_var_size_or_pad (mode, type))
3413 return true;
3414
3415 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3416 The layout_type routine is crafty and tries to trick us into passing
3417 currently unsupported vector types on the stack by using TImode. */
3418 return (!TARGET_64BIT && mode == TImode
3419 && type && TREE_CODE (type) != VECTOR_TYPE);
3420 }
3421
3422 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3423 for a call to a function whose data type is FNTYPE.
3424 For a library call, FNTYPE is 0. */
3425
3426 void
3427 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3428 tree fntype, /* tree ptr for function decl */
3429 rtx libname, /* SYMBOL_REF of library name or 0 */
3430 tree fndecl)
3431 {
3432 memset (cum, 0, sizeof (*cum));
3433
3434 /* Set up the number of registers to use for passing arguments. */
3435 cum->nregs = ix86_regparm;
3436 if (TARGET_SSE)
3437 cum->sse_nregs = SSE_REGPARM_MAX;
3438 if (TARGET_MMX)
3439 cum->mmx_nregs = MMX_REGPARM_MAX;
3440 cum->warn_sse = true;
3441 cum->warn_mmx = true;
3442 cum->maybe_vaarg = (fntype
3443 ? (!prototype_p (fntype) || stdarg_p (fntype))
3444 : !libname);
3445
3446 if (!TARGET_64BIT)
3447 {
3448 /* If there are variable arguments, then we won't pass anything
3449 in registers in 32-bit mode. */
3450 if (cum->maybe_vaarg)
3451 {
3452 cum->nregs = 0;
3453 cum->sse_nregs = 0;
3454 cum->mmx_nregs = 0;
3455 cum->warn_sse = 0;
3456 cum->warn_mmx = 0;
3457 return;
3458 }
3459
3460 /* Use ecx and edx registers if function has fastcall attribute,
3461 else look for regparm information. */
3462 if (fntype)
3463 {
3464 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3465 {
3466 cum->nregs = 2;
3467 cum->fastcall = 1;
3468 }
3469 else
3470 cum->nregs = ix86_function_regparm (fntype, fndecl);
3471 }
3472
3473 /* Set up the number of SSE registers used for passing SFmode
3474 and DFmode arguments. Warn for mismatching ABI. */
3475 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3476 }
3477 }
3478
3479 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3480 But in the case of vector types, it is some vector mode.
3481
3482 When we have only some of our vector isa extensions enabled, then there
3483 are some modes for which vector_mode_supported_p is false. For these
3484 modes, the generic vector support in gcc will choose some non-vector mode
3485 in order to implement the type. By computing the natural mode, we'll
3486 select the proper ABI location for the operand and not depend on whatever
3487 the middle-end decides to do with these vector types. */
3488
3489 static enum machine_mode
3490 type_natural_mode (const_tree type)
3491 {
3492 enum machine_mode mode = TYPE_MODE (type);
3493
3494 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3495 {
3496 HOST_WIDE_INT size = int_size_in_bytes (type);
3497 if ((size == 8 || size == 16)
3498 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3499 && TYPE_VECTOR_SUBPARTS (type) > 1)
3500 {
3501 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3502
3503 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3504 mode = MIN_MODE_VECTOR_FLOAT;
3505 else
3506 mode = MIN_MODE_VECTOR_INT;
3507
3508 /* Get the mode which has this inner mode and number of units. */
3509 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3510 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3511 && GET_MODE_INNER (mode) == innermode)
3512 return mode;
3513
3514 gcc_unreachable ();
3515 }
3516 }
3517
3518 return mode;
3519 }
3520
3521 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3522 this may not agree with the mode that the type system has chosen for the
3523 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3524 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3525
3526 static rtx
3527 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3528 unsigned int regno)
3529 {
3530 rtx tmp;
3531
3532 if (orig_mode != BLKmode)
3533 tmp = gen_rtx_REG (orig_mode, regno);
3534 else
3535 {
3536 tmp = gen_rtx_REG (mode, regno);
3537 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3538 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3539 }
3540
3541 return tmp;
3542 }
3543
3544 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3545 of this code is to classify each 8bytes of incoming argument by the register
3546 class and assign registers accordingly. */
3547
3548 /* Return the union class of CLASS1 and CLASS2.
3549 See the x86-64 PS ABI for details. */
3550
3551 static enum x86_64_reg_class
3552 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3553 {
3554 /* Rule #1: If both classes are equal, this is the resulting class. */
3555 if (class1 == class2)
3556 return class1;
3557
3558 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3559 the other class. */
3560 if (class1 == X86_64_NO_CLASS)
3561 return class2;
3562 if (class2 == X86_64_NO_CLASS)
3563 return class1;
3564
3565 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3566 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3567 return X86_64_MEMORY_CLASS;
3568
3569 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3570 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3571 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3572 return X86_64_INTEGERSI_CLASS;
3573 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3574 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3575 return X86_64_INTEGER_CLASS;
3576
3577 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3578 MEMORY is used. */
3579 if (class1 == X86_64_X87_CLASS
3580 || class1 == X86_64_X87UP_CLASS
3581 || class1 == X86_64_COMPLEX_X87_CLASS
3582 || class2 == X86_64_X87_CLASS
3583 || class2 == X86_64_X87UP_CLASS
3584 || class2 == X86_64_COMPLEX_X87_CLASS)
3585 return X86_64_MEMORY_CLASS;
3586
3587 /* Rule #6: Otherwise class SSE is used. */
3588 return X86_64_SSE_CLASS;
3589 }
3590
3591 /* Classify the argument of type TYPE and mode MODE.
3592 CLASSES will be filled by the register class used to pass each word
3593 of the operand. The number of words is returned. In case the parameter
3594 should be passed in memory, 0 is returned. As a special case for zero
3595 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3596
3597 BIT_OFFSET is used internally for handling records and specifies offset
3598 of the offset in bits modulo 256 to avoid overflow cases.
3599
3600 See the x86-64 PS ABI for details.
3601 */
3602
3603 static int
3604 classify_argument (enum machine_mode mode, const_tree type,
3605 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3606 {
3607 HOST_WIDE_INT bytes =
3608 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3609 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3610
3611 /* Variable sized entities are always passed/returned in memory. */
3612 if (bytes < 0)
3613 return 0;
3614
3615 if (mode != VOIDmode
3616 && targetm.calls.must_pass_in_stack (mode, type))
3617 return 0;
3618
3619 if (type && AGGREGATE_TYPE_P (type))
3620 {
3621 int i;
3622 tree field;
3623 enum x86_64_reg_class subclasses[MAX_CLASSES];
3624
3625 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3626 if (bytes > 16)
3627 return 0;
3628
3629 for (i = 0; i < words; i++)
3630 classes[i] = X86_64_NO_CLASS;
3631
3632 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3633 signalize memory class, so handle it as special case. */
3634 if (!words)
3635 {
3636 classes[0] = X86_64_NO_CLASS;
3637 return 1;
3638 }
3639
3640 /* Classify each field of record and merge classes. */
3641 switch (TREE_CODE (type))
3642 {
3643 case RECORD_TYPE:
3644 /* And now merge the fields of structure. */
3645 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3646 {
3647 if (TREE_CODE (field) == FIELD_DECL)
3648 {
3649 int num;
3650
3651 if (TREE_TYPE (field) == error_mark_node)
3652 continue;
3653
3654 /* Bitfields are always classified as integer. Handle them
3655 early, since later code would consider them to be
3656 misaligned integers. */
3657 if (DECL_BIT_FIELD (field))
3658 {
3659 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3660 i < ((int_bit_position (field) + (bit_offset % 64))
3661 + tree_low_cst (DECL_SIZE (field), 0)
3662 + 63) / 8 / 8; i++)
3663 classes[i] =
3664 merge_classes (X86_64_INTEGER_CLASS,
3665 classes[i]);
3666 }
3667 else
3668 {
3669 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3670 TREE_TYPE (field), subclasses,
3671 (int_bit_position (field)
3672 + bit_offset) % 256);
3673 if (!num)
3674 return 0;
3675 for (i = 0; i < num; i++)
3676 {
3677 int pos =
3678 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3679 classes[i + pos] =
3680 merge_classes (subclasses[i], classes[i + pos]);
3681 }
3682 }
3683 }
3684 }
3685 break;
3686
3687 case ARRAY_TYPE:
3688 /* Arrays are handled as small records. */
3689 {
3690 int num;
3691 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3692 TREE_TYPE (type), subclasses, bit_offset);
3693 if (!num)
3694 return 0;
3695
3696 /* The partial classes are now full classes. */
3697 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3698 subclasses[0] = X86_64_SSE_CLASS;
3699 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3700 subclasses[0] = X86_64_INTEGER_CLASS;
3701
3702 for (i = 0; i < words; i++)
3703 classes[i] = subclasses[i % num];
3704
3705 break;
3706 }
3707 case UNION_TYPE:
3708 case QUAL_UNION_TYPE:
3709 /* Unions are similar to RECORD_TYPE but offset is always 0.
3710 */
3711 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3712 {
3713 if (TREE_CODE (field) == FIELD_DECL)
3714 {
3715 int num;
3716
3717 if (TREE_TYPE (field) == error_mark_node)
3718 continue;
3719
3720 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3721 TREE_TYPE (field), subclasses,
3722 bit_offset);
3723 if (!num)
3724 return 0;
3725 for (i = 0; i < num; i++)
3726 classes[i] = merge_classes (subclasses[i], classes[i]);
3727 }
3728 }
3729 break;
3730
3731 default:
3732 gcc_unreachable ();
3733 }
3734
3735 /* Final merger cleanup. */
3736 for (i = 0; i < words; i++)
3737 {
3738 /* If one class is MEMORY, everything should be passed in
3739 memory. */
3740 if (classes[i] == X86_64_MEMORY_CLASS)
3741 return 0;
3742
3743 /* The X86_64_SSEUP_CLASS should be always preceded by
3744 X86_64_SSE_CLASS. */
3745 if (classes[i] == X86_64_SSEUP_CLASS
3746 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3747 classes[i] = X86_64_SSE_CLASS;
3748
3749 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3750 if (classes[i] == X86_64_X87UP_CLASS
3751 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3752 classes[i] = X86_64_SSE_CLASS;
3753 }
3754 return words;
3755 }
3756
3757 /* Compute alignment needed. We align all types to natural boundaries with
3758 exception of XFmode that is aligned to 64bits. */
3759 if (mode != VOIDmode && mode != BLKmode)
3760 {
3761 int mode_alignment = GET_MODE_BITSIZE (mode);
3762
3763 if (mode == XFmode)
3764 mode_alignment = 128;
3765 else if (mode == XCmode)
3766 mode_alignment = 256;
3767 if (COMPLEX_MODE_P (mode))
3768 mode_alignment /= 2;
3769 /* Misaligned fields are always returned in memory. */
3770 if (bit_offset % mode_alignment)
3771 return 0;
3772 }
3773
3774 /* for V1xx modes, just use the base mode */
3775 if (VECTOR_MODE_P (mode)
3776 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3777 mode = GET_MODE_INNER (mode);
3778
3779 /* Classification of atomic types. */
3780 switch (mode)
3781 {
3782 case SDmode:
3783 case DDmode:
3784 classes[0] = X86_64_SSE_CLASS;
3785 return 1;
3786 case TDmode:
3787 classes[0] = X86_64_SSE_CLASS;
3788 classes[1] = X86_64_SSEUP_CLASS;
3789 return 2;
3790 case DImode:
3791 case SImode:
3792 case HImode:
3793 case QImode:
3794 case CSImode:
3795 case CHImode:
3796 case CQImode:
3797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3798 classes[0] = X86_64_INTEGERSI_CLASS;
3799 else
3800 classes[0] = X86_64_INTEGER_CLASS;
3801 return 1;
3802 case CDImode:
3803 case TImode:
3804 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3805 return 2;
3806 case CTImode:
3807 return 0;
3808 case SFmode:
3809 if (!(bit_offset % 64))
3810 classes[0] = X86_64_SSESF_CLASS;
3811 else
3812 classes[0] = X86_64_SSE_CLASS;
3813 return 1;
3814 case DFmode:
3815 classes[0] = X86_64_SSEDF_CLASS;
3816 return 1;
3817 case XFmode:
3818 classes[0] = X86_64_X87_CLASS;
3819 classes[1] = X86_64_X87UP_CLASS;
3820 return 2;
3821 case TFmode:
3822 classes[0] = X86_64_SSE_CLASS;
3823 classes[1] = X86_64_SSEUP_CLASS;
3824 return 2;
3825 case SCmode:
3826 classes[0] = X86_64_SSE_CLASS;
3827 return 1;
3828 case DCmode:
3829 classes[0] = X86_64_SSEDF_CLASS;
3830 classes[1] = X86_64_SSEDF_CLASS;
3831 return 2;
3832 case XCmode:
3833 classes[0] = X86_64_COMPLEX_X87_CLASS;
3834 return 1;
3835 case TCmode:
3836 /* This modes is larger than 16 bytes. */
3837 return 0;
3838 case V4SFmode:
3839 case V4SImode:
3840 case V16QImode:
3841 case V8HImode:
3842 case V2DFmode:
3843 case V2DImode:
3844 classes[0] = X86_64_SSE_CLASS;
3845 classes[1] = X86_64_SSEUP_CLASS;
3846 return 2;
3847 case V2SFmode:
3848 case V2SImode:
3849 case V4HImode:
3850 case V8QImode:
3851 classes[0] = X86_64_SSE_CLASS;
3852 return 1;
3853 case BLKmode:
3854 case VOIDmode:
3855 return 0;
3856 default:
3857 gcc_assert (VECTOR_MODE_P (mode));
3858
3859 if (bytes > 16)
3860 return 0;
3861
3862 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3863
3864 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3865 classes[0] = X86_64_INTEGERSI_CLASS;
3866 else
3867 classes[0] = X86_64_INTEGER_CLASS;
3868 classes[1] = X86_64_INTEGER_CLASS;
3869 return 1 + (bytes > 8);
3870 }
3871 }
3872
3873 /* Examine the argument and return set number of register required in each
3874 class. Return 0 iff parameter should be passed in memory. */
3875 static int
3876 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3877 int *int_nregs, int *sse_nregs)
3878 {
3879 enum x86_64_reg_class regclass[MAX_CLASSES];
3880 int n = classify_argument (mode, type, regclass, 0);
3881
3882 *int_nregs = 0;
3883 *sse_nregs = 0;
3884 if (!n)
3885 return 0;
3886 for (n--; n >= 0; n--)
3887 switch (regclass[n])
3888 {
3889 case X86_64_INTEGER_CLASS:
3890 case X86_64_INTEGERSI_CLASS:
3891 (*int_nregs)++;
3892 break;
3893 case X86_64_SSE_CLASS:
3894 case X86_64_SSESF_CLASS:
3895 case X86_64_SSEDF_CLASS:
3896 (*sse_nregs)++;
3897 break;
3898 case X86_64_NO_CLASS:
3899 case X86_64_SSEUP_CLASS:
3900 break;
3901 case X86_64_X87_CLASS:
3902 case X86_64_X87UP_CLASS:
3903 if (!in_return)
3904 return 0;
3905 break;
3906 case X86_64_COMPLEX_X87_CLASS:
3907 return in_return ? 2 : 0;
3908 case X86_64_MEMORY_CLASS:
3909 gcc_unreachable ();
3910 }
3911 return 1;
3912 }
3913
3914 /* Construct container for the argument used by GCC interface. See
3915 FUNCTION_ARG for the detailed description. */
3916
3917 static rtx
3918 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3919 const_tree type, int in_return, int nintregs, int nsseregs,
3920 const int *intreg, int sse_regno)
3921 {
3922 /* The following variables hold the static issued_error state. */
3923 static bool issued_sse_arg_error;
3924 static bool issued_sse_ret_error;
3925 static bool issued_x87_ret_error;
3926
3927 enum machine_mode tmpmode;
3928 int bytes =
3929 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3930 enum x86_64_reg_class regclass[MAX_CLASSES];
3931 int n;
3932 int i;
3933 int nexps = 0;
3934 int needed_sseregs, needed_intregs;
3935 rtx exp[MAX_CLASSES];
3936 rtx ret;
3937
3938 n = classify_argument (mode, type, regclass, 0);
3939 if (!n)
3940 return NULL;
3941 if (!examine_argument (mode, type, in_return, &needed_intregs,
3942 &needed_sseregs))
3943 return NULL;
3944 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3945 return NULL;
3946
3947 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3948 some less clueful developer tries to use floating-point anyway. */
3949 if (needed_sseregs && !TARGET_SSE)
3950 {
3951 if (in_return)
3952 {
3953 if (!issued_sse_ret_error)
3954 {
3955 error ("SSE register return with SSE disabled");
3956 issued_sse_ret_error = true;
3957 }
3958 }
3959 else if (!issued_sse_arg_error)
3960 {
3961 error ("SSE register argument with SSE disabled");
3962 issued_sse_arg_error = true;
3963 }
3964 return NULL;
3965 }
3966
3967 /* Likewise, error if the ABI requires us to return values in the
3968 x87 registers and the user specified -mno-80387. */
3969 if (!TARGET_80387 && in_return)
3970 for (i = 0; i < n; i++)
3971 if (regclass[i] == X86_64_X87_CLASS
3972 || regclass[i] == X86_64_X87UP_CLASS
3973 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3974 {
3975 if (!issued_x87_ret_error)
3976 {
3977 error ("x87 register return with x87 disabled");
3978 issued_x87_ret_error = true;
3979 }
3980 return NULL;
3981 }
3982
3983 /* First construct simple cases. Avoid SCmode, since we want to use
3984 single register to pass this type. */
3985 if (n == 1 && mode != SCmode)
3986 switch (regclass[0])
3987 {
3988 case X86_64_INTEGER_CLASS:
3989 case X86_64_INTEGERSI_CLASS:
3990 return gen_rtx_REG (mode, intreg[0]);
3991 case X86_64_SSE_CLASS:
3992 case X86_64_SSESF_CLASS:
3993 case X86_64_SSEDF_CLASS:
3994 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3995 case X86_64_X87_CLASS:
3996 case X86_64_COMPLEX_X87_CLASS:
3997 return gen_rtx_REG (mode, FIRST_STACK_REG);
3998 case X86_64_NO_CLASS:
3999 /* Zero sized array, struct or class. */
4000 return NULL;
4001 default:
4002 gcc_unreachable ();
4003 }
4004 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4005 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4006 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4007
4008 if (n == 2
4009 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4010 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4011 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4012 && regclass[1] == X86_64_INTEGER_CLASS
4013 && (mode == CDImode || mode == TImode || mode == TFmode)
4014 && intreg[0] + 1 == intreg[1])
4015 return gen_rtx_REG (mode, intreg[0]);
4016
4017 /* Otherwise figure out the entries of the PARALLEL. */
4018 for (i = 0; i < n; i++)
4019 {
4020 switch (regclass[i])
4021 {
4022 case X86_64_NO_CLASS:
4023 break;
4024 case X86_64_INTEGER_CLASS:
4025 case X86_64_INTEGERSI_CLASS:
4026 /* Merge TImodes on aligned occasions here too. */
4027 if (i * 8 + 8 > bytes)
4028 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4029 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4030 tmpmode = SImode;
4031 else
4032 tmpmode = DImode;
4033 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4034 if (tmpmode == BLKmode)
4035 tmpmode = DImode;
4036 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4037 gen_rtx_REG (tmpmode, *intreg),
4038 GEN_INT (i*8));
4039 intreg++;
4040 break;
4041 case X86_64_SSESF_CLASS:
4042 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4043 gen_rtx_REG (SFmode,
4044 SSE_REGNO (sse_regno)),
4045 GEN_INT (i*8));
4046 sse_regno++;
4047 break;
4048 case X86_64_SSEDF_CLASS:
4049 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4050 gen_rtx_REG (DFmode,
4051 SSE_REGNO (sse_regno)),
4052 GEN_INT (i*8));
4053 sse_regno++;
4054 break;
4055 case X86_64_SSE_CLASS:
4056 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4057 tmpmode = TImode;
4058 else
4059 tmpmode = DImode;
4060 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4061 gen_rtx_REG (tmpmode,
4062 SSE_REGNO (sse_regno)),
4063 GEN_INT (i*8));
4064 if (tmpmode == TImode)
4065 i++;
4066 sse_regno++;
4067 break;
4068 default:
4069 gcc_unreachable ();
4070 }
4071 }
4072
4073 /* Empty aligned struct, union or class. */
4074 if (nexps == 0)
4075 return NULL;
4076
4077 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4078 for (i = 0; i < nexps; i++)
4079 XVECEXP (ret, 0, i) = exp [i];
4080 return ret;
4081 }
4082
4083 /* Update the data in CUM to advance over an argument of mode MODE
4084 and data type TYPE. (TYPE is null for libcalls where that information
4085 may not be available.) */
4086
4087 static void
4088 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4089 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4090 {
4091 switch (mode)
4092 {
4093 default:
4094 break;
4095
4096 case BLKmode:
4097 if (bytes < 0)
4098 break;
4099 /* FALLTHRU */
4100
4101 case DImode:
4102 case SImode:
4103 case HImode:
4104 case QImode:
4105 cum->words += words;
4106 cum->nregs -= words;
4107 cum->regno += words;
4108
4109 if (cum->nregs <= 0)
4110 {
4111 cum->nregs = 0;
4112 cum->regno = 0;
4113 }
4114 break;
4115
4116 case DFmode:
4117 if (cum->float_in_sse < 2)
4118 break;
4119 case SFmode:
4120 if (cum->float_in_sse < 1)
4121 break;
4122 /* FALLTHRU */
4123
4124 case TImode:
4125 case V16QImode:
4126 case V8HImode:
4127 case V4SImode:
4128 case V2DImode:
4129 case V4SFmode:
4130 case V2DFmode:
4131 if (!type || !AGGREGATE_TYPE_P (type))
4132 {
4133 cum->sse_words += words;
4134 cum->sse_nregs -= 1;
4135 cum->sse_regno += 1;
4136 if (cum->sse_nregs <= 0)
4137 {
4138 cum->sse_nregs = 0;
4139 cum->sse_regno = 0;
4140 }
4141 }
4142 break;
4143
4144 case V8QImode:
4145 case V4HImode:
4146 case V2SImode:
4147 case V2SFmode:
4148 if (!type || !AGGREGATE_TYPE_P (type))
4149 {
4150 cum->mmx_words += words;
4151 cum->mmx_nregs -= 1;
4152 cum->mmx_regno += 1;
4153 if (cum->mmx_nregs <= 0)
4154 {
4155 cum->mmx_nregs = 0;
4156 cum->mmx_regno = 0;
4157 }
4158 }
4159 break;
4160 }
4161 }
4162
4163 static void
4164 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4165 tree type, HOST_WIDE_INT words)
4166 {
4167 int int_nregs, sse_nregs;
4168
4169 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4170 cum->words += words;
4171 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4172 {
4173 cum->nregs -= int_nregs;
4174 cum->sse_nregs -= sse_nregs;
4175 cum->regno += int_nregs;
4176 cum->sse_regno += sse_nregs;
4177 }
4178 else
4179 cum->words += words;
4180 }
4181
4182 static void
4183 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4184 HOST_WIDE_INT words)
4185 {
4186 /* Otherwise, this should be passed indirect. */
4187 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4188
4189 cum->words += words;
4190 if (cum->nregs > 0)
4191 {
4192 cum->nregs -= 1;
4193 cum->regno += 1;
4194 }
4195 }
4196
4197 void
4198 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4199 tree type, int named ATTRIBUTE_UNUSED)
4200 {
4201 HOST_WIDE_INT bytes, words;
4202
4203 if (mode == BLKmode)
4204 bytes = int_size_in_bytes (type);
4205 else
4206 bytes = GET_MODE_SIZE (mode);
4207 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4208
4209 if (type)
4210 mode = type_natural_mode (type);
4211
4212 if (TARGET_64BIT_MS_ABI)
4213 function_arg_advance_ms_64 (cum, bytes, words);
4214 else if (TARGET_64BIT)
4215 function_arg_advance_64 (cum, mode, type, words);
4216 else
4217 function_arg_advance_32 (cum, mode, type, bytes, words);
4218 }
4219
4220 /* Define where to put the arguments to a function.
4221 Value is zero to push the argument on the stack,
4222 or a hard register in which to store the argument.
4223
4224 MODE is the argument's machine mode.
4225 TYPE is the data type of the argument (as a tree).
4226 This is null for libcalls where that information may
4227 not be available.
4228 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4229 the preceding args and about the function being called.
4230 NAMED is nonzero if this argument is a named parameter
4231 (otherwise it is an extra parameter matching an ellipsis). */
4232
4233 static rtx
4234 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4235 enum machine_mode orig_mode, tree type,
4236 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4237 {
4238 static bool warnedsse, warnedmmx;
4239
4240 /* Avoid the AL settings for the Unix64 ABI. */
4241 if (mode == VOIDmode)
4242 return constm1_rtx;
4243
4244 switch (mode)
4245 {
4246 default:
4247 break;
4248
4249 case BLKmode:
4250 if (bytes < 0)
4251 break;
4252 /* FALLTHRU */
4253 case DImode:
4254 case SImode:
4255 case HImode:
4256 case QImode:
4257 if (words <= cum->nregs)
4258 {
4259 int regno = cum->regno;
4260
4261 /* Fastcall allocates the first two DWORD (SImode) or
4262 smaller arguments to ECX and EDX if it isn't an
4263 aggregate type . */
4264 if (cum->fastcall)
4265 {
4266 if (mode == BLKmode
4267 || mode == DImode
4268 || (type && AGGREGATE_TYPE_P (type)))
4269 break;
4270
4271 /* ECX not EAX is the first allocated register. */
4272 if (regno == AX_REG)
4273 regno = CX_REG;
4274 }
4275 return gen_rtx_REG (mode, regno);
4276 }
4277 break;
4278
4279 case DFmode:
4280 if (cum->float_in_sse < 2)
4281 break;
4282 case SFmode:
4283 if (cum->float_in_sse < 1)
4284 break;
4285 /* FALLTHRU */
4286 case TImode:
4287 case V16QImode:
4288 case V8HImode:
4289 case V4SImode:
4290 case V2DImode:
4291 case V4SFmode:
4292 case V2DFmode:
4293 if (!type || !AGGREGATE_TYPE_P (type))
4294 {
4295 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4296 {
4297 warnedsse = true;
4298 warning (0, "SSE vector argument without SSE enabled "
4299 "changes the ABI");
4300 }
4301 if (cum->sse_nregs)
4302 return gen_reg_or_parallel (mode, orig_mode,
4303 cum->sse_regno + FIRST_SSE_REG);
4304 }
4305 break;
4306
4307 case V8QImode:
4308 case V4HImode:
4309 case V2SImode:
4310 case V2SFmode:
4311 if (!type || !AGGREGATE_TYPE_P (type))
4312 {
4313 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4314 {
4315 warnedmmx = true;
4316 warning (0, "MMX vector argument without MMX enabled "
4317 "changes the ABI");
4318 }
4319 if (cum->mmx_nregs)
4320 return gen_reg_or_parallel (mode, orig_mode,
4321 cum->mmx_regno + FIRST_MMX_REG);
4322 }
4323 break;
4324 }
4325
4326 return NULL_RTX;
4327 }
4328
4329 static rtx
4330 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4331 enum machine_mode orig_mode, tree type)
4332 {
4333 /* Handle a hidden AL argument containing number of registers
4334 for varargs x86-64 functions. */
4335 if (mode == VOIDmode)
4336 return GEN_INT (cum->maybe_vaarg
4337 ? (cum->sse_nregs < 0
4338 ? SSE_REGPARM_MAX
4339 : cum->sse_regno)
4340 : -1);
4341
4342 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4343 cum->sse_nregs,
4344 &x86_64_int_parameter_registers [cum->regno],
4345 cum->sse_regno);
4346 }
4347
4348 static rtx
4349 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4350 enum machine_mode orig_mode, int named)
4351 {
4352 unsigned int regno;
4353
4354 /* Avoid the AL settings for the Unix64 ABI. */
4355 if (mode == VOIDmode)
4356 return constm1_rtx;
4357
4358 /* If we've run out of registers, it goes on the stack. */
4359 if (cum->nregs == 0)
4360 return NULL_RTX;
4361
4362 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4363
4364 /* Only floating point modes are passed in anything but integer regs. */
4365 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4366 {
4367 if (named)
4368 regno = cum->regno + FIRST_SSE_REG;
4369 else
4370 {
4371 rtx t1, t2;
4372
4373 /* Unnamed floating parameters are passed in both the
4374 SSE and integer registers. */
4375 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4376 t2 = gen_rtx_REG (mode, regno);
4377 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4378 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4379 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4380 }
4381 }
4382
4383 return gen_reg_or_parallel (mode, orig_mode, regno);
4384 }
4385
4386 rtx
4387 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4388 tree type, int named)
4389 {
4390 enum machine_mode mode = omode;
4391 HOST_WIDE_INT bytes, words;
4392
4393 if (mode == BLKmode)
4394 bytes = int_size_in_bytes (type);
4395 else
4396 bytes = GET_MODE_SIZE (mode);
4397 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4398
4399 /* To simplify the code below, represent vector types with a vector mode
4400 even if MMX/SSE are not active. */
4401 if (type && TREE_CODE (type) == VECTOR_TYPE)
4402 mode = type_natural_mode (type);
4403
4404 if (TARGET_64BIT_MS_ABI)
4405 return function_arg_ms_64 (cum, mode, omode, named);
4406 else if (TARGET_64BIT)
4407 return function_arg_64 (cum, mode, omode, type);
4408 else
4409 return function_arg_32 (cum, mode, omode, type, bytes, words);
4410 }
4411
4412 /* A C expression that indicates when an argument must be passed by
4413 reference. If nonzero for an argument, a copy of that argument is
4414 made in memory and a pointer to the argument is passed instead of
4415 the argument itself. The pointer is passed in whatever way is
4416 appropriate for passing a pointer to that type. */
4417
4418 static bool
4419 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4420 enum machine_mode mode ATTRIBUTE_UNUSED,
4421 const_tree type, bool named ATTRIBUTE_UNUSED)
4422 {
4423 if (TARGET_64BIT_MS_ABI)
4424 {
4425 if (type)
4426 {
4427 /* Arrays are passed by reference. */
4428 if (TREE_CODE (type) == ARRAY_TYPE)
4429 return true;
4430
4431 if (AGGREGATE_TYPE_P (type))
4432 {
4433 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4434 are passed by reference. */
4435 int el2 = exact_log2 (int_size_in_bytes (type));
4436 return !(el2 >= 0 && el2 <= 3);
4437 }
4438 }
4439
4440 /* __m128 is passed by reference. */
4441 /* ??? How to handle complex? For now treat them as structs,
4442 and pass them by reference if they're too large. */
4443 if (GET_MODE_SIZE (mode) > 8)
4444 return true;
4445 }
4446 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4447 return 1;
4448
4449 return 0;
4450 }
4451
4452 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4453 ABI. Only called if TARGET_SSE. */
4454 static bool
4455 contains_128bit_aligned_vector_p (tree type)
4456 {
4457 enum machine_mode mode = TYPE_MODE (type);
4458 if (SSE_REG_MODE_P (mode)
4459 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4460 return true;
4461 if (TYPE_ALIGN (type) < 128)
4462 return false;
4463
4464 if (AGGREGATE_TYPE_P (type))
4465 {
4466 /* Walk the aggregates recursively. */
4467 switch (TREE_CODE (type))
4468 {
4469 case RECORD_TYPE:
4470 case UNION_TYPE:
4471 case QUAL_UNION_TYPE:
4472 {
4473 tree field;
4474
4475 /* Walk all the structure fields. */
4476 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4477 {
4478 if (TREE_CODE (field) == FIELD_DECL
4479 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4480 return true;
4481 }
4482 break;
4483 }
4484
4485 case ARRAY_TYPE:
4486 /* Just for use if some languages passes arrays by value. */
4487 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4488 return true;
4489 break;
4490
4491 default:
4492 gcc_unreachable ();
4493 }
4494 }
4495 return false;
4496 }
4497
4498 /* Gives the alignment boundary, in bits, of an argument with the
4499 specified mode and type. */
4500
4501 int
4502 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4503 {
4504 int align;
4505 if (type)
4506 align = TYPE_ALIGN (type);
4507 else
4508 align = GET_MODE_ALIGNMENT (mode);
4509 if (align < PARM_BOUNDARY)
4510 align = PARM_BOUNDARY;
4511 if (!TARGET_64BIT)
4512 {
4513 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4514 make an exception for SSE modes since these require 128bit
4515 alignment.
4516
4517 The handling here differs from field_alignment. ICC aligns MMX
4518 arguments to 4 byte boundaries, while structure fields are aligned
4519 to 8 byte boundaries. */
4520 if (!TARGET_SSE)
4521 align = PARM_BOUNDARY;
4522 else if (!type)
4523 {
4524 if (!SSE_REG_MODE_P (mode))
4525 align = PARM_BOUNDARY;
4526 }
4527 else
4528 {
4529 if (!contains_128bit_aligned_vector_p (type))
4530 align = PARM_BOUNDARY;
4531 }
4532 }
4533 if (align > 128)
4534 align = 128;
4535 return align;
4536 }
4537
4538 /* Return true if N is a possible register number of function value. */
4539
4540 bool
4541 ix86_function_value_regno_p (int regno)
4542 {
4543 switch (regno)
4544 {
4545 case 0:
4546 return true;
4547
4548 case FIRST_FLOAT_REG:
4549 if (TARGET_64BIT_MS_ABI)
4550 return false;
4551 return TARGET_FLOAT_RETURNS_IN_80387;
4552
4553 case FIRST_SSE_REG:
4554 return TARGET_SSE;
4555
4556 case FIRST_MMX_REG:
4557 if (TARGET_MACHO || TARGET_64BIT)
4558 return false;
4559 return TARGET_MMX;
4560 }
4561
4562 return false;
4563 }
4564
4565 /* Define how to find the value returned by a function.
4566 VALTYPE is the data type of the value (as a tree).
4567 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4568 otherwise, FUNC is 0. */
4569
4570 static rtx
4571 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4572 const_tree fntype, const_tree fn)
4573 {
4574 unsigned int regno;
4575
4576 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4577 we normally prevent this case when mmx is not available. However
4578 some ABIs may require the result to be returned like DImode. */
4579 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4580 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4581
4582 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4583 we prevent this case when sse is not available. However some ABIs
4584 may require the result to be returned like integer TImode. */
4585 else if (mode == TImode
4586 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4587 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4588
4589 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4590 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4591 regno = FIRST_FLOAT_REG;
4592 else
4593 /* Most things go in %eax. */
4594 regno = AX_REG;
4595
4596 /* Override FP return register with %xmm0 for local functions when
4597 SSE math is enabled or for functions with sseregparm attribute. */
4598 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4599 {
4600 int sse_level = ix86_function_sseregparm (fntype, fn);
4601 if ((sse_level >= 1 && mode == SFmode)
4602 || (sse_level == 2 && mode == DFmode))
4603 regno = FIRST_SSE_REG;
4604 }
4605
4606 return gen_rtx_REG (orig_mode, regno);
4607 }
4608
4609 static rtx
4610 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4611 const_tree valtype)
4612 {
4613 rtx ret;
4614
4615 /* Handle libcalls, which don't provide a type node. */
4616 if (valtype == NULL)
4617 {
4618 switch (mode)
4619 {
4620 case SFmode:
4621 case SCmode:
4622 case DFmode:
4623 case DCmode:
4624 case TFmode:
4625 case SDmode:
4626 case DDmode:
4627 case TDmode:
4628 return gen_rtx_REG (mode, FIRST_SSE_REG);
4629 case XFmode:
4630 case XCmode:
4631 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4632 case TCmode:
4633 return NULL;
4634 default:
4635 return gen_rtx_REG (mode, AX_REG);
4636 }
4637 }
4638
4639 ret = construct_container (mode, orig_mode, valtype, 1,
4640 REGPARM_MAX, SSE_REGPARM_MAX,
4641 x86_64_int_return_registers, 0);
4642
4643 /* For zero sized structures, construct_container returns NULL, but we
4644 need to keep rest of compiler happy by returning meaningful value. */
4645 if (!ret)
4646 ret = gen_rtx_REG (orig_mode, AX_REG);
4647
4648 return ret;
4649 }
4650
4651 static rtx
4652 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4653 {
4654 unsigned int regno = AX_REG;
4655
4656 if (TARGET_SSE)
4657 {
4658 if (mode == SFmode || mode == DFmode)
4659 regno = FIRST_SSE_REG;
4660 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4661 regno = FIRST_SSE_REG;
4662 }
4663
4664 return gen_rtx_REG (orig_mode, regno);
4665 }
4666
4667 static rtx
4668 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4669 enum machine_mode orig_mode, enum machine_mode mode)
4670 {
4671 const_tree fn, fntype;
4672
4673 fn = NULL_TREE;
4674 if (fntype_or_decl && DECL_P (fntype_or_decl))
4675 fn = fntype_or_decl;
4676 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4677
4678 if (TARGET_64BIT_MS_ABI)
4679 return function_value_ms_64 (orig_mode, mode);
4680 else if (TARGET_64BIT)
4681 return function_value_64 (orig_mode, mode, valtype);
4682 else
4683 return function_value_32 (orig_mode, mode, fntype, fn);
4684 }
4685
4686 static rtx
4687 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4688 bool outgoing ATTRIBUTE_UNUSED)
4689 {
4690 enum machine_mode mode, orig_mode;
4691
4692 orig_mode = TYPE_MODE (valtype);
4693 mode = type_natural_mode (valtype);
4694 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4695 }
4696
4697 rtx
4698 ix86_libcall_value (enum machine_mode mode)
4699 {
4700 return ix86_function_value_1 (NULL, NULL, mode, mode);
4701 }
4702
4703 /* Return true iff type is returned in memory. */
4704
4705 static int
4706 return_in_memory_32 (const_tree type, enum machine_mode mode)
4707 {
4708 HOST_WIDE_INT size;
4709
4710 if (mode == BLKmode)
4711 return 1;
4712
4713 size = int_size_in_bytes (type);
4714
4715 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4716 return 0;
4717
4718 if (VECTOR_MODE_P (mode) || mode == TImode)
4719 {
4720 /* User-created vectors small enough to fit in EAX. */
4721 if (size < 8)
4722 return 0;
4723
4724 /* MMX/3dNow values are returned in MM0,
4725 except when it doesn't exits. */
4726 if (size == 8)
4727 return (TARGET_MMX ? 0 : 1);
4728
4729 /* SSE values are returned in XMM0, except when it doesn't exist. */
4730 if (size == 16)
4731 return (TARGET_SSE ? 0 : 1);
4732 }
4733
4734 if (mode == XFmode)
4735 return 0;
4736
4737 if (mode == TDmode)
4738 return 1;
4739
4740 if (size > 12)
4741 return 1;
4742 return 0;
4743 }
4744
4745 static int
4746 return_in_memory_64 (const_tree type, enum machine_mode mode)
4747 {
4748 int needed_intregs, needed_sseregs;
4749 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4750 }
4751
4752 static int
4753 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4754 {
4755 HOST_WIDE_INT size = int_size_in_bytes (type);
4756
4757 /* __m128 and friends are returned in xmm0. */
4758 if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4759 return 0;
4760
4761 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4762 return (size != 1 && size != 2 && size != 4 && size != 8)
4763 || COMPLEX_MODE_P (mode);
4764 }
4765
4766 int
4767 ix86_return_in_memory (const_tree type)
4768 {
4769 const enum machine_mode mode = type_natural_mode (type);
4770
4771 if (TARGET_64BIT_MS_ABI)
4772 return return_in_memory_ms_64 (type, mode);
4773 else if (TARGET_64BIT)
4774 return return_in_memory_64 (type, mode);
4775 else
4776 return return_in_memory_32 (type, mode);
4777 }
4778
4779 /* Return false iff TYPE is returned in memory. This version is used
4780 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4781 but differs notably in that when MMX is available, 8-byte vectors
4782 are returned in memory, rather than in MMX registers. */
4783
4784 int
4785 ix86_sol10_return_in_memory (const_tree type)
4786 {
4787 int size;
4788 enum machine_mode mode = type_natural_mode (type);
4789
4790 if (TARGET_64BIT)
4791 return return_in_memory_64 (type, mode);
4792
4793 if (mode == BLKmode)
4794 return 1;
4795
4796 size = int_size_in_bytes (type);
4797
4798 if (VECTOR_MODE_P (mode))
4799 {
4800 /* Return in memory only if MMX registers *are* available. This
4801 seems backwards, but it is consistent with the existing
4802 Solaris x86 ABI. */
4803 if (size == 8)
4804 return TARGET_MMX;
4805 if (size == 16)
4806 return !TARGET_SSE;
4807 }
4808 else if (mode == TImode)
4809 return !TARGET_SSE;
4810 else if (mode == XFmode)
4811 return 0;
4812
4813 return size > 12;
4814 }
4815
4816 /* When returning SSE vector types, we have a choice of either
4817 (1) being abi incompatible with a -march switch, or
4818 (2) generating an error.
4819 Given no good solution, I think the safest thing is one warning.
4820 The user won't be able to use -Werror, but....
4821
4822 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4823 called in response to actually generating a caller or callee that
4824 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4825 via aggregate_value_p for general type probing from tree-ssa. */
4826
4827 static rtx
4828 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4829 {
4830 static bool warnedsse, warnedmmx;
4831
4832 if (!TARGET_64BIT && type)
4833 {
4834 /* Look at the return type of the function, not the function type. */
4835 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4836
4837 if (!TARGET_SSE && !warnedsse)
4838 {
4839 if (mode == TImode
4840 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4841 {
4842 warnedsse = true;
4843 warning (0, "SSE vector return without SSE enabled "
4844 "changes the ABI");
4845 }
4846 }
4847
4848 if (!TARGET_MMX && !warnedmmx)
4849 {
4850 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4851 {
4852 warnedmmx = true;
4853 warning (0, "MMX vector return without MMX enabled "
4854 "changes the ABI");
4855 }
4856 }
4857 }
4858
4859 return NULL;
4860 }
4861
4862 \f
4863 /* Create the va_list data type. */
4864
4865 static tree
4866 ix86_build_builtin_va_list (void)
4867 {
4868 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4869
4870 /* For i386 we use plain pointer to argument area. */
4871 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4872 return build_pointer_type (char_type_node);
4873
4874 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4875 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4876
4877 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4878 unsigned_type_node);
4879 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4880 unsigned_type_node);
4881 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4882 ptr_type_node);
4883 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4884 ptr_type_node);
4885
4886 va_list_gpr_counter_field = f_gpr;
4887 va_list_fpr_counter_field = f_fpr;
4888
4889 DECL_FIELD_CONTEXT (f_gpr) = record;
4890 DECL_FIELD_CONTEXT (f_fpr) = record;
4891 DECL_FIELD_CONTEXT (f_ovf) = record;
4892 DECL_FIELD_CONTEXT (f_sav) = record;
4893
4894 TREE_CHAIN (record) = type_decl;
4895 TYPE_NAME (record) = type_decl;
4896 TYPE_FIELDS (record) = f_gpr;
4897 TREE_CHAIN (f_gpr) = f_fpr;
4898 TREE_CHAIN (f_fpr) = f_ovf;
4899 TREE_CHAIN (f_ovf) = f_sav;
4900
4901 layout_type (record);
4902
4903 /* The correct type is an array type of one element. */
4904 return build_array_type (record, build_index_type (size_zero_node));
4905 }
4906
4907 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4908
4909 static void
4910 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4911 {
4912 rtx save_area, mem;
4913 rtx label;
4914 rtx label_ref;
4915 rtx tmp_reg;
4916 rtx nsse_reg;
4917 alias_set_type set;
4918 int i;
4919
4920 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4921 return;
4922
4923 /* Indicate to allocate space on the stack for varargs save area. */
4924 ix86_save_varrargs_registers = 1;
4925 /* We need 16-byte stack alignment to save SSE registers. If user
4926 asked for lower preferred_stack_boundary, lets just hope that he knows
4927 what he is doing and won't varargs SSE values.
4928
4929 We also may end up assuming that only 64bit values are stored in SSE
4930 register let some floating point program work. */
4931 if (ix86_preferred_stack_boundary >= 128)
4932 cfun->stack_alignment_needed = 128;
4933
4934 save_area = frame_pointer_rtx;
4935 set = get_varargs_alias_set ();
4936
4937 for (i = cum->regno;
4938 i < ix86_regparm
4939 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4940 i++)
4941 {
4942 mem = gen_rtx_MEM (Pmode,
4943 plus_constant (save_area, i * UNITS_PER_WORD));
4944 MEM_NOTRAP_P (mem) = 1;
4945 set_mem_alias_set (mem, set);
4946 emit_move_insn (mem, gen_rtx_REG (Pmode,
4947 x86_64_int_parameter_registers[i]));
4948 }
4949
4950 if (cum->sse_nregs && cfun->va_list_fpr_size)
4951 {
4952 /* Now emit code to save SSE registers. The AX parameter contains number
4953 of SSE parameter registers used to call this function. We use
4954 sse_prologue_save insn template that produces computed jump across
4955 SSE saves. We need some preparation work to get this working. */
4956
4957 label = gen_label_rtx ();
4958 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4959
4960 /* Compute address to jump to :
4961 label - 5*eax + nnamed_sse_arguments*5 */
4962 tmp_reg = gen_reg_rtx (Pmode);
4963 nsse_reg = gen_reg_rtx (Pmode);
4964 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
4965 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4966 gen_rtx_MULT (Pmode, nsse_reg,
4967 GEN_INT (4))));
4968 if (cum->sse_regno)
4969 emit_move_insn
4970 (nsse_reg,
4971 gen_rtx_CONST (DImode,
4972 gen_rtx_PLUS (DImode,
4973 label_ref,
4974 GEN_INT (cum->sse_regno * 4))));
4975 else
4976 emit_move_insn (nsse_reg, label_ref);
4977 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4978
4979 /* Compute address of memory block we save into. We always use pointer
4980 pointing 127 bytes after first byte to store - this is needed to keep
4981 instruction size limited by 4 bytes. */
4982 tmp_reg = gen_reg_rtx (Pmode);
4983 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4984 plus_constant (save_area,
4985 8 * REGPARM_MAX + 127)));
4986 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4987 MEM_NOTRAP_P (mem) = 1;
4988 set_mem_alias_set (mem, set);
4989 set_mem_align (mem, BITS_PER_WORD);
4990
4991 /* And finally do the dirty job! */
4992 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4993 GEN_INT (cum->sse_regno), label));
4994 }
4995 }
4996
4997 static void
4998 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4999 {
5000 alias_set_type set = get_varargs_alias_set ();
5001 int i;
5002
5003 for (i = cum->regno; i < REGPARM_MAX; i++)
5004 {
5005 rtx reg, mem;
5006
5007 mem = gen_rtx_MEM (Pmode,
5008 plus_constant (virtual_incoming_args_rtx,
5009 i * UNITS_PER_WORD));
5010 MEM_NOTRAP_P (mem) = 1;
5011 set_mem_alias_set (mem, set);
5012
5013 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5014 emit_move_insn (mem, reg);
5015 }
5016 }
5017
5018 static void
5019 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5020 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5021 int no_rtl)
5022 {
5023 CUMULATIVE_ARGS next_cum;
5024 tree fntype;
5025
5026 /* This argument doesn't appear to be used anymore. Which is good,
5027 because the old code here didn't suppress rtl generation. */
5028 gcc_assert (!no_rtl);
5029
5030 if (!TARGET_64BIT)
5031 return;
5032
5033 fntype = TREE_TYPE (current_function_decl);
5034
5035 /* For varargs, we do not want to skip the dummy va_dcl argument.
5036 For stdargs, we do want to skip the last named argument. */
5037 next_cum = *cum;
5038 if (stdarg_p (fntype))
5039 function_arg_advance (&next_cum, mode, type, 1);
5040
5041 if (TARGET_64BIT_MS_ABI)
5042 setup_incoming_varargs_ms_64 (&next_cum);
5043 else
5044 setup_incoming_varargs_64 (&next_cum);
5045 }
5046
5047 /* Implement va_start. */
5048
5049 static void
5050 ix86_va_start (tree valist, rtx nextarg)
5051 {
5052 HOST_WIDE_INT words, n_gpr, n_fpr;
5053 tree f_gpr, f_fpr, f_ovf, f_sav;
5054 tree gpr, fpr, ovf, sav, t;
5055 tree type;
5056
5057 /* Only 64bit target needs something special. */
5058 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5059 {
5060 std_expand_builtin_va_start (valist, nextarg);
5061 return;
5062 }
5063
5064 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5065 f_fpr = TREE_CHAIN (f_gpr);
5066 f_ovf = TREE_CHAIN (f_fpr);
5067 f_sav = TREE_CHAIN (f_ovf);
5068
5069 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5070 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5071 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5072 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5073 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5074
5075 /* Count number of gp and fp argument registers used. */
5076 words = current_function_args_info.words;
5077 n_gpr = current_function_args_info.regno;
5078 n_fpr = current_function_args_info.sse_regno;
5079
5080 if (cfun->va_list_gpr_size)
5081 {
5082 type = TREE_TYPE (gpr);
5083 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5084 build_int_cst (type, n_gpr * 8));
5085 TREE_SIDE_EFFECTS (t) = 1;
5086 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5087 }
5088
5089 if (cfun->va_list_fpr_size)
5090 {
5091 type = TREE_TYPE (fpr);
5092 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5093 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5094 TREE_SIDE_EFFECTS (t) = 1;
5095 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5096 }
5097
5098 /* Find the overflow area. */
5099 type = TREE_TYPE (ovf);
5100 t = make_tree (type, virtual_incoming_args_rtx);
5101 if (words != 0)
5102 t = build2 (POINTER_PLUS_EXPR, type, t,
5103 size_int (words * UNITS_PER_WORD));
5104 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5105 TREE_SIDE_EFFECTS (t) = 1;
5106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5107
5108 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5109 {
5110 /* Find the register save area.
5111 Prologue of the function save it right above stack frame. */
5112 type = TREE_TYPE (sav);
5113 t = make_tree (type, frame_pointer_rtx);
5114 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5115 TREE_SIDE_EFFECTS (t) = 1;
5116 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5117 }
5118 }
5119
5120 /* Implement va_arg. */
5121
5122 static tree
5123 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5124 {
5125 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5126 tree f_gpr, f_fpr, f_ovf, f_sav;
5127 tree gpr, fpr, ovf, sav, t;
5128 int size, rsize;
5129 tree lab_false, lab_over = NULL_TREE;
5130 tree addr, t2;
5131 rtx container;
5132 int indirect_p = 0;
5133 tree ptrtype;
5134 enum machine_mode nat_mode;
5135
5136 /* Only 64bit target needs something special. */
5137 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5138 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5139
5140 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5141 f_fpr = TREE_CHAIN (f_gpr);
5142 f_ovf = TREE_CHAIN (f_fpr);
5143 f_sav = TREE_CHAIN (f_ovf);
5144
5145 valist = build_va_arg_indirect_ref (valist);
5146 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5147 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5148 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5149 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5150
5151 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5152 if (indirect_p)
5153 type = build_pointer_type (type);
5154 size = int_size_in_bytes (type);
5155 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5156
5157 nat_mode = type_natural_mode (type);
5158 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5159 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5160
5161 /* Pull the value out of the saved registers. */
5162
5163 addr = create_tmp_var (ptr_type_node, "addr");
5164 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5165
5166 if (container)
5167 {
5168 int needed_intregs, needed_sseregs;
5169 bool need_temp;
5170 tree int_addr, sse_addr;
5171
5172 lab_false = create_artificial_label ();
5173 lab_over = create_artificial_label ();
5174
5175 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5176
5177 need_temp = (!REG_P (container)
5178 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5179 || TYPE_ALIGN (type) > 128));
5180
5181 /* In case we are passing structure, verify that it is consecutive block
5182 on the register save area. If not we need to do moves. */
5183 if (!need_temp && !REG_P (container))
5184 {
5185 /* Verify that all registers are strictly consecutive */
5186 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5187 {
5188 int i;
5189
5190 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5191 {
5192 rtx slot = XVECEXP (container, 0, i);
5193 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5194 || INTVAL (XEXP (slot, 1)) != i * 16)
5195 need_temp = 1;
5196 }
5197 }
5198 else
5199 {
5200 int i;
5201
5202 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5203 {
5204 rtx slot = XVECEXP (container, 0, i);
5205 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5206 || INTVAL (XEXP (slot, 1)) != i * 8)
5207 need_temp = 1;
5208 }
5209 }
5210 }
5211 if (!need_temp)
5212 {
5213 int_addr = addr;
5214 sse_addr = addr;
5215 }
5216 else
5217 {
5218 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5219 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5220 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5221 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5222 }
5223
5224 /* First ensure that we fit completely in registers. */
5225 if (needed_intregs)
5226 {
5227 t = build_int_cst (TREE_TYPE (gpr),
5228 (REGPARM_MAX - needed_intregs + 1) * 8);
5229 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5230 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5231 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5232 gimplify_and_add (t, pre_p);
5233 }
5234 if (needed_sseregs)
5235 {
5236 t = build_int_cst (TREE_TYPE (fpr),
5237 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5238 + REGPARM_MAX * 8);
5239 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5240 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5241 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5242 gimplify_and_add (t, pre_p);
5243 }
5244
5245 /* Compute index to start of area used for integer regs. */
5246 if (needed_intregs)
5247 {
5248 /* int_addr = gpr + sav; */
5249 t = fold_convert (sizetype, gpr);
5250 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5251 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5252 gimplify_and_add (t, pre_p);
5253 }
5254 if (needed_sseregs)
5255 {
5256 /* sse_addr = fpr + sav; */
5257 t = fold_convert (sizetype, fpr);
5258 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5259 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5260 gimplify_and_add (t, pre_p);
5261 }
5262 if (need_temp)
5263 {
5264 int i;
5265 tree temp = create_tmp_var (type, "va_arg_tmp");
5266
5267 /* addr = &temp; */
5268 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5269 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5270 gimplify_and_add (t, pre_p);
5271
5272 for (i = 0; i < XVECLEN (container, 0); i++)
5273 {
5274 rtx slot = XVECEXP (container, 0, i);
5275 rtx reg = XEXP (slot, 0);
5276 enum machine_mode mode = GET_MODE (reg);
5277 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5278 tree addr_type = build_pointer_type (piece_type);
5279 tree src_addr, src;
5280 int src_offset;
5281 tree dest_addr, dest;
5282
5283 if (SSE_REGNO_P (REGNO (reg)))
5284 {
5285 src_addr = sse_addr;
5286 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5287 }
5288 else
5289 {
5290 src_addr = int_addr;
5291 src_offset = REGNO (reg) * 8;
5292 }
5293 src_addr = fold_convert (addr_type, src_addr);
5294 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5295 size_int (src_offset));
5296 src = build_va_arg_indirect_ref (src_addr);
5297
5298 dest_addr = fold_convert (addr_type, addr);
5299 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5300 size_int (INTVAL (XEXP (slot, 1))));
5301 dest = build_va_arg_indirect_ref (dest_addr);
5302
5303 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5304 gimplify_and_add (t, pre_p);
5305 }
5306 }
5307
5308 if (needed_intregs)
5309 {
5310 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5311 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5312 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5313 gimplify_and_add (t, pre_p);
5314 }
5315 if (needed_sseregs)
5316 {
5317 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5318 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5319 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5320 gimplify_and_add (t, pre_p);
5321 }
5322
5323 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5324 gimplify_and_add (t, pre_p);
5325
5326 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5327 append_to_statement_list (t, pre_p);
5328 }
5329
5330 /* ... otherwise out of the overflow area. */
5331
5332 /* Care for on-stack alignment if needed. */
5333 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5334 || integer_zerop (TYPE_SIZE (type)))
5335 t = ovf;
5336 else
5337 {
5338 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5339 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5340 size_int (align - 1));
5341 t = fold_convert (sizetype, t);
5342 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5343 size_int (-align));
5344 t = fold_convert (TREE_TYPE (ovf), t);
5345 }
5346 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5347
5348 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5349 gimplify_and_add (t2, pre_p);
5350
5351 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5352 size_int (rsize * UNITS_PER_WORD));
5353 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5354 gimplify_and_add (t, pre_p);
5355
5356 if (container)
5357 {
5358 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5359 append_to_statement_list (t, pre_p);
5360 }
5361
5362 ptrtype = build_pointer_type (type);
5363 addr = fold_convert (ptrtype, addr);
5364
5365 if (indirect_p)
5366 addr = build_va_arg_indirect_ref (addr);
5367 return build_va_arg_indirect_ref (addr);
5368 }
5369 \f
5370 /* Return nonzero if OPNUM's MEM should be matched
5371 in movabs* patterns. */
5372
5373 int
5374 ix86_check_movabs (rtx insn, int opnum)
5375 {
5376 rtx set, mem;
5377
5378 set = PATTERN (insn);
5379 if (GET_CODE (set) == PARALLEL)
5380 set = XVECEXP (set, 0, 0);
5381 gcc_assert (GET_CODE (set) == SET);
5382 mem = XEXP (set, opnum);
5383 while (GET_CODE (mem) == SUBREG)
5384 mem = SUBREG_REG (mem);
5385 gcc_assert (MEM_P (mem));
5386 return (volatile_ok || !MEM_VOLATILE_P (mem));
5387 }
5388 \f
5389 /* Initialize the table of extra 80387 mathematical constants. */
5390
5391 static void
5392 init_ext_80387_constants (void)
5393 {
5394 static const char * cst[5] =
5395 {
5396 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5397 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5398 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5399 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5400 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5401 };
5402 int i;
5403
5404 for (i = 0; i < 5; i++)
5405 {
5406 real_from_string (&ext_80387_constants_table[i], cst[i]);
5407 /* Ensure each constant is rounded to XFmode precision. */
5408 real_convert (&ext_80387_constants_table[i],
5409 XFmode, &ext_80387_constants_table[i]);
5410 }
5411
5412 ext_80387_constants_init = 1;
5413 }
5414
5415 /* Return true if the constant is something that can be loaded with
5416 a special instruction. */
5417
5418 int
5419 standard_80387_constant_p (rtx x)
5420 {
5421 enum machine_mode mode = GET_MODE (x);
5422
5423 REAL_VALUE_TYPE r;
5424
5425 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5426 return -1;
5427
5428 if (x == CONST0_RTX (mode))
5429 return 1;
5430 if (x == CONST1_RTX (mode))
5431 return 2;
5432
5433 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5434
5435 /* For XFmode constants, try to find a special 80387 instruction when
5436 optimizing for size or on those CPUs that benefit from them. */
5437 if (mode == XFmode
5438 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5439 {
5440 int i;
5441
5442 if (! ext_80387_constants_init)
5443 init_ext_80387_constants ();
5444
5445 for (i = 0; i < 5; i++)
5446 if (real_identical (&r, &ext_80387_constants_table[i]))
5447 return i + 3;
5448 }
5449
5450 /* Load of the constant -0.0 or -1.0 will be split as
5451 fldz;fchs or fld1;fchs sequence. */
5452 if (real_isnegzero (&r))
5453 return 8;
5454 if (real_identical (&r, &dconstm1))
5455 return 9;
5456
5457 return 0;
5458 }
5459
5460 /* Return the opcode of the special instruction to be used to load
5461 the constant X. */
5462
5463 const char *
5464 standard_80387_constant_opcode (rtx x)
5465 {
5466 switch (standard_80387_constant_p (x))
5467 {
5468 case 1:
5469 return "fldz";
5470 case 2:
5471 return "fld1";
5472 case 3:
5473 return "fldlg2";
5474 case 4:
5475 return "fldln2";
5476 case 5:
5477 return "fldl2e";
5478 case 6:
5479 return "fldl2t";
5480 case 7:
5481 return "fldpi";
5482 case 8:
5483 case 9:
5484 return "#";
5485 default:
5486 gcc_unreachable ();
5487 }
5488 }
5489
5490 /* Return the CONST_DOUBLE representing the 80387 constant that is
5491 loaded by the specified special instruction. The argument IDX
5492 matches the return value from standard_80387_constant_p. */
5493
5494 rtx
5495 standard_80387_constant_rtx (int idx)
5496 {
5497 int i;
5498
5499 if (! ext_80387_constants_init)
5500 init_ext_80387_constants ();
5501
5502 switch (idx)
5503 {
5504 case 3:
5505 case 4:
5506 case 5:
5507 case 6:
5508 case 7:
5509 i = idx - 3;
5510 break;
5511
5512 default:
5513 gcc_unreachable ();
5514 }
5515
5516 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5517 XFmode);
5518 }
5519
5520 /* Return 1 if mode is a valid mode for sse. */
5521 static int
5522 standard_sse_mode_p (enum machine_mode mode)
5523 {
5524 switch (mode)
5525 {
5526 case V16QImode:
5527 case V8HImode:
5528 case V4SImode:
5529 case V2DImode:
5530 case V4SFmode:
5531 case V2DFmode:
5532 return 1;
5533
5534 default:
5535 return 0;
5536 }
5537 }
5538
5539 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5540 */
5541 int
5542 standard_sse_constant_p (rtx x)
5543 {
5544 enum machine_mode mode = GET_MODE (x);
5545
5546 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5547 return 1;
5548 if (vector_all_ones_operand (x, mode)
5549 && standard_sse_mode_p (mode))
5550 return TARGET_SSE2 ? 2 : -1;
5551
5552 return 0;
5553 }
5554
5555 /* Return the opcode of the special instruction to be used to load
5556 the constant X. */
5557
5558 const char *
5559 standard_sse_constant_opcode (rtx insn, rtx x)
5560 {
5561 switch (standard_sse_constant_p (x))
5562 {
5563 case 1:
5564 if (get_attr_mode (insn) == MODE_V4SF)
5565 return "xorps\t%0, %0";
5566 else if (get_attr_mode (insn) == MODE_V2DF)
5567 return "xorpd\t%0, %0";
5568 else
5569 return "pxor\t%0, %0";
5570 case 2:
5571 return "pcmpeqd\t%0, %0";
5572 }
5573 gcc_unreachable ();
5574 }
5575
5576 /* Returns 1 if OP contains a symbol reference */
5577
5578 int
5579 symbolic_reference_mentioned_p (rtx op)
5580 {
5581 const char *fmt;
5582 int i;
5583
5584 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5585 return 1;
5586
5587 fmt = GET_RTX_FORMAT (GET_CODE (op));
5588 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5589 {
5590 if (fmt[i] == 'E')
5591 {
5592 int j;
5593
5594 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5595 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5596 return 1;
5597 }
5598
5599 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5600 return 1;
5601 }
5602
5603 return 0;
5604 }
5605
5606 /* Return 1 if it is appropriate to emit `ret' instructions in the
5607 body of a function. Do this only if the epilogue is simple, needing a
5608 couple of insns. Prior to reloading, we can't tell how many registers
5609 must be saved, so return 0 then. Return 0 if there is no frame
5610 marker to de-allocate. */
5611
5612 int
5613 ix86_can_use_return_insn_p (void)
5614 {
5615 struct ix86_frame frame;
5616
5617 if (! reload_completed || frame_pointer_needed)
5618 return 0;
5619
5620 /* Don't allow more than 32 pop, since that's all we can do
5621 with one instruction. */
5622 if (current_function_pops_args
5623 && current_function_args_size >= 32768)
5624 return 0;
5625
5626 ix86_compute_frame_layout (&frame);
5627 return frame.to_allocate == 0 && frame.nregs == 0;
5628 }
5629 \f
5630 /* Value should be nonzero if functions must have frame pointers.
5631 Zero means the frame pointer need not be set up (and parms may
5632 be accessed via the stack pointer) in functions that seem suitable. */
5633
5634 int
5635 ix86_frame_pointer_required (void)
5636 {
5637 /* If we accessed previous frames, then the generated code expects
5638 to be able to access the saved ebp value in our frame. */
5639 if (cfun->machine->accesses_prev_frame)
5640 return 1;
5641
5642 /* Several x86 os'es need a frame pointer for other reasons,
5643 usually pertaining to setjmp. */
5644 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5645 return 1;
5646
5647 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5648 the frame pointer by default. Turn it back on now if we've not
5649 got a leaf function. */
5650 if (TARGET_OMIT_LEAF_FRAME_POINTER
5651 && (!current_function_is_leaf
5652 || ix86_current_function_calls_tls_descriptor))
5653 return 1;
5654
5655 if (current_function_profile)
5656 return 1;
5657
5658 return 0;
5659 }
5660
5661 /* Record that the current function accesses previous call frames. */
5662
5663 void
5664 ix86_setup_frame_addresses (void)
5665 {
5666 cfun->machine->accesses_prev_frame = 1;
5667 }
5668 \f
5669 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5670 # define USE_HIDDEN_LINKONCE 1
5671 #else
5672 # define USE_HIDDEN_LINKONCE 0
5673 #endif
5674
5675 static int pic_labels_used;
5676
5677 /* Fills in the label name that should be used for a pc thunk for
5678 the given register. */
5679
5680 static void
5681 get_pc_thunk_name (char name[32], unsigned int regno)
5682 {
5683 gcc_assert (!TARGET_64BIT);
5684
5685 if (USE_HIDDEN_LINKONCE)
5686 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5687 else
5688 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5689 }
5690
5691
5692 /* This function generates code for -fpic that loads %ebx with
5693 the return address of the caller and then returns. */
5694
5695 void
5696 ix86_file_end (void)
5697 {
5698 rtx xops[2];
5699 int regno;
5700
5701 for (regno = 0; regno < 8; ++regno)
5702 {
5703 char name[32];
5704
5705 if (! ((pic_labels_used >> regno) & 1))
5706 continue;
5707
5708 get_pc_thunk_name (name, regno);
5709
5710 #if TARGET_MACHO
5711 if (TARGET_MACHO)
5712 {
5713 switch_to_section (darwin_sections[text_coal_section]);
5714 fputs ("\t.weak_definition\t", asm_out_file);
5715 assemble_name (asm_out_file, name);
5716 fputs ("\n\t.private_extern\t", asm_out_file);
5717 assemble_name (asm_out_file, name);
5718 fputs ("\n", asm_out_file);
5719 ASM_OUTPUT_LABEL (asm_out_file, name);
5720 }
5721 else
5722 #endif
5723 if (USE_HIDDEN_LINKONCE)
5724 {
5725 tree decl;
5726
5727 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5728 error_mark_node);
5729 TREE_PUBLIC (decl) = 1;
5730 TREE_STATIC (decl) = 1;
5731 DECL_ONE_ONLY (decl) = 1;
5732
5733 (*targetm.asm_out.unique_section) (decl, 0);
5734 switch_to_section (get_named_section (decl, NULL, 0));
5735
5736 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5737 fputs ("\t.hidden\t", asm_out_file);
5738 assemble_name (asm_out_file, name);
5739 fputc ('\n', asm_out_file);
5740 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5741 }
5742 else
5743 {
5744 switch_to_section (text_section);
5745 ASM_OUTPUT_LABEL (asm_out_file, name);
5746 }
5747
5748 xops[0] = gen_rtx_REG (SImode, regno);
5749 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5750 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5751 output_asm_insn ("ret", xops);
5752 }
5753
5754 if (NEED_INDICATE_EXEC_STACK)
5755 file_end_indicate_exec_stack ();
5756 }
5757
5758 /* Emit code for the SET_GOT patterns. */
5759
5760 const char *
5761 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5762 {
5763 rtx xops[3];
5764
5765 xops[0] = dest;
5766
5767 if (TARGET_VXWORKS_RTP && flag_pic)
5768 {
5769 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5770 xops[2] = gen_rtx_MEM (Pmode,
5771 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5772 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5773
5774 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5775 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5776 an unadorned address. */
5777 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5778 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5779 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5780 return "";
5781 }
5782
5783 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5784
5785 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5786 {
5787 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5788
5789 if (!flag_pic)
5790 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5791 else
5792 output_asm_insn ("call\t%a2", xops);
5793
5794 #if TARGET_MACHO
5795 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5796 is what will be referenced by the Mach-O PIC subsystem. */
5797 if (!label)
5798 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5799 #endif
5800
5801 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5802 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5803
5804 if (flag_pic)
5805 output_asm_insn ("pop{l}\t%0", xops);
5806 }
5807 else
5808 {
5809 char name[32];
5810 get_pc_thunk_name (name, REGNO (dest));
5811 pic_labels_used |= 1 << REGNO (dest);
5812
5813 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5814 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5815 output_asm_insn ("call\t%X2", xops);
5816 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5817 is what will be referenced by the Mach-O PIC subsystem. */
5818 #if TARGET_MACHO
5819 if (!label)
5820 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5821 else
5822 targetm.asm_out.internal_label (asm_out_file, "L",
5823 CODE_LABEL_NUMBER (label));
5824 #endif
5825 }
5826
5827 if (TARGET_MACHO)
5828 return "";
5829
5830 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5831 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5832 else
5833 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5834
5835 return "";
5836 }
5837
5838 /* Generate an "push" pattern for input ARG. */
5839
5840 static rtx
5841 gen_push (rtx arg)
5842 {
5843 return gen_rtx_SET (VOIDmode,
5844 gen_rtx_MEM (Pmode,
5845 gen_rtx_PRE_DEC (Pmode,
5846 stack_pointer_rtx)),
5847 arg);
5848 }
5849
5850 /* Return >= 0 if there is an unused call-clobbered register available
5851 for the entire function. */
5852
5853 static unsigned int
5854 ix86_select_alt_pic_regnum (void)
5855 {
5856 if (current_function_is_leaf && !current_function_profile
5857 && !ix86_current_function_calls_tls_descriptor)
5858 {
5859 int i;
5860 for (i = 2; i >= 0; --i)
5861 if (!df_regs_ever_live_p (i))
5862 return i;
5863 }
5864
5865 return INVALID_REGNUM;
5866 }
5867
5868 /* Return 1 if we need to save REGNO. */
5869 static int
5870 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5871 {
5872 if (pic_offset_table_rtx
5873 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5874 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5875 || current_function_profile
5876 || current_function_calls_eh_return
5877 || current_function_uses_const_pool))
5878 {
5879 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5880 return 0;
5881 return 1;
5882 }
5883
5884 if (current_function_calls_eh_return && maybe_eh_return)
5885 {
5886 unsigned i;
5887 for (i = 0; ; i++)
5888 {
5889 unsigned test = EH_RETURN_DATA_REGNO (i);
5890 if (test == INVALID_REGNUM)
5891 break;
5892 if (test == regno)
5893 return 1;
5894 }
5895 }
5896
5897 if (cfun->machine->force_align_arg_pointer
5898 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5899 return 1;
5900
5901 return (df_regs_ever_live_p (regno)
5902 && !call_used_regs[regno]
5903 && !fixed_regs[regno]
5904 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5905 }
5906
5907 /* Return number of registers to be saved on the stack. */
5908
5909 static int
5910 ix86_nsaved_regs (void)
5911 {
5912 int nregs = 0;
5913 int regno;
5914
5915 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5916 if (ix86_save_reg (regno, true))
5917 nregs++;
5918 return nregs;
5919 }
5920
5921 /* Return the offset between two registers, one to be eliminated, and the other
5922 its replacement, at the start of a routine. */
5923
5924 HOST_WIDE_INT
5925 ix86_initial_elimination_offset (int from, int to)
5926 {
5927 struct ix86_frame frame;
5928 ix86_compute_frame_layout (&frame);
5929
5930 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5931 return frame.hard_frame_pointer_offset;
5932 else if (from == FRAME_POINTER_REGNUM
5933 && to == HARD_FRAME_POINTER_REGNUM)
5934 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5935 else
5936 {
5937 gcc_assert (to == STACK_POINTER_REGNUM);
5938
5939 if (from == ARG_POINTER_REGNUM)
5940 return frame.stack_pointer_offset;
5941
5942 gcc_assert (from == FRAME_POINTER_REGNUM);
5943 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5944 }
5945 }
5946
5947 /* Fill structure ix86_frame about frame of currently computed function. */
5948
5949 static void
5950 ix86_compute_frame_layout (struct ix86_frame *frame)
5951 {
5952 HOST_WIDE_INT total_size;
5953 unsigned int stack_alignment_needed;
5954 HOST_WIDE_INT offset;
5955 unsigned int preferred_alignment;
5956 HOST_WIDE_INT size = get_frame_size ();
5957
5958 frame->nregs = ix86_nsaved_regs ();
5959 total_size = size;
5960
5961 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5962 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5963
5964 /* During reload iteration the amount of registers saved can change.
5965 Recompute the value as needed. Do not recompute when amount of registers
5966 didn't change as reload does multiple calls to the function and does not
5967 expect the decision to change within single iteration. */
5968 if (!optimize_size
5969 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5970 {
5971 int count = frame->nregs;
5972
5973 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5974 /* The fast prologue uses move instead of push to save registers. This
5975 is significantly longer, but also executes faster as modern hardware
5976 can execute the moves in parallel, but can't do that for push/pop.
5977
5978 Be careful about choosing what prologue to emit: When function takes
5979 many instructions to execute we may use slow version as well as in
5980 case function is known to be outside hot spot (this is known with
5981 feedback only). Weight the size of function by number of registers
5982 to save as it is cheap to use one or two push instructions but very
5983 slow to use many of them. */
5984 if (count)
5985 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5986 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5987 || (flag_branch_probabilities
5988 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5989 cfun->machine->use_fast_prologue_epilogue = false;
5990 else
5991 cfun->machine->use_fast_prologue_epilogue
5992 = !expensive_function_p (count);
5993 }
5994 if (TARGET_PROLOGUE_USING_MOVE
5995 && cfun->machine->use_fast_prologue_epilogue)
5996 frame->save_regs_using_mov = true;
5997 else
5998 frame->save_regs_using_mov = false;
5999
6000
6001 /* Skip return address and saved base pointer. */
6002 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6003
6004 frame->hard_frame_pointer_offset = offset;
6005
6006 /* Do some sanity checking of stack_alignment_needed and
6007 preferred_alignment, since i386 port is the only using those features
6008 that may break easily. */
6009
6010 gcc_assert (!size || stack_alignment_needed);
6011 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6012 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6013 gcc_assert (stack_alignment_needed
6014 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6015
6016 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6017 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6018
6019 /* Register save area */
6020 offset += frame->nregs * UNITS_PER_WORD;
6021
6022 /* Va-arg area */
6023 if (ix86_save_varrargs_registers)
6024 {
6025 offset += X86_64_VARARGS_SIZE;
6026 frame->va_arg_size = X86_64_VARARGS_SIZE;
6027 }
6028 else
6029 frame->va_arg_size = 0;
6030
6031 /* Align start of frame for local function. */
6032 frame->padding1 = ((offset + stack_alignment_needed - 1)
6033 & -stack_alignment_needed) - offset;
6034
6035 offset += frame->padding1;
6036
6037 /* Frame pointer points here. */
6038 frame->frame_pointer_offset = offset;
6039
6040 offset += size;
6041
6042 /* Add outgoing arguments area. Can be skipped if we eliminated
6043 all the function calls as dead code.
6044 Skipping is however impossible when function calls alloca. Alloca
6045 expander assumes that last current_function_outgoing_args_size
6046 of stack frame are unused. */
6047 if (ACCUMULATE_OUTGOING_ARGS
6048 && (!current_function_is_leaf || current_function_calls_alloca
6049 || ix86_current_function_calls_tls_descriptor))
6050 {
6051 offset += current_function_outgoing_args_size;
6052 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6053 }
6054 else
6055 frame->outgoing_arguments_size = 0;
6056
6057 /* Align stack boundary. Only needed if we're calling another function
6058 or using alloca. */
6059 if (!current_function_is_leaf || current_function_calls_alloca
6060 || ix86_current_function_calls_tls_descriptor)
6061 frame->padding2 = ((offset + preferred_alignment - 1)
6062 & -preferred_alignment) - offset;
6063 else
6064 frame->padding2 = 0;
6065
6066 offset += frame->padding2;
6067
6068 /* We've reached end of stack frame. */
6069 frame->stack_pointer_offset = offset;
6070
6071 /* Size prologue needs to allocate. */
6072 frame->to_allocate =
6073 (size + frame->padding1 + frame->padding2
6074 + frame->outgoing_arguments_size + frame->va_arg_size);
6075
6076 if ((!frame->to_allocate && frame->nregs <= 1)
6077 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6078 frame->save_regs_using_mov = false;
6079
6080 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6081 && current_function_is_leaf
6082 && !ix86_current_function_calls_tls_descriptor)
6083 {
6084 frame->red_zone_size = frame->to_allocate;
6085 if (frame->save_regs_using_mov)
6086 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6087 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6088 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6089 }
6090 else
6091 frame->red_zone_size = 0;
6092 frame->to_allocate -= frame->red_zone_size;
6093 frame->stack_pointer_offset -= frame->red_zone_size;
6094 #if 0
6095 fprintf (stderr, "\n");
6096 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6097 fprintf (stderr, "size: %ld\n", (long)size);
6098 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6099 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6100 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6101 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6102 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6103 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6104 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6105 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6106 (long)frame->hard_frame_pointer_offset);
6107 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6108 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6109 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6110 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6111 #endif
6112 }
6113
6114 /* Emit code to save registers in the prologue. */
6115
6116 static void
6117 ix86_emit_save_regs (void)
6118 {
6119 unsigned int regno;
6120 rtx insn;
6121
6122 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6123 if (ix86_save_reg (regno, true))
6124 {
6125 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6126 RTX_FRAME_RELATED_P (insn) = 1;
6127 }
6128 }
6129
6130 /* Emit code to save registers using MOV insns. First register
6131 is restored from POINTER + OFFSET. */
6132 static void
6133 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6134 {
6135 unsigned int regno;
6136 rtx insn;
6137
6138 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6139 if (ix86_save_reg (regno, true))
6140 {
6141 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6142 Pmode, offset),
6143 gen_rtx_REG (Pmode, regno));
6144 RTX_FRAME_RELATED_P (insn) = 1;
6145 offset += UNITS_PER_WORD;
6146 }
6147 }
6148
6149 /* Expand prologue or epilogue stack adjustment.
6150 The pattern exist to put a dependency on all ebp-based memory accesses.
6151 STYLE should be negative if instructions should be marked as frame related,
6152 zero if %r11 register is live and cannot be freely used and positive
6153 otherwise. */
6154
6155 static void
6156 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6157 {
6158 rtx insn;
6159
6160 if (! TARGET_64BIT)
6161 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6162 else if (x86_64_immediate_operand (offset, DImode))
6163 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6164 else
6165 {
6166 rtx r11;
6167 /* r11 is used by indirect sibcall return as well, set before the
6168 epilogue and used after the epilogue. ATM indirect sibcall
6169 shouldn't be used together with huge frame sizes in one
6170 function because of the frame_size check in sibcall.c. */
6171 gcc_assert (style);
6172 r11 = gen_rtx_REG (DImode, R11_REG);
6173 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6174 if (style < 0)
6175 RTX_FRAME_RELATED_P (insn) = 1;
6176 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6177 offset));
6178 }
6179 if (style < 0)
6180 RTX_FRAME_RELATED_P (insn) = 1;
6181 }
6182
6183 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6184
6185 static rtx
6186 ix86_internal_arg_pointer (void)
6187 {
6188 bool has_force_align_arg_pointer =
6189 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6190 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6191 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6192 && DECL_NAME (current_function_decl)
6193 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6194 && DECL_FILE_SCOPE_P (current_function_decl))
6195 || ix86_force_align_arg_pointer
6196 || has_force_align_arg_pointer)
6197 {
6198 /* Nested functions can't realign the stack due to a register
6199 conflict. */
6200 if (DECL_CONTEXT (current_function_decl)
6201 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6202 {
6203 if (ix86_force_align_arg_pointer)
6204 warning (0, "-mstackrealign ignored for nested functions");
6205 if (has_force_align_arg_pointer)
6206 error ("%s not supported for nested functions",
6207 ix86_force_align_arg_pointer_string);
6208 return virtual_incoming_args_rtx;
6209 }
6210 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6211 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6212 }
6213 else
6214 return virtual_incoming_args_rtx;
6215 }
6216
6217 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6218 This is called from dwarf2out.c to emit call frame instructions
6219 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6220 static void
6221 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6222 {
6223 rtx unspec = SET_SRC (pattern);
6224 gcc_assert (GET_CODE (unspec) == UNSPEC);
6225
6226 switch (index)
6227 {
6228 case UNSPEC_REG_SAVE:
6229 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6230 SET_DEST (pattern));
6231 break;
6232 case UNSPEC_DEF_CFA:
6233 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6234 INTVAL (XVECEXP (unspec, 0, 0)));
6235 break;
6236 default:
6237 gcc_unreachable ();
6238 }
6239 }
6240
6241 /* Expand the prologue into a bunch of separate insns. */
6242
6243 void
6244 ix86_expand_prologue (void)
6245 {
6246 rtx insn;
6247 bool pic_reg_used;
6248 struct ix86_frame frame;
6249 HOST_WIDE_INT allocate;
6250
6251 ix86_compute_frame_layout (&frame);
6252
6253 if (cfun->machine->force_align_arg_pointer)
6254 {
6255 rtx x, y;
6256
6257 /* Grab the argument pointer. */
6258 x = plus_constant (stack_pointer_rtx, 4);
6259 y = cfun->machine->force_align_arg_pointer;
6260 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6261 RTX_FRAME_RELATED_P (insn) = 1;
6262
6263 /* The unwind info consists of two parts: install the fafp as the cfa,
6264 and record the fafp as the "save register" of the stack pointer.
6265 The later is there in order that the unwinder can see where it
6266 should restore the stack pointer across the and insn. */
6267 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6268 x = gen_rtx_SET (VOIDmode, y, x);
6269 RTX_FRAME_RELATED_P (x) = 1;
6270 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6271 UNSPEC_REG_SAVE);
6272 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6273 RTX_FRAME_RELATED_P (y) = 1;
6274 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6275 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6276 REG_NOTES (insn) = x;
6277
6278 /* Align the stack. */
6279 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6280 GEN_INT (-16)));
6281
6282 /* And here we cheat like madmen with the unwind info. We force the
6283 cfa register back to sp+4, which is exactly what it was at the
6284 start of the function. Re-pushing the return address results in
6285 the return at the same spot relative to the cfa, and thus is
6286 correct wrt the unwind info. */
6287 x = cfun->machine->force_align_arg_pointer;
6288 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6289 insn = emit_insn (gen_push (x));
6290 RTX_FRAME_RELATED_P (insn) = 1;
6291
6292 x = GEN_INT (4);
6293 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6294 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6295 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6296 REG_NOTES (insn) = x;
6297 }
6298
6299 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6300 slower on all targets. Also sdb doesn't like it. */
6301
6302 if (frame_pointer_needed)
6303 {
6304 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6305 RTX_FRAME_RELATED_P (insn) = 1;
6306
6307 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6308 RTX_FRAME_RELATED_P (insn) = 1;
6309 }
6310
6311 allocate = frame.to_allocate;
6312
6313 if (!frame.save_regs_using_mov)
6314 ix86_emit_save_regs ();
6315 else
6316 allocate += frame.nregs * UNITS_PER_WORD;
6317
6318 /* When using red zone we may start register saving before allocating
6319 the stack frame saving one cycle of the prologue. */
6320 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6321 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6322 : stack_pointer_rtx,
6323 -frame.nregs * UNITS_PER_WORD);
6324
6325 if (allocate == 0)
6326 ;
6327 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6328 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6329 GEN_INT (-allocate), -1);
6330 else
6331 {
6332 /* Only valid for Win32. */
6333 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6334 bool eax_live;
6335 rtx t;
6336
6337 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6338
6339 if (TARGET_64BIT_MS_ABI)
6340 eax_live = false;
6341 else
6342 eax_live = ix86_eax_live_at_start_p ();
6343
6344 if (eax_live)
6345 {
6346 emit_insn (gen_push (eax));
6347 allocate -= UNITS_PER_WORD;
6348 }
6349
6350 emit_move_insn (eax, GEN_INT (allocate));
6351
6352 if (TARGET_64BIT)
6353 insn = gen_allocate_stack_worker_64 (eax);
6354 else
6355 insn = gen_allocate_stack_worker_32 (eax);
6356 insn = emit_insn (insn);
6357 RTX_FRAME_RELATED_P (insn) = 1;
6358 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6359 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6360 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6361 t, REG_NOTES (insn));
6362
6363 if (eax_live)
6364 {
6365 if (frame_pointer_needed)
6366 t = plus_constant (hard_frame_pointer_rtx,
6367 allocate
6368 - frame.to_allocate
6369 - frame.nregs * UNITS_PER_WORD);
6370 else
6371 t = plus_constant (stack_pointer_rtx, allocate);
6372 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6373 }
6374 }
6375
6376 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6377 {
6378 if (!frame_pointer_needed || !frame.to_allocate)
6379 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6380 else
6381 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6382 -frame.nregs * UNITS_PER_WORD);
6383 }
6384
6385 pic_reg_used = false;
6386 if (pic_offset_table_rtx
6387 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6388 || current_function_profile))
6389 {
6390 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6391
6392 if (alt_pic_reg_used != INVALID_REGNUM)
6393 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6394
6395 pic_reg_used = true;
6396 }
6397
6398 if (pic_reg_used)
6399 {
6400 if (TARGET_64BIT)
6401 {
6402 if (ix86_cmodel == CM_LARGE_PIC)
6403 {
6404 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6405 rtx label = gen_label_rtx ();
6406 emit_label (label);
6407 LABEL_PRESERVE_P (label) = 1;
6408 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6409 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6410 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6411 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6412 pic_offset_table_rtx, tmp_reg));
6413 }
6414 else
6415 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6416 }
6417 else
6418 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6419 }
6420
6421 /* Prevent function calls from being scheduled before the call to mcount.
6422 In the pic_reg_used case, make sure that the got load isn't deleted. */
6423 if (current_function_profile)
6424 {
6425 if (pic_reg_used)
6426 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6427 emit_insn (gen_blockage ());
6428 }
6429 }
6430
6431 /* Emit code to restore saved registers using MOV insns. First register
6432 is restored from POINTER + OFFSET. */
6433 static void
6434 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6435 int maybe_eh_return)
6436 {
6437 int regno;
6438 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6439
6440 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6441 if (ix86_save_reg (regno, maybe_eh_return))
6442 {
6443 /* Ensure that adjust_address won't be forced to produce pointer
6444 out of range allowed by x86-64 instruction set. */
6445 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6446 {
6447 rtx r11;
6448
6449 r11 = gen_rtx_REG (DImode, R11_REG);
6450 emit_move_insn (r11, GEN_INT (offset));
6451 emit_insn (gen_adddi3 (r11, r11, pointer));
6452 base_address = gen_rtx_MEM (Pmode, r11);
6453 offset = 0;
6454 }
6455 emit_move_insn (gen_rtx_REG (Pmode, regno),
6456 adjust_address (base_address, Pmode, offset));
6457 offset += UNITS_PER_WORD;
6458 }
6459 }
6460
6461 /* Restore function stack, frame, and registers. */
6462
6463 void
6464 ix86_expand_epilogue (int style)
6465 {
6466 int regno;
6467 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6468 struct ix86_frame frame;
6469 HOST_WIDE_INT offset;
6470
6471 ix86_compute_frame_layout (&frame);
6472
6473 /* Calculate start of saved registers relative to ebp. Special care
6474 must be taken for the normal return case of a function using
6475 eh_return: the eax and edx registers are marked as saved, but not
6476 restored along this path. */
6477 offset = frame.nregs;
6478 if (current_function_calls_eh_return && style != 2)
6479 offset -= 2;
6480 offset *= -UNITS_PER_WORD;
6481
6482 /* If we're only restoring one register and sp is not valid then
6483 using a move instruction to restore the register since it's
6484 less work than reloading sp and popping the register.
6485
6486 The default code result in stack adjustment using add/lea instruction,
6487 while this code results in LEAVE instruction (or discrete equivalent),
6488 so it is profitable in some other cases as well. Especially when there
6489 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6490 and there is exactly one register to pop. This heuristic may need some
6491 tuning in future. */
6492 if ((!sp_valid && frame.nregs <= 1)
6493 || (TARGET_EPILOGUE_USING_MOVE
6494 && cfun->machine->use_fast_prologue_epilogue
6495 && (frame.nregs > 1 || frame.to_allocate))
6496 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6497 || (frame_pointer_needed && TARGET_USE_LEAVE
6498 && cfun->machine->use_fast_prologue_epilogue
6499 && frame.nregs == 1)
6500 || current_function_calls_eh_return)
6501 {
6502 /* Restore registers. We can use ebp or esp to address the memory
6503 locations. If both are available, default to ebp, since offsets
6504 are known to be small. Only exception is esp pointing directly to the
6505 end of block of saved registers, where we may simplify addressing
6506 mode. */
6507
6508 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6509 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6510 frame.to_allocate, style == 2);
6511 else
6512 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6513 offset, style == 2);
6514
6515 /* eh_return epilogues need %ecx added to the stack pointer. */
6516 if (style == 2)
6517 {
6518 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6519
6520 if (frame_pointer_needed)
6521 {
6522 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6523 tmp = plus_constant (tmp, UNITS_PER_WORD);
6524 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6525
6526 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6527 emit_move_insn (hard_frame_pointer_rtx, tmp);
6528
6529 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6530 const0_rtx, style);
6531 }
6532 else
6533 {
6534 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6535 tmp = plus_constant (tmp, (frame.to_allocate
6536 + frame.nregs * UNITS_PER_WORD));
6537 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6538 }
6539 }
6540 else if (!frame_pointer_needed)
6541 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6542 GEN_INT (frame.to_allocate
6543 + frame.nregs * UNITS_PER_WORD),
6544 style);
6545 /* If not an i386, mov & pop is faster than "leave". */
6546 else if (TARGET_USE_LEAVE || optimize_size
6547 || !cfun->machine->use_fast_prologue_epilogue)
6548 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6549 else
6550 {
6551 pro_epilogue_adjust_stack (stack_pointer_rtx,
6552 hard_frame_pointer_rtx,
6553 const0_rtx, style);
6554 if (TARGET_64BIT)
6555 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6556 else
6557 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6558 }
6559 }
6560 else
6561 {
6562 /* First step is to deallocate the stack frame so that we can
6563 pop the registers. */
6564 if (!sp_valid)
6565 {
6566 gcc_assert (frame_pointer_needed);
6567 pro_epilogue_adjust_stack (stack_pointer_rtx,
6568 hard_frame_pointer_rtx,
6569 GEN_INT (offset), style);
6570 }
6571 else if (frame.to_allocate)
6572 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6573 GEN_INT (frame.to_allocate), style);
6574
6575 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6576 if (ix86_save_reg (regno, false))
6577 {
6578 if (TARGET_64BIT)
6579 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6580 else
6581 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6582 }
6583 if (frame_pointer_needed)
6584 {
6585 /* Leave results in shorter dependency chains on CPUs that are
6586 able to grok it fast. */
6587 if (TARGET_USE_LEAVE)
6588 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6589 else if (TARGET_64BIT)
6590 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6591 else
6592 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6593 }
6594 }
6595
6596 if (cfun->machine->force_align_arg_pointer)
6597 {
6598 emit_insn (gen_addsi3 (stack_pointer_rtx,
6599 cfun->machine->force_align_arg_pointer,
6600 GEN_INT (-4)));
6601 }
6602
6603 /* Sibcall epilogues don't want a return instruction. */
6604 if (style == 0)
6605 return;
6606
6607 if (current_function_pops_args && current_function_args_size)
6608 {
6609 rtx popc = GEN_INT (current_function_pops_args);
6610
6611 /* i386 can only pop 64K bytes. If asked to pop more, pop
6612 return address, do explicit add, and jump indirectly to the
6613 caller. */
6614
6615 if (current_function_pops_args >= 65536)
6616 {
6617 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6618
6619 /* There is no "pascal" calling convention in any 64bit ABI. */
6620 gcc_assert (!TARGET_64BIT);
6621
6622 emit_insn (gen_popsi1 (ecx));
6623 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6624 emit_jump_insn (gen_return_indirect_internal (ecx));
6625 }
6626 else
6627 emit_jump_insn (gen_return_pop_internal (popc));
6628 }
6629 else
6630 emit_jump_insn (gen_return_internal ());
6631 }
6632
6633 /* Reset from the function's potential modifications. */
6634
6635 static void
6636 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6637 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6638 {
6639 if (pic_offset_table_rtx)
6640 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6641 #if TARGET_MACHO
6642 /* Mach-O doesn't support labels at the end of objects, so if
6643 it looks like we might want one, insert a NOP. */
6644 {
6645 rtx insn = get_last_insn ();
6646 while (insn
6647 && NOTE_P (insn)
6648 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6649 insn = PREV_INSN (insn);
6650 if (insn
6651 && (LABEL_P (insn)
6652 || (NOTE_P (insn)
6653 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6654 fputs ("\tnop\n", file);
6655 }
6656 #endif
6657
6658 }
6659 \f
6660 /* Extract the parts of an RTL expression that is a valid memory address
6661 for an instruction. Return 0 if the structure of the address is
6662 grossly off. Return -1 if the address contains ASHIFT, so it is not
6663 strictly valid, but still used for computing length of lea instruction. */
6664
6665 int
6666 ix86_decompose_address (rtx addr, struct ix86_address *out)
6667 {
6668 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6669 rtx base_reg, index_reg;
6670 HOST_WIDE_INT scale = 1;
6671 rtx scale_rtx = NULL_RTX;
6672 int retval = 1;
6673 enum ix86_address_seg seg = SEG_DEFAULT;
6674
6675 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6676 base = addr;
6677 else if (GET_CODE (addr) == PLUS)
6678 {
6679 rtx addends[4], op;
6680 int n = 0, i;
6681
6682 op = addr;
6683 do
6684 {
6685 if (n >= 4)
6686 return 0;
6687 addends[n++] = XEXP (op, 1);
6688 op = XEXP (op, 0);
6689 }
6690 while (GET_CODE (op) == PLUS);
6691 if (n >= 4)
6692 return 0;
6693 addends[n] = op;
6694
6695 for (i = n; i >= 0; --i)
6696 {
6697 op = addends[i];
6698 switch (GET_CODE (op))
6699 {
6700 case MULT:
6701 if (index)
6702 return 0;
6703 index = XEXP (op, 0);
6704 scale_rtx = XEXP (op, 1);
6705 break;
6706
6707 case UNSPEC:
6708 if (XINT (op, 1) == UNSPEC_TP
6709 && TARGET_TLS_DIRECT_SEG_REFS
6710 && seg == SEG_DEFAULT)
6711 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6712 else
6713 return 0;
6714 break;
6715
6716 case REG:
6717 case SUBREG:
6718 if (!base)
6719 base = op;
6720 else if (!index)
6721 index = op;
6722 else
6723 return 0;
6724 break;
6725
6726 case CONST:
6727 case CONST_INT:
6728 case SYMBOL_REF:
6729 case LABEL_REF:
6730 if (disp)
6731 return 0;
6732 disp = op;
6733 break;
6734
6735 default:
6736 return 0;
6737 }
6738 }
6739 }
6740 else if (GET_CODE (addr) == MULT)
6741 {
6742 index = XEXP (addr, 0); /* index*scale */
6743 scale_rtx = XEXP (addr, 1);
6744 }
6745 else if (GET_CODE (addr) == ASHIFT)
6746 {
6747 rtx tmp;
6748
6749 /* We're called for lea too, which implements ashift on occasion. */
6750 index = XEXP (addr, 0);
6751 tmp = XEXP (addr, 1);
6752 if (!CONST_INT_P (tmp))
6753 return 0;
6754 scale = INTVAL (tmp);
6755 if ((unsigned HOST_WIDE_INT) scale > 3)
6756 return 0;
6757 scale = 1 << scale;
6758 retval = -1;
6759 }
6760 else
6761 disp = addr; /* displacement */
6762
6763 /* Extract the integral value of scale. */
6764 if (scale_rtx)
6765 {
6766 if (!CONST_INT_P (scale_rtx))
6767 return 0;
6768 scale = INTVAL (scale_rtx);
6769 }
6770
6771 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6772 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6773
6774 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6775 if (base_reg && index_reg && scale == 1
6776 && (index_reg == arg_pointer_rtx
6777 || index_reg == frame_pointer_rtx
6778 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6779 {
6780 rtx tmp;
6781 tmp = base, base = index, index = tmp;
6782 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6783 }
6784
6785 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6786 if ((base_reg == hard_frame_pointer_rtx
6787 || base_reg == frame_pointer_rtx
6788 || base_reg == arg_pointer_rtx) && !disp)
6789 disp = const0_rtx;
6790
6791 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6792 Avoid this by transforming to [%esi+0]. */
6793 if (TARGET_K6 && !optimize_size
6794 && base_reg && !index_reg && !disp
6795 && REG_P (base_reg)
6796 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6797 disp = const0_rtx;
6798
6799 /* Special case: encode reg+reg instead of reg*2. */
6800 if (!base && index && scale && scale == 2)
6801 base = index, base_reg = index_reg, scale = 1;
6802
6803 /* Special case: scaling cannot be encoded without base or displacement. */
6804 if (!base && !disp && index && scale != 1)
6805 disp = const0_rtx;
6806
6807 out->base = base;
6808 out->index = index;
6809 out->disp = disp;
6810 out->scale = scale;
6811 out->seg = seg;
6812
6813 return retval;
6814 }
6815 \f
6816 /* Return cost of the memory address x.
6817 For i386, it is better to use a complex address than let gcc copy
6818 the address into a reg and make a new pseudo. But not if the address
6819 requires to two regs - that would mean more pseudos with longer
6820 lifetimes. */
6821 static int
6822 ix86_address_cost (rtx x)
6823 {
6824 struct ix86_address parts;
6825 int cost = 1;
6826 int ok = ix86_decompose_address (x, &parts);
6827
6828 gcc_assert (ok);
6829
6830 if (parts.base && GET_CODE (parts.base) == SUBREG)
6831 parts.base = SUBREG_REG (parts.base);
6832 if (parts.index && GET_CODE (parts.index) == SUBREG)
6833 parts.index = SUBREG_REG (parts.index);
6834
6835 /* Attempt to minimize number of registers in the address. */
6836 if ((parts.base
6837 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6838 || (parts.index
6839 && (!REG_P (parts.index)
6840 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6841 cost++;
6842
6843 if (parts.base
6844 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6845 && parts.index
6846 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6847 && parts.base != parts.index)
6848 cost++;
6849
6850 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6851 since it's predecode logic can't detect the length of instructions
6852 and it degenerates to vector decoded. Increase cost of such
6853 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6854 to split such addresses or even refuse such addresses at all.
6855
6856 Following addressing modes are affected:
6857 [base+scale*index]
6858 [scale*index+disp]
6859 [base+index]
6860
6861 The first and last case may be avoidable by explicitly coding the zero in
6862 memory address, but I don't have AMD-K6 machine handy to check this
6863 theory. */
6864
6865 if (TARGET_K6
6866 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6867 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6868 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6869 cost += 10;
6870
6871 return cost;
6872 }
6873 \f
6874 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6875 this is used for to form addresses to local data when -fPIC is in
6876 use. */
6877
6878 static bool
6879 darwin_local_data_pic (rtx disp)
6880 {
6881 if (GET_CODE (disp) == MINUS)
6882 {
6883 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6884 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6885 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6886 {
6887 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6888 if (! strcmp (sym_name, "<pic base>"))
6889 return true;
6890 }
6891 }
6892
6893 return false;
6894 }
6895
6896 /* Determine if a given RTX is a valid constant. We already know this
6897 satisfies CONSTANT_P. */
6898
6899 bool
6900 legitimate_constant_p (rtx x)
6901 {
6902 switch (GET_CODE (x))
6903 {
6904 case CONST:
6905 x = XEXP (x, 0);
6906
6907 if (GET_CODE (x) == PLUS)
6908 {
6909 if (!CONST_INT_P (XEXP (x, 1)))
6910 return false;
6911 x = XEXP (x, 0);
6912 }
6913
6914 if (TARGET_MACHO && darwin_local_data_pic (x))
6915 return true;
6916
6917 /* Only some unspecs are valid as "constants". */
6918 if (GET_CODE (x) == UNSPEC)
6919 switch (XINT (x, 1))
6920 {
6921 case UNSPEC_GOT:
6922 case UNSPEC_GOTOFF:
6923 case UNSPEC_PLTOFF:
6924 return TARGET_64BIT;
6925 case UNSPEC_TPOFF:
6926 case UNSPEC_NTPOFF:
6927 x = XVECEXP (x, 0, 0);
6928 return (GET_CODE (x) == SYMBOL_REF
6929 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6930 case UNSPEC_DTPOFF:
6931 x = XVECEXP (x, 0, 0);
6932 return (GET_CODE (x) == SYMBOL_REF
6933 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6934 default:
6935 return false;
6936 }
6937
6938 /* We must have drilled down to a symbol. */
6939 if (GET_CODE (x) == LABEL_REF)
6940 return true;
6941 if (GET_CODE (x) != SYMBOL_REF)
6942 return false;
6943 /* FALLTHRU */
6944
6945 case SYMBOL_REF:
6946 /* TLS symbols are never valid. */
6947 if (SYMBOL_REF_TLS_MODEL (x))
6948 return false;
6949
6950 /* DLLIMPORT symbols are never valid. */
6951 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6952 && SYMBOL_REF_DLLIMPORT_P (x))
6953 return false;
6954 break;
6955
6956 case CONST_DOUBLE:
6957 if (GET_MODE (x) == TImode
6958 && x != CONST0_RTX (TImode)
6959 && !TARGET_64BIT)
6960 return false;
6961 break;
6962
6963 case CONST_VECTOR:
6964 if (x == CONST0_RTX (GET_MODE (x)))
6965 return true;
6966 return false;
6967
6968 default:
6969 break;
6970 }
6971
6972 /* Otherwise we handle everything else in the move patterns. */
6973 return true;
6974 }
6975
6976 /* Determine if it's legal to put X into the constant pool. This
6977 is not possible for the address of thread-local symbols, which
6978 is checked above. */
6979
6980 static bool
6981 ix86_cannot_force_const_mem (rtx x)
6982 {
6983 /* We can always put integral constants and vectors in memory. */
6984 switch (GET_CODE (x))
6985 {
6986 case CONST_INT:
6987 case CONST_DOUBLE:
6988 case CONST_VECTOR:
6989 return false;
6990
6991 default:
6992 break;
6993 }
6994 return !legitimate_constant_p (x);
6995 }
6996
6997 /* Determine if a given RTX is a valid constant address. */
6998
6999 bool
7000 constant_address_p (rtx x)
7001 {
7002 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7003 }
7004
7005 /* Nonzero if the constant value X is a legitimate general operand
7006 when generating PIC code. It is given that flag_pic is on and
7007 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7008
7009 bool
7010 legitimate_pic_operand_p (rtx x)
7011 {
7012 rtx inner;
7013
7014 switch (GET_CODE (x))
7015 {
7016 case CONST:
7017 inner = XEXP (x, 0);
7018 if (GET_CODE (inner) == PLUS
7019 && CONST_INT_P (XEXP (inner, 1)))
7020 inner = XEXP (inner, 0);
7021
7022 /* Only some unspecs are valid as "constants". */
7023 if (GET_CODE (inner) == UNSPEC)
7024 switch (XINT (inner, 1))
7025 {
7026 case UNSPEC_GOT:
7027 case UNSPEC_GOTOFF:
7028 case UNSPEC_PLTOFF:
7029 return TARGET_64BIT;
7030 case UNSPEC_TPOFF:
7031 x = XVECEXP (inner, 0, 0);
7032 return (GET_CODE (x) == SYMBOL_REF
7033 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7034 default:
7035 return false;
7036 }
7037 /* FALLTHRU */
7038
7039 case SYMBOL_REF:
7040 case LABEL_REF:
7041 return legitimate_pic_address_disp_p (x);
7042
7043 default:
7044 return true;
7045 }
7046 }
7047
7048 /* Determine if a given CONST RTX is a valid memory displacement
7049 in PIC mode. */
7050
7051 int
7052 legitimate_pic_address_disp_p (rtx disp)
7053 {
7054 bool saw_plus;
7055
7056 /* In 64bit mode we can allow direct addresses of symbols and labels
7057 when they are not dynamic symbols. */
7058 if (TARGET_64BIT)
7059 {
7060 rtx op0 = disp, op1;
7061
7062 switch (GET_CODE (disp))
7063 {
7064 case LABEL_REF:
7065 return true;
7066
7067 case CONST:
7068 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7069 break;
7070 op0 = XEXP (XEXP (disp, 0), 0);
7071 op1 = XEXP (XEXP (disp, 0), 1);
7072 if (!CONST_INT_P (op1)
7073 || INTVAL (op1) >= 16*1024*1024
7074 || INTVAL (op1) < -16*1024*1024)
7075 break;
7076 if (GET_CODE (op0) == LABEL_REF)
7077 return true;
7078 if (GET_CODE (op0) != SYMBOL_REF)
7079 break;
7080 /* FALLTHRU */
7081
7082 case SYMBOL_REF:
7083 /* TLS references should always be enclosed in UNSPEC. */
7084 if (SYMBOL_REF_TLS_MODEL (op0))
7085 return false;
7086 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7087 && ix86_cmodel != CM_LARGE_PIC)
7088 return true;
7089 break;
7090
7091 default:
7092 break;
7093 }
7094 }
7095 if (GET_CODE (disp) != CONST)
7096 return 0;
7097 disp = XEXP (disp, 0);
7098
7099 if (TARGET_64BIT)
7100 {
7101 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7102 of GOT tables. We should not need these anyway. */
7103 if (GET_CODE (disp) != UNSPEC
7104 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7105 && XINT (disp, 1) != UNSPEC_GOTOFF
7106 && XINT (disp, 1) != UNSPEC_PLTOFF))
7107 return 0;
7108
7109 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7110 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7111 return 0;
7112 return 1;
7113 }
7114
7115 saw_plus = false;
7116 if (GET_CODE (disp) == PLUS)
7117 {
7118 if (!CONST_INT_P (XEXP (disp, 1)))
7119 return 0;
7120 disp = XEXP (disp, 0);
7121 saw_plus = true;
7122 }
7123
7124 if (TARGET_MACHO && darwin_local_data_pic (disp))
7125 return 1;
7126
7127 if (GET_CODE (disp) != UNSPEC)
7128 return 0;
7129
7130 switch (XINT (disp, 1))
7131 {
7132 case UNSPEC_GOT:
7133 if (saw_plus)
7134 return false;
7135 /* We need to check for both symbols and labels because VxWorks loads
7136 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7137 details. */
7138 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7139 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7140 case UNSPEC_GOTOFF:
7141 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7142 While ABI specify also 32bit relocation but we don't produce it in
7143 small PIC model at all. */
7144 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7145 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7146 && !TARGET_64BIT)
7147 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7148 return false;
7149 case UNSPEC_GOTTPOFF:
7150 case UNSPEC_GOTNTPOFF:
7151 case UNSPEC_INDNTPOFF:
7152 if (saw_plus)
7153 return false;
7154 disp = XVECEXP (disp, 0, 0);
7155 return (GET_CODE (disp) == SYMBOL_REF
7156 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7157 case UNSPEC_NTPOFF:
7158 disp = XVECEXP (disp, 0, 0);
7159 return (GET_CODE (disp) == SYMBOL_REF
7160 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7161 case UNSPEC_DTPOFF:
7162 disp = XVECEXP (disp, 0, 0);
7163 return (GET_CODE (disp) == SYMBOL_REF
7164 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7165 }
7166
7167 return 0;
7168 }
7169
7170 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7171 memory address for an instruction. The MODE argument is the machine mode
7172 for the MEM expression that wants to use this address.
7173
7174 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7175 convert common non-canonical forms to canonical form so that they will
7176 be recognized. */
7177
7178 int
7179 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7180 rtx addr, int strict)
7181 {
7182 struct ix86_address parts;
7183 rtx base, index, disp;
7184 HOST_WIDE_INT scale;
7185 const char *reason = NULL;
7186 rtx reason_rtx = NULL_RTX;
7187
7188 if (ix86_decompose_address (addr, &parts) <= 0)
7189 {
7190 reason = "decomposition failed";
7191 goto report_error;
7192 }
7193
7194 base = parts.base;
7195 index = parts.index;
7196 disp = parts.disp;
7197 scale = parts.scale;
7198
7199 /* Validate base register.
7200
7201 Don't allow SUBREG's that span more than a word here. It can lead to spill
7202 failures when the base is one word out of a two word structure, which is
7203 represented internally as a DImode int. */
7204
7205 if (base)
7206 {
7207 rtx reg;
7208 reason_rtx = base;
7209
7210 if (REG_P (base))
7211 reg = base;
7212 else if (GET_CODE (base) == SUBREG
7213 && REG_P (SUBREG_REG (base))
7214 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7215 <= UNITS_PER_WORD)
7216 reg = SUBREG_REG (base);
7217 else
7218 {
7219 reason = "base is not a register";
7220 goto report_error;
7221 }
7222
7223 if (GET_MODE (base) != Pmode)
7224 {
7225 reason = "base is not in Pmode";
7226 goto report_error;
7227 }
7228
7229 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7230 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7231 {
7232 reason = "base is not valid";
7233 goto report_error;
7234 }
7235 }
7236
7237 /* Validate index register.
7238
7239 Don't allow SUBREG's that span more than a word here -- same as above. */
7240
7241 if (index)
7242 {
7243 rtx reg;
7244 reason_rtx = index;
7245
7246 if (REG_P (index))
7247 reg = index;
7248 else if (GET_CODE (index) == SUBREG
7249 && REG_P (SUBREG_REG (index))
7250 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7251 <= UNITS_PER_WORD)
7252 reg = SUBREG_REG (index);
7253 else
7254 {
7255 reason = "index is not a register";
7256 goto report_error;
7257 }
7258
7259 if (GET_MODE (index) != Pmode)
7260 {
7261 reason = "index is not in Pmode";
7262 goto report_error;
7263 }
7264
7265 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7266 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7267 {
7268 reason = "index is not valid";
7269 goto report_error;
7270 }
7271 }
7272
7273 /* Validate scale factor. */
7274 if (scale != 1)
7275 {
7276 reason_rtx = GEN_INT (scale);
7277 if (!index)
7278 {
7279 reason = "scale without index";
7280 goto report_error;
7281 }
7282
7283 if (scale != 2 && scale != 4 && scale != 8)
7284 {
7285 reason = "scale is not a valid multiplier";
7286 goto report_error;
7287 }
7288 }
7289
7290 /* Validate displacement. */
7291 if (disp)
7292 {
7293 reason_rtx = disp;
7294
7295 if (GET_CODE (disp) == CONST
7296 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7297 switch (XINT (XEXP (disp, 0), 1))
7298 {
7299 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7300 used. While ABI specify also 32bit relocations, we don't produce
7301 them at all and use IP relative instead. */
7302 case UNSPEC_GOT:
7303 case UNSPEC_GOTOFF:
7304 gcc_assert (flag_pic);
7305 if (!TARGET_64BIT)
7306 goto is_legitimate_pic;
7307 reason = "64bit address unspec";
7308 goto report_error;
7309
7310 case UNSPEC_GOTPCREL:
7311 gcc_assert (flag_pic);
7312 goto is_legitimate_pic;
7313
7314 case UNSPEC_GOTTPOFF:
7315 case UNSPEC_GOTNTPOFF:
7316 case UNSPEC_INDNTPOFF:
7317 case UNSPEC_NTPOFF:
7318 case UNSPEC_DTPOFF:
7319 break;
7320
7321 default:
7322 reason = "invalid address unspec";
7323 goto report_error;
7324 }
7325
7326 else if (SYMBOLIC_CONST (disp)
7327 && (flag_pic
7328 || (TARGET_MACHO
7329 #if TARGET_MACHO
7330 && MACHOPIC_INDIRECT
7331 && !machopic_operand_p (disp)
7332 #endif
7333 )))
7334 {
7335
7336 is_legitimate_pic:
7337 if (TARGET_64BIT && (index || base))
7338 {
7339 /* foo@dtpoff(%rX) is ok. */
7340 if (GET_CODE (disp) != CONST
7341 || GET_CODE (XEXP (disp, 0)) != PLUS
7342 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7343 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7344 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7345 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7346 {
7347 reason = "non-constant pic memory reference";
7348 goto report_error;
7349 }
7350 }
7351 else if (! legitimate_pic_address_disp_p (disp))
7352 {
7353 reason = "displacement is an invalid pic construct";
7354 goto report_error;
7355 }
7356
7357 /* This code used to verify that a symbolic pic displacement
7358 includes the pic_offset_table_rtx register.
7359
7360 While this is good idea, unfortunately these constructs may
7361 be created by "adds using lea" optimization for incorrect
7362 code like:
7363
7364 int a;
7365 int foo(int i)
7366 {
7367 return *(&a+i);
7368 }
7369
7370 This code is nonsensical, but results in addressing
7371 GOT table with pic_offset_table_rtx base. We can't
7372 just refuse it easily, since it gets matched by
7373 "addsi3" pattern, that later gets split to lea in the
7374 case output register differs from input. While this
7375 can be handled by separate addsi pattern for this case
7376 that never results in lea, this seems to be easier and
7377 correct fix for crash to disable this test. */
7378 }
7379 else if (GET_CODE (disp) != LABEL_REF
7380 && !CONST_INT_P (disp)
7381 && (GET_CODE (disp) != CONST
7382 || !legitimate_constant_p (disp))
7383 && (GET_CODE (disp) != SYMBOL_REF
7384 || !legitimate_constant_p (disp)))
7385 {
7386 reason = "displacement is not constant";
7387 goto report_error;
7388 }
7389 else if (TARGET_64BIT
7390 && !x86_64_immediate_operand (disp, VOIDmode))
7391 {
7392 reason = "displacement is out of range";
7393 goto report_error;
7394 }
7395 }
7396
7397 /* Everything looks valid. */
7398 return TRUE;
7399
7400 report_error:
7401 return FALSE;
7402 }
7403 \f
7404 /* Return a unique alias set for the GOT. */
7405
7406 static alias_set_type
7407 ix86_GOT_alias_set (void)
7408 {
7409 static alias_set_type set = -1;
7410 if (set == -1)
7411 set = new_alias_set ();
7412 return set;
7413 }
7414
7415 /* Return a legitimate reference for ORIG (an address) using the
7416 register REG. If REG is 0, a new pseudo is generated.
7417
7418 There are two types of references that must be handled:
7419
7420 1. Global data references must load the address from the GOT, via
7421 the PIC reg. An insn is emitted to do this load, and the reg is
7422 returned.
7423
7424 2. Static data references, constant pool addresses, and code labels
7425 compute the address as an offset from the GOT, whose base is in
7426 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7427 differentiate them from global data objects. The returned
7428 address is the PIC reg + an unspec constant.
7429
7430 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7431 reg also appears in the address. */
7432
7433 static rtx
7434 legitimize_pic_address (rtx orig, rtx reg)
7435 {
7436 rtx addr = orig;
7437 rtx new_rtx = orig;
7438 rtx base;
7439
7440 #if TARGET_MACHO
7441 if (TARGET_MACHO && !TARGET_64BIT)
7442 {
7443 if (reg == 0)
7444 reg = gen_reg_rtx (Pmode);
7445 /* Use the generic Mach-O PIC machinery. */
7446 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7447 }
7448 #endif
7449
7450 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7451 new_rtx = addr;
7452 else if (TARGET_64BIT
7453 && ix86_cmodel != CM_SMALL_PIC
7454 && gotoff_operand (addr, Pmode))
7455 {
7456 rtx tmpreg;
7457 /* This symbol may be referenced via a displacement from the PIC
7458 base address (@GOTOFF). */
7459
7460 if (reload_in_progress)
7461 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7462 if (GET_CODE (addr) == CONST)
7463 addr = XEXP (addr, 0);
7464 if (GET_CODE (addr) == PLUS)
7465 {
7466 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7467 UNSPEC_GOTOFF);
7468 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7469 }
7470 else
7471 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7472 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7473 if (!reg)
7474 tmpreg = gen_reg_rtx (Pmode);
7475 else
7476 tmpreg = reg;
7477 emit_move_insn (tmpreg, new_rtx);
7478
7479 if (reg != 0)
7480 {
7481 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7482 tmpreg, 1, OPTAB_DIRECT);
7483 new_rtx = reg;
7484 }
7485 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7486 }
7487 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7488 {
7489 /* This symbol may be referenced via a displacement from the PIC
7490 base address (@GOTOFF). */
7491
7492 if (reload_in_progress)
7493 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7494 if (GET_CODE (addr) == CONST)
7495 addr = XEXP (addr, 0);
7496 if (GET_CODE (addr) == PLUS)
7497 {
7498 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7499 UNSPEC_GOTOFF);
7500 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7501 }
7502 else
7503 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7504 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7505 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7506
7507 if (reg != 0)
7508 {
7509 emit_move_insn (reg, new_rtx);
7510 new_rtx = reg;
7511 }
7512 }
7513 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7514 /* We can't use @GOTOFF for text labels on VxWorks;
7515 see gotoff_operand. */
7516 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7517 {
7518 /* Given that we've already handled dllimport variables separately
7519 in legitimize_address, and all other variables should satisfy
7520 legitimate_pic_address_disp_p, we should never arrive here. */
7521 gcc_assert (!TARGET_64BIT_MS_ABI);
7522
7523 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7524 {
7525 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7526 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7527 new_rtx = gen_const_mem (Pmode, new_rtx);
7528 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7529
7530 if (reg == 0)
7531 reg = gen_reg_rtx (Pmode);
7532 /* Use directly gen_movsi, otherwise the address is loaded
7533 into register for CSE. We don't want to CSE this addresses,
7534 instead we CSE addresses from the GOT table, so skip this. */
7535 emit_insn (gen_movsi (reg, new_rtx));
7536 new_rtx = reg;
7537 }
7538 else
7539 {
7540 /* This symbol must be referenced via a load from the
7541 Global Offset Table (@GOT). */
7542
7543 if (reload_in_progress)
7544 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7545 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7546 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7547 if (TARGET_64BIT)
7548 new_rtx = force_reg (Pmode, new_rtx);
7549 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7550 new_rtx = gen_const_mem (Pmode, new_rtx);
7551 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7552
7553 if (reg == 0)
7554 reg = gen_reg_rtx (Pmode);
7555 emit_move_insn (reg, new_rtx);
7556 new_rtx = reg;
7557 }
7558 }
7559 else
7560 {
7561 if (CONST_INT_P (addr)
7562 && !x86_64_immediate_operand (addr, VOIDmode))
7563 {
7564 if (reg)
7565 {
7566 emit_move_insn (reg, addr);
7567 new_rtx = reg;
7568 }
7569 else
7570 new_rtx = force_reg (Pmode, addr);
7571 }
7572 else if (GET_CODE (addr) == CONST)
7573 {
7574 addr = XEXP (addr, 0);
7575
7576 /* We must match stuff we generate before. Assume the only
7577 unspecs that can get here are ours. Not that we could do
7578 anything with them anyway.... */
7579 if (GET_CODE (addr) == UNSPEC
7580 || (GET_CODE (addr) == PLUS
7581 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7582 return orig;
7583 gcc_assert (GET_CODE (addr) == PLUS);
7584 }
7585 if (GET_CODE (addr) == PLUS)
7586 {
7587 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7588
7589 /* Check first to see if this is a constant offset from a @GOTOFF
7590 symbol reference. */
7591 if (gotoff_operand (op0, Pmode)
7592 && CONST_INT_P (op1))
7593 {
7594 if (!TARGET_64BIT)
7595 {
7596 if (reload_in_progress)
7597 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7598 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7599 UNSPEC_GOTOFF);
7600 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7601 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7602 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7603
7604 if (reg != 0)
7605 {
7606 emit_move_insn (reg, new_rtx);
7607 new_rtx = reg;
7608 }
7609 }
7610 else
7611 {
7612 if (INTVAL (op1) < -16*1024*1024
7613 || INTVAL (op1) >= 16*1024*1024)
7614 {
7615 if (!x86_64_immediate_operand (op1, Pmode))
7616 op1 = force_reg (Pmode, op1);
7617 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7618 }
7619 }
7620 }
7621 else
7622 {
7623 base = legitimize_pic_address (XEXP (addr, 0), reg);
7624 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7625 base == reg ? NULL_RTX : reg);
7626
7627 if (CONST_INT_P (new_rtx))
7628 new_rtx = plus_constant (base, INTVAL (new_rtx));
7629 else
7630 {
7631 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7632 {
7633 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7634 new_rtx = XEXP (new_rtx, 1);
7635 }
7636 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7637 }
7638 }
7639 }
7640 }
7641 return new_rtx;
7642 }
7643 \f
7644 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7645
7646 static rtx
7647 get_thread_pointer (int to_reg)
7648 {
7649 rtx tp, reg, insn;
7650
7651 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7652 if (!to_reg)
7653 return tp;
7654
7655 reg = gen_reg_rtx (Pmode);
7656 insn = gen_rtx_SET (VOIDmode, reg, tp);
7657 insn = emit_insn (insn);
7658
7659 return reg;
7660 }
7661
7662 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7663 false if we expect this to be used for a memory address and true if
7664 we expect to load the address into a register. */
7665
7666 static rtx
7667 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7668 {
7669 rtx dest, base, off, pic, tp;
7670 int type;
7671
7672 switch (model)
7673 {
7674 case TLS_MODEL_GLOBAL_DYNAMIC:
7675 dest = gen_reg_rtx (Pmode);
7676 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7677
7678 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7679 {
7680 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7681
7682 start_sequence ();
7683 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7684 insns = get_insns ();
7685 end_sequence ();
7686
7687 CONST_OR_PURE_CALL_P (insns) = 1;
7688 emit_libcall_block (insns, dest, rax, x);
7689 }
7690 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7691 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7692 else
7693 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7694
7695 if (TARGET_GNU2_TLS)
7696 {
7697 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7698
7699 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7700 }
7701 break;
7702
7703 case TLS_MODEL_LOCAL_DYNAMIC:
7704 base = gen_reg_rtx (Pmode);
7705 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7706
7707 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7708 {
7709 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7710
7711 start_sequence ();
7712 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7713 insns = get_insns ();
7714 end_sequence ();
7715
7716 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7717 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7718 CONST_OR_PURE_CALL_P (insns) = 1;
7719 emit_libcall_block (insns, base, rax, note);
7720 }
7721 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7722 emit_insn (gen_tls_local_dynamic_base_64 (base));
7723 else
7724 emit_insn (gen_tls_local_dynamic_base_32 (base));
7725
7726 if (TARGET_GNU2_TLS)
7727 {
7728 rtx x = ix86_tls_module_base ();
7729
7730 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7731 gen_rtx_MINUS (Pmode, x, tp));
7732 }
7733
7734 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7735 off = gen_rtx_CONST (Pmode, off);
7736
7737 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7738
7739 if (TARGET_GNU2_TLS)
7740 {
7741 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7742
7743 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7744 }
7745
7746 break;
7747
7748 case TLS_MODEL_INITIAL_EXEC:
7749 if (TARGET_64BIT)
7750 {
7751 pic = NULL;
7752 type = UNSPEC_GOTNTPOFF;
7753 }
7754 else if (flag_pic)
7755 {
7756 if (reload_in_progress)
7757 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7758 pic = pic_offset_table_rtx;
7759 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7760 }
7761 else if (!TARGET_ANY_GNU_TLS)
7762 {
7763 pic = gen_reg_rtx (Pmode);
7764 emit_insn (gen_set_got (pic));
7765 type = UNSPEC_GOTTPOFF;
7766 }
7767 else
7768 {
7769 pic = NULL;
7770 type = UNSPEC_INDNTPOFF;
7771 }
7772
7773 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7774 off = gen_rtx_CONST (Pmode, off);
7775 if (pic)
7776 off = gen_rtx_PLUS (Pmode, pic, off);
7777 off = gen_const_mem (Pmode, off);
7778 set_mem_alias_set (off, ix86_GOT_alias_set ());
7779
7780 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7781 {
7782 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7783 off = force_reg (Pmode, off);
7784 return gen_rtx_PLUS (Pmode, base, off);
7785 }
7786 else
7787 {
7788 base = get_thread_pointer (true);
7789 dest = gen_reg_rtx (Pmode);
7790 emit_insn (gen_subsi3 (dest, base, off));
7791 }
7792 break;
7793
7794 case TLS_MODEL_LOCAL_EXEC:
7795 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7796 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7797 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7798 off = gen_rtx_CONST (Pmode, off);
7799
7800 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7801 {
7802 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7803 return gen_rtx_PLUS (Pmode, base, off);
7804 }
7805 else
7806 {
7807 base = get_thread_pointer (true);
7808 dest = gen_reg_rtx (Pmode);
7809 emit_insn (gen_subsi3 (dest, base, off));
7810 }
7811 break;
7812
7813 default:
7814 gcc_unreachable ();
7815 }
7816
7817 return dest;
7818 }
7819
7820 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7821 to symbol DECL. */
7822
7823 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7824 htab_t dllimport_map;
7825
7826 static tree
7827 get_dllimport_decl (tree decl)
7828 {
7829 struct tree_map *h, in;
7830 void **loc;
7831 const char *name;
7832 const char *prefix;
7833 size_t namelen, prefixlen;
7834 char *imp_name;
7835 tree to;
7836 rtx rtl;
7837
7838 if (!dllimport_map)
7839 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7840
7841 in.hash = htab_hash_pointer (decl);
7842 in.base.from = decl;
7843 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7844 h = (struct tree_map *) *loc;
7845 if (h)
7846 return h->to;
7847
7848 *loc = h = GGC_NEW (struct tree_map);
7849 h->hash = in.hash;
7850 h->base.from = decl;
7851 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7852 DECL_ARTIFICIAL (to) = 1;
7853 DECL_IGNORED_P (to) = 1;
7854 DECL_EXTERNAL (to) = 1;
7855 TREE_READONLY (to) = 1;
7856
7857 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7858 name = targetm.strip_name_encoding (name);
7859 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7860 namelen = strlen (name);
7861 prefixlen = strlen (prefix);
7862 imp_name = (char *) alloca (namelen + prefixlen + 1);
7863 memcpy (imp_name, prefix, prefixlen);
7864 memcpy (imp_name + prefixlen, name, namelen + 1);
7865
7866 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7867 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7868 SET_SYMBOL_REF_DECL (rtl, to);
7869 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7870
7871 rtl = gen_const_mem (Pmode, rtl);
7872 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7873
7874 SET_DECL_RTL (to, rtl);
7875 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7876
7877 return to;
7878 }
7879
7880 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7881 true if we require the result be a register. */
7882
7883 static rtx
7884 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7885 {
7886 tree imp_decl;
7887 rtx x;
7888
7889 gcc_assert (SYMBOL_REF_DECL (symbol));
7890 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7891
7892 x = DECL_RTL (imp_decl);
7893 if (want_reg)
7894 x = force_reg (Pmode, x);
7895 return x;
7896 }
7897
7898 /* Try machine-dependent ways of modifying an illegitimate address
7899 to be legitimate. If we find one, return the new, valid address.
7900 This macro is used in only one place: `memory_address' in explow.c.
7901
7902 OLDX is the address as it was before break_out_memory_refs was called.
7903 In some cases it is useful to look at this to decide what needs to be done.
7904
7905 MODE and WIN are passed so that this macro can use
7906 GO_IF_LEGITIMATE_ADDRESS.
7907
7908 It is always safe for this macro to do nothing. It exists to recognize
7909 opportunities to optimize the output.
7910
7911 For the 80386, we handle X+REG by loading X into a register R and
7912 using R+REG. R will go in a general reg and indexing will be used.
7913 However, if REG is a broken-out memory address or multiplication,
7914 nothing needs to be done because REG can certainly go in a general reg.
7915
7916 When -fpic is used, special handling is needed for symbolic references.
7917 See comments by legitimize_pic_address in i386.c for details. */
7918
7919 rtx
7920 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7921 {
7922 int changed = 0;
7923 unsigned log;
7924
7925 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7926 if (log)
7927 return legitimize_tls_address (x, (enum tls_model) log, false);
7928 if (GET_CODE (x) == CONST
7929 && GET_CODE (XEXP (x, 0)) == PLUS
7930 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7931 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7932 {
7933 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7934 (enum tls_model) log, false);
7935 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7936 }
7937
7938 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7939 {
7940 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7941 return legitimize_dllimport_symbol (x, true);
7942 if (GET_CODE (x) == CONST
7943 && GET_CODE (XEXP (x, 0)) == PLUS
7944 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7945 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7946 {
7947 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7948 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7949 }
7950 }
7951
7952 if (flag_pic && SYMBOLIC_CONST (x))
7953 return legitimize_pic_address (x, 0);
7954
7955 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7956 if (GET_CODE (x) == ASHIFT
7957 && CONST_INT_P (XEXP (x, 1))
7958 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7959 {
7960 changed = 1;
7961 log = INTVAL (XEXP (x, 1));
7962 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7963 GEN_INT (1 << log));
7964 }
7965
7966 if (GET_CODE (x) == PLUS)
7967 {
7968 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7969
7970 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7971 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7972 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7973 {
7974 changed = 1;
7975 log = INTVAL (XEXP (XEXP (x, 0), 1));
7976 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7977 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7978 GEN_INT (1 << log));
7979 }
7980
7981 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7982 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7983 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7984 {
7985 changed = 1;
7986 log = INTVAL (XEXP (XEXP (x, 1), 1));
7987 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7988 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7989 GEN_INT (1 << log));
7990 }
7991
7992 /* Put multiply first if it isn't already. */
7993 if (GET_CODE (XEXP (x, 1)) == MULT)
7994 {
7995 rtx tmp = XEXP (x, 0);
7996 XEXP (x, 0) = XEXP (x, 1);
7997 XEXP (x, 1) = tmp;
7998 changed = 1;
7999 }
8000
8001 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8002 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8003 created by virtual register instantiation, register elimination, and
8004 similar optimizations. */
8005 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8006 {
8007 changed = 1;
8008 x = gen_rtx_PLUS (Pmode,
8009 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8010 XEXP (XEXP (x, 1), 0)),
8011 XEXP (XEXP (x, 1), 1));
8012 }
8013
8014 /* Canonicalize
8015 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8016 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8017 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8018 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8019 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8020 && CONSTANT_P (XEXP (x, 1)))
8021 {
8022 rtx constant;
8023 rtx other = NULL_RTX;
8024
8025 if (CONST_INT_P (XEXP (x, 1)))
8026 {
8027 constant = XEXP (x, 1);
8028 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8029 }
8030 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8031 {
8032 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8033 other = XEXP (x, 1);
8034 }
8035 else
8036 constant = 0;
8037
8038 if (constant)
8039 {
8040 changed = 1;
8041 x = gen_rtx_PLUS (Pmode,
8042 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8043 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8044 plus_constant (other, INTVAL (constant)));
8045 }
8046 }
8047
8048 if (changed && legitimate_address_p (mode, x, FALSE))
8049 return x;
8050
8051 if (GET_CODE (XEXP (x, 0)) == MULT)
8052 {
8053 changed = 1;
8054 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8055 }
8056
8057 if (GET_CODE (XEXP (x, 1)) == MULT)
8058 {
8059 changed = 1;
8060 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8061 }
8062
8063 if (changed
8064 && REG_P (XEXP (x, 1))
8065 && REG_P (XEXP (x, 0)))
8066 return x;
8067
8068 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8069 {
8070 changed = 1;
8071 x = legitimize_pic_address (x, 0);
8072 }
8073
8074 if (changed && legitimate_address_p (mode, x, FALSE))
8075 return x;
8076
8077 if (REG_P (XEXP (x, 0)))
8078 {
8079 rtx temp = gen_reg_rtx (Pmode);
8080 rtx val = force_operand (XEXP (x, 1), temp);
8081 if (val != temp)
8082 emit_move_insn (temp, val);
8083
8084 XEXP (x, 1) = temp;
8085 return x;
8086 }
8087
8088 else if (REG_P (XEXP (x, 1)))
8089 {
8090 rtx temp = gen_reg_rtx (Pmode);
8091 rtx val = force_operand (XEXP (x, 0), temp);
8092 if (val != temp)
8093 emit_move_insn (temp, val);
8094
8095 XEXP (x, 0) = temp;
8096 return x;
8097 }
8098 }
8099
8100 return x;
8101 }
8102 \f
8103 /* Print an integer constant expression in assembler syntax. Addition
8104 and subtraction are the only arithmetic that may appear in these
8105 expressions. FILE is the stdio stream to write to, X is the rtx, and
8106 CODE is the operand print code from the output string. */
8107
8108 static void
8109 output_pic_addr_const (FILE *file, rtx x, int code)
8110 {
8111 char buf[256];
8112
8113 switch (GET_CODE (x))
8114 {
8115 case PC:
8116 gcc_assert (flag_pic);
8117 putc ('.', file);
8118 break;
8119
8120 case SYMBOL_REF:
8121 if (! TARGET_MACHO || TARGET_64BIT)
8122 output_addr_const (file, x);
8123 else
8124 {
8125 const char *name = XSTR (x, 0);
8126
8127 /* Mark the decl as referenced so that cgraph will
8128 output the function. */
8129 if (SYMBOL_REF_DECL (x))
8130 mark_decl_referenced (SYMBOL_REF_DECL (x));
8131
8132 #if TARGET_MACHO
8133 if (MACHOPIC_INDIRECT
8134 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8135 name = machopic_indirection_name (x, /*stub_p=*/true);
8136 #endif
8137 assemble_name (file, name);
8138 }
8139 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8140 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8141 fputs ("@PLT", file);
8142 break;
8143
8144 case LABEL_REF:
8145 x = XEXP (x, 0);
8146 /* FALLTHRU */
8147 case CODE_LABEL:
8148 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8149 assemble_name (asm_out_file, buf);
8150 break;
8151
8152 case CONST_INT:
8153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8154 break;
8155
8156 case CONST:
8157 /* This used to output parentheses around the expression,
8158 but that does not work on the 386 (either ATT or BSD assembler). */
8159 output_pic_addr_const (file, XEXP (x, 0), code);
8160 break;
8161
8162 case CONST_DOUBLE:
8163 if (GET_MODE (x) == VOIDmode)
8164 {
8165 /* We can use %d if the number is <32 bits and positive. */
8166 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8167 fprintf (file, "0x%lx%08lx",
8168 (unsigned long) CONST_DOUBLE_HIGH (x),
8169 (unsigned long) CONST_DOUBLE_LOW (x));
8170 else
8171 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8172 }
8173 else
8174 /* We can't handle floating point constants;
8175 PRINT_OPERAND must handle them. */
8176 output_operand_lossage ("floating constant misused");
8177 break;
8178
8179 case PLUS:
8180 /* Some assemblers need integer constants to appear first. */
8181 if (CONST_INT_P (XEXP (x, 0)))
8182 {
8183 output_pic_addr_const (file, XEXP (x, 0), code);
8184 putc ('+', file);
8185 output_pic_addr_const (file, XEXP (x, 1), code);
8186 }
8187 else
8188 {
8189 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8190 output_pic_addr_const (file, XEXP (x, 1), code);
8191 putc ('+', file);
8192 output_pic_addr_const (file, XEXP (x, 0), code);
8193 }
8194 break;
8195
8196 case MINUS:
8197 if (!TARGET_MACHO)
8198 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8199 output_pic_addr_const (file, XEXP (x, 0), code);
8200 putc ('-', file);
8201 output_pic_addr_const (file, XEXP (x, 1), code);
8202 if (!TARGET_MACHO)
8203 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8204 break;
8205
8206 case UNSPEC:
8207 gcc_assert (XVECLEN (x, 0) == 1);
8208 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8209 switch (XINT (x, 1))
8210 {
8211 case UNSPEC_GOT:
8212 fputs ("@GOT", file);
8213 break;
8214 case UNSPEC_GOTOFF:
8215 fputs ("@GOTOFF", file);
8216 break;
8217 case UNSPEC_PLTOFF:
8218 fputs ("@PLTOFF", file);
8219 break;
8220 case UNSPEC_GOTPCREL:
8221 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8222 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8223 break;
8224 case UNSPEC_GOTTPOFF:
8225 /* FIXME: This might be @TPOFF in Sun ld too. */
8226 fputs ("@GOTTPOFF", file);
8227 break;
8228 case UNSPEC_TPOFF:
8229 fputs ("@TPOFF", file);
8230 break;
8231 case UNSPEC_NTPOFF:
8232 if (TARGET_64BIT)
8233 fputs ("@TPOFF", file);
8234 else
8235 fputs ("@NTPOFF", file);
8236 break;
8237 case UNSPEC_DTPOFF:
8238 fputs ("@DTPOFF", file);
8239 break;
8240 case UNSPEC_GOTNTPOFF:
8241 if (TARGET_64BIT)
8242 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8243 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8244 else
8245 fputs ("@GOTNTPOFF", file);
8246 break;
8247 case UNSPEC_INDNTPOFF:
8248 fputs ("@INDNTPOFF", file);
8249 break;
8250 default:
8251 output_operand_lossage ("invalid UNSPEC as operand");
8252 break;
8253 }
8254 break;
8255
8256 default:
8257 output_operand_lossage ("invalid expression as operand");
8258 }
8259 }
8260
8261 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8262 We need to emit DTP-relative relocations. */
8263
8264 static void ATTRIBUTE_UNUSED
8265 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8266 {
8267 fputs (ASM_LONG, file);
8268 output_addr_const (file, x);
8269 fputs ("@DTPOFF", file);
8270 switch (size)
8271 {
8272 case 4:
8273 break;
8274 case 8:
8275 fputs (", 0", file);
8276 break;
8277 default:
8278 gcc_unreachable ();
8279 }
8280 }
8281
8282 /* In the name of slightly smaller debug output, and to cater to
8283 general assembler lossage, recognize PIC+GOTOFF and turn it back
8284 into a direct symbol reference.
8285
8286 On Darwin, this is necessary to avoid a crash, because Darwin
8287 has a different PIC label for each routine but the DWARF debugging
8288 information is not associated with any particular routine, so it's
8289 necessary to remove references to the PIC label from RTL stored by
8290 the DWARF output code. */
8291
8292 static rtx
8293 ix86_delegitimize_address (rtx orig_x)
8294 {
8295 rtx x = orig_x;
8296 /* reg_addend is NULL or a multiple of some register. */
8297 rtx reg_addend = NULL_RTX;
8298 /* const_addend is NULL or a const_int. */
8299 rtx const_addend = NULL_RTX;
8300 /* This is the result, or NULL. */
8301 rtx result = NULL_RTX;
8302
8303 if (MEM_P (x))
8304 x = XEXP (x, 0);
8305
8306 if (TARGET_64BIT)
8307 {
8308 if (GET_CODE (x) != CONST
8309 || GET_CODE (XEXP (x, 0)) != UNSPEC
8310 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8311 || !MEM_P (orig_x))
8312 return orig_x;
8313 return XVECEXP (XEXP (x, 0), 0, 0);
8314 }
8315
8316 if (GET_CODE (x) != PLUS
8317 || GET_CODE (XEXP (x, 1)) != CONST)
8318 return orig_x;
8319
8320 if (REG_P (XEXP (x, 0))
8321 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8322 /* %ebx + GOT/GOTOFF */
8323 ;
8324 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8325 {
8326 /* %ebx + %reg * scale + GOT/GOTOFF */
8327 reg_addend = XEXP (x, 0);
8328 if (REG_P (XEXP (reg_addend, 0))
8329 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8330 reg_addend = XEXP (reg_addend, 1);
8331 else if (REG_P (XEXP (reg_addend, 1))
8332 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8333 reg_addend = XEXP (reg_addend, 0);
8334 else
8335 return orig_x;
8336 if (!REG_P (reg_addend)
8337 && GET_CODE (reg_addend) != MULT
8338 && GET_CODE (reg_addend) != ASHIFT)
8339 return orig_x;
8340 }
8341 else
8342 return orig_x;
8343
8344 x = XEXP (XEXP (x, 1), 0);
8345 if (GET_CODE (x) == PLUS
8346 && CONST_INT_P (XEXP (x, 1)))
8347 {
8348 const_addend = XEXP (x, 1);
8349 x = XEXP (x, 0);
8350 }
8351
8352 if (GET_CODE (x) == UNSPEC
8353 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8354 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8355 result = XVECEXP (x, 0, 0);
8356
8357 if (TARGET_MACHO && darwin_local_data_pic (x)
8358 && !MEM_P (orig_x))
8359 result = XEXP (x, 0);
8360
8361 if (! result)
8362 return orig_x;
8363
8364 if (const_addend)
8365 result = gen_rtx_PLUS (Pmode, result, const_addend);
8366 if (reg_addend)
8367 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8368 return result;
8369 }
8370
8371 /* If X is a machine specific address (i.e. a symbol or label being
8372 referenced as a displacement from the GOT implemented using an
8373 UNSPEC), then return the base term. Otherwise return X. */
8374
8375 rtx
8376 ix86_find_base_term (rtx x)
8377 {
8378 rtx term;
8379
8380 if (TARGET_64BIT)
8381 {
8382 if (GET_CODE (x) != CONST)
8383 return x;
8384 term = XEXP (x, 0);
8385 if (GET_CODE (term) == PLUS
8386 && (CONST_INT_P (XEXP (term, 1))
8387 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8388 term = XEXP (term, 0);
8389 if (GET_CODE (term) != UNSPEC
8390 || XINT (term, 1) != UNSPEC_GOTPCREL)
8391 return x;
8392
8393 term = XVECEXP (term, 0, 0);
8394
8395 if (GET_CODE (term) != SYMBOL_REF
8396 && GET_CODE (term) != LABEL_REF)
8397 return x;
8398
8399 return term;
8400 }
8401
8402 term = ix86_delegitimize_address (x);
8403
8404 if (GET_CODE (term) != SYMBOL_REF
8405 && GET_CODE (term) != LABEL_REF)
8406 return x;
8407
8408 return term;
8409 }
8410 \f
8411 static void
8412 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8413 int fp, FILE *file)
8414 {
8415 const char *suffix;
8416
8417 if (mode == CCFPmode || mode == CCFPUmode)
8418 {
8419 enum rtx_code second_code, bypass_code;
8420 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8421 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8422 code = ix86_fp_compare_code_to_integer (code);
8423 mode = CCmode;
8424 }
8425 if (reverse)
8426 code = reverse_condition (code);
8427
8428 switch (code)
8429 {
8430 case EQ:
8431 switch (mode)
8432 {
8433 case CCAmode:
8434 suffix = "a";
8435 break;
8436
8437 case CCCmode:
8438 suffix = "c";
8439 break;
8440
8441 case CCOmode:
8442 suffix = "o";
8443 break;
8444
8445 case CCSmode:
8446 suffix = "s";
8447 break;
8448
8449 default:
8450 suffix = "e";
8451 }
8452 break;
8453 case NE:
8454 switch (mode)
8455 {
8456 case CCAmode:
8457 suffix = "na";
8458 break;
8459
8460 case CCCmode:
8461 suffix = "nc";
8462 break;
8463
8464 case CCOmode:
8465 suffix = "no";
8466 break;
8467
8468 case CCSmode:
8469 suffix = "ns";
8470 break;
8471
8472 default:
8473 suffix = "ne";
8474 }
8475 break;
8476 case GT:
8477 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8478 suffix = "g";
8479 break;
8480 case GTU:
8481 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8482 Those same assemblers have the same but opposite lossage on cmov. */
8483 if (mode == CCmode)
8484 suffix = fp ? "nbe" : "a";
8485 else if (mode == CCCmode)
8486 suffix = "b";
8487 else
8488 gcc_unreachable ();
8489 break;
8490 case LT:
8491 switch (mode)
8492 {
8493 case CCNOmode:
8494 case CCGOCmode:
8495 suffix = "s";
8496 break;
8497
8498 case CCmode:
8499 case CCGCmode:
8500 suffix = "l";
8501 break;
8502
8503 default:
8504 gcc_unreachable ();
8505 }
8506 break;
8507 case LTU:
8508 gcc_assert (mode == CCmode || mode == CCCmode);
8509 suffix = "b";
8510 break;
8511 case GE:
8512 switch (mode)
8513 {
8514 case CCNOmode:
8515 case CCGOCmode:
8516 suffix = "ns";
8517 break;
8518
8519 case CCmode:
8520 case CCGCmode:
8521 suffix = "ge";
8522 break;
8523
8524 default:
8525 gcc_unreachable ();
8526 }
8527 break;
8528 case GEU:
8529 /* ??? As above. */
8530 gcc_assert (mode == CCmode || mode == CCCmode);
8531 suffix = fp ? "nb" : "ae";
8532 break;
8533 case LE:
8534 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8535 suffix = "le";
8536 break;
8537 case LEU:
8538 /* ??? As above. */
8539 if (mode == CCmode)
8540 suffix = "be";
8541 else if (mode == CCCmode)
8542 suffix = fp ? "nb" : "ae";
8543 else
8544 gcc_unreachable ();
8545 break;
8546 case UNORDERED:
8547 suffix = fp ? "u" : "p";
8548 break;
8549 case ORDERED:
8550 suffix = fp ? "nu" : "np";
8551 break;
8552 default:
8553 gcc_unreachable ();
8554 }
8555 fputs (suffix, file);
8556 }
8557
8558 /* Print the name of register X to FILE based on its machine mode and number.
8559 If CODE is 'w', pretend the mode is HImode.
8560 If CODE is 'b', pretend the mode is QImode.
8561 If CODE is 'k', pretend the mode is SImode.
8562 If CODE is 'q', pretend the mode is DImode.
8563 If CODE is 'h', pretend the reg is the 'high' byte register.
8564 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8565
8566 void
8567 print_reg (rtx x, int code, FILE *file)
8568 {
8569 gcc_assert (x == pc_rtx
8570 || (REGNO (x) != ARG_POINTER_REGNUM
8571 && REGNO (x) != FRAME_POINTER_REGNUM
8572 && REGNO (x) != FLAGS_REG
8573 && REGNO (x) != FPSR_REG
8574 && REGNO (x) != FPCR_REG));
8575
8576 if (ASSEMBLER_DIALECT == ASM_ATT)
8577 putc ('%', file);
8578
8579 if (x == pc_rtx)
8580 {
8581 gcc_assert (TARGET_64BIT);
8582 fputs ("rip", file);
8583 return;
8584 }
8585
8586 if (code == 'w' || MMX_REG_P (x))
8587 code = 2;
8588 else if (code == 'b')
8589 code = 1;
8590 else if (code == 'k')
8591 code = 4;
8592 else if (code == 'q')
8593 code = 8;
8594 else if (code == 'y')
8595 code = 3;
8596 else if (code == 'h')
8597 code = 0;
8598 else
8599 code = GET_MODE_SIZE (GET_MODE (x));
8600
8601 /* Irritatingly, AMD extended registers use different naming convention
8602 from the normal registers. */
8603 if (REX_INT_REG_P (x))
8604 {
8605 gcc_assert (TARGET_64BIT);
8606 switch (code)
8607 {
8608 case 0:
8609 error ("extended registers have no high halves");
8610 break;
8611 case 1:
8612 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8613 break;
8614 case 2:
8615 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8616 break;
8617 case 4:
8618 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8619 break;
8620 case 8:
8621 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8622 break;
8623 default:
8624 error ("unsupported operand size for extended register");
8625 break;
8626 }
8627 return;
8628 }
8629 switch (code)
8630 {
8631 case 3:
8632 if (STACK_TOP_P (x))
8633 {
8634 fputs ("st(0)", file);
8635 break;
8636 }
8637 /* FALLTHRU */
8638 case 8:
8639 case 4:
8640 case 12:
8641 if (! ANY_FP_REG_P (x))
8642 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8643 /* FALLTHRU */
8644 case 16:
8645 case 2:
8646 normal:
8647 fputs (hi_reg_name[REGNO (x)], file);
8648 break;
8649 case 1:
8650 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8651 goto normal;
8652 fputs (qi_reg_name[REGNO (x)], file);
8653 break;
8654 case 0:
8655 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8656 goto normal;
8657 fputs (qi_high_reg_name[REGNO (x)], file);
8658 break;
8659 default:
8660 gcc_unreachable ();
8661 }
8662 }
8663
8664 /* Locate some local-dynamic symbol still in use by this function
8665 so that we can print its name in some tls_local_dynamic_base
8666 pattern. */
8667
8668 static int
8669 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8670 {
8671 rtx x = *px;
8672
8673 if (GET_CODE (x) == SYMBOL_REF
8674 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8675 {
8676 cfun->machine->some_ld_name = XSTR (x, 0);
8677 return 1;
8678 }
8679
8680 return 0;
8681 }
8682
8683 static const char *
8684 get_some_local_dynamic_name (void)
8685 {
8686 rtx insn;
8687
8688 if (cfun->machine->some_ld_name)
8689 return cfun->machine->some_ld_name;
8690
8691 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8692 if (INSN_P (insn)
8693 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8694 return cfun->machine->some_ld_name;
8695
8696 gcc_unreachable ();
8697 }
8698
8699 /* Meaning of CODE:
8700 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8701 C -- print opcode suffix for set/cmov insn.
8702 c -- like C, but print reversed condition
8703 F,f -- likewise, but for floating-point.
8704 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8705 otherwise nothing
8706 R -- print the prefix for register names.
8707 z -- print the opcode suffix for the size of the current operand.
8708 * -- print a star (in certain assembler syntax)
8709 A -- print an absolute memory reference.
8710 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8711 s -- print a shift double count, followed by the assemblers argument
8712 delimiter.
8713 b -- print the QImode name of the register for the indicated operand.
8714 %b0 would print %al if operands[0] is reg 0.
8715 w -- likewise, print the HImode name of the register.
8716 k -- likewise, print the SImode name of the register.
8717 q -- likewise, print the DImode name of the register.
8718 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8719 y -- print "st(0)" instead of "st" as a register.
8720 D -- print condition for SSE cmp instruction.
8721 P -- if PIC, print an @PLT suffix.
8722 X -- don't print any sort of PIC '@' suffix for a symbol.
8723 & -- print some in-use local-dynamic symbol name.
8724 H -- print a memory address offset by 8; used for sse high-parts
8725 Y -- print condition for SSE5 com* instruction.
8726 + -- print a branch hint as 'cs' or 'ds' prefix
8727 ; -- print a semicolon (after prefixes due to bug in older gas).
8728 */
8729
8730 void
8731 print_operand (FILE *file, rtx x, int code)
8732 {
8733 if (code)
8734 {
8735 switch (code)
8736 {
8737 case '*':
8738 if (ASSEMBLER_DIALECT == ASM_ATT)
8739 putc ('*', file);
8740 return;
8741
8742 case '&':
8743 assemble_name (file, get_some_local_dynamic_name ());
8744 return;
8745
8746 case 'A':
8747 switch (ASSEMBLER_DIALECT)
8748 {
8749 case ASM_ATT:
8750 putc ('*', file);
8751 break;
8752
8753 case ASM_INTEL:
8754 /* Intel syntax. For absolute addresses, registers should not
8755 be surrounded by braces. */
8756 if (!REG_P (x))
8757 {
8758 putc ('[', file);
8759 PRINT_OPERAND (file, x, 0);
8760 putc (']', file);
8761 return;
8762 }
8763 break;
8764
8765 default:
8766 gcc_unreachable ();
8767 }
8768
8769 PRINT_OPERAND (file, x, 0);
8770 return;
8771
8772
8773 case 'L':
8774 if (ASSEMBLER_DIALECT == ASM_ATT)
8775 putc ('l', file);
8776 return;
8777
8778 case 'W':
8779 if (ASSEMBLER_DIALECT == ASM_ATT)
8780 putc ('w', file);
8781 return;
8782
8783 case 'B':
8784 if (ASSEMBLER_DIALECT == ASM_ATT)
8785 putc ('b', file);
8786 return;
8787
8788 case 'Q':
8789 if (ASSEMBLER_DIALECT == ASM_ATT)
8790 putc ('l', file);
8791 return;
8792
8793 case 'S':
8794 if (ASSEMBLER_DIALECT == ASM_ATT)
8795 putc ('s', file);
8796 return;
8797
8798 case 'T':
8799 if (ASSEMBLER_DIALECT == ASM_ATT)
8800 putc ('t', file);
8801 return;
8802
8803 case 'z':
8804 /* 387 opcodes don't get size suffixes if the operands are
8805 registers. */
8806 if (STACK_REG_P (x))
8807 return;
8808
8809 /* Likewise if using Intel opcodes. */
8810 if (ASSEMBLER_DIALECT == ASM_INTEL)
8811 return;
8812
8813 /* This is the size of op from size of operand. */
8814 switch (GET_MODE_SIZE (GET_MODE (x)))
8815 {
8816 case 1:
8817 putc ('b', file);
8818 return;
8819
8820 case 2:
8821 if (MEM_P (x))
8822 {
8823 #ifdef HAVE_GAS_FILDS_FISTS
8824 putc ('s', file);
8825 #endif
8826 return;
8827 }
8828 else
8829 putc ('w', file);
8830 return;
8831
8832 case 4:
8833 if (GET_MODE (x) == SFmode)
8834 {
8835 putc ('s', file);
8836 return;
8837 }
8838 else
8839 putc ('l', file);
8840 return;
8841
8842 case 12:
8843 case 16:
8844 putc ('t', file);
8845 return;
8846
8847 case 8:
8848 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8849 {
8850 #ifdef GAS_MNEMONICS
8851 putc ('q', file);
8852 #else
8853 putc ('l', file);
8854 putc ('l', file);
8855 #endif
8856 }
8857 else
8858 putc ('l', file);
8859 return;
8860
8861 default:
8862 gcc_unreachable ();
8863 }
8864
8865 case 'b':
8866 case 'w':
8867 case 'k':
8868 case 'q':
8869 case 'h':
8870 case 'y':
8871 case 'X':
8872 case 'P':
8873 break;
8874
8875 case 's':
8876 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8877 {
8878 PRINT_OPERAND (file, x, 0);
8879 putc (',', file);
8880 }
8881 return;
8882
8883 case 'D':
8884 /* Little bit of braindamage here. The SSE compare instructions
8885 does use completely different names for the comparisons that the
8886 fp conditional moves. */
8887 switch (GET_CODE (x))
8888 {
8889 case EQ:
8890 case UNEQ:
8891 fputs ("eq", file);
8892 break;
8893 case LT:
8894 case UNLT:
8895 fputs ("lt", file);
8896 break;
8897 case LE:
8898 case UNLE:
8899 fputs ("le", file);
8900 break;
8901 case UNORDERED:
8902 fputs ("unord", file);
8903 break;
8904 case NE:
8905 case LTGT:
8906 fputs ("neq", file);
8907 break;
8908 case UNGE:
8909 case GE:
8910 fputs ("nlt", file);
8911 break;
8912 case UNGT:
8913 case GT:
8914 fputs ("nle", file);
8915 break;
8916 case ORDERED:
8917 fputs ("ord", file);
8918 break;
8919 default:
8920 gcc_unreachable ();
8921 }
8922 return;
8923 case 'O':
8924 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8925 if (ASSEMBLER_DIALECT == ASM_ATT)
8926 {
8927 switch (GET_MODE (x))
8928 {
8929 case HImode: putc ('w', file); break;
8930 case SImode:
8931 case SFmode: putc ('l', file); break;
8932 case DImode:
8933 case DFmode: putc ('q', file); break;
8934 default: gcc_unreachable ();
8935 }
8936 putc ('.', file);
8937 }
8938 #endif
8939 return;
8940 case 'C':
8941 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8942 return;
8943 case 'F':
8944 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8945 if (ASSEMBLER_DIALECT == ASM_ATT)
8946 putc ('.', file);
8947 #endif
8948 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8949 return;
8950
8951 /* Like above, but reverse condition */
8952 case 'c':
8953 /* Check to see if argument to %c is really a constant
8954 and not a condition code which needs to be reversed. */
8955 if (!COMPARISON_P (x))
8956 {
8957 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8958 return;
8959 }
8960 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8961 return;
8962 case 'f':
8963 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8964 if (ASSEMBLER_DIALECT == ASM_ATT)
8965 putc ('.', file);
8966 #endif
8967 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8968 return;
8969
8970 case 'H':
8971 /* It doesn't actually matter what mode we use here, as we're
8972 only going to use this for printing. */
8973 x = adjust_address_nv (x, DImode, 8);
8974 break;
8975
8976 case '+':
8977 {
8978 rtx x;
8979
8980 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8981 return;
8982
8983 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8984 if (x)
8985 {
8986 int pred_val = INTVAL (XEXP (x, 0));
8987
8988 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8989 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8990 {
8991 int taken = pred_val > REG_BR_PROB_BASE / 2;
8992 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8993
8994 /* Emit hints only in the case default branch prediction
8995 heuristics would fail. */
8996 if (taken != cputaken)
8997 {
8998 /* We use 3e (DS) prefix for taken branches and
8999 2e (CS) prefix for not taken branches. */
9000 if (taken)
9001 fputs ("ds ; ", file);
9002 else
9003 fputs ("cs ; ", file);
9004 }
9005 }
9006 }
9007 return;
9008 }
9009
9010 case 'Y':
9011 switch (GET_CODE (x))
9012 {
9013 case NE:
9014 fputs ("neq", file);
9015 break;
9016 case EQ:
9017 fputs ("eq", file);
9018 break;
9019 case GE:
9020 case GEU:
9021 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9022 break;
9023 case GT:
9024 case GTU:
9025 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9026 break;
9027 case LE:
9028 case LEU:
9029 fputs ("le", file);
9030 break;
9031 case LT:
9032 case LTU:
9033 fputs ("lt", file);
9034 break;
9035 case UNORDERED:
9036 fputs ("unord", file);
9037 break;
9038 case ORDERED:
9039 fputs ("ord", file);
9040 break;
9041 case UNEQ:
9042 fputs ("ueq", file);
9043 break;
9044 case UNGE:
9045 fputs ("nlt", file);
9046 break;
9047 case UNGT:
9048 fputs ("nle", file);
9049 break;
9050 case UNLE:
9051 fputs ("ule", file);
9052 break;
9053 case UNLT:
9054 fputs ("ult", file);
9055 break;
9056 case LTGT:
9057 fputs ("une", file);
9058 break;
9059 default:
9060 gcc_unreachable ();
9061 }
9062 return;
9063
9064 case ';':
9065 #if TARGET_MACHO
9066 fputs (" ; ", file);
9067 #else
9068 fputc (' ', file);
9069 #endif
9070 return;
9071
9072 default:
9073 output_operand_lossage ("invalid operand code '%c'", code);
9074 }
9075 }
9076
9077 if (REG_P (x))
9078 print_reg (x, code, file);
9079
9080 else if (MEM_P (x))
9081 {
9082 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9083 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9084 && GET_MODE (x) != BLKmode)
9085 {
9086 const char * size;
9087 switch (GET_MODE_SIZE (GET_MODE (x)))
9088 {
9089 case 1: size = "BYTE"; break;
9090 case 2: size = "WORD"; break;
9091 case 4: size = "DWORD"; break;
9092 case 8: size = "QWORD"; break;
9093 case 12: size = "XWORD"; break;
9094 case 16:
9095 if (GET_MODE (x) == XFmode)
9096 size = "XWORD";
9097 else
9098 size = "XMMWORD";
9099 break;
9100 default:
9101 gcc_unreachable ();
9102 }
9103
9104 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9105 if (code == 'b')
9106 size = "BYTE";
9107 else if (code == 'w')
9108 size = "WORD";
9109 else if (code == 'k')
9110 size = "DWORD";
9111
9112 fputs (size, file);
9113 fputs (" PTR ", file);
9114 }
9115
9116 x = XEXP (x, 0);
9117 /* Avoid (%rip) for call operands. */
9118 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9119 && !CONST_INT_P (x))
9120 output_addr_const (file, x);
9121 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9122 output_operand_lossage ("invalid constraints for operand");
9123 else
9124 output_address (x);
9125 }
9126
9127 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9128 {
9129 REAL_VALUE_TYPE r;
9130 long l;
9131
9132 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9133 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9134
9135 if (ASSEMBLER_DIALECT == ASM_ATT)
9136 putc ('$', file);
9137 fprintf (file, "0x%08lx", l);
9138 }
9139
9140 /* These float cases don't actually occur as immediate operands. */
9141 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9142 {
9143 char dstr[30];
9144
9145 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9146 fprintf (file, "%s", dstr);
9147 }
9148
9149 else if (GET_CODE (x) == CONST_DOUBLE
9150 && GET_MODE (x) == XFmode)
9151 {
9152 char dstr[30];
9153
9154 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9155 fprintf (file, "%s", dstr);
9156 }
9157
9158 else
9159 {
9160 /* We have patterns that allow zero sets of memory, for instance.
9161 In 64-bit mode, we should probably support all 8-byte vectors,
9162 since we can in fact encode that into an immediate. */
9163 if (GET_CODE (x) == CONST_VECTOR)
9164 {
9165 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9166 x = const0_rtx;
9167 }
9168
9169 if (code != 'P')
9170 {
9171 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9172 {
9173 if (ASSEMBLER_DIALECT == ASM_ATT)
9174 putc ('$', file);
9175 }
9176 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9177 || GET_CODE (x) == LABEL_REF)
9178 {
9179 if (ASSEMBLER_DIALECT == ASM_ATT)
9180 putc ('$', file);
9181 else
9182 fputs ("OFFSET FLAT:", file);
9183 }
9184 }
9185 if (CONST_INT_P (x))
9186 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9187 else if (flag_pic)
9188 output_pic_addr_const (file, x, code);
9189 else
9190 output_addr_const (file, x);
9191 }
9192 }
9193 \f
9194 /* Print a memory operand whose address is ADDR. */
9195
9196 void
9197 print_operand_address (FILE *file, rtx addr)
9198 {
9199 struct ix86_address parts;
9200 rtx base, index, disp;
9201 int scale;
9202 int ok = ix86_decompose_address (addr, &parts);
9203
9204 gcc_assert (ok);
9205
9206 base = parts.base;
9207 index = parts.index;
9208 disp = parts.disp;
9209 scale = parts.scale;
9210
9211 switch (parts.seg)
9212 {
9213 case SEG_DEFAULT:
9214 break;
9215 case SEG_FS:
9216 case SEG_GS:
9217 if (ASSEMBLER_DIALECT == ASM_ATT)
9218 putc ('%', file);
9219 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9220 break;
9221 default:
9222 gcc_unreachable ();
9223 }
9224
9225 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9226 if (TARGET_64BIT && !base && !index)
9227 {
9228 rtx symbol = disp;
9229
9230 if (GET_CODE (disp) == CONST
9231 && GET_CODE (XEXP (disp, 0)) == PLUS
9232 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9233 symbol = XEXP (XEXP (disp, 0), 0);
9234
9235 if (GET_CODE (symbol) == LABEL_REF
9236 || (GET_CODE (symbol) == SYMBOL_REF
9237 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9238 base = pc_rtx;
9239 }
9240 if (!base && !index)
9241 {
9242 /* Displacement only requires special attention. */
9243
9244 if (CONST_INT_P (disp))
9245 {
9246 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9247 fputs ("ds:", file);
9248 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9249 }
9250 else if (flag_pic)
9251 output_pic_addr_const (file, disp, 0);
9252 else
9253 output_addr_const (file, disp);
9254 }
9255 else
9256 {
9257 if (ASSEMBLER_DIALECT == ASM_ATT)
9258 {
9259 if (disp)
9260 {
9261 if (flag_pic)
9262 output_pic_addr_const (file, disp, 0);
9263 else if (GET_CODE (disp) == LABEL_REF)
9264 output_asm_label (disp);
9265 else
9266 output_addr_const (file, disp);
9267 }
9268
9269 putc ('(', file);
9270 if (base)
9271 print_reg (base, 0, file);
9272 if (index)
9273 {
9274 putc (',', file);
9275 print_reg (index, 0, file);
9276 if (scale != 1)
9277 fprintf (file, ",%d", scale);
9278 }
9279 putc (')', file);
9280 }
9281 else
9282 {
9283 rtx offset = NULL_RTX;
9284
9285 if (disp)
9286 {
9287 /* Pull out the offset of a symbol; print any symbol itself. */
9288 if (GET_CODE (disp) == CONST
9289 && GET_CODE (XEXP (disp, 0)) == PLUS
9290 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9291 {
9292 offset = XEXP (XEXP (disp, 0), 1);
9293 disp = gen_rtx_CONST (VOIDmode,
9294 XEXP (XEXP (disp, 0), 0));
9295 }
9296
9297 if (flag_pic)
9298 output_pic_addr_const (file, disp, 0);
9299 else if (GET_CODE (disp) == LABEL_REF)
9300 output_asm_label (disp);
9301 else if (CONST_INT_P (disp))
9302 offset = disp;
9303 else
9304 output_addr_const (file, disp);
9305 }
9306
9307 putc ('[', file);
9308 if (base)
9309 {
9310 print_reg (base, 0, file);
9311 if (offset)
9312 {
9313 if (INTVAL (offset) >= 0)
9314 putc ('+', file);
9315 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9316 }
9317 }
9318 else if (offset)
9319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9320 else
9321 putc ('0', file);
9322
9323 if (index)
9324 {
9325 putc ('+', file);
9326 print_reg (index, 0, file);
9327 if (scale != 1)
9328 fprintf (file, "*%d", scale);
9329 }
9330 putc (']', file);
9331 }
9332 }
9333 }
9334
9335 bool
9336 output_addr_const_extra (FILE *file, rtx x)
9337 {
9338 rtx op;
9339
9340 if (GET_CODE (x) != UNSPEC)
9341 return false;
9342
9343 op = XVECEXP (x, 0, 0);
9344 switch (XINT (x, 1))
9345 {
9346 case UNSPEC_GOTTPOFF:
9347 output_addr_const (file, op);
9348 /* FIXME: This might be @TPOFF in Sun ld. */
9349 fputs ("@GOTTPOFF", file);
9350 break;
9351 case UNSPEC_TPOFF:
9352 output_addr_const (file, op);
9353 fputs ("@TPOFF", file);
9354 break;
9355 case UNSPEC_NTPOFF:
9356 output_addr_const (file, op);
9357 if (TARGET_64BIT)
9358 fputs ("@TPOFF", file);
9359 else
9360 fputs ("@NTPOFF", file);
9361 break;
9362 case UNSPEC_DTPOFF:
9363 output_addr_const (file, op);
9364 fputs ("@DTPOFF", file);
9365 break;
9366 case UNSPEC_GOTNTPOFF:
9367 output_addr_const (file, op);
9368 if (TARGET_64BIT)
9369 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9370 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9371 else
9372 fputs ("@GOTNTPOFF", file);
9373 break;
9374 case UNSPEC_INDNTPOFF:
9375 output_addr_const (file, op);
9376 fputs ("@INDNTPOFF", file);
9377 break;
9378
9379 default:
9380 return false;
9381 }
9382
9383 return true;
9384 }
9385 \f
9386 /* Split one or more DImode RTL references into pairs of SImode
9387 references. The RTL can be REG, offsettable MEM, integer constant, or
9388 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9389 split and "num" is its length. lo_half and hi_half are output arrays
9390 that parallel "operands". */
9391
9392 void
9393 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9394 {
9395 while (num--)
9396 {
9397 rtx op = operands[num];
9398
9399 /* simplify_subreg refuse to split volatile memory addresses,
9400 but we still have to handle it. */
9401 if (MEM_P (op))
9402 {
9403 lo_half[num] = adjust_address (op, SImode, 0);
9404 hi_half[num] = adjust_address (op, SImode, 4);
9405 }
9406 else
9407 {
9408 lo_half[num] = simplify_gen_subreg (SImode, op,
9409 GET_MODE (op) == VOIDmode
9410 ? DImode : GET_MODE (op), 0);
9411 hi_half[num] = simplify_gen_subreg (SImode, op,
9412 GET_MODE (op) == VOIDmode
9413 ? DImode : GET_MODE (op), 4);
9414 }
9415 }
9416 }
9417 /* Split one or more TImode RTL references into pairs of DImode
9418 references. The RTL can be REG, offsettable MEM, integer constant, or
9419 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9420 split and "num" is its length. lo_half and hi_half are output arrays
9421 that parallel "operands". */
9422
9423 void
9424 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9425 {
9426 while (num--)
9427 {
9428 rtx op = operands[num];
9429
9430 /* simplify_subreg refuse to split volatile memory addresses, but we
9431 still have to handle it. */
9432 if (MEM_P (op))
9433 {
9434 lo_half[num] = adjust_address (op, DImode, 0);
9435 hi_half[num] = adjust_address (op, DImode, 8);
9436 }
9437 else
9438 {
9439 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9440 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9441 }
9442 }
9443 }
9444 \f
9445 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9446 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9447 is the expression of the binary operation. The output may either be
9448 emitted here, or returned to the caller, like all output_* functions.
9449
9450 There is no guarantee that the operands are the same mode, as they
9451 might be within FLOAT or FLOAT_EXTEND expressions. */
9452
9453 #ifndef SYSV386_COMPAT
9454 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9455 wants to fix the assemblers because that causes incompatibility
9456 with gcc. No-one wants to fix gcc because that causes
9457 incompatibility with assemblers... You can use the option of
9458 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9459 #define SYSV386_COMPAT 1
9460 #endif
9461
9462 const char *
9463 output_387_binary_op (rtx insn, rtx *operands)
9464 {
9465 static char buf[30];
9466 const char *p;
9467 const char *ssep;
9468 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9469
9470 #ifdef ENABLE_CHECKING
9471 /* Even if we do not want to check the inputs, this documents input
9472 constraints. Which helps in understanding the following code. */
9473 if (STACK_REG_P (operands[0])
9474 && ((REG_P (operands[1])
9475 && REGNO (operands[0]) == REGNO (operands[1])
9476 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9477 || (REG_P (operands[2])
9478 && REGNO (operands[0]) == REGNO (operands[2])
9479 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9480 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9481 ; /* ok */
9482 else
9483 gcc_assert (is_sse);
9484 #endif
9485
9486 switch (GET_CODE (operands[3]))
9487 {
9488 case PLUS:
9489 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9490 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9491 p = "fiadd";
9492 else
9493 p = "fadd";
9494 ssep = "add";
9495 break;
9496
9497 case MINUS:
9498 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9499 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9500 p = "fisub";
9501 else
9502 p = "fsub";
9503 ssep = "sub";
9504 break;
9505
9506 case MULT:
9507 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9508 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9509 p = "fimul";
9510 else
9511 p = "fmul";
9512 ssep = "mul";
9513 break;
9514
9515 case DIV:
9516 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9517 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9518 p = "fidiv";
9519 else
9520 p = "fdiv";
9521 ssep = "div";
9522 break;
9523
9524 default:
9525 gcc_unreachable ();
9526 }
9527
9528 if (is_sse)
9529 {
9530 strcpy (buf, ssep);
9531 if (GET_MODE (operands[0]) == SFmode)
9532 strcat (buf, "ss\t{%2, %0|%0, %2}");
9533 else
9534 strcat (buf, "sd\t{%2, %0|%0, %2}");
9535 return buf;
9536 }
9537 strcpy (buf, p);
9538
9539 switch (GET_CODE (operands[3]))
9540 {
9541 case MULT:
9542 case PLUS:
9543 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9544 {
9545 rtx temp = operands[2];
9546 operands[2] = operands[1];
9547 operands[1] = temp;
9548 }
9549
9550 /* know operands[0] == operands[1]. */
9551
9552 if (MEM_P (operands[2]))
9553 {
9554 p = "%z2\t%2";
9555 break;
9556 }
9557
9558 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9559 {
9560 if (STACK_TOP_P (operands[0]))
9561 /* How is it that we are storing to a dead operand[2]?
9562 Well, presumably operands[1] is dead too. We can't
9563 store the result to st(0) as st(0) gets popped on this
9564 instruction. Instead store to operands[2] (which I
9565 think has to be st(1)). st(1) will be popped later.
9566 gcc <= 2.8.1 didn't have this check and generated
9567 assembly code that the Unixware assembler rejected. */
9568 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9569 else
9570 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9571 break;
9572 }
9573
9574 if (STACK_TOP_P (operands[0]))
9575 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9576 else
9577 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9578 break;
9579
9580 case MINUS:
9581 case DIV:
9582 if (MEM_P (operands[1]))
9583 {
9584 p = "r%z1\t%1";
9585 break;
9586 }
9587
9588 if (MEM_P (operands[2]))
9589 {
9590 p = "%z2\t%2";
9591 break;
9592 }
9593
9594 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9595 {
9596 #if SYSV386_COMPAT
9597 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9598 derived assemblers, confusingly reverse the direction of
9599 the operation for fsub{r} and fdiv{r} when the
9600 destination register is not st(0). The Intel assembler
9601 doesn't have this brain damage. Read !SYSV386_COMPAT to
9602 figure out what the hardware really does. */
9603 if (STACK_TOP_P (operands[0]))
9604 p = "{p\t%0, %2|rp\t%2, %0}";
9605 else
9606 p = "{rp\t%2, %0|p\t%0, %2}";
9607 #else
9608 if (STACK_TOP_P (operands[0]))
9609 /* As above for fmul/fadd, we can't store to st(0). */
9610 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9611 else
9612 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9613 #endif
9614 break;
9615 }
9616
9617 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9618 {
9619 #if SYSV386_COMPAT
9620 if (STACK_TOP_P (operands[0]))
9621 p = "{rp\t%0, %1|p\t%1, %0}";
9622 else
9623 p = "{p\t%1, %0|rp\t%0, %1}";
9624 #else
9625 if (STACK_TOP_P (operands[0]))
9626 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9627 else
9628 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9629 #endif
9630 break;
9631 }
9632
9633 if (STACK_TOP_P (operands[0]))
9634 {
9635 if (STACK_TOP_P (operands[1]))
9636 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9637 else
9638 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9639 break;
9640 }
9641 else if (STACK_TOP_P (operands[1]))
9642 {
9643 #if SYSV386_COMPAT
9644 p = "{\t%1, %0|r\t%0, %1}";
9645 #else
9646 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9647 #endif
9648 }
9649 else
9650 {
9651 #if SYSV386_COMPAT
9652 p = "{r\t%2, %0|\t%0, %2}";
9653 #else
9654 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9655 #endif
9656 }
9657 break;
9658
9659 default:
9660 gcc_unreachable ();
9661 }
9662
9663 strcat (buf, p);
9664 return buf;
9665 }
9666
9667 /* Return needed mode for entity in optimize_mode_switching pass. */
9668
9669 int
9670 ix86_mode_needed (int entity, rtx insn)
9671 {
9672 enum attr_i387_cw mode;
9673
9674 /* The mode UNINITIALIZED is used to store control word after a
9675 function call or ASM pattern. The mode ANY specify that function
9676 has no requirements on the control word and make no changes in the
9677 bits we are interested in. */
9678
9679 if (CALL_P (insn)
9680 || (NONJUMP_INSN_P (insn)
9681 && (asm_noperands (PATTERN (insn)) >= 0
9682 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9683 return I387_CW_UNINITIALIZED;
9684
9685 if (recog_memoized (insn) < 0)
9686 return I387_CW_ANY;
9687
9688 mode = get_attr_i387_cw (insn);
9689
9690 switch (entity)
9691 {
9692 case I387_TRUNC:
9693 if (mode == I387_CW_TRUNC)
9694 return mode;
9695 break;
9696
9697 case I387_FLOOR:
9698 if (mode == I387_CW_FLOOR)
9699 return mode;
9700 break;
9701
9702 case I387_CEIL:
9703 if (mode == I387_CW_CEIL)
9704 return mode;
9705 break;
9706
9707 case I387_MASK_PM:
9708 if (mode == I387_CW_MASK_PM)
9709 return mode;
9710 break;
9711
9712 default:
9713 gcc_unreachable ();
9714 }
9715
9716 return I387_CW_ANY;
9717 }
9718
9719 /* Output code to initialize control word copies used by trunc?f?i and
9720 rounding patterns. CURRENT_MODE is set to current control word,
9721 while NEW_MODE is set to new control word. */
9722
9723 void
9724 emit_i387_cw_initialization (int mode)
9725 {
9726 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9727 rtx new_mode;
9728
9729 enum ix86_stack_slot slot;
9730
9731 rtx reg = gen_reg_rtx (HImode);
9732
9733 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9734 emit_move_insn (reg, copy_rtx (stored_mode));
9735
9736 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9737 {
9738 switch (mode)
9739 {
9740 case I387_CW_TRUNC:
9741 /* round toward zero (truncate) */
9742 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9743 slot = SLOT_CW_TRUNC;
9744 break;
9745
9746 case I387_CW_FLOOR:
9747 /* round down toward -oo */
9748 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9749 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9750 slot = SLOT_CW_FLOOR;
9751 break;
9752
9753 case I387_CW_CEIL:
9754 /* round up toward +oo */
9755 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9756 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9757 slot = SLOT_CW_CEIL;
9758 break;
9759
9760 case I387_CW_MASK_PM:
9761 /* mask precision exception for nearbyint() */
9762 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9763 slot = SLOT_CW_MASK_PM;
9764 break;
9765
9766 default:
9767 gcc_unreachable ();
9768 }
9769 }
9770 else
9771 {
9772 switch (mode)
9773 {
9774 case I387_CW_TRUNC:
9775 /* round toward zero (truncate) */
9776 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9777 slot = SLOT_CW_TRUNC;
9778 break;
9779
9780 case I387_CW_FLOOR:
9781 /* round down toward -oo */
9782 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9783 slot = SLOT_CW_FLOOR;
9784 break;
9785
9786 case I387_CW_CEIL:
9787 /* round up toward +oo */
9788 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9789 slot = SLOT_CW_CEIL;
9790 break;
9791
9792 case I387_CW_MASK_PM:
9793 /* mask precision exception for nearbyint() */
9794 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9795 slot = SLOT_CW_MASK_PM;
9796 break;
9797
9798 default:
9799 gcc_unreachable ();
9800 }
9801 }
9802
9803 gcc_assert (slot < MAX_386_STACK_LOCALS);
9804
9805 new_mode = assign_386_stack_local (HImode, slot);
9806 emit_move_insn (new_mode, reg);
9807 }
9808
9809 /* Output code for INSN to convert a float to a signed int. OPERANDS
9810 are the insn operands. The output may be [HSD]Imode and the input
9811 operand may be [SDX]Fmode. */
9812
9813 const char *
9814 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9815 {
9816 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9817 int dimode_p = GET_MODE (operands[0]) == DImode;
9818 int round_mode = get_attr_i387_cw (insn);
9819
9820 /* Jump through a hoop or two for DImode, since the hardware has no
9821 non-popping instruction. We used to do this a different way, but
9822 that was somewhat fragile and broke with post-reload splitters. */
9823 if ((dimode_p || fisttp) && !stack_top_dies)
9824 output_asm_insn ("fld\t%y1", operands);
9825
9826 gcc_assert (STACK_TOP_P (operands[1]));
9827 gcc_assert (MEM_P (operands[0]));
9828 gcc_assert (GET_MODE (operands[1]) != TFmode);
9829
9830 if (fisttp)
9831 output_asm_insn ("fisttp%z0\t%0", operands);
9832 else
9833 {
9834 if (round_mode != I387_CW_ANY)
9835 output_asm_insn ("fldcw\t%3", operands);
9836 if (stack_top_dies || dimode_p)
9837 output_asm_insn ("fistp%z0\t%0", operands);
9838 else
9839 output_asm_insn ("fist%z0\t%0", operands);
9840 if (round_mode != I387_CW_ANY)
9841 output_asm_insn ("fldcw\t%2", operands);
9842 }
9843
9844 return "";
9845 }
9846
9847 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9848 have the values zero or one, indicates the ffreep insn's operand
9849 from the OPERANDS array. */
9850
9851 static const char *
9852 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9853 {
9854 if (TARGET_USE_FFREEP)
9855 #if HAVE_AS_IX86_FFREEP
9856 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9857 #else
9858 {
9859 static char retval[] = ".word\t0xc_df";
9860 int regno = REGNO (operands[opno]);
9861
9862 gcc_assert (FP_REGNO_P (regno));
9863
9864 retval[9] = '0' + (regno - FIRST_STACK_REG);
9865 return retval;
9866 }
9867 #endif
9868
9869 return opno ? "fstp\t%y1" : "fstp\t%y0";
9870 }
9871
9872
9873 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9874 should be used. UNORDERED_P is true when fucom should be used. */
9875
9876 const char *
9877 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9878 {
9879 int stack_top_dies;
9880 rtx cmp_op0, cmp_op1;
9881 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9882
9883 if (eflags_p)
9884 {
9885 cmp_op0 = operands[0];
9886 cmp_op1 = operands[1];
9887 }
9888 else
9889 {
9890 cmp_op0 = operands[1];
9891 cmp_op1 = operands[2];
9892 }
9893
9894 if (is_sse)
9895 {
9896 if (GET_MODE (operands[0]) == SFmode)
9897 if (unordered_p)
9898 return "ucomiss\t{%1, %0|%0, %1}";
9899 else
9900 return "comiss\t{%1, %0|%0, %1}";
9901 else
9902 if (unordered_p)
9903 return "ucomisd\t{%1, %0|%0, %1}";
9904 else
9905 return "comisd\t{%1, %0|%0, %1}";
9906 }
9907
9908 gcc_assert (STACK_TOP_P (cmp_op0));
9909
9910 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9911
9912 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9913 {
9914 if (stack_top_dies)
9915 {
9916 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9917 return output_387_ffreep (operands, 1);
9918 }
9919 else
9920 return "ftst\n\tfnstsw\t%0";
9921 }
9922
9923 if (STACK_REG_P (cmp_op1)
9924 && stack_top_dies
9925 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9926 && REGNO (cmp_op1) != FIRST_STACK_REG)
9927 {
9928 /* If both the top of the 387 stack dies, and the other operand
9929 is also a stack register that dies, then this must be a
9930 `fcompp' float compare */
9931
9932 if (eflags_p)
9933 {
9934 /* There is no double popping fcomi variant. Fortunately,
9935 eflags is immune from the fstp's cc clobbering. */
9936 if (unordered_p)
9937 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9938 else
9939 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9940 return output_387_ffreep (operands, 0);
9941 }
9942 else
9943 {
9944 if (unordered_p)
9945 return "fucompp\n\tfnstsw\t%0";
9946 else
9947 return "fcompp\n\tfnstsw\t%0";
9948 }
9949 }
9950 else
9951 {
9952 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9953
9954 static const char * const alt[16] =
9955 {
9956 "fcom%z2\t%y2\n\tfnstsw\t%0",
9957 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9958 "fucom%z2\t%y2\n\tfnstsw\t%0",
9959 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9960
9961 "ficom%z2\t%y2\n\tfnstsw\t%0",
9962 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9963 NULL,
9964 NULL,
9965
9966 "fcomi\t{%y1, %0|%0, %y1}",
9967 "fcomip\t{%y1, %0|%0, %y1}",
9968 "fucomi\t{%y1, %0|%0, %y1}",
9969 "fucomip\t{%y1, %0|%0, %y1}",
9970
9971 NULL,
9972 NULL,
9973 NULL,
9974 NULL
9975 };
9976
9977 int mask;
9978 const char *ret;
9979
9980 mask = eflags_p << 3;
9981 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9982 mask |= unordered_p << 1;
9983 mask |= stack_top_dies;
9984
9985 gcc_assert (mask < 16);
9986 ret = alt[mask];
9987 gcc_assert (ret);
9988
9989 return ret;
9990 }
9991 }
9992
9993 void
9994 ix86_output_addr_vec_elt (FILE *file, int value)
9995 {
9996 const char *directive = ASM_LONG;
9997
9998 #ifdef ASM_QUAD
9999 if (TARGET_64BIT)
10000 directive = ASM_QUAD;
10001 #else
10002 gcc_assert (!TARGET_64BIT);
10003 #endif
10004
10005 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10006 }
10007
10008 void
10009 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10010 {
10011 const char *directive = ASM_LONG;
10012
10013 #ifdef ASM_QUAD
10014 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10015 directive = ASM_QUAD;
10016 #else
10017 gcc_assert (!TARGET_64BIT);
10018 #endif
10019 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10020 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10021 fprintf (file, "%s%s%d-%s%d\n",
10022 directive, LPREFIX, value, LPREFIX, rel);
10023 else if (HAVE_AS_GOTOFF_IN_DATA)
10024 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10025 #if TARGET_MACHO
10026 else if (TARGET_MACHO)
10027 {
10028 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10029 machopic_output_function_base_name (file);
10030 fprintf(file, "\n");
10031 }
10032 #endif
10033 else
10034 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10035 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10036 }
10037 \f
10038 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10039 for the target. */
10040
10041 void
10042 ix86_expand_clear (rtx dest)
10043 {
10044 rtx tmp;
10045
10046 /* We play register width games, which are only valid after reload. */
10047 gcc_assert (reload_completed);
10048
10049 /* Avoid HImode and its attendant prefix byte. */
10050 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10051 dest = gen_rtx_REG (SImode, REGNO (dest));
10052 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10053
10054 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10055 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10056 {
10057 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10058 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10059 }
10060
10061 emit_insn (tmp);
10062 }
10063
10064 /* X is an unchanging MEM. If it is a constant pool reference, return
10065 the constant pool rtx, else NULL. */
10066
10067 rtx
10068 maybe_get_pool_constant (rtx x)
10069 {
10070 x = ix86_delegitimize_address (XEXP (x, 0));
10071
10072 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10073 return get_pool_constant (x);
10074
10075 return NULL_RTX;
10076 }
10077
10078 void
10079 ix86_expand_move (enum machine_mode mode, rtx operands[])
10080 {
10081 rtx op0, op1;
10082 enum tls_model model;
10083
10084 op0 = operands[0];
10085 op1 = operands[1];
10086
10087 if (GET_CODE (op1) == SYMBOL_REF)
10088 {
10089 model = SYMBOL_REF_TLS_MODEL (op1);
10090 if (model)
10091 {
10092 op1 = legitimize_tls_address (op1, model, true);
10093 op1 = force_operand (op1, op0);
10094 if (op1 == op0)
10095 return;
10096 }
10097 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10098 && SYMBOL_REF_DLLIMPORT_P (op1))
10099 op1 = legitimize_dllimport_symbol (op1, false);
10100 }
10101 else if (GET_CODE (op1) == CONST
10102 && GET_CODE (XEXP (op1, 0)) == PLUS
10103 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10104 {
10105 rtx addend = XEXP (XEXP (op1, 0), 1);
10106 rtx symbol = XEXP (XEXP (op1, 0), 0);
10107 rtx tmp = NULL;
10108
10109 model = SYMBOL_REF_TLS_MODEL (symbol);
10110 if (model)
10111 tmp = legitimize_tls_address (symbol, model, true);
10112 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10113 && SYMBOL_REF_DLLIMPORT_P (symbol))
10114 tmp = legitimize_dllimport_symbol (symbol, true);
10115
10116 if (tmp)
10117 {
10118 tmp = force_operand (tmp, NULL);
10119 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10120 op0, 1, OPTAB_DIRECT);
10121 if (tmp == op0)
10122 return;
10123 }
10124 }
10125
10126 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10127 {
10128 if (TARGET_MACHO && !TARGET_64BIT)
10129 {
10130 #if TARGET_MACHO
10131 if (MACHOPIC_PURE)
10132 {
10133 rtx temp = ((reload_in_progress
10134 || ((op0 && REG_P (op0))
10135 && mode == Pmode))
10136 ? op0 : gen_reg_rtx (Pmode));
10137 op1 = machopic_indirect_data_reference (op1, temp);
10138 op1 = machopic_legitimize_pic_address (op1, mode,
10139 temp == op1 ? 0 : temp);
10140 }
10141 else if (MACHOPIC_INDIRECT)
10142 op1 = machopic_indirect_data_reference (op1, 0);
10143 if (op0 == op1)
10144 return;
10145 #endif
10146 }
10147 else
10148 {
10149 if (MEM_P (op0))
10150 op1 = force_reg (Pmode, op1);
10151 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10152 {
10153 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10154 op1 = legitimize_pic_address (op1, reg);
10155 if (op0 == op1)
10156 return;
10157 }
10158 }
10159 }
10160 else
10161 {
10162 if (MEM_P (op0)
10163 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10164 || !push_operand (op0, mode))
10165 && MEM_P (op1))
10166 op1 = force_reg (mode, op1);
10167
10168 if (push_operand (op0, mode)
10169 && ! general_no_elim_operand (op1, mode))
10170 op1 = copy_to_mode_reg (mode, op1);
10171
10172 /* Force large constants in 64bit compilation into register
10173 to get them CSEed. */
10174 if (can_create_pseudo_p ()
10175 && (mode == DImode) && TARGET_64BIT
10176 && immediate_operand (op1, mode)
10177 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10178 && !register_operand (op0, mode)
10179 && optimize)
10180 op1 = copy_to_mode_reg (mode, op1);
10181
10182 if (can_create_pseudo_p ()
10183 && FLOAT_MODE_P (mode)
10184 && GET_CODE (op1) == CONST_DOUBLE)
10185 {
10186 /* If we are loading a floating point constant to a register,
10187 force the value to memory now, since we'll get better code
10188 out the back end. */
10189
10190 op1 = validize_mem (force_const_mem (mode, op1));
10191 if (!register_operand (op0, mode))
10192 {
10193 rtx temp = gen_reg_rtx (mode);
10194 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10195 emit_move_insn (op0, temp);
10196 return;
10197 }
10198 }
10199 }
10200
10201 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10202 }
10203
10204 void
10205 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10206 {
10207 rtx op0 = operands[0], op1 = operands[1];
10208 unsigned int align = GET_MODE_ALIGNMENT (mode);
10209
10210 /* Force constants other than zero into memory. We do not know how
10211 the instructions used to build constants modify the upper 64 bits
10212 of the register, once we have that information we may be able
10213 to handle some of them more efficiently. */
10214 if (can_create_pseudo_p ()
10215 && register_operand (op0, mode)
10216 && (CONSTANT_P (op1)
10217 || (GET_CODE (op1) == SUBREG
10218 && CONSTANT_P (SUBREG_REG (op1))))
10219 && standard_sse_constant_p (op1) <= 0)
10220 op1 = validize_mem (force_const_mem (mode, op1));
10221
10222 /* TDmode values are passed as TImode on the stack. TImode values
10223 are moved via xmm registers, and moving them to stack can result in
10224 unaligned memory access. Use ix86_expand_vector_move_misalign()
10225 if memory operand is not aligned correctly. */
10226 if (can_create_pseudo_p ()
10227 && (mode == TImode) && !TARGET_64BIT
10228 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10229 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10230 {
10231 rtx tmp[2];
10232
10233 /* ix86_expand_vector_move_misalign() does not like constants ... */
10234 if (CONSTANT_P (op1)
10235 || (GET_CODE (op1) == SUBREG
10236 && CONSTANT_P (SUBREG_REG (op1))))
10237 op1 = validize_mem (force_const_mem (mode, op1));
10238
10239 /* ... nor both arguments in memory. */
10240 if (!register_operand (op0, mode)
10241 && !register_operand (op1, mode))
10242 op1 = force_reg (mode, op1);
10243
10244 tmp[0] = op0; tmp[1] = op1;
10245 ix86_expand_vector_move_misalign (mode, tmp);
10246 return;
10247 }
10248
10249 /* Make operand1 a register if it isn't already. */
10250 if (can_create_pseudo_p ()
10251 && !register_operand (op0, mode)
10252 && !register_operand (op1, mode))
10253 {
10254 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10255 return;
10256 }
10257
10258 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10259 }
10260
10261 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10262 straight to ix86_expand_vector_move. */
10263 /* Code generation for scalar reg-reg moves of single and double precision data:
10264 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10265 movaps reg, reg
10266 else
10267 movss reg, reg
10268 if (x86_sse_partial_reg_dependency == true)
10269 movapd reg, reg
10270 else
10271 movsd reg, reg
10272
10273 Code generation for scalar loads of double precision data:
10274 if (x86_sse_split_regs == true)
10275 movlpd mem, reg (gas syntax)
10276 else
10277 movsd mem, reg
10278
10279 Code generation for unaligned packed loads of single precision data
10280 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10281 if (x86_sse_unaligned_move_optimal)
10282 movups mem, reg
10283
10284 if (x86_sse_partial_reg_dependency == true)
10285 {
10286 xorps reg, reg
10287 movlps mem, reg
10288 movhps mem+8, reg
10289 }
10290 else
10291 {
10292 movlps mem, reg
10293 movhps mem+8, reg
10294 }
10295
10296 Code generation for unaligned packed loads of double precision data
10297 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10298 if (x86_sse_unaligned_move_optimal)
10299 movupd mem, reg
10300
10301 if (x86_sse_split_regs == true)
10302 {
10303 movlpd mem, reg
10304 movhpd mem+8, reg
10305 }
10306 else
10307 {
10308 movsd mem, reg
10309 movhpd mem+8, reg
10310 }
10311 */
10312
10313 void
10314 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10315 {
10316 rtx op0, op1, m;
10317
10318 op0 = operands[0];
10319 op1 = operands[1];
10320
10321 if (MEM_P (op1))
10322 {
10323 /* If we're optimizing for size, movups is the smallest. */
10324 if (optimize_size)
10325 {
10326 op0 = gen_lowpart (V4SFmode, op0);
10327 op1 = gen_lowpart (V4SFmode, op1);
10328 emit_insn (gen_sse_movups (op0, op1));
10329 return;
10330 }
10331
10332 /* ??? If we have typed data, then it would appear that using
10333 movdqu is the only way to get unaligned data loaded with
10334 integer type. */
10335 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10336 {
10337 op0 = gen_lowpart (V16QImode, op0);
10338 op1 = gen_lowpart (V16QImode, op1);
10339 emit_insn (gen_sse2_movdqu (op0, op1));
10340 return;
10341 }
10342
10343 if (TARGET_SSE2 && mode == V2DFmode)
10344 {
10345 rtx zero;
10346
10347 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10348 {
10349 op0 = gen_lowpart (V2DFmode, op0);
10350 op1 = gen_lowpart (V2DFmode, op1);
10351 emit_insn (gen_sse2_movupd (op0, op1));
10352 return;
10353 }
10354
10355 /* When SSE registers are split into halves, we can avoid
10356 writing to the top half twice. */
10357 if (TARGET_SSE_SPLIT_REGS)
10358 {
10359 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10360 zero = op0;
10361 }
10362 else
10363 {
10364 /* ??? Not sure about the best option for the Intel chips.
10365 The following would seem to satisfy; the register is
10366 entirely cleared, breaking the dependency chain. We
10367 then store to the upper half, with a dependency depth
10368 of one. A rumor has it that Intel recommends two movsd
10369 followed by an unpacklpd, but this is unconfirmed. And
10370 given that the dependency depth of the unpacklpd would
10371 still be one, I'm not sure why this would be better. */
10372 zero = CONST0_RTX (V2DFmode);
10373 }
10374
10375 m = adjust_address (op1, DFmode, 0);
10376 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10377 m = adjust_address (op1, DFmode, 8);
10378 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10379 }
10380 else
10381 {
10382 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10383 {
10384 op0 = gen_lowpart (V4SFmode, op0);
10385 op1 = gen_lowpart (V4SFmode, op1);
10386 emit_insn (gen_sse_movups (op0, op1));
10387 return;
10388 }
10389
10390 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10391 emit_move_insn (op0, CONST0_RTX (mode));
10392 else
10393 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10394
10395 if (mode != V4SFmode)
10396 op0 = gen_lowpart (V4SFmode, op0);
10397 m = adjust_address (op1, V2SFmode, 0);
10398 emit_insn (gen_sse_loadlps (op0, op0, m));
10399 m = adjust_address (op1, V2SFmode, 8);
10400 emit_insn (gen_sse_loadhps (op0, op0, m));
10401 }
10402 }
10403 else if (MEM_P (op0))
10404 {
10405 /* If we're optimizing for size, movups is the smallest. */
10406 if (optimize_size)
10407 {
10408 op0 = gen_lowpart (V4SFmode, op0);
10409 op1 = gen_lowpart (V4SFmode, op1);
10410 emit_insn (gen_sse_movups (op0, op1));
10411 return;
10412 }
10413
10414 /* ??? Similar to above, only less clear because of quote
10415 typeless stores unquote. */
10416 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10417 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10418 {
10419 op0 = gen_lowpart (V16QImode, op0);
10420 op1 = gen_lowpart (V16QImode, op1);
10421 emit_insn (gen_sse2_movdqu (op0, op1));
10422 return;
10423 }
10424
10425 if (TARGET_SSE2 && mode == V2DFmode)
10426 {
10427 m = adjust_address (op0, DFmode, 0);
10428 emit_insn (gen_sse2_storelpd (m, op1));
10429 m = adjust_address (op0, DFmode, 8);
10430 emit_insn (gen_sse2_storehpd (m, op1));
10431 }
10432 else
10433 {
10434 if (mode != V4SFmode)
10435 op1 = gen_lowpart (V4SFmode, op1);
10436 m = adjust_address (op0, V2SFmode, 0);
10437 emit_insn (gen_sse_storelps (m, op1));
10438 m = adjust_address (op0, V2SFmode, 8);
10439 emit_insn (gen_sse_storehps (m, op1));
10440 }
10441 }
10442 else
10443 gcc_unreachable ();
10444 }
10445
10446 /* Expand a push in MODE. This is some mode for which we do not support
10447 proper push instructions, at least from the registers that we expect
10448 the value to live in. */
10449
10450 void
10451 ix86_expand_push (enum machine_mode mode, rtx x)
10452 {
10453 rtx tmp;
10454
10455 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10456 GEN_INT (-GET_MODE_SIZE (mode)),
10457 stack_pointer_rtx, 1, OPTAB_DIRECT);
10458 if (tmp != stack_pointer_rtx)
10459 emit_move_insn (stack_pointer_rtx, tmp);
10460
10461 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10462 emit_move_insn (tmp, x);
10463 }
10464
10465 /* Helper function of ix86_fixup_binary_operands to canonicalize
10466 operand order. Returns true if the operands should be swapped. */
10467
10468 static bool
10469 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10470 rtx operands[])
10471 {
10472 rtx dst = operands[0];
10473 rtx src1 = operands[1];
10474 rtx src2 = operands[2];
10475
10476 /* If the operation is not commutative, we can't do anything. */
10477 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10478 return false;
10479
10480 /* Highest priority is that src1 should match dst. */
10481 if (rtx_equal_p (dst, src1))
10482 return false;
10483 if (rtx_equal_p (dst, src2))
10484 return true;
10485
10486 /* Next highest priority is that immediate constants come second. */
10487 if (immediate_operand (src2, mode))
10488 return false;
10489 if (immediate_operand (src1, mode))
10490 return true;
10491
10492 /* Lowest priority is that memory references should come second. */
10493 if (MEM_P (src2))
10494 return false;
10495 if (MEM_P (src1))
10496 return true;
10497
10498 return false;
10499 }
10500
10501
10502 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10503 destination to use for the operation. If different from the true
10504 destination in operands[0], a copy operation will be required. */
10505
10506 rtx
10507 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10508 rtx operands[])
10509 {
10510 rtx dst = operands[0];
10511 rtx src1 = operands[1];
10512 rtx src2 = operands[2];
10513
10514 /* Canonicalize operand order. */
10515 if (ix86_swap_binary_operands_p (code, mode, operands))
10516 {
10517 rtx temp = src1;
10518 src1 = src2;
10519 src2 = temp;
10520 }
10521
10522 /* Both source operands cannot be in memory. */
10523 if (MEM_P (src1) && MEM_P (src2))
10524 {
10525 /* Optimization: Only read from memory once. */
10526 if (rtx_equal_p (src1, src2))
10527 {
10528 src2 = force_reg (mode, src2);
10529 src1 = src2;
10530 }
10531 else
10532 src2 = force_reg (mode, src2);
10533 }
10534
10535 /* If the destination is memory, and we do not have matching source
10536 operands, do things in registers. */
10537 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10538 dst = gen_reg_rtx (mode);
10539
10540 /* Source 1 cannot be a constant. */
10541 if (CONSTANT_P (src1))
10542 src1 = force_reg (mode, src1);
10543
10544 /* Source 1 cannot be a non-matching memory. */
10545 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10546 src1 = force_reg (mode, src1);
10547
10548 operands[1] = src1;
10549 operands[2] = src2;
10550 return dst;
10551 }
10552
10553 /* Similarly, but assume that the destination has already been
10554 set up properly. */
10555
10556 void
10557 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10558 enum machine_mode mode, rtx operands[])
10559 {
10560 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10561 gcc_assert (dst == operands[0]);
10562 }
10563
10564 /* Attempt to expand a binary operator. Make the expansion closer to the
10565 actual machine, then just general_operand, which will allow 3 separate
10566 memory references (one output, two input) in a single insn. */
10567
10568 void
10569 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10570 rtx operands[])
10571 {
10572 rtx src1, src2, dst, op, clob;
10573
10574 dst = ix86_fixup_binary_operands (code, mode, operands);
10575 src1 = operands[1];
10576 src2 = operands[2];
10577
10578 /* Emit the instruction. */
10579
10580 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10581 if (reload_in_progress)
10582 {
10583 /* Reload doesn't know about the flags register, and doesn't know that
10584 it doesn't want to clobber it. We can only do this with PLUS. */
10585 gcc_assert (code == PLUS);
10586 emit_insn (op);
10587 }
10588 else
10589 {
10590 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10591 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10592 }
10593
10594 /* Fix up the destination if needed. */
10595 if (dst != operands[0])
10596 emit_move_insn (operands[0], dst);
10597 }
10598
10599 /* Return TRUE or FALSE depending on whether the binary operator meets the
10600 appropriate constraints. */
10601
10602 int
10603 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10604 rtx operands[3])
10605 {
10606 rtx dst = operands[0];
10607 rtx src1 = operands[1];
10608 rtx src2 = operands[2];
10609
10610 /* Both source operands cannot be in memory. */
10611 if (MEM_P (src1) && MEM_P (src2))
10612 return 0;
10613
10614 /* Canonicalize operand order for commutative operators. */
10615 if (ix86_swap_binary_operands_p (code, mode, operands))
10616 {
10617 rtx temp = src1;
10618 src1 = src2;
10619 src2 = temp;
10620 }
10621
10622 /* If the destination is memory, we must have a matching source operand. */
10623 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10624 return 0;
10625
10626 /* Source 1 cannot be a constant. */
10627 if (CONSTANT_P (src1))
10628 return 0;
10629
10630 /* Source 1 cannot be a non-matching memory. */
10631 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10632 return 0;
10633
10634 return 1;
10635 }
10636
10637 /* Attempt to expand a unary operator. Make the expansion closer to the
10638 actual machine, then just general_operand, which will allow 2 separate
10639 memory references (one output, one input) in a single insn. */
10640
10641 void
10642 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10643 rtx operands[])
10644 {
10645 int matching_memory;
10646 rtx src, dst, op, clob;
10647
10648 dst = operands[0];
10649 src = operands[1];
10650
10651 /* If the destination is memory, and we do not have matching source
10652 operands, do things in registers. */
10653 matching_memory = 0;
10654 if (MEM_P (dst))
10655 {
10656 if (rtx_equal_p (dst, src))
10657 matching_memory = 1;
10658 else
10659 dst = gen_reg_rtx (mode);
10660 }
10661
10662 /* When source operand is memory, destination must match. */
10663 if (MEM_P (src) && !matching_memory)
10664 src = force_reg (mode, src);
10665
10666 /* Emit the instruction. */
10667
10668 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10669 if (reload_in_progress || code == NOT)
10670 {
10671 /* Reload doesn't know about the flags register, and doesn't know that
10672 it doesn't want to clobber it. */
10673 gcc_assert (code == NOT);
10674 emit_insn (op);
10675 }
10676 else
10677 {
10678 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10679 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10680 }
10681
10682 /* Fix up the destination if needed. */
10683 if (dst != operands[0])
10684 emit_move_insn (operands[0], dst);
10685 }
10686
10687 /* Return TRUE or FALSE depending on whether the unary operator meets the
10688 appropriate constraints. */
10689
10690 int
10691 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10692 enum machine_mode mode ATTRIBUTE_UNUSED,
10693 rtx operands[2] ATTRIBUTE_UNUSED)
10694 {
10695 /* If one of operands is memory, source and destination must match. */
10696 if ((MEM_P (operands[0])
10697 || MEM_P (operands[1]))
10698 && ! rtx_equal_p (operands[0], operands[1]))
10699 return FALSE;
10700 return TRUE;
10701 }
10702
10703 /* Post-reload splitter for converting an SF or DFmode value in an
10704 SSE register into an unsigned SImode. */
10705
10706 void
10707 ix86_split_convert_uns_si_sse (rtx operands[])
10708 {
10709 enum machine_mode vecmode;
10710 rtx value, large, zero_or_two31, input, two31, x;
10711
10712 large = operands[1];
10713 zero_or_two31 = operands[2];
10714 input = operands[3];
10715 two31 = operands[4];
10716 vecmode = GET_MODE (large);
10717 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10718
10719 /* Load up the value into the low element. We must ensure that the other
10720 elements are valid floats -- zero is the easiest such value. */
10721 if (MEM_P (input))
10722 {
10723 if (vecmode == V4SFmode)
10724 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10725 else
10726 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10727 }
10728 else
10729 {
10730 input = gen_rtx_REG (vecmode, REGNO (input));
10731 emit_move_insn (value, CONST0_RTX (vecmode));
10732 if (vecmode == V4SFmode)
10733 emit_insn (gen_sse_movss (value, value, input));
10734 else
10735 emit_insn (gen_sse2_movsd (value, value, input));
10736 }
10737
10738 emit_move_insn (large, two31);
10739 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10740
10741 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10742 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10743
10744 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10745 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10746
10747 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10748 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10749
10750 large = gen_rtx_REG (V4SImode, REGNO (large));
10751 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10752
10753 x = gen_rtx_REG (V4SImode, REGNO (value));
10754 if (vecmode == V4SFmode)
10755 emit_insn (gen_sse2_cvttps2dq (x, value));
10756 else
10757 emit_insn (gen_sse2_cvttpd2dq (x, value));
10758 value = x;
10759
10760 emit_insn (gen_xorv4si3 (value, value, large));
10761 }
10762
10763 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10764 Expects the 64-bit DImode to be supplied in a pair of integral
10765 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10766 -mfpmath=sse, !optimize_size only. */
10767
10768 void
10769 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10770 {
10771 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10772 rtx int_xmm, fp_xmm;
10773 rtx biases, exponents;
10774 rtx x;
10775
10776 int_xmm = gen_reg_rtx (V4SImode);
10777 if (TARGET_INTER_UNIT_MOVES)
10778 emit_insn (gen_movdi_to_sse (int_xmm, input));
10779 else if (TARGET_SSE_SPLIT_REGS)
10780 {
10781 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10782 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10783 }
10784 else
10785 {
10786 x = gen_reg_rtx (V2DImode);
10787 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10788 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10789 }
10790
10791 x = gen_rtx_CONST_VECTOR (V4SImode,
10792 gen_rtvec (4, GEN_INT (0x43300000UL),
10793 GEN_INT (0x45300000UL),
10794 const0_rtx, const0_rtx));
10795 exponents = validize_mem (force_const_mem (V4SImode, x));
10796
10797 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10798 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10799
10800 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10801 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10802 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10803 (0x1.0p84 + double(fp_value_hi_xmm)).
10804 Note these exponents differ by 32. */
10805
10806 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10807
10808 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10809 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10810 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10811 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10812 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10813 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10814 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10815 biases = validize_mem (force_const_mem (V2DFmode, biases));
10816 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10817
10818 /* Add the upper and lower DFmode values together. */
10819 if (TARGET_SSE3)
10820 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10821 else
10822 {
10823 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10824 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10825 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10826 }
10827
10828 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10829 }
10830
10831 /* Convert an unsigned SImode value into a DFmode. Only currently used
10832 for SSE, but applicable anywhere. */
10833
10834 void
10835 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10836 {
10837 REAL_VALUE_TYPE TWO31r;
10838 rtx x, fp;
10839
10840 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10841 NULL, 1, OPTAB_DIRECT);
10842
10843 fp = gen_reg_rtx (DFmode);
10844 emit_insn (gen_floatsidf2 (fp, x));
10845
10846 real_ldexp (&TWO31r, &dconst1, 31);
10847 x = const_double_from_real_value (TWO31r, DFmode);
10848
10849 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10850 if (x != target)
10851 emit_move_insn (target, x);
10852 }
10853
10854 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10855 32-bit mode; otherwise we have a direct convert instruction. */
10856
10857 void
10858 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10859 {
10860 REAL_VALUE_TYPE TWO32r;
10861 rtx fp_lo, fp_hi, x;
10862
10863 fp_lo = gen_reg_rtx (DFmode);
10864 fp_hi = gen_reg_rtx (DFmode);
10865
10866 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10867
10868 real_ldexp (&TWO32r, &dconst1, 32);
10869 x = const_double_from_real_value (TWO32r, DFmode);
10870 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10871
10872 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10873
10874 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10875 0, OPTAB_DIRECT);
10876 if (x != target)
10877 emit_move_insn (target, x);
10878 }
10879
10880 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10881 For x86_32, -mfpmath=sse, !optimize_size only. */
10882 void
10883 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10884 {
10885 REAL_VALUE_TYPE ONE16r;
10886 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10887
10888 real_ldexp (&ONE16r, &dconst1, 16);
10889 x = const_double_from_real_value (ONE16r, SFmode);
10890 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10891 NULL, 0, OPTAB_DIRECT);
10892 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10893 NULL, 0, OPTAB_DIRECT);
10894 fp_hi = gen_reg_rtx (SFmode);
10895 fp_lo = gen_reg_rtx (SFmode);
10896 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10897 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10898 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10899 0, OPTAB_DIRECT);
10900 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10901 0, OPTAB_DIRECT);
10902 if (!rtx_equal_p (target, fp_hi))
10903 emit_move_insn (target, fp_hi);
10904 }
10905
10906 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10907 then replicate the value for all elements of the vector
10908 register. */
10909
10910 rtx
10911 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10912 {
10913 rtvec v;
10914 switch (mode)
10915 {
10916 case SImode:
10917 gcc_assert (vect);
10918 v = gen_rtvec (4, value, value, value, value);
10919 return gen_rtx_CONST_VECTOR (V4SImode, v);
10920
10921 case DImode:
10922 gcc_assert (vect);
10923 v = gen_rtvec (2, value, value);
10924 return gen_rtx_CONST_VECTOR (V2DImode, v);
10925
10926 case SFmode:
10927 if (vect)
10928 v = gen_rtvec (4, value, value, value, value);
10929 else
10930 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10931 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10932 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10933
10934 case DFmode:
10935 if (vect)
10936 v = gen_rtvec (2, value, value);
10937 else
10938 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10939 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10940
10941 default:
10942 gcc_unreachable ();
10943 }
10944 }
10945
10946 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10947 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10948 for an SSE register. If VECT is true, then replicate the mask for
10949 all elements of the vector register. If INVERT is true, then create
10950 a mask excluding the sign bit. */
10951
10952 rtx
10953 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10954 {
10955 enum machine_mode vec_mode, imode;
10956 HOST_WIDE_INT hi, lo;
10957 int shift = 63;
10958 rtx v;
10959 rtx mask;
10960
10961 /* Find the sign bit, sign extended to 2*HWI. */
10962 switch (mode)
10963 {
10964 case SImode:
10965 case SFmode:
10966 imode = SImode;
10967 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10968 lo = 0x80000000, hi = lo < 0;
10969 break;
10970
10971 case DImode:
10972 case DFmode:
10973 imode = DImode;
10974 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10975 if (HOST_BITS_PER_WIDE_INT >= 64)
10976 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10977 else
10978 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10979 break;
10980
10981 case TImode:
10982 case TFmode:
10983 imode = TImode;
10984 vec_mode = VOIDmode;
10985 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10986 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10987 break;
10988
10989 default:
10990 gcc_unreachable ();
10991 }
10992
10993 if (invert)
10994 lo = ~lo, hi = ~hi;
10995
10996 /* Force this value into the low part of a fp vector constant. */
10997 mask = immed_double_const (lo, hi, imode);
10998 mask = gen_lowpart (mode, mask);
10999
11000 if (vec_mode == VOIDmode)
11001 return force_reg (mode, mask);
11002
11003 v = ix86_build_const_vector (mode, vect, mask);
11004 return force_reg (vec_mode, v);
11005 }
11006
11007 /* Generate code for floating point ABS or NEG. */
11008
11009 void
11010 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11011 rtx operands[])
11012 {
11013 rtx mask, set, use, clob, dst, src;
11014 bool matching_memory;
11015 bool use_sse = false;
11016 bool vector_mode = VECTOR_MODE_P (mode);
11017 enum machine_mode elt_mode = mode;
11018
11019 if (vector_mode)
11020 {
11021 elt_mode = GET_MODE_INNER (mode);
11022 use_sse = true;
11023 }
11024 else if (mode == TFmode)
11025 use_sse = true;
11026 else if (TARGET_SSE_MATH)
11027 use_sse = SSE_FLOAT_MODE_P (mode);
11028
11029 /* NEG and ABS performed with SSE use bitwise mask operations.
11030 Create the appropriate mask now. */
11031 if (use_sse)
11032 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11033 else
11034 mask = NULL_RTX;
11035
11036 dst = operands[0];
11037 src = operands[1];
11038
11039 /* If the destination is memory, and we don't have matching source
11040 operands or we're using the x87, do things in registers. */
11041 matching_memory = false;
11042 if (MEM_P (dst))
11043 {
11044 if (use_sse && rtx_equal_p (dst, src))
11045 matching_memory = true;
11046 else
11047 dst = gen_reg_rtx (mode);
11048 }
11049 if (MEM_P (src) && !matching_memory)
11050 src = force_reg (mode, src);
11051
11052 if (vector_mode)
11053 {
11054 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11055 set = gen_rtx_SET (VOIDmode, dst, set);
11056 emit_insn (set);
11057 }
11058 else
11059 {
11060 set = gen_rtx_fmt_e (code, mode, src);
11061 set = gen_rtx_SET (VOIDmode, dst, set);
11062 if (mask)
11063 {
11064 use = gen_rtx_USE (VOIDmode, mask);
11065 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11066 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11067 gen_rtvec (3, set, use, clob)));
11068 }
11069 else
11070 emit_insn (set);
11071 }
11072
11073 if (dst != operands[0])
11074 emit_move_insn (operands[0], dst);
11075 }
11076
11077 /* Expand a copysign operation. Special case operand 0 being a constant. */
11078
11079 void
11080 ix86_expand_copysign (rtx operands[])
11081 {
11082 enum machine_mode mode, vmode;
11083 rtx dest, op0, op1, mask, nmask;
11084
11085 dest = operands[0];
11086 op0 = operands[1];
11087 op1 = operands[2];
11088
11089 mode = GET_MODE (dest);
11090 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11091
11092 if (GET_CODE (op0) == CONST_DOUBLE)
11093 {
11094 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11095
11096 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11097 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11098
11099 if (mode == SFmode || mode == DFmode)
11100 {
11101 if (op0 == CONST0_RTX (mode))
11102 op0 = CONST0_RTX (vmode);
11103 else
11104 {
11105 rtvec v;
11106
11107 if (mode == SFmode)
11108 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11109 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11110 else
11111 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11112 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11113 }
11114 }
11115
11116 mask = ix86_build_signbit_mask (mode, 0, 0);
11117
11118 if (mode == SFmode)
11119 copysign_insn = gen_copysignsf3_const;
11120 else if (mode == DFmode)
11121 copysign_insn = gen_copysigndf3_const;
11122 else
11123 copysign_insn = gen_copysigntf3_const;
11124
11125 emit_insn (copysign_insn (dest, op0, op1, mask));
11126 }
11127 else
11128 {
11129 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11130
11131 nmask = ix86_build_signbit_mask (mode, 0, 1);
11132 mask = ix86_build_signbit_mask (mode, 0, 0);
11133
11134 if (mode == SFmode)
11135 copysign_insn = gen_copysignsf3_var;
11136 else if (mode == DFmode)
11137 copysign_insn = gen_copysigndf3_var;
11138 else
11139 copysign_insn = gen_copysigntf3_var;
11140
11141 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11142 }
11143 }
11144
11145 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11146 be a constant, and so has already been expanded into a vector constant. */
11147
11148 void
11149 ix86_split_copysign_const (rtx operands[])
11150 {
11151 enum machine_mode mode, vmode;
11152 rtx dest, op0, op1, mask, x;
11153
11154 dest = operands[0];
11155 op0 = operands[1];
11156 op1 = operands[2];
11157 mask = operands[3];
11158
11159 mode = GET_MODE (dest);
11160 vmode = GET_MODE (mask);
11161
11162 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11163 x = gen_rtx_AND (vmode, dest, mask);
11164 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11165
11166 if (op0 != CONST0_RTX (vmode))
11167 {
11168 x = gen_rtx_IOR (vmode, dest, op0);
11169 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11170 }
11171 }
11172
11173 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11174 so we have to do two masks. */
11175
11176 void
11177 ix86_split_copysign_var (rtx operands[])
11178 {
11179 enum machine_mode mode, vmode;
11180 rtx dest, scratch, op0, op1, mask, nmask, x;
11181
11182 dest = operands[0];
11183 scratch = operands[1];
11184 op0 = operands[2];
11185 op1 = operands[3];
11186 nmask = operands[4];
11187 mask = operands[5];
11188
11189 mode = GET_MODE (dest);
11190 vmode = GET_MODE (mask);
11191
11192 if (rtx_equal_p (op0, op1))
11193 {
11194 /* Shouldn't happen often (it's useless, obviously), but when it does
11195 we'd generate incorrect code if we continue below. */
11196 emit_move_insn (dest, op0);
11197 return;
11198 }
11199
11200 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11201 {
11202 gcc_assert (REGNO (op1) == REGNO (scratch));
11203
11204 x = gen_rtx_AND (vmode, scratch, mask);
11205 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11206
11207 dest = mask;
11208 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11209 x = gen_rtx_NOT (vmode, dest);
11210 x = gen_rtx_AND (vmode, x, op0);
11211 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11212 }
11213 else
11214 {
11215 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11216 {
11217 x = gen_rtx_AND (vmode, scratch, mask);
11218 }
11219 else /* alternative 2,4 */
11220 {
11221 gcc_assert (REGNO (mask) == REGNO (scratch));
11222 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11223 x = gen_rtx_AND (vmode, scratch, op1);
11224 }
11225 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11226
11227 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11228 {
11229 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11230 x = gen_rtx_AND (vmode, dest, nmask);
11231 }
11232 else /* alternative 3,4 */
11233 {
11234 gcc_assert (REGNO (nmask) == REGNO (dest));
11235 dest = nmask;
11236 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11237 x = gen_rtx_AND (vmode, dest, op0);
11238 }
11239 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11240 }
11241
11242 x = gen_rtx_IOR (vmode, dest, scratch);
11243 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11244 }
11245
11246 /* Return TRUE or FALSE depending on whether the first SET in INSN
11247 has source and destination with matching CC modes, and that the
11248 CC mode is at least as constrained as REQ_MODE. */
11249
11250 int
11251 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11252 {
11253 rtx set;
11254 enum machine_mode set_mode;
11255
11256 set = PATTERN (insn);
11257 if (GET_CODE (set) == PARALLEL)
11258 set = XVECEXP (set, 0, 0);
11259 gcc_assert (GET_CODE (set) == SET);
11260 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11261
11262 set_mode = GET_MODE (SET_DEST (set));
11263 switch (set_mode)
11264 {
11265 case CCNOmode:
11266 if (req_mode != CCNOmode
11267 && (req_mode != CCmode
11268 || XEXP (SET_SRC (set), 1) != const0_rtx))
11269 return 0;
11270 break;
11271 case CCmode:
11272 if (req_mode == CCGCmode)
11273 return 0;
11274 /* FALLTHRU */
11275 case CCGCmode:
11276 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11277 return 0;
11278 /* FALLTHRU */
11279 case CCGOCmode:
11280 if (req_mode == CCZmode)
11281 return 0;
11282 /* FALLTHRU */
11283 case CCZmode:
11284 break;
11285
11286 default:
11287 gcc_unreachable ();
11288 }
11289
11290 return (GET_MODE (SET_SRC (set)) == set_mode);
11291 }
11292
11293 /* Generate insn patterns to do an integer compare of OPERANDS. */
11294
11295 static rtx
11296 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11297 {
11298 enum machine_mode cmpmode;
11299 rtx tmp, flags;
11300
11301 cmpmode = SELECT_CC_MODE (code, op0, op1);
11302 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11303
11304 /* This is very simple, but making the interface the same as in the
11305 FP case makes the rest of the code easier. */
11306 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11307 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11308
11309 /* Return the test that should be put into the flags user, i.e.
11310 the bcc, scc, or cmov instruction. */
11311 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11312 }
11313
11314 /* Figure out whether to use ordered or unordered fp comparisons.
11315 Return the appropriate mode to use. */
11316
11317 enum machine_mode
11318 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11319 {
11320 /* ??? In order to make all comparisons reversible, we do all comparisons
11321 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11322 all forms trapping and nontrapping comparisons, we can make inequality
11323 comparisons trapping again, since it results in better code when using
11324 FCOM based compares. */
11325 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11326 }
11327
11328 enum machine_mode
11329 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11330 {
11331 enum machine_mode mode = GET_MODE (op0);
11332
11333 if (SCALAR_FLOAT_MODE_P (mode))
11334 {
11335 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11336 return ix86_fp_compare_mode (code);
11337 }
11338
11339 switch (code)
11340 {
11341 /* Only zero flag is needed. */
11342 case EQ: /* ZF=0 */
11343 case NE: /* ZF!=0 */
11344 return CCZmode;
11345 /* Codes needing carry flag. */
11346 case GEU: /* CF=0 */
11347 case LTU: /* CF=1 */
11348 /* Detect overflow checks. They need just the carry flag. */
11349 if (GET_CODE (op0) == PLUS
11350 && rtx_equal_p (op1, XEXP (op0, 0)))
11351 return CCCmode;
11352 else
11353 return CCmode;
11354 case GTU: /* CF=0 & ZF=0 */
11355 case LEU: /* CF=1 | ZF=1 */
11356 /* Detect overflow checks. They need just the carry flag. */
11357 if (GET_CODE (op0) == MINUS
11358 && rtx_equal_p (op1, XEXP (op0, 0)))
11359 return CCCmode;
11360 else
11361 return CCmode;
11362 /* Codes possibly doable only with sign flag when
11363 comparing against zero. */
11364 case GE: /* SF=OF or SF=0 */
11365 case LT: /* SF<>OF or SF=1 */
11366 if (op1 == const0_rtx)
11367 return CCGOCmode;
11368 else
11369 /* For other cases Carry flag is not required. */
11370 return CCGCmode;
11371 /* Codes doable only with sign flag when comparing
11372 against zero, but we miss jump instruction for it
11373 so we need to use relational tests against overflow
11374 that thus needs to be zero. */
11375 case GT: /* ZF=0 & SF=OF */
11376 case LE: /* ZF=1 | SF<>OF */
11377 if (op1 == const0_rtx)
11378 return CCNOmode;
11379 else
11380 return CCGCmode;
11381 /* strcmp pattern do (use flags) and combine may ask us for proper
11382 mode. */
11383 case USE:
11384 return CCmode;
11385 default:
11386 gcc_unreachable ();
11387 }
11388 }
11389
11390 /* Return the fixed registers used for condition codes. */
11391
11392 static bool
11393 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11394 {
11395 *p1 = FLAGS_REG;
11396 *p2 = FPSR_REG;
11397 return true;
11398 }
11399
11400 /* If two condition code modes are compatible, return a condition code
11401 mode which is compatible with both. Otherwise, return
11402 VOIDmode. */
11403
11404 static enum machine_mode
11405 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11406 {
11407 if (m1 == m2)
11408 return m1;
11409
11410 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11411 return VOIDmode;
11412
11413 if ((m1 == CCGCmode && m2 == CCGOCmode)
11414 || (m1 == CCGOCmode && m2 == CCGCmode))
11415 return CCGCmode;
11416
11417 switch (m1)
11418 {
11419 default:
11420 gcc_unreachable ();
11421
11422 case CCmode:
11423 case CCGCmode:
11424 case CCGOCmode:
11425 case CCNOmode:
11426 case CCAmode:
11427 case CCCmode:
11428 case CCOmode:
11429 case CCSmode:
11430 case CCZmode:
11431 switch (m2)
11432 {
11433 default:
11434 return VOIDmode;
11435
11436 case CCmode:
11437 case CCGCmode:
11438 case CCGOCmode:
11439 case CCNOmode:
11440 case CCAmode:
11441 case CCCmode:
11442 case CCOmode:
11443 case CCSmode:
11444 case CCZmode:
11445 return CCmode;
11446 }
11447
11448 case CCFPmode:
11449 case CCFPUmode:
11450 /* These are only compatible with themselves, which we already
11451 checked above. */
11452 return VOIDmode;
11453 }
11454 }
11455
11456 /* Split comparison code CODE into comparisons we can do using branch
11457 instructions. BYPASS_CODE is comparison code for branch that will
11458 branch around FIRST_CODE and SECOND_CODE. If some of branches
11459 is not required, set value to UNKNOWN.
11460 We never require more than two branches. */
11461
11462 void
11463 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11464 enum rtx_code *first_code,
11465 enum rtx_code *second_code)
11466 {
11467 *first_code = code;
11468 *bypass_code = UNKNOWN;
11469 *second_code = UNKNOWN;
11470
11471 /* The fcomi comparison sets flags as follows:
11472
11473 cmp ZF PF CF
11474 > 0 0 0
11475 < 0 0 1
11476 = 1 0 0
11477 un 1 1 1 */
11478
11479 switch (code)
11480 {
11481 case GT: /* GTU - CF=0 & ZF=0 */
11482 case GE: /* GEU - CF=0 */
11483 case ORDERED: /* PF=0 */
11484 case UNORDERED: /* PF=1 */
11485 case UNEQ: /* EQ - ZF=1 */
11486 case UNLT: /* LTU - CF=1 */
11487 case UNLE: /* LEU - CF=1 | ZF=1 */
11488 case LTGT: /* EQ - ZF=0 */
11489 break;
11490 case LT: /* LTU - CF=1 - fails on unordered */
11491 *first_code = UNLT;
11492 *bypass_code = UNORDERED;
11493 break;
11494 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11495 *first_code = UNLE;
11496 *bypass_code = UNORDERED;
11497 break;
11498 case EQ: /* EQ - ZF=1 - fails on unordered */
11499 *first_code = UNEQ;
11500 *bypass_code = UNORDERED;
11501 break;
11502 case NE: /* NE - ZF=0 - fails on unordered */
11503 *first_code = LTGT;
11504 *second_code = UNORDERED;
11505 break;
11506 case UNGE: /* GEU - CF=0 - fails on unordered */
11507 *first_code = GE;
11508 *second_code = UNORDERED;
11509 break;
11510 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11511 *first_code = GT;
11512 *second_code = UNORDERED;
11513 break;
11514 default:
11515 gcc_unreachable ();
11516 }
11517 if (!TARGET_IEEE_FP)
11518 {
11519 *second_code = UNKNOWN;
11520 *bypass_code = UNKNOWN;
11521 }
11522 }
11523
11524 /* Return cost of comparison done fcom + arithmetics operations on AX.
11525 All following functions do use number of instructions as a cost metrics.
11526 In future this should be tweaked to compute bytes for optimize_size and
11527 take into account performance of various instructions on various CPUs. */
11528 static int
11529 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11530 {
11531 if (!TARGET_IEEE_FP)
11532 return 4;
11533 /* The cost of code output by ix86_expand_fp_compare. */
11534 switch (code)
11535 {
11536 case UNLE:
11537 case UNLT:
11538 case LTGT:
11539 case GT:
11540 case GE:
11541 case UNORDERED:
11542 case ORDERED:
11543 case UNEQ:
11544 return 4;
11545 break;
11546 case LT:
11547 case NE:
11548 case EQ:
11549 case UNGE:
11550 return 5;
11551 break;
11552 case LE:
11553 case UNGT:
11554 return 6;
11555 break;
11556 default:
11557 gcc_unreachable ();
11558 }
11559 }
11560
11561 /* Return cost of comparison done using fcomi operation.
11562 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11563 static int
11564 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11565 {
11566 enum rtx_code bypass_code, first_code, second_code;
11567 /* Return arbitrarily high cost when instruction is not supported - this
11568 prevents gcc from using it. */
11569 if (!TARGET_CMOVE)
11570 return 1024;
11571 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11572 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11573 }
11574
11575 /* Return cost of comparison done using sahf operation.
11576 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11577 static int
11578 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11579 {
11580 enum rtx_code bypass_code, first_code, second_code;
11581 /* Return arbitrarily high cost when instruction is not preferred - this
11582 avoids gcc from using it. */
11583 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11584 return 1024;
11585 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11586 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11587 }
11588
11589 /* Compute cost of the comparison done using any method.
11590 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11591 static int
11592 ix86_fp_comparison_cost (enum rtx_code code)
11593 {
11594 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11595 int min;
11596
11597 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11598 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11599
11600 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11601 if (min > sahf_cost)
11602 min = sahf_cost;
11603 if (min > fcomi_cost)
11604 min = fcomi_cost;
11605 return min;
11606 }
11607
11608 /* Return true if we should use an FCOMI instruction for this
11609 fp comparison. */
11610
11611 int
11612 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11613 {
11614 enum rtx_code swapped_code = swap_condition (code);
11615
11616 return ((ix86_fp_comparison_cost (code)
11617 == ix86_fp_comparison_fcomi_cost (code))
11618 || (ix86_fp_comparison_cost (swapped_code)
11619 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11620 }
11621
11622 /* Swap, force into registers, or otherwise massage the two operands
11623 to a fp comparison. The operands are updated in place; the new
11624 comparison code is returned. */
11625
11626 static enum rtx_code
11627 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11628 {
11629 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11630 rtx op0 = *pop0, op1 = *pop1;
11631 enum machine_mode op_mode = GET_MODE (op0);
11632 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11633
11634 /* All of the unordered compare instructions only work on registers.
11635 The same is true of the fcomi compare instructions. The XFmode
11636 compare instructions require registers except when comparing
11637 against zero or when converting operand 1 from fixed point to
11638 floating point. */
11639
11640 if (!is_sse
11641 && (fpcmp_mode == CCFPUmode
11642 || (op_mode == XFmode
11643 && ! (standard_80387_constant_p (op0) == 1
11644 || standard_80387_constant_p (op1) == 1)
11645 && GET_CODE (op1) != FLOAT)
11646 || ix86_use_fcomi_compare (code)))
11647 {
11648 op0 = force_reg (op_mode, op0);
11649 op1 = force_reg (op_mode, op1);
11650 }
11651 else
11652 {
11653 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11654 things around if they appear profitable, otherwise force op0
11655 into a register. */
11656
11657 if (standard_80387_constant_p (op0) == 0
11658 || (MEM_P (op0)
11659 && ! (standard_80387_constant_p (op1) == 0
11660 || MEM_P (op1))))
11661 {
11662 rtx tmp;
11663 tmp = op0, op0 = op1, op1 = tmp;
11664 code = swap_condition (code);
11665 }
11666
11667 if (!REG_P (op0))
11668 op0 = force_reg (op_mode, op0);
11669
11670 if (CONSTANT_P (op1))
11671 {
11672 int tmp = standard_80387_constant_p (op1);
11673 if (tmp == 0)
11674 op1 = validize_mem (force_const_mem (op_mode, op1));
11675 else if (tmp == 1)
11676 {
11677 if (TARGET_CMOVE)
11678 op1 = force_reg (op_mode, op1);
11679 }
11680 else
11681 op1 = force_reg (op_mode, op1);
11682 }
11683 }
11684
11685 /* Try to rearrange the comparison to make it cheaper. */
11686 if (ix86_fp_comparison_cost (code)
11687 > ix86_fp_comparison_cost (swap_condition (code))
11688 && (REG_P (op1) || can_create_pseudo_p ()))
11689 {
11690 rtx tmp;
11691 tmp = op0, op0 = op1, op1 = tmp;
11692 code = swap_condition (code);
11693 if (!REG_P (op0))
11694 op0 = force_reg (op_mode, op0);
11695 }
11696
11697 *pop0 = op0;
11698 *pop1 = op1;
11699 return code;
11700 }
11701
11702 /* Convert comparison codes we use to represent FP comparison to integer
11703 code that will result in proper branch. Return UNKNOWN if no such code
11704 is available. */
11705
11706 enum rtx_code
11707 ix86_fp_compare_code_to_integer (enum rtx_code code)
11708 {
11709 switch (code)
11710 {
11711 case GT:
11712 return GTU;
11713 case GE:
11714 return GEU;
11715 case ORDERED:
11716 case UNORDERED:
11717 return code;
11718 break;
11719 case UNEQ:
11720 return EQ;
11721 break;
11722 case UNLT:
11723 return LTU;
11724 break;
11725 case UNLE:
11726 return LEU;
11727 break;
11728 case LTGT:
11729 return NE;
11730 break;
11731 default:
11732 return UNKNOWN;
11733 }
11734 }
11735
11736 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11737
11738 static rtx
11739 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11740 rtx *second_test, rtx *bypass_test)
11741 {
11742 enum machine_mode fpcmp_mode, intcmp_mode;
11743 rtx tmp, tmp2;
11744 int cost = ix86_fp_comparison_cost (code);
11745 enum rtx_code bypass_code, first_code, second_code;
11746
11747 fpcmp_mode = ix86_fp_compare_mode (code);
11748 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11749
11750 if (second_test)
11751 *second_test = NULL_RTX;
11752 if (bypass_test)
11753 *bypass_test = NULL_RTX;
11754
11755 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11756
11757 /* Do fcomi/sahf based test when profitable. */
11758 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11759 && (bypass_code == UNKNOWN || bypass_test)
11760 && (second_code == UNKNOWN || second_test))
11761 {
11762 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11763 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11764 tmp);
11765 if (TARGET_CMOVE)
11766 emit_insn (tmp);
11767 else
11768 {
11769 gcc_assert (TARGET_SAHF);
11770
11771 if (!scratch)
11772 scratch = gen_reg_rtx (HImode);
11773 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11774
11775 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11776 }
11777
11778 /* The FP codes work out to act like unsigned. */
11779 intcmp_mode = fpcmp_mode;
11780 code = first_code;
11781 if (bypass_code != UNKNOWN)
11782 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11783 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11784 const0_rtx);
11785 if (second_code != UNKNOWN)
11786 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11787 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11788 const0_rtx);
11789 }
11790 else
11791 {
11792 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11793 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11794 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11795 if (!scratch)
11796 scratch = gen_reg_rtx (HImode);
11797 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11798
11799 /* In the unordered case, we have to check C2 for NaN's, which
11800 doesn't happen to work out to anything nice combination-wise.
11801 So do some bit twiddling on the value we've got in AH to come
11802 up with an appropriate set of condition codes. */
11803
11804 intcmp_mode = CCNOmode;
11805 switch (code)
11806 {
11807 case GT:
11808 case UNGT:
11809 if (code == GT || !TARGET_IEEE_FP)
11810 {
11811 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11812 code = EQ;
11813 }
11814 else
11815 {
11816 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11817 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11818 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11819 intcmp_mode = CCmode;
11820 code = GEU;
11821 }
11822 break;
11823 case LT:
11824 case UNLT:
11825 if (code == LT && TARGET_IEEE_FP)
11826 {
11827 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11828 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11829 intcmp_mode = CCmode;
11830 code = EQ;
11831 }
11832 else
11833 {
11834 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11835 code = NE;
11836 }
11837 break;
11838 case GE:
11839 case UNGE:
11840 if (code == GE || !TARGET_IEEE_FP)
11841 {
11842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11843 code = EQ;
11844 }
11845 else
11846 {
11847 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11848 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11849 GEN_INT (0x01)));
11850 code = NE;
11851 }
11852 break;
11853 case LE:
11854 case UNLE:
11855 if (code == LE && TARGET_IEEE_FP)
11856 {
11857 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11858 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11859 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11860 intcmp_mode = CCmode;
11861 code = LTU;
11862 }
11863 else
11864 {
11865 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11866 code = NE;
11867 }
11868 break;
11869 case EQ:
11870 case UNEQ:
11871 if (code == EQ && TARGET_IEEE_FP)
11872 {
11873 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11874 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11875 intcmp_mode = CCmode;
11876 code = EQ;
11877 }
11878 else
11879 {
11880 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11881 code = NE;
11882 break;
11883 }
11884 break;
11885 case NE:
11886 case LTGT:
11887 if (code == NE && TARGET_IEEE_FP)
11888 {
11889 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11890 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11891 GEN_INT (0x40)));
11892 code = NE;
11893 }
11894 else
11895 {
11896 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11897 code = EQ;
11898 }
11899 break;
11900
11901 case UNORDERED:
11902 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11903 code = NE;
11904 break;
11905 case ORDERED:
11906 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11907 code = EQ;
11908 break;
11909
11910 default:
11911 gcc_unreachable ();
11912 }
11913 }
11914
11915 /* Return the test that should be put into the flags user, i.e.
11916 the bcc, scc, or cmov instruction. */
11917 return gen_rtx_fmt_ee (code, VOIDmode,
11918 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11919 const0_rtx);
11920 }
11921
11922 rtx
11923 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11924 {
11925 rtx op0, op1, ret;
11926 op0 = ix86_compare_op0;
11927 op1 = ix86_compare_op1;
11928
11929 if (second_test)
11930 *second_test = NULL_RTX;
11931 if (bypass_test)
11932 *bypass_test = NULL_RTX;
11933
11934 if (ix86_compare_emitted)
11935 {
11936 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11937 ix86_compare_emitted = NULL_RTX;
11938 }
11939 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11940 {
11941 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11942 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11943 second_test, bypass_test);
11944 }
11945 else
11946 ret = ix86_expand_int_compare (code, op0, op1);
11947
11948 return ret;
11949 }
11950
11951 /* Return true if the CODE will result in nontrivial jump sequence. */
11952 bool
11953 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11954 {
11955 enum rtx_code bypass_code, first_code, second_code;
11956 if (!TARGET_CMOVE)
11957 return true;
11958 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11959 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11960 }
11961
11962 void
11963 ix86_expand_branch (enum rtx_code code, rtx label)
11964 {
11965 rtx tmp;
11966
11967 /* If we have emitted a compare insn, go straight to simple.
11968 ix86_expand_compare won't emit anything if ix86_compare_emitted
11969 is non NULL. */
11970 if (ix86_compare_emitted)
11971 goto simple;
11972
11973 switch (GET_MODE (ix86_compare_op0))
11974 {
11975 case QImode:
11976 case HImode:
11977 case SImode:
11978 simple:
11979 tmp = ix86_expand_compare (code, NULL, NULL);
11980 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11981 gen_rtx_LABEL_REF (VOIDmode, label),
11982 pc_rtx);
11983 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11984 return;
11985
11986 case SFmode:
11987 case DFmode:
11988 case XFmode:
11989 {
11990 rtvec vec;
11991 int use_fcomi;
11992 enum rtx_code bypass_code, first_code, second_code;
11993
11994 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11995 &ix86_compare_op1);
11996
11997 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11998
11999 /* Check whether we will use the natural sequence with one jump. If
12000 so, we can expand jump early. Otherwise delay expansion by
12001 creating compound insn to not confuse optimizers. */
12002 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12003 {
12004 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12005 gen_rtx_LABEL_REF (VOIDmode, label),
12006 pc_rtx, NULL_RTX, NULL_RTX);
12007 }
12008 else
12009 {
12010 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12011 ix86_compare_op0, ix86_compare_op1);
12012 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12013 gen_rtx_LABEL_REF (VOIDmode, label),
12014 pc_rtx);
12015 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12016
12017 use_fcomi = ix86_use_fcomi_compare (code);
12018 vec = rtvec_alloc (3 + !use_fcomi);
12019 RTVEC_ELT (vec, 0) = tmp;
12020 RTVEC_ELT (vec, 1)
12021 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12022 RTVEC_ELT (vec, 2)
12023 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12024 if (! use_fcomi)
12025 RTVEC_ELT (vec, 3)
12026 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12027
12028 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12029 }
12030 return;
12031 }
12032
12033 case DImode:
12034 if (TARGET_64BIT)
12035 goto simple;
12036 case TImode:
12037 /* Expand DImode branch into multiple compare+branch. */
12038 {
12039 rtx lo[2], hi[2], label2;
12040 enum rtx_code code1, code2, code3;
12041 enum machine_mode submode;
12042
12043 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12044 {
12045 tmp = ix86_compare_op0;
12046 ix86_compare_op0 = ix86_compare_op1;
12047 ix86_compare_op1 = tmp;
12048 code = swap_condition (code);
12049 }
12050 if (GET_MODE (ix86_compare_op0) == DImode)
12051 {
12052 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12053 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12054 submode = SImode;
12055 }
12056 else
12057 {
12058 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12059 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12060 submode = DImode;
12061 }
12062
12063 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12064 avoid two branches. This costs one extra insn, so disable when
12065 optimizing for size. */
12066
12067 if ((code == EQ || code == NE)
12068 && (!optimize_size
12069 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12070 {
12071 rtx xor0, xor1;
12072
12073 xor1 = hi[0];
12074 if (hi[1] != const0_rtx)
12075 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12076 NULL_RTX, 0, OPTAB_WIDEN);
12077
12078 xor0 = lo[0];
12079 if (lo[1] != const0_rtx)
12080 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12081 NULL_RTX, 0, OPTAB_WIDEN);
12082
12083 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12084 NULL_RTX, 0, OPTAB_WIDEN);
12085
12086 ix86_compare_op0 = tmp;
12087 ix86_compare_op1 = const0_rtx;
12088 ix86_expand_branch (code, label);
12089 return;
12090 }
12091
12092 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12093 op1 is a constant and the low word is zero, then we can just
12094 examine the high word. */
12095
12096 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
12097 switch (code)
12098 {
12099 case LT: case LTU: case GE: case GEU:
12100 ix86_compare_op0 = hi[0];
12101 ix86_compare_op1 = hi[1];
12102 ix86_expand_branch (code, label);
12103 return;
12104 default:
12105 break;
12106 }
12107
12108 /* Otherwise, we need two or three jumps. */
12109
12110 label2 = gen_label_rtx ();
12111
12112 code1 = code;
12113 code2 = swap_condition (code);
12114 code3 = unsigned_condition (code);
12115
12116 switch (code)
12117 {
12118 case LT: case GT: case LTU: case GTU:
12119 break;
12120
12121 case LE: code1 = LT; code2 = GT; break;
12122 case GE: code1 = GT; code2 = LT; break;
12123 case LEU: code1 = LTU; code2 = GTU; break;
12124 case GEU: code1 = GTU; code2 = LTU; break;
12125
12126 case EQ: code1 = UNKNOWN; code2 = NE; break;
12127 case NE: code2 = UNKNOWN; break;
12128
12129 default:
12130 gcc_unreachable ();
12131 }
12132
12133 /*
12134 * a < b =>
12135 * if (hi(a) < hi(b)) goto true;
12136 * if (hi(a) > hi(b)) goto false;
12137 * if (lo(a) < lo(b)) goto true;
12138 * false:
12139 */
12140
12141 ix86_compare_op0 = hi[0];
12142 ix86_compare_op1 = hi[1];
12143
12144 if (code1 != UNKNOWN)
12145 ix86_expand_branch (code1, label);
12146 if (code2 != UNKNOWN)
12147 ix86_expand_branch (code2, label2);
12148
12149 ix86_compare_op0 = lo[0];
12150 ix86_compare_op1 = lo[1];
12151 ix86_expand_branch (code3, label);
12152
12153 if (code2 != UNKNOWN)
12154 emit_label (label2);
12155 return;
12156 }
12157
12158 default:
12159 gcc_unreachable ();
12160 }
12161 }
12162
12163 /* Split branch based on floating point condition. */
12164 void
12165 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12166 rtx target1, rtx target2, rtx tmp, rtx pushed)
12167 {
12168 rtx second, bypass;
12169 rtx label = NULL_RTX;
12170 rtx condition;
12171 int bypass_probability = -1, second_probability = -1, probability = -1;
12172 rtx i;
12173
12174 if (target2 != pc_rtx)
12175 {
12176 rtx tmp = target2;
12177 code = reverse_condition_maybe_unordered (code);
12178 target2 = target1;
12179 target1 = tmp;
12180 }
12181
12182 condition = ix86_expand_fp_compare (code, op1, op2,
12183 tmp, &second, &bypass);
12184
12185 /* Remove pushed operand from stack. */
12186 if (pushed)
12187 ix86_free_from_memory (GET_MODE (pushed));
12188
12189 if (split_branch_probability >= 0)
12190 {
12191 /* Distribute the probabilities across the jumps.
12192 Assume the BYPASS and SECOND to be always test
12193 for UNORDERED. */
12194 probability = split_branch_probability;
12195
12196 /* Value of 1 is low enough to make no need for probability
12197 to be updated. Later we may run some experiments and see
12198 if unordered values are more frequent in practice. */
12199 if (bypass)
12200 bypass_probability = 1;
12201 if (second)
12202 second_probability = 1;
12203 }
12204 if (bypass != NULL_RTX)
12205 {
12206 label = gen_label_rtx ();
12207 i = emit_jump_insn (gen_rtx_SET
12208 (VOIDmode, pc_rtx,
12209 gen_rtx_IF_THEN_ELSE (VOIDmode,
12210 bypass,
12211 gen_rtx_LABEL_REF (VOIDmode,
12212 label),
12213 pc_rtx)));
12214 if (bypass_probability >= 0)
12215 REG_NOTES (i)
12216 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12217 GEN_INT (bypass_probability),
12218 REG_NOTES (i));
12219 }
12220 i = emit_jump_insn (gen_rtx_SET
12221 (VOIDmode, pc_rtx,
12222 gen_rtx_IF_THEN_ELSE (VOIDmode,
12223 condition, target1, target2)));
12224 if (probability >= 0)
12225 REG_NOTES (i)
12226 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12227 GEN_INT (probability),
12228 REG_NOTES (i));
12229 if (second != NULL_RTX)
12230 {
12231 i = emit_jump_insn (gen_rtx_SET
12232 (VOIDmode, pc_rtx,
12233 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12234 target2)));
12235 if (second_probability >= 0)
12236 REG_NOTES (i)
12237 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12238 GEN_INT (second_probability),
12239 REG_NOTES (i));
12240 }
12241 if (label != NULL_RTX)
12242 emit_label (label);
12243 }
12244
12245 int
12246 ix86_expand_setcc (enum rtx_code code, rtx dest)
12247 {
12248 rtx ret, tmp, tmpreg, equiv;
12249 rtx second_test, bypass_test;
12250
12251 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12252 return 0; /* FAIL */
12253
12254 gcc_assert (GET_MODE (dest) == QImode);
12255
12256 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12257 PUT_MODE (ret, QImode);
12258
12259 tmp = dest;
12260 tmpreg = dest;
12261
12262 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12263 if (bypass_test || second_test)
12264 {
12265 rtx test = second_test;
12266 int bypass = 0;
12267 rtx tmp2 = gen_reg_rtx (QImode);
12268 if (bypass_test)
12269 {
12270 gcc_assert (!second_test);
12271 test = bypass_test;
12272 bypass = 1;
12273 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12274 }
12275 PUT_MODE (test, QImode);
12276 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12277
12278 if (bypass)
12279 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12280 else
12281 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12282 }
12283
12284 /* Attach a REG_EQUAL note describing the comparison result. */
12285 if (ix86_compare_op0 && ix86_compare_op1)
12286 {
12287 equiv = simplify_gen_relational (code, QImode,
12288 GET_MODE (ix86_compare_op0),
12289 ix86_compare_op0, ix86_compare_op1);
12290 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12291 }
12292
12293 return 1; /* DONE */
12294 }
12295
12296 /* Expand comparison setting or clearing carry flag. Return true when
12297 successful and set pop for the operation. */
12298 static bool
12299 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12300 {
12301 enum machine_mode mode =
12302 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12303
12304 /* Do not handle DImode compares that go through special path. */
12305 if (mode == (TARGET_64BIT ? TImode : DImode))
12306 return false;
12307
12308 if (SCALAR_FLOAT_MODE_P (mode))
12309 {
12310 rtx second_test = NULL, bypass_test = NULL;
12311 rtx compare_op, compare_seq;
12312
12313 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12314
12315 /* Shortcut: following common codes never translate
12316 into carry flag compares. */
12317 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12318 || code == ORDERED || code == UNORDERED)
12319 return false;
12320
12321 /* These comparisons require zero flag; swap operands so they won't. */
12322 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12323 && !TARGET_IEEE_FP)
12324 {
12325 rtx tmp = op0;
12326 op0 = op1;
12327 op1 = tmp;
12328 code = swap_condition (code);
12329 }
12330
12331 /* Try to expand the comparison and verify that we end up with
12332 carry flag based comparison. This fails to be true only when
12333 we decide to expand comparison using arithmetic that is not
12334 too common scenario. */
12335 start_sequence ();
12336 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12337 &second_test, &bypass_test);
12338 compare_seq = get_insns ();
12339 end_sequence ();
12340
12341 if (second_test || bypass_test)
12342 return false;
12343
12344 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12345 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12346 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12347 else
12348 code = GET_CODE (compare_op);
12349
12350 if (code != LTU && code != GEU)
12351 return false;
12352
12353 emit_insn (compare_seq);
12354 *pop = compare_op;
12355 return true;
12356 }
12357
12358 if (!INTEGRAL_MODE_P (mode))
12359 return false;
12360
12361 switch (code)
12362 {
12363 case LTU:
12364 case GEU:
12365 break;
12366
12367 /* Convert a==0 into (unsigned)a<1. */
12368 case EQ:
12369 case NE:
12370 if (op1 != const0_rtx)
12371 return false;
12372 op1 = const1_rtx;
12373 code = (code == EQ ? LTU : GEU);
12374 break;
12375
12376 /* Convert a>b into b<a or a>=b-1. */
12377 case GTU:
12378 case LEU:
12379 if (CONST_INT_P (op1))
12380 {
12381 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12382 /* Bail out on overflow. We still can swap operands but that
12383 would force loading of the constant into register. */
12384 if (op1 == const0_rtx
12385 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12386 return false;
12387 code = (code == GTU ? GEU : LTU);
12388 }
12389 else
12390 {
12391 rtx tmp = op1;
12392 op1 = op0;
12393 op0 = tmp;
12394 code = (code == GTU ? LTU : GEU);
12395 }
12396 break;
12397
12398 /* Convert a>=0 into (unsigned)a<0x80000000. */
12399 case LT:
12400 case GE:
12401 if (mode == DImode || op1 != const0_rtx)
12402 return false;
12403 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12404 code = (code == LT ? GEU : LTU);
12405 break;
12406 case LE:
12407 case GT:
12408 if (mode == DImode || op1 != constm1_rtx)
12409 return false;
12410 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12411 code = (code == LE ? GEU : LTU);
12412 break;
12413
12414 default:
12415 return false;
12416 }
12417 /* Swapping operands may cause constant to appear as first operand. */
12418 if (!nonimmediate_operand (op0, VOIDmode))
12419 {
12420 if (!can_create_pseudo_p ())
12421 return false;
12422 op0 = force_reg (mode, op0);
12423 }
12424 ix86_compare_op0 = op0;
12425 ix86_compare_op1 = op1;
12426 *pop = ix86_expand_compare (code, NULL, NULL);
12427 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12428 return true;
12429 }
12430
12431 int
12432 ix86_expand_int_movcc (rtx operands[])
12433 {
12434 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12435 rtx compare_seq, compare_op;
12436 rtx second_test, bypass_test;
12437 enum machine_mode mode = GET_MODE (operands[0]);
12438 bool sign_bit_compare_p = false;;
12439
12440 start_sequence ();
12441 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12442 compare_seq = get_insns ();
12443 end_sequence ();
12444
12445 compare_code = GET_CODE (compare_op);
12446
12447 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12448 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12449 sign_bit_compare_p = true;
12450
12451 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12452 HImode insns, we'd be swallowed in word prefix ops. */
12453
12454 if ((mode != HImode || TARGET_FAST_PREFIX)
12455 && (mode != (TARGET_64BIT ? TImode : DImode))
12456 && CONST_INT_P (operands[2])
12457 && CONST_INT_P (operands[3]))
12458 {
12459 rtx out = operands[0];
12460 HOST_WIDE_INT ct = INTVAL (operands[2]);
12461 HOST_WIDE_INT cf = INTVAL (operands[3]);
12462 HOST_WIDE_INT diff;
12463
12464 diff = ct - cf;
12465 /* Sign bit compares are better done using shifts than we do by using
12466 sbb. */
12467 if (sign_bit_compare_p
12468 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12469 ix86_compare_op1, &compare_op))
12470 {
12471 /* Detect overlap between destination and compare sources. */
12472 rtx tmp = out;
12473
12474 if (!sign_bit_compare_p)
12475 {
12476 bool fpcmp = false;
12477
12478 compare_code = GET_CODE (compare_op);
12479
12480 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12481 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12482 {
12483 fpcmp = true;
12484 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12485 }
12486
12487 /* To simplify rest of code, restrict to the GEU case. */
12488 if (compare_code == LTU)
12489 {
12490 HOST_WIDE_INT tmp = ct;
12491 ct = cf;
12492 cf = tmp;
12493 compare_code = reverse_condition (compare_code);
12494 code = reverse_condition (code);
12495 }
12496 else
12497 {
12498 if (fpcmp)
12499 PUT_CODE (compare_op,
12500 reverse_condition_maybe_unordered
12501 (GET_CODE (compare_op)));
12502 else
12503 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12504 }
12505 diff = ct - cf;
12506
12507 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12508 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12509 tmp = gen_reg_rtx (mode);
12510
12511 if (mode == DImode)
12512 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12513 else
12514 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12515 }
12516 else
12517 {
12518 if (code == GT || code == GE)
12519 code = reverse_condition (code);
12520 else
12521 {
12522 HOST_WIDE_INT tmp = ct;
12523 ct = cf;
12524 cf = tmp;
12525 diff = ct - cf;
12526 }
12527 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12528 ix86_compare_op1, VOIDmode, 0, -1);
12529 }
12530
12531 if (diff == 1)
12532 {
12533 /*
12534 * cmpl op0,op1
12535 * sbbl dest,dest
12536 * [addl dest, ct]
12537 *
12538 * Size 5 - 8.
12539 */
12540 if (ct)
12541 tmp = expand_simple_binop (mode, PLUS,
12542 tmp, GEN_INT (ct),
12543 copy_rtx (tmp), 1, OPTAB_DIRECT);
12544 }
12545 else if (cf == -1)
12546 {
12547 /*
12548 * cmpl op0,op1
12549 * sbbl dest,dest
12550 * orl $ct, dest
12551 *
12552 * Size 8.
12553 */
12554 tmp = expand_simple_binop (mode, IOR,
12555 tmp, GEN_INT (ct),
12556 copy_rtx (tmp), 1, OPTAB_DIRECT);
12557 }
12558 else if (diff == -1 && ct)
12559 {
12560 /*
12561 * cmpl op0,op1
12562 * sbbl dest,dest
12563 * notl dest
12564 * [addl dest, cf]
12565 *
12566 * Size 8 - 11.
12567 */
12568 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12569 if (cf)
12570 tmp = expand_simple_binop (mode, PLUS,
12571 copy_rtx (tmp), GEN_INT (cf),
12572 copy_rtx (tmp), 1, OPTAB_DIRECT);
12573 }
12574 else
12575 {
12576 /*
12577 * cmpl op0,op1
12578 * sbbl dest,dest
12579 * [notl dest]
12580 * andl cf - ct, dest
12581 * [addl dest, ct]
12582 *
12583 * Size 8 - 11.
12584 */
12585
12586 if (cf == 0)
12587 {
12588 cf = ct;
12589 ct = 0;
12590 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12591 }
12592
12593 tmp = expand_simple_binop (mode, AND,
12594 copy_rtx (tmp),
12595 gen_int_mode (cf - ct, mode),
12596 copy_rtx (tmp), 1, OPTAB_DIRECT);
12597 if (ct)
12598 tmp = expand_simple_binop (mode, PLUS,
12599 copy_rtx (tmp), GEN_INT (ct),
12600 copy_rtx (tmp), 1, OPTAB_DIRECT);
12601 }
12602
12603 if (!rtx_equal_p (tmp, out))
12604 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12605
12606 return 1; /* DONE */
12607 }
12608
12609 if (diff < 0)
12610 {
12611 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12612
12613 HOST_WIDE_INT tmp;
12614 tmp = ct, ct = cf, cf = tmp;
12615 diff = -diff;
12616
12617 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12618 {
12619 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12620
12621 /* We may be reversing unordered compare to normal compare, that
12622 is not valid in general (we may convert non-trapping condition
12623 to trapping one), however on i386 we currently emit all
12624 comparisons unordered. */
12625 compare_code = reverse_condition_maybe_unordered (compare_code);
12626 code = reverse_condition_maybe_unordered (code);
12627 }
12628 else
12629 {
12630 compare_code = reverse_condition (compare_code);
12631 code = reverse_condition (code);
12632 }
12633 }
12634
12635 compare_code = UNKNOWN;
12636 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12637 && CONST_INT_P (ix86_compare_op1))
12638 {
12639 if (ix86_compare_op1 == const0_rtx
12640 && (code == LT || code == GE))
12641 compare_code = code;
12642 else if (ix86_compare_op1 == constm1_rtx)
12643 {
12644 if (code == LE)
12645 compare_code = LT;
12646 else if (code == GT)
12647 compare_code = GE;
12648 }
12649 }
12650
12651 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12652 if (compare_code != UNKNOWN
12653 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12654 && (cf == -1 || ct == -1))
12655 {
12656 /* If lea code below could be used, only optimize
12657 if it results in a 2 insn sequence. */
12658
12659 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12660 || diff == 3 || diff == 5 || diff == 9)
12661 || (compare_code == LT && ct == -1)
12662 || (compare_code == GE && cf == -1))
12663 {
12664 /*
12665 * notl op1 (if necessary)
12666 * sarl $31, op1
12667 * orl cf, op1
12668 */
12669 if (ct != -1)
12670 {
12671 cf = ct;
12672 ct = -1;
12673 code = reverse_condition (code);
12674 }
12675
12676 out = emit_store_flag (out, code, ix86_compare_op0,
12677 ix86_compare_op1, VOIDmode, 0, -1);
12678
12679 out = expand_simple_binop (mode, IOR,
12680 out, GEN_INT (cf),
12681 out, 1, OPTAB_DIRECT);
12682 if (out != operands[0])
12683 emit_move_insn (operands[0], out);
12684
12685 return 1; /* DONE */
12686 }
12687 }
12688
12689
12690 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12691 || diff == 3 || diff == 5 || diff == 9)
12692 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12693 && (mode != DImode
12694 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12695 {
12696 /*
12697 * xorl dest,dest
12698 * cmpl op1,op2
12699 * setcc dest
12700 * lea cf(dest*(ct-cf)),dest
12701 *
12702 * Size 14.
12703 *
12704 * This also catches the degenerate setcc-only case.
12705 */
12706
12707 rtx tmp;
12708 int nops;
12709
12710 out = emit_store_flag (out, code, ix86_compare_op0,
12711 ix86_compare_op1, VOIDmode, 0, 1);
12712
12713 nops = 0;
12714 /* On x86_64 the lea instruction operates on Pmode, so we need
12715 to get arithmetics done in proper mode to match. */
12716 if (diff == 1)
12717 tmp = copy_rtx (out);
12718 else
12719 {
12720 rtx out1;
12721 out1 = copy_rtx (out);
12722 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12723 nops++;
12724 if (diff & 1)
12725 {
12726 tmp = gen_rtx_PLUS (mode, tmp, out1);
12727 nops++;
12728 }
12729 }
12730 if (cf != 0)
12731 {
12732 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12733 nops++;
12734 }
12735 if (!rtx_equal_p (tmp, out))
12736 {
12737 if (nops == 1)
12738 out = force_operand (tmp, copy_rtx (out));
12739 else
12740 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12741 }
12742 if (!rtx_equal_p (out, operands[0]))
12743 emit_move_insn (operands[0], copy_rtx (out));
12744
12745 return 1; /* DONE */
12746 }
12747
12748 /*
12749 * General case: Jumpful:
12750 * xorl dest,dest cmpl op1, op2
12751 * cmpl op1, op2 movl ct, dest
12752 * setcc dest jcc 1f
12753 * decl dest movl cf, dest
12754 * andl (cf-ct),dest 1:
12755 * addl ct,dest
12756 *
12757 * Size 20. Size 14.
12758 *
12759 * This is reasonably steep, but branch mispredict costs are
12760 * high on modern cpus, so consider failing only if optimizing
12761 * for space.
12762 */
12763
12764 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12765 && BRANCH_COST >= 2)
12766 {
12767 if (cf == 0)
12768 {
12769 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12770
12771 cf = ct;
12772 ct = 0;
12773
12774 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12775 {
12776 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12777
12778 /* We may be reversing unordered compare to normal compare,
12779 that is not valid in general (we may convert non-trapping
12780 condition to trapping one), however on i386 we currently
12781 emit all comparisons unordered. */
12782 code = reverse_condition_maybe_unordered (code);
12783 }
12784 else
12785 {
12786 code = reverse_condition (code);
12787 if (compare_code != UNKNOWN)
12788 compare_code = reverse_condition (compare_code);
12789 }
12790 }
12791
12792 if (compare_code != UNKNOWN)
12793 {
12794 /* notl op1 (if needed)
12795 sarl $31, op1
12796 andl (cf-ct), op1
12797 addl ct, op1
12798
12799 For x < 0 (resp. x <= -1) there will be no notl,
12800 so if possible swap the constants to get rid of the
12801 complement.
12802 True/false will be -1/0 while code below (store flag
12803 followed by decrement) is 0/-1, so the constants need
12804 to be exchanged once more. */
12805
12806 if (compare_code == GE || !cf)
12807 {
12808 code = reverse_condition (code);
12809 compare_code = LT;
12810 }
12811 else
12812 {
12813 HOST_WIDE_INT tmp = cf;
12814 cf = ct;
12815 ct = tmp;
12816 }
12817
12818 out = emit_store_flag (out, code, ix86_compare_op0,
12819 ix86_compare_op1, VOIDmode, 0, -1);
12820 }
12821 else
12822 {
12823 out = emit_store_flag (out, code, ix86_compare_op0,
12824 ix86_compare_op1, VOIDmode, 0, 1);
12825
12826 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12827 copy_rtx (out), 1, OPTAB_DIRECT);
12828 }
12829
12830 out = expand_simple_binop (mode, AND, copy_rtx (out),
12831 gen_int_mode (cf - ct, mode),
12832 copy_rtx (out), 1, OPTAB_DIRECT);
12833 if (ct)
12834 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12835 copy_rtx (out), 1, OPTAB_DIRECT);
12836 if (!rtx_equal_p (out, operands[0]))
12837 emit_move_insn (operands[0], copy_rtx (out));
12838
12839 return 1; /* DONE */
12840 }
12841 }
12842
12843 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12844 {
12845 /* Try a few things more with specific constants and a variable. */
12846
12847 optab op;
12848 rtx var, orig_out, out, tmp;
12849
12850 if (BRANCH_COST <= 2)
12851 return 0; /* FAIL */
12852
12853 /* If one of the two operands is an interesting constant, load a
12854 constant with the above and mask it in with a logical operation. */
12855
12856 if (CONST_INT_P (operands[2]))
12857 {
12858 var = operands[3];
12859 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12860 operands[3] = constm1_rtx, op = and_optab;
12861 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12862 operands[3] = const0_rtx, op = ior_optab;
12863 else
12864 return 0; /* FAIL */
12865 }
12866 else if (CONST_INT_P (operands[3]))
12867 {
12868 var = operands[2];
12869 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12870 operands[2] = constm1_rtx, op = and_optab;
12871 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12872 operands[2] = const0_rtx, op = ior_optab;
12873 else
12874 return 0; /* FAIL */
12875 }
12876 else
12877 return 0; /* FAIL */
12878
12879 orig_out = operands[0];
12880 tmp = gen_reg_rtx (mode);
12881 operands[0] = tmp;
12882
12883 /* Recurse to get the constant loaded. */
12884 if (ix86_expand_int_movcc (operands) == 0)
12885 return 0; /* FAIL */
12886
12887 /* Mask in the interesting variable. */
12888 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12889 OPTAB_WIDEN);
12890 if (!rtx_equal_p (out, orig_out))
12891 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12892
12893 return 1; /* DONE */
12894 }
12895
12896 /*
12897 * For comparison with above,
12898 *
12899 * movl cf,dest
12900 * movl ct,tmp
12901 * cmpl op1,op2
12902 * cmovcc tmp,dest
12903 *
12904 * Size 15.
12905 */
12906
12907 if (! nonimmediate_operand (operands[2], mode))
12908 operands[2] = force_reg (mode, operands[2]);
12909 if (! nonimmediate_operand (operands[3], mode))
12910 operands[3] = force_reg (mode, operands[3]);
12911
12912 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12913 {
12914 rtx tmp = gen_reg_rtx (mode);
12915 emit_move_insn (tmp, operands[3]);
12916 operands[3] = tmp;
12917 }
12918 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12919 {
12920 rtx tmp = gen_reg_rtx (mode);
12921 emit_move_insn (tmp, operands[2]);
12922 operands[2] = tmp;
12923 }
12924
12925 if (! register_operand (operands[2], VOIDmode)
12926 && (mode == QImode
12927 || ! register_operand (operands[3], VOIDmode)))
12928 operands[2] = force_reg (mode, operands[2]);
12929
12930 if (mode == QImode
12931 && ! register_operand (operands[3], VOIDmode))
12932 operands[3] = force_reg (mode, operands[3]);
12933
12934 emit_insn (compare_seq);
12935 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12936 gen_rtx_IF_THEN_ELSE (mode,
12937 compare_op, operands[2],
12938 operands[3])));
12939 if (bypass_test)
12940 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12941 gen_rtx_IF_THEN_ELSE (mode,
12942 bypass_test,
12943 copy_rtx (operands[3]),
12944 copy_rtx (operands[0]))));
12945 if (second_test)
12946 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12947 gen_rtx_IF_THEN_ELSE (mode,
12948 second_test,
12949 copy_rtx (operands[2]),
12950 copy_rtx (operands[0]))));
12951
12952 return 1; /* DONE */
12953 }
12954
12955 /* Swap, force into registers, or otherwise massage the two operands
12956 to an sse comparison with a mask result. Thus we differ a bit from
12957 ix86_prepare_fp_compare_args which expects to produce a flags result.
12958
12959 The DEST operand exists to help determine whether to commute commutative
12960 operators. The POP0/POP1 operands are updated in place. The new
12961 comparison code is returned, or UNKNOWN if not implementable. */
12962
12963 static enum rtx_code
12964 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12965 rtx *pop0, rtx *pop1)
12966 {
12967 rtx tmp;
12968
12969 switch (code)
12970 {
12971 case LTGT:
12972 case UNEQ:
12973 /* We have no LTGT as an operator. We could implement it with
12974 NE & ORDERED, but this requires an extra temporary. It's
12975 not clear that it's worth it. */
12976 return UNKNOWN;
12977
12978 case LT:
12979 case LE:
12980 case UNGT:
12981 case UNGE:
12982 /* These are supported directly. */
12983 break;
12984
12985 case EQ:
12986 case NE:
12987 case UNORDERED:
12988 case ORDERED:
12989 /* For commutative operators, try to canonicalize the destination
12990 operand to be first in the comparison - this helps reload to
12991 avoid extra moves. */
12992 if (!dest || !rtx_equal_p (dest, *pop1))
12993 break;
12994 /* FALLTHRU */
12995
12996 case GE:
12997 case GT:
12998 case UNLE:
12999 case UNLT:
13000 /* These are not supported directly. Swap the comparison operands
13001 to transform into something that is supported. */
13002 tmp = *pop0;
13003 *pop0 = *pop1;
13004 *pop1 = tmp;
13005 code = swap_condition (code);
13006 break;
13007
13008 default:
13009 gcc_unreachable ();
13010 }
13011
13012 return code;
13013 }
13014
13015 /* Detect conditional moves that exactly match min/max operational
13016 semantics. Note that this is IEEE safe, as long as we don't
13017 interchange the operands.
13018
13019 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13020 and TRUE if the operation is successful and instructions are emitted. */
13021
13022 static bool
13023 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13024 rtx cmp_op1, rtx if_true, rtx if_false)
13025 {
13026 enum machine_mode mode;
13027 bool is_min;
13028 rtx tmp;
13029
13030 if (code == LT)
13031 ;
13032 else if (code == UNGE)
13033 {
13034 tmp = if_true;
13035 if_true = if_false;
13036 if_false = tmp;
13037 }
13038 else
13039 return false;
13040
13041 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13042 is_min = true;
13043 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13044 is_min = false;
13045 else
13046 return false;
13047
13048 mode = GET_MODE (dest);
13049
13050 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13051 but MODE may be a vector mode and thus not appropriate. */
13052 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13053 {
13054 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13055 rtvec v;
13056
13057 if_true = force_reg (mode, if_true);
13058 v = gen_rtvec (2, if_true, if_false);
13059 tmp = gen_rtx_UNSPEC (mode, v, u);
13060 }
13061 else
13062 {
13063 code = is_min ? SMIN : SMAX;
13064 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13065 }
13066
13067 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13068 return true;
13069 }
13070
13071 /* Expand an sse vector comparison. Return the register with the result. */
13072
13073 static rtx
13074 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13075 rtx op_true, rtx op_false)
13076 {
13077 enum machine_mode mode = GET_MODE (dest);
13078 rtx x;
13079
13080 cmp_op0 = force_reg (mode, cmp_op0);
13081 if (!nonimmediate_operand (cmp_op1, mode))
13082 cmp_op1 = force_reg (mode, cmp_op1);
13083
13084 if (optimize
13085 || reg_overlap_mentioned_p (dest, op_true)
13086 || reg_overlap_mentioned_p (dest, op_false))
13087 dest = gen_reg_rtx (mode);
13088
13089 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13090 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13091
13092 return dest;
13093 }
13094
13095 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13096 operations. This is used for both scalar and vector conditional moves. */
13097
13098 static void
13099 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13100 {
13101 enum machine_mode mode = GET_MODE (dest);
13102 rtx t2, t3, x;
13103
13104 if (TARGET_SSE5)
13105 {
13106 rtx pcmov = gen_rtx_SET (mode, dest,
13107 gen_rtx_IF_THEN_ELSE (mode, cmp,
13108 op_true,
13109 op_false));
13110 emit_insn (pcmov);
13111 }
13112 else if (op_false == CONST0_RTX (mode))
13113 {
13114 op_true = force_reg (mode, op_true);
13115 x = gen_rtx_AND (mode, cmp, op_true);
13116 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13117 }
13118 else if (op_true == CONST0_RTX (mode))
13119 {
13120 op_false = force_reg (mode, op_false);
13121 x = gen_rtx_NOT (mode, cmp);
13122 x = gen_rtx_AND (mode, x, op_false);
13123 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13124 }
13125 else
13126 {
13127 op_true = force_reg (mode, op_true);
13128 op_false = force_reg (mode, op_false);
13129
13130 t2 = gen_reg_rtx (mode);
13131 if (optimize)
13132 t3 = gen_reg_rtx (mode);
13133 else
13134 t3 = dest;
13135
13136 x = gen_rtx_AND (mode, op_true, cmp);
13137 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13138
13139 x = gen_rtx_NOT (mode, cmp);
13140 x = gen_rtx_AND (mode, x, op_false);
13141 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13142
13143 x = gen_rtx_IOR (mode, t3, t2);
13144 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13145 }
13146 }
13147
13148 /* Expand a floating-point conditional move. Return true if successful. */
13149
13150 int
13151 ix86_expand_fp_movcc (rtx operands[])
13152 {
13153 enum machine_mode mode = GET_MODE (operands[0]);
13154 enum rtx_code code = GET_CODE (operands[1]);
13155 rtx tmp, compare_op, second_test, bypass_test;
13156
13157 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13158 {
13159 enum machine_mode cmode;
13160
13161 /* Since we've no cmove for sse registers, don't force bad register
13162 allocation just to gain access to it. Deny movcc when the
13163 comparison mode doesn't match the move mode. */
13164 cmode = GET_MODE (ix86_compare_op0);
13165 if (cmode == VOIDmode)
13166 cmode = GET_MODE (ix86_compare_op1);
13167 if (cmode != mode)
13168 return 0;
13169
13170 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13171 &ix86_compare_op0,
13172 &ix86_compare_op1);
13173 if (code == UNKNOWN)
13174 return 0;
13175
13176 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13177 ix86_compare_op1, operands[2],
13178 operands[3]))
13179 return 1;
13180
13181 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13182 ix86_compare_op1, operands[2], operands[3]);
13183 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13184 return 1;
13185 }
13186
13187 /* The floating point conditional move instructions don't directly
13188 support conditions resulting from a signed integer comparison. */
13189
13190 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13191
13192 /* The floating point conditional move instructions don't directly
13193 support signed integer comparisons. */
13194
13195 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13196 {
13197 gcc_assert (!second_test && !bypass_test);
13198 tmp = gen_reg_rtx (QImode);
13199 ix86_expand_setcc (code, tmp);
13200 code = NE;
13201 ix86_compare_op0 = tmp;
13202 ix86_compare_op1 = const0_rtx;
13203 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13204 }
13205 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13206 {
13207 tmp = gen_reg_rtx (mode);
13208 emit_move_insn (tmp, operands[3]);
13209 operands[3] = tmp;
13210 }
13211 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13212 {
13213 tmp = gen_reg_rtx (mode);
13214 emit_move_insn (tmp, operands[2]);
13215 operands[2] = tmp;
13216 }
13217
13218 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13219 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13220 operands[2], operands[3])));
13221 if (bypass_test)
13222 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13223 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13224 operands[3], operands[0])));
13225 if (second_test)
13226 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13227 gen_rtx_IF_THEN_ELSE (mode, second_test,
13228 operands[2], operands[0])));
13229
13230 return 1;
13231 }
13232
13233 /* Expand a floating-point vector conditional move; a vcond operation
13234 rather than a movcc operation. */
13235
13236 bool
13237 ix86_expand_fp_vcond (rtx operands[])
13238 {
13239 enum rtx_code code = GET_CODE (operands[3]);
13240 rtx cmp;
13241
13242 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13243 &operands[4], &operands[5]);
13244 if (code == UNKNOWN)
13245 return false;
13246
13247 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13248 operands[5], operands[1], operands[2]))
13249 return true;
13250
13251 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13252 operands[1], operands[2]);
13253 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13254 return true;
13255 }
13256
13257 /* Expand a signed/unsigned integral vector conditional move. */
13258
13259 bool
13260 ix86_expand_int_vcond (rtx operands[])
13261 {
13262 enum machine_mode mode = GET_MODE (operands[0]);
13263 enum rtx_code code = GET_CODE (operands[3]);
13264 bool negate = false;
13265 rtx x, cop0, cop1;
13266
13267 cop0 = operands[4];
13268 cop1 = operands[5];
13269
13270 /* Canonicalize the comparison to EQ, GT, GTU. */
13271 switch (code)
13272 {
13273 case EQ:
13274 case GT:
13275 case GTU:
13276 break;
13277
13278 case NE:
13279 case LE:
13280 case LEU:
13281 code = reverse_condition (code);
13282 negate = true;
13283 break;
13284
13285 case GE:
13286 case GEU:
13287 code = reverse_condition (code);
13288 negate = true;
13289 /* FALLTHRU */
13290
13291 case LT:
13292 case LTU:
13293 code = swap_condition (code);
13294 x = cop0, cop0 = cop1, cop1 = x;
13295 break;
13296
13297 default:
13298 gcc_unreachable ();
13299 }
13300
13301 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13302 if (mode == V2DImode)
13303 {
13304 switch (code)
13305 {
13306 case EQ:
13307 /* SSE4.1 supports EQ. */
13308 if (!TARGET_SSE4_1)
13309 return false;
13310 break;
13311
13312 case GT:
13313 case GTU:
13314 /* SSE4.2 supports GT/GTU. */
13315 if (!TARGET_SSE4_2)
13316 return false;
13317 break;
13318
13319 default:
13320 gcc_unreachable ();
13321 }
13322 }
13323
13324 /* Unsigned parallel compare is not supported by the hardware. Play some
13325 tricks to turn this into a signed comparison against 0. */
13326 if (code == GTU)
13327 {
13328 cop0 = force_reg (mode, cop0);
13329
13330 switch (mode)
13331 {
13332 case V4SImode:
13333 case V2DImode:
13334 {
13335 rtx t1, t2, mask;
13336
13337 /* Perform a parallel modulo subtraction. */
13338 t1 = gen_reg_rtx (mode);
13339 emit_insn ((mode == V4SImode
13340 ? gen_subv4si3
13341 : gen_subv2di3) (t1, cop0, cop1));
13342
13343 /* Extract the original sign bit of op0. */
13344 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13345 true, false);
13346 t2 = gen_reg_rtx (mode);
13347 emit_insn ((mode == V4SImode
13348 ? gen_andv4si3
13349 : gen_andv2di3) (t2, cop0, mask));
13350
13351 /* XOR it back into the result of the subtraction. This results
13352 in the sign bit set iff we saw unsigned underflow. */
13353 x = gen_reg_rtx (mode);
13354 emit_insn ((mode == V4SImode
13355 ? gen_xorv4si3
13356 : gen_xorv2di3) (x, t1, t2));
13357
13358 code = GT;
13359 }
13360 break;
13361
13362 case V16QImode:
13363 case V8HImode:
13364 /* Perform a parallel unsigned saturating subtraction. */
13365 x = gen_reg_rtx (mode);
13366 emit_insn (gen_rtx_SET (VOIDmode, x,
13367 gen_rtx_US_MINUS (mode, cop0, cop1)));
13368
13369 code = EQ;
13370 negate = !negate;
13371 break;
13372
13373 default:
13374 gcc_unreachable ();
13375 }
13376
13377 cop0 = x;
13378 cop1 = CONST0_RTX (mode);
13379 }
13380
13381 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13382 operands[1+negate], operands[2-negate]);
13383
13384 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13385 operands[2-negate]);
13386 return true;
13387 }
13388
13389 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13390 true if we should do zero extension, else sign extension. HIGH_P is
13391 true if we want the N/2 high elements, else the low elements. */
13392
13393 void
13394 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13395 {
13396 enum machine_mode imode = GET_MODE (operands[1]);
13397 rtx (*unpack)(rtx, rtx, rtx);
13398 rtx se, dest;
13399
13400 switch (imode)
13401 {
13402 case V16QImode:
13403 if (high_p)
13404 unpack = gen_vec_interleave_highv16qi;
13405 else
13406 unpack = gen_vec_interleave_lowv16qi;
13407 break;
13408 case V8HImode:
13409 if (high_p)
13410 unpack = gen_vec_interleave_highv8hi;
13411 else
13412 unpack = gen_vec_interleave_lowv8hi;
13413 break;
13414 case V4SImode:
13415 if (high_p)
13416 unpack = gen_vec_interleave_highv4si;
13417 else
13418 unpack = gen_vec_interleave_lowv4si;
13419 break;
13420 default:
13421 gcc_unreachable ();
13422 }
13423
13424 dest = gen_lowpart (imode, operands[0]);
13425
13426 if (unsigned_p)
13427 se = force_reg (imode, CONST0_RTX (imode));
13428 else
13429 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13430 operands[1], pc_rtx, pc_rtx);
13431
13432 emit_insn (unpack (dest, operands[1], se));
13433 }
13434
13435 /* This function performs the same task as ix86_expand_sse_unpack,
13436 but with SSE4.1 instructions. */
13437
13438 void
13439 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13440 {
13441 enum machine_mode imode = GET_MODE (operands[1]);
13442 rtx (*unpack)(rtx, rtx);
13443 rtx src, dest;
13444
13445 switch (imode)
13446 {
13447 case V16QImode:
13448 if (unsigned_p)
13449 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13450 else
13451 unpack = gen_sse4_1_extendv8qiv8hi2;
13452 break;
13453 case V8HImode:
13454 if (unsigned_p)
13455 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13456 else
13457 unpack = gen_sse4_1_extendv4hiv4si2;
13458 break;
13459 case V4SImode:
13460 if (unsigned_p)
13461 unpack = gen_sse4_1_zero_extendv2siv2di2;
13462 else
13463 unpack = gen_sse4_1_extendv2siv2di2;
13464 break;
13465 default:
13466 gcc_unreachable ();
13467 }
13468
13469 dest = operands[0];
13470 if (high_p)
13471 {
13472 /* Shift higher 8 bytes to lower 8 bytes. */
13473 src = gen_reg_rtx (imode);
13474 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13475 gen_lowpart (TImode, operands[1]),
13476 GEN_INT (64)));
13477 }
13478 else
13479 src = operands[1];
13480
13481 emit_insn (unpack (dest, src));
13482 }
13483
13484 /* This function performs the same task as ix86_expand_sse_unpack,
13485 but with amdfam15 instructions. */
13486
13487 #define PPERM_SRC 0x00 /* copy source */
13488 #define PPERM_INVERT 0x20 /* invert source */
13489 #define PPERM_REVERSE 0x40 /* bit reverse source */
13490 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13491 #define PPERM_ZERO 0x80 /* all 0's */
13492 #define PPERM_ONES 0xa0 /* all 1's */
13493 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13494 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13495
13496 #define PPERM_SRC1 0x00 /* use first source byte */
13497 #define PPERM_SRC2 0x10 /* use second source byte */
13498
13499 void
13500 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13501 {
13502 enum machine_mode imode = GET_MODE (operands[1]);
13503 int pperm_bytes[16];
13504 int i;
13505 int h = (high_p) ? 8 : 0;
13506 int h2;
13507 int sign_extend;
13508 rtvec v = rtvec_alloc (16);
13509 rtvec vs;
13510 rtx x, p;
13511 rtx op0 = operands[0], op1 = operands[1];
13512
13513 switch (imode)
13514 {
13515 case V16QImode:
13516 vs = rtvec_alloc (8);
13517 h2 = (high_p) ? 8 : 0;
13518 for (i = 0; i < 8; i++)
13519 {
13520 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13521 pperm_bytes[2*i+1] = ((unsigned_p)
13522 ? PPERM_ZERO
13523 : PPERM_SIGN | PPERM_SRC2 | i | h);
13524 }
13525
13526 for (i = 0; i < 16; i++)
13527 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13528
13529 for (i = 0; i < 8; i++)
13530 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13531
13532 p = gen_rtx_PARALLEL (VOIDmode, vs);
13533 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13534 if (unsigned_p)
13535 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13536 else
13537 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13538 break;
13539
13540 case V8HImode:
13541 vs = rtvec_alloc (4);
13542 h2 = (high_p) ? 4 : 0;
13543 for (i = 0; i < 4; i++)
13544 {
13545 sign_extend = ((unsigned_p)
13546 ? PPERM_ZERO
13547 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13548 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13549 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13550 pperm_bytes[4*i+2] = sign_extend;
13551 pperm_bytes[4*i+3] = sign_extend;
13552 }
13553
13554 for (i = 0; i < 16; i++)
13555 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13556
13557 for (i = 0; i < 4; i++)
13558 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13559
13560 p = gen_rtx_PARALLEL (VOIDmode, vs);
13561 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13562 if (unsigned_p)
13563 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13564 else
13565 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13566 break;
13567
13568 case V4SImode:
13569 vs = rtvec_alloc (2);
13570 h2 = (high_p) ? 2 : 0;
13571 for (i = 0; i < 2; i++)
13572 {
13573 sign_extend = ((unsigned_p)
13574 ? PPERM_ZERO
13575 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13576 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13577 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13578 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13579 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13580 pperm_bytes[8*i+4] = sign_extend;
13581 pperm_bytes[8*i+5] = sign_extend;
13582 pperm_bytes[8*i+6] = sign_extend;
13583 pperm_bytes[8*i+7] = sign_extend;
13584 }
13585
13586 for (i = 0; i < 16; i++)
13587 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13588
13589 for (i = 0; i < 2; i++)
13590 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13591
13592 p = gen_rtx_PARALLEL (VOIDmode, vs);
13593 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13594 if (unsigned_p)
13595 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13596 else
13597 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13598 break;
13599
13600 default:
13601 gcc_unreachable ();
13602 }
13603
13604 return;
13605 }
13606
13607 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13608 next narrower integer vector type */
13609 void
13610 ix86_expand_sse5_pack (rtx operands[3])
13611 {
13612 enum machine_mode imode = GET_MODE (operands[0]);
13613 int pperm_bytes[16];
13614 int i;
13615 rtvec v = rtvec_alloc (16);
13616 rtx x;
13617 rtx op0 = operands[0];
13618 rtx op1 = operands[1];
13619 rtx op2 = operands[2];
13620
13621 switch (imode)
13622 {
13623 case V16QImode:
13624 for (i = 0; i < 8; i++)
13625 {
13626 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13627 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13628 }
13629
13630 for (i = 0; i < 16; i++)
13631 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13632
13633 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13634 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13635 break;
13636
13637 case V8HImode:
13638 for (i = 0; i < 4; i++)
13639 {
13640 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13641 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13642 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13643 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13644 }
13645
13646 for (i = 0; i < 16; i++)
13647 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13648
13649 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13650 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13651 break;
13652
13653 case V4SImode:
13654 for (i = 0; i < 2; i++)
13655 {
13656 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13657 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13658 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13659 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13660 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13661 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13662 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13663 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13664 }
13665
13666 for (i = 0; i < 16; i++)
13667 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13668
13669 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13670 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13671 break;
13672
13673 default:
13674 gcc_unreachable ();
13675 }
13676
13677 return;
13678 }
13679
13680 /* Expand conditional increment or decrement using adb/sbb instructions.
13681 The default case using setcc followed by the conditional move can be
13682 done by generic code. */
13683 int
13684 ix86_expand_int_addcc (rtx operands[])
13685 {
13686 enum rtx_code code = GET_CODE (operands[1]);
13687 rtx compare_op;
13688 rtx val = const0_rtx;
13689 bool fpcmp = false;
13690 enum machine_mode mode = GET_MODE (operands[0]);
13691
13692 if (operands[3] != const1_rtx
13693 && operands[3] != constm1_rtx)
13694 return 0;
13695 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13696 ix86_compare_op1, &compare_op))
13697 return 0;
13698 code = GET_CODE (compare_op);
13699
13700 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13701 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13702 {
13703 fpcmp = true;
13704 code = ix86_fp_compare_code_to_integer (code);
13705 }
13706
13707 if (code != LTU)
13708 {
13709 val = constm1_rtx;
13710 if (fpcmp)
13711 PUT_CODE (compare_op,
13712 reverse_condition_maybe_unordered
13713 (GET_CODE (compare_op)));
13714 else
13715 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13716 }
13717 PUT_MODE (compare_op, mode);
13718
13719 /* Construct either adc or sbb insn. */
13720 if ((code == LTU) == (operands[3] == constm1_rtx))
13721 {
13722 switch (GET_MODE (operands[0]))
13723 {
13724 case QImode:
13725 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13726 break;
13727 case HImode:
13728 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13729 break;
13730 case SImode:
13731 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13732 break;
13733 case DImode:
13734 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13735 break;
13736 default:
13737 gcc_unreachable ();
13738 }
13739 }
13740 else
13741 {
13742 switch (GET_MODE (operands[0]))
13743 {
13744 case QImode:
13745 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13746 break;
13747 case HImode:
13748 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13749 break;
13750 case SImode:
13751 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13752 break;
13753 case DImode:
13754 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13755 break;
13756 default:
13757 gcc_unreachable ();
13758 }
13759 }
13760 return 1; /* DONE */
13761 }
13762
13763
13764 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13765 works for floating pointer parameters and nonoffsetable memories.
13766 For pushes, it returns just stack offsets; the values will be saved
13767 in the right order. Maximally three parts are generated. */
13768
13769 static int
13770 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13771 {
13772 int size;
13773
13774 if (!TARGET_64BIT)
13775 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13776 else
13777 size = (GET_MODE_SIZE (mode) + 4) / 8;
13778
13779 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13780 gcc_assert (size >= 2 && size <= 3);
13781
13782 /* Optimize constant pool reference to immediates. This is used by fp
13783 moves, that force all constants to memory to allow combining. */
13784 if (MEM_P (operand) && MEM_READONLY_P (operand))
13785 {
13786 rtx tmp = maybe_get_pool_constant (operand);
13787 if (tmp)
13788 operand = tmp;
13789 }
13790
13791 if (MEM_P (operand) && !offsettable_memref_p (operand))
13792 {
13793 /* The only non-offsetable memories we handle are pushes. */
13794 int ok = push_operand (operand, VOIDmode);
13795
13796 gcc_assert (ok);
13797
13798 operand = copy_rtx (operand);
13799 PUT_MODE (operand, Pmode);
13800 parts[0] = parts[1] = parts[2] = operand;
13801 return size;
13802 }
13803
13804 if (GET_CODE (operand) == CONST_VECTOR)
13805 {
13806 enum machine_mode imode = int_mode_for_mode (mode);
13807 /* Caution: if we looked through a constant pool memory above,
13808 the operand may actually have a different mode now. That's
13809 ok, since we want to pun this all the way back to an integer. */
13810 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13811 gcc_assert (operand != NULL);
13812 mode = imode;
13813 }
13814
13815 if (!TARGET_64BIT)
13816 {
13817 if (mode == DImode)
13818 split_di (&operand, 1, &parts[0], &parts[1]);
13819 else
13820 {
13821 if (REG_P (operand))
13822 {
13823 gcc_assert (reload_completed);
13824 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13825 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13826 if (size == 3)
13827 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13828 }
13829 else if (offsettable_memref_p (operand))
13830 {
13831 operand = adjust_address (operand, SImode, 0);
13832 parts[0] = operand;
13833 parts[1] = adjust_address (operand, SImode, 4);
13834 if (size == 3)
13835 parts[2] = adjust_address (operand, SImode, 8);
13836 }
13837 else if (GET_CODE (operand) == CONST_DOUBLE)
13838 {
13839 REAL_VALUE_TYPE r;
13840 long l[4];
13841
13842 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13843 switch (mode)
13844 {
13845 case XFmode:
13846 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13847 parts[2] = gen_int_mode (l[2], SImode);
13848 break;
13849 case DFmode:
13850 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13851 break;
13852 default:
13853 gcc_unreachable ();
13854 }
13855 parts[1] = gen_int_mode (l[1], SImode);
13856 parts[0] = gen_int_mode (l[0], SImode);
13857 }
13858 else
13859 gcc_unreachable ();
13860 }
13861 }
13862 else
13863 {
13864 if (mode == TImode)
13865 split_ti (&operand, 1, &parts[0], &parts[1]);
13866 if (mode == XFmode || mode == TFmode)
13867 {
13868 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13869 if (REG_P (operand))
13870 {
13871 gcc_assert (reload_completed);
13872 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13873 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13874 }
13875 else if (offsettable_memref_p (operand))
13876 {
13877 operand = adjust_address (operand, DImode, 0);
13878 parts[0] = operand;
13879 parts[1] = adjust_address (operand, upper_mode, 8);
13880 }
13881 else if (GET_CODE (operand) == CONST_DOUBLE)
13882 {
13883 REAL_VALUE_TYPE r;
13884 long l[4];
13885
13886 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13887 real_to_target (l, &r, mode);
13888
13889 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13890 if (HOST_BITS_PER_WIDE_INT >= 64)
13891 parts[0]
13892 = gen_int_mode
13893 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13894 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13895 DImode);
13896 else
13897 parts[0] = immed_double_const (l[0], l[1], DImode);
13898
13899 if (upper_mode == SImode)
13900 parts[1] = gen_int_mode (l[2], SImode);
13901 else if (HOST_BITS_PER_WIDE_INT >= 64)
13902 parts[1]
13903 = gen_int_mode
13904 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13905 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13906 DImode);
13907 else
13908 parts[1] = immed_double_const (l[2], l[3], DImode);
13909 }
13910 else
13911 gcc_unreachable ();
13912 }
13913 }
13914
13915 return size;
13916 }
13917
13918 /* Emit insns to perform a move or push of DI, DF, and XF values.
13919 Return false when normal moves are needed; true when all required
13920 insns have been emitted. Operands 2-4 contain the input values
13921 int the correct order; operands 5-7 contain the output values. */
13922
13923 void
13924 ix86_split_long_move (rtx operands[])
13925 {
13926 rtx part[2][3];
13927 int nparts;
13928 int push = 0;
13929 int collisions = 0;
13930 enum machine_mode mode = GET_MODE (operands[0]);
13931
13932 /* The DFmode expanders may ask us to move double.
13933 For 64bit target this is single move. By hiding the fact
13934 here we simplify i386.md splitters. */
13935 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13936 {
13937 /* Optimize constant pool reference to immediates. This is used by
13938 fp moves, that force all constants to memory to allow combining. */
13939
13940 if (MEM_P (operands[1])
13941 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13942 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13943 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13944 if (push_operand (operands[0], VOIDmode))
13945 {
13946 operands[0] = copy_rtx (operands[0]);
13947 PUT_MODE (operands[0], Pmode);
13948 }
13949 else
13950 operands[0] = gen_lowpart (DImode, operands[0]);
13951 operands[1] = gen_lowpart (DImode, operands[1]);
13952 emit_move_insn (operands[0], operands[1]);
13953 return;
13954 }
13955
13956 /* The only non-offsettable memory we handle is push. */
13957 if (push_operand (operands[0], VOIDmode))
13958 push = 1;
13959 else
13960 gcc_assert (!MEM_P (operands[0])
13961 || offsettable_memref_p (operands[0]));
13962
13963 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13964 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13965
13966 /* When emitting push, take care for source operands on the stack. */
13967 if (push && MEM_P (operands[1])
13968 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13969 {
13970 if (nparts == 3)
13971 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13972 XEXP (part[1][2], 0));
13973 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13974 XEXP (part[1][1], 0));
13975 }
13976
13977 /* We need to do copy in the right order in case an address register
13978 of the source overlaps the destination. */
13979 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13980 {
13981 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13982 collisions++;
13983 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13984 collisions++;
13985 if (nparts == 3
13986 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13987 collisions++;
13988
13989 /* Collision in the middle part can be handled by reordering. */
13990 if (collisions == 1 && nparts == 3
13991 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13992 {
13993 rtx tmp;
13994 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13995 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13996 }
13997
13998 /* If there are more collisions, we can't handle it by reordering.
13999 Do an lea to the last part and use only one colliding move. */
14000 else if (collisions > 1)
14001 {
14002 rtx base;
14003
14004 collisions = 1;
14005
14006 base = part[0][nparts - 1];
14007
14008 /* Handle the case when the last part isn't valid for lea.
14009 Happens in 64-bit mode storing the 12-byte XFmode. */
14010 if (GET_MODE (base) != Pmode)
14011 base = gen_rtx_REG (Pmode, REGNO (base));
14012
14013 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14014 part[1][0] = replace_equiv_address (part[1][0], base);
14015 part[1][1] = replace_equiv_address (part[1][1],
14016 plus_constant (base, UNITS_PER_WORD));
14017 if (nparts == 3)
14018 part[1][2] = replace_equiv_address (part[1][2],
14019 plus_constant (base, 8));
14020 }
14021 }
14022
14023 if (push)
14024 {
14025 if (!TARGET_64BIT)
14026 {
14027 if (nparts == 3)
14028 {
14029 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14030 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14031 emit_move_insn (part[0][2], part[1][2]);
14032 }
14033 }
14034 else
14035 {
14036 /* In 64bit mode we don't have 32bit push available. In case this is
14037 register, it is OK - we will just use larger counterpart. We also
14038 retype memory - these comes from attempt to avoid REX prefix on
14039 moving of second half of TFmode value. */
14040 if (GET_MODE (part[1][1]) == SImode)
14041 {
14042 switch (GET_CODE (part[1][1]))
14043 {
14044 case MEM:
14045 part[1][1] = adjust_address (part[1][1], DImode, 0);
14046 break;
14047
14048 case REG:
14049 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14050 break;
14051
14052 default:
14053 gcc_unreachable ();
14054 }
14055
14056 if (GET_MODE (part[1][0]) == SImode)
14057 part[1][0] = part[1][1];
14058 }
14059 }
14060 emit_move_insn (part[0][1], part[1][1]);
14061 emit_move_insn (part[0][0], part[1][0]);
14062 return;
14063 }
14064
14065 /* Choose correct order to not overwrite the source before it is copied. */
14066 if ((REG_P (part[0][0])
14067 && REG_P (part[1][1])
14068 && (REGNO (part[0][0]) == REGNO (part[1][1])
14069 || (nparts == 3
14070 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14071 || (collisions > 0
14072 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14073 {
14074 if (nparts == 3)
14075 {
14076 operands[2] = part[0][2];
14077 operands[3] = part[0][1];
14078 operands[4] = part[0][0];
14079 operands[5] = part[1][2];
14080 operands[6] = part[1][1];
14081 operands[7] = part[1][0];
14082 }
14083 else
14084 {
14085 operands[2] = part[0][1];
14086 operands[3] = part[0][0];
14087 operands[5] = part[1][1];
14088 operands[6] = part[1][0];
14089 }
14090 }
14091 else
14092 {
14093 if (nparts == 3)
14094 {
14095 operands[2] = part[0][0];
14096 operands[3] = part[0][1];
14097 operands[4] = part[0][2];
14098 operands[5] = part[1][0];
14099 operands[6] = part[1][1];
14100 operands[7] = part[1][2];
14101 }
14102 else
14103 {
14104 operands[2] = part[0][0];
14105 operands[3] = part[0][1];
14106 operands[5] = part[1][0];
14107 operands[6] = part[1][1];
14108 }
14109 }
14110
14111 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14112 if (optimize_size)
14113 {
14114 if (CONST_INT_P (operands[5])
14115 && operands[5] != const0_rtx
14116 && REG_P (operands[2]))
14117 {
14118 if (CONST_INT_P (operands[6])
14119 && INTVAL (operands[6]) == INTVAL (operands[5]))
14120 operands[6] = operands[2];
14121
14122 if (nparts == 3
14123 && CONST_INT_P (operands[7])
14124 && INTVAL (operands[7]) == INTVAL (operands[5]))
14125 operands[7] = operands[2];
14126 }
14127
14128 if (nparts == 3
14129 && CONST_INT_P (operands[6])
14130 && operands[6] != const0_rtx
14131 && REG_P (operands[3])
14132 && CONST_INT_P (operands[7])
14133 && INTVAL (operands[7]) == INTVAL (operands[6]))
14134 operands[7] = operands[3];
14135 }
14136
14137 emit_move_insn (operands[2], operands[5]);
14138 emit_move_insn (operands[3], operands[6]);
14139 if (nparts == 3)
14140 emit_move_insn (operands[4], operands[7]);
14141
14142 return;
14143 }
14144
14145 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14146 left shift by a constant, either using a single shift or
14147 a sequence of add instructions. */
14148
14149 static void
14150 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14151 {
14152 if (count == 1)
14153 {
14154 emit_insn ((mode == DImode
14155 ? gen_addsi3
14156 : gen_adddi3) (operand, operand, operand));
14157 }
14158 else if (!optimize_size
14159 && count * ix86_cost->add <= ix86_cost->shift_const)
14160 {
14161 int i;
14162 for (i=0; i<count; i++)
14163 {
14164 emit_insn ((mode == DImode
14165 ? gen_addsi3
14166 : gen_adddi3) (operand, operand, operand));
14167 }
14168 }
14169 else
14170 emit_insn ((mode == DImode
14171 ? gen_ashlsi3
14172 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14173 }
14174
14175 void
14176 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14177 {
14178 rtx low[2], high[2];
14179 int count;
14180 const int single_width = mode == DImode ? 32 : 64;
14181
14182 if (CONST_INT_P (operands[2]))
14183 {
14184 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14185 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14186
14187 if (count >= single_width)
14188 {
14189 emit_move_insn (high[0], low[1]);
14190 emit_move_insn (low[0], const0_rtx);
14191
14192 if (count > single_width)
14193 ix86_expand_ashl_const (high[0], count - single_width, mode);
14194 }
14195 else
14196 {
14197 if (!rtx_equal_p (operands[0], operands[1]))
14198 emit_move_insn (operands[0], operands[1]);
14199 emit_insn ((mode == DImode
14200 ? gen_x86_shld_1
14201 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14202 ix86_expand_ashl_const (low[0], count, mode);
14203 }
14204 return;
14205 }
14206
14207 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14208
14209 if (operands[1] == const1_rtx)
14210 {
14211 /* Assuming we've chosen a QImode capable registers, then 1 << N
14212 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14213 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14214 {
14215 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14216
14217 ix86_expand_clear (low[0]);
14218 ix86_expand_clear (high[0]);
14219 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14220
14221 d = gen_lowpart (QImode, low[0]);
14222 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14223 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14224 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14225
14226 d = gen_lowpart (QImode, high[0]);
14227 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14228 s = gen_rtx_NE (QImode, flags, const0_rtx);
14229 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14230 }
14231
14232 /* Otherwise, we can get the same results by manually performing
14233 a bit extract operation on bit 5/6, and then performing the two
14234 shifts. The two methods of getting 0/1 into low/high are exactly
14235 the same size. Avoiding the shift in the bit extract case helps
14236 pentium4 a bit; no one else seems to care much either way. */
14237 else
14238 {
14239 rtx x;
14240
14241 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14242 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14243 else
14244 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14245 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14246
14247 emit_insn ((mode == DImode
14248 ? gen_lshrsi3
14249 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14250 emit_insn ((mode == DImode
14251 ? gen_andsi3
14252 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14253 emit_move_insn (low[0], high[0]);
14254 emit_insn ((mode == DImode
14255 ? gen_xorsi3
14256 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14257 }
14258
14259 emit_insn ((mode == DImode
14260 ? gen_ashlsi3
14261 : gen_ashldi3) (low[0], low[0], operands[2]));
14262 emit_insn ((mode == DImode
14263 ? gen_ashlsi3
14264 : gen_ashldi3) (high[0], high[0], operands[2]));
14265 return;
14266 }
14267
14268 if (operands[1] == constm1_rtx)
14269 {
14270 /* For -1 << N, we can avoid the shld instruction, because we
14271 know that we're shifting 0...31/63 ones into a -1. */
14272 emit_move_insn (low[0], constm1_rtx);
14273 if (optimize_size)
14274 emit_move_insn (high[0], low[0]);
14275 else
14276 emit_move_insn (high[0], constm1_rtx);
14277 }
14278 else
14279 {
14280 if (!rtx_equal_p (operands[0], operands[1]))
14281 emit_move_insn (operands[0], operands[1]);
14282
14283 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14284 emit_insn ((mode == DImode
14285 ? gen_x86_shld_1
14286 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14287 }
14288
14289 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14290
14291 if (TARGET_CMOVE && scratch)
14292 {
14293 ix86_expand_clear (scratch);
14294 emit_insn ((mode == DImode
14295 ? gen_x86_shift_adj_1
14296 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14297 }
14298 else
14299 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14300 }
14301
14302 void
14303 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14304 {
14305 rtx low[2], high[2];
14306 int count;
14307 const int single_width = mode == DImode ? 32 : 64;
14308
14309 if (CONST_INT_P (operands[2]))
14310 {
14311 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14312 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14313
14314 if (count == single_width * 2 - 1)
14315 {
14316 emit_move_insn (high[0], high[1]);
14317 emit_insn ((mode == DImode
14318 ? gen_ashrsi3
14319 : gen_ashrdi3) (high[0], high[0],
14320 GEN_INT (single_width - 1)));
14321 emit_move_insn (low[0], high[0]);
14322
14323 }
14324 else if (count >= single_width)
14325 {
14326 emit_move_insn (low[0], high[1]);
14327 emit_move_insn (high[0], low[0]);
14328 emit_insn ((mode == DImode
14329 ? gen_ashrsi3
14330 : gen_ashrdi3) (high[0], high[0],
14331 GEN_INT (single_width - 1)));
14332 if (count > single_width)
14333 emit_insn ((mode == DImode
14334 ? gen_ashrsi3
14335 : gen_ashrdi3) (low[0], low[0],
14336 GEN_INT (count - single_width)));
14337 }
14338 else
14339 {
14340 if (!rtx_equal_p (operands[0], operands[1]))
14341 emit_move_insn (operands[0], operands[1]);
14342 emit_insn ((mode == DImode
14343 ? gen_x86_shrd_1
14344 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14345 emit_insn ((mode == DImode
14346 ? gen_ashrsi3
14347 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14348 }
14349 }
14350 else
14351 {
14352 if (!rtx_equal_p (operands[0], operands[1]))
14353 emit_move_insn (operands[0], operands[1]);
14354
14355 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14356
14357 emit_insn ((mode == DImode
14358 ? gen_x86_shrd_1
14359 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14360 emit_insn ((mode == DImode
14361 ? gen_ashrsi3
14362 : gen_ashrdi3) (high[0], high[0], operands[2]));
14363
14364 if (TARGET_CMOVE && scratch)
14365 {
14366 emit_move_insn (scratch, high[0]);
14367 emit_insn ((mode == DImode
14368 ? gen_ashrsi3
14369 : gen_ashrdi3) (scratch, scratch,
14370 GEN_INT (single_width - 1)));
14371 emit_insn ((mode == DImode
14372 ? gen_x86_shift_adj_1
14373 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14374 scratch));
14375 }
14376 else
14377 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14378 }
14379 }
14380
14381 void
14382 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14383 {
14384 rtx low[2], high[2];
14385 int count;
14386 const int single_width = mode == DImode ? 32 : 64;
14387
14388 if (CONST_INT_P (operands[2]))
14389 {
14390 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14391 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14392
14393 if (count >= single_width)
14394 {
14395 emit_move_insn (low[0], high[1]);
14396 ix86_expand_clear (high[0]);
14397
14398 if (count > single_width)
14399 emit_insn ((mode == DImode
14400 ? gen_lshrsi3
14401 : gen_lshrdi3) (low[0], low[0],
14402 GEN_INT (count - single_width)));
14403 }
14404 else
14405 {
14406 if (!rtx_equal_p (operands[0], operands[1]))
14407 emit_move_insn (operands[0], operands[1]);
14408 emit_insn ((mode == DImode
14409 ? gen_x86_shrd_1
14410 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14411 emit_insn ((mode == DImode
14412 ? gen_lshrsi3
14413 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14414 }
14415 }
14416 else
14417 {
14418 if (!rtx_equal_p (operands[0], operands[1]))
14419 emit_move_insn (operands[0], operands[1]);
14420
14421 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14422
14423 emit_insn ((mode == DImode
14424 ? gen_x86_shrd_1
14425 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14426 emit_insn ((mode == DImode
14427 ? gen_lshrsi3
14428 : gen_lshrdi3) (high[0], high[0], operands[2]));
14429
14430 /* Heh. By reversing the arguments, we can reuse this pattern. */
14431 if (TARGET_CMOVE && scratch)
14432 {
14433 ix86_expand_clear (scratch);
14434 emit_insn ((mode == DImode
14435 ? gen_x86_shift_adj_1
14436 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14437 scratch));
14438 }
14439 else
14440 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14441 }
14442 }
14443
14444 /* Predict just emitted jump instruction to be taken with probability PROB. */
14445 static void
14446 predict_jump (int prob)
14447 {
14448 rtx insn = get_last_insn ();
14449 gcc_assert (JUMP_P (insn));
14450 REG_NOTES (insn)
14451 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14452 GEN_INT (prob),
14453 REG_NOTES (insn));
14454 }
14455
14456 /* Helper function for the string operations below. Dest VARIABLE whether
14457 it is aligned to VALUE bytes. If true, jump to the label. */
14458 static rtx
14459 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14460 {
14461 rtx label = gen_label_rtx ();
14462 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14463 if (GET_MODE (variable) == DImode)
14464 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14465 else
14466 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14467 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14468 1, label);
14469 if (epilogue)
14470 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14471 else
14472 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14473 return label;
14474 }
14475
14476 /* Adjust COUNTER by the VALUE. */
14477 static void
14478 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14479 {
14480 if (GET_MODE (countreg) == DImode)
14481 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14482 else
14483 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14484 }
14485
14486 /* Zero extend possibly SImode EXP to Pmode register. */
14487 rtx
14488 ix86_zero_extend_to_Pmode (rtx exp)
14489 {
14490 rtx r;
14491 if (GET_MODE (exp) == VOIDmode)
14492 return force_reg (Pmode, exp);
14493 if (GET_MODE (exp) == Pmode)
14494 return copy_to_mode_reg (Pmode, exp);
14495 r = gen_reg_rtx (Pmode);
14496 emit_insn (gen_zero_extendsidi2 (r, exp));
14497 return r;
14498 }
14499
14500 /* Divide COUNTREG by SCALE. */
14501 static rtx
14502 scale_counter (rtx countreg, int scale)
14503 {
14504 rtx sc;
14505 rtx piece_size_mask;
14506
14507 if (scale == 1)
14508 return countreg;
14509 if (CONST_INT_P (countreg))
14510 return GEN_INT (INTVAL (countreg) / scale);
14511 gcc_assert (REG_P (countreg));
14512
14513 piece_size_mask = GEN_INT (scale - 1);
14514 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14515 GEN_INT (exact_log2 (scale)),
14516 NULL, 1, OPTAB_DIRECT);
14517 return sc;
14518 }
14519
14520 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14521 DImode for constant loop counts. */
14522
14523 static enum machine_mode
14524 counter_mode (rtx count_exp)
14525 {
14526 if (GET_MODE (count_exp) != VOIDmode)
14527 return GET_MODE (count_exp);
14528 if (GET_CODE (count_exp) != CONST_INT)
14529 return Pmode;
14530 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14531 return DImode;
14532 return SImode;
14533 }
14534
14535 /* When SRCPTR is non-NULL, output simple loop to move memory
14536 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14537 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14538 equivalent loop to set memory by VALUE (supposed to be in MODE).
14539
14540 The size is rounded down to whole number of chunk size moved at once.
14541 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14542
14543
14544 static void
14545 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14546 rtx destptr, rtx srcptr, rtx value,
14547 rtx count, enum machine_mode mode, int unroll,
14548 int expected_size)
14549 {
14550 rtx out_label, top_label, iter, tmp;
14551 enum machine_mode iter_mode = counter_mode (count);
14552 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14553 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14554 rtx size;
14555 rtx x_addr;
14556 rtx y_addr;
14557 int i;
14558
14559 top_label = gen_label_rtx ();
14560 out_label = gen_label_rtx ();
14561 iter = gen_reg_rtx (iter_mode);
14562
14563 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14564 NULL, 1, OPTAB_DIRECT);
14565 /* Those two should combine. */
14566 if (piece_size == const1_rtx)
14567 {
14568 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14569 true, out_label);
14570 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14571 }
14572 emit_move_insn (iter, const0_rtx);
14573
14574 emit_label (top_label);
14575
14576 tmp = convert_modes (Pmode, iter_mode, iter, true);
14577 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14578 destmem = change_address (destmem, mode, x_addr);
14579
14580 if (srcmem)
14581 {
14582 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14583 srcmem = change_address (srcmem, mode, y_addr);
14584
14585 /* When unrolling for chips that reorder memory reads and writes,
14586 we can save registers by using single temporary.
14587 Also using 4 temporaries is overkill in 32bit mode. */
14588 if (!TARGET_64BIT && 0)
14589 {
14590 for (i = 0; i < unroll; i++)
14591 {
14592 if (i)
14593 {
14594 destmem =
14595 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14596 srcmem =
14597 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14598 }
14599 emit_move_insn (destmem, srcmem);
14600 }
14601 }
14602 else
14603 {
14604 rtx tmpreg[4];
14605 gcc_assert (unroll <= 4);
14606 for (i = 0; i < unroll; i++)
14607 {
14608 tmpreg[i] = gen_reg_rtx (mode);
14609 if (i)
14610 {
14611 srcmem =
14612 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14613 }
14614 emit_move_insn (tmpreg[i], srcmem);
14615 }
14616 for (i = 0; i < unroll; i++)
14617 {
14618 if (i)
14619 {
14620 destmem =
14621 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14622 }
14623 emit_move_insn (destmem, tmpreg[i]);
14624 }
14625 }
14626 }
14627 else
14628 for (i = 0; i < unroll; i++)
14629 {
14630 if (i)
14631 destmem =
14632 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14633 emit_move_insn (destmem, value);
14634 }
14635
14636 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14637 true, OPTAB_LIB_WIDEN);
14638 if (tmp != iter)
14639 emit_move_insn (iter, tmp);
14640
14641 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14642 true, top_label);
14643 if (expected_size != -1)
14644 {
14645 expected_size /= GET_MODE_SIZE (mode) * unroll;
14646 if (expected_size == 0)
14647 predict_jump (0);
14648 else if (expected_size > REG_BR_PROB_BASE)
14649 predict_jump (REG_BR_PROB_BASE - 1);
14650 else
14651 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14652 }
14653 else
14654 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14655 iter = ix86_zero_extend_to_Pmode (iter);
14656 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14657 true, OPTAB_LIB_WIDEN);
14658 if (tmp != destptr)
14659 emit_move_insn (destptr, tmp);
14660 if (srcptr)
14661 {
14662 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14663 true, OPTAB_LIB_WIDEN);
14664 if (tmp != srcptr)
14665 emit_move_insn (srcptr, tmp);
14666 }
14667 emit_label (out_label);
14668 }
14669
14670 /* Output "rep; mov" instruction.
14671 Arguments have same meaning as for previous function */
14672 static void
14673 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14674 rtx destptr, rtx srcptr,
14675 rtx count,
14676 enum machine_mode mode)
14677 {
14678 rtx destexp;
14679 rtx srcexp;
14680 rtx countreg;
14681
14682 /* If the size is known, it is shorter to use rep movs. */
14683 if (mode == QImode && CONST_INT_P (count)
14684 && !(INTVAL (count) & 3))
14685 mode = SImode;
14686
14687 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14688 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14689 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14690 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14691 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14692 if (mode != QImode)
14693 {
14694 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14695 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14696 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14697 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14698 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14699 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14700 }
14701 else
14702 {
14703 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14704 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14705 }
14706 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14707 destexp, srcexp));
14708 }
14709
14710 /* Output "rep; stos" instruction.
14711 Arguments have same meaning as for previous function */
14712 static void
14713 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14714 rtx count,
14715 enum machine_mode mode)
14716 {
14717 rtx destexp;
14718 rtx countreg;
14719
14720 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14721 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14722 value = force_reg (mode, gen_lowpart (mode, value));
14723 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14724 if (mode != QImode)
14725 {
14726 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14727 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14728 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14729 }
14730 else
14731 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14732 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14733 }
14734
14735 static void
14736 emit_strmov (rtx destmem, rtx srcmem,
14737 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14738 {
14739 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14740 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14741 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14742 }
14743
14744 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14745 static void
14746 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14747 rtx destptr, rtx srcptr, rtx count, int max_size)
14748 {
14749 rtx src, dest;
14750 if (CONST_INT_P (count))
14751 {
14752 HOST_WIDE_INT countval = INTVAL (count);
14753 int offset = 0;
14754
14755 if ((countval & 0x10) && max_size > 16)
14756 {
14757 if (TARGET_64BIT)
14758 {
14759 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14760 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14761 }
14762 else
14763 gcc_unreachable ();
14764 offset += 16;
14765 }
14766 if ((countval & 0x08) && max_size > 8)
14767 {
14768 if (TARGET_64BIT)
14769 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14770 else
14771 {
14772 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14773 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14774 }
14775 offset += 8;
14776 }
14777 if ((countval & 0x04) && max_size > 4)
14778 {
14779 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14780 offset += 4;
14781 }
14782 if ((countval & 0x02) && max_size > 2)
14783 {
14784 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14785 offset += 2;
14786 }
14787 if ((countval & 0x01) && max_size > 1)
14788 {
14789 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14790 offset += 1;
14791 }
14792 return;
14793 }
14794 if (max_size > 8)
14795 {
14796 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14797 count, 1, OPTAB_DIRECT);
14798 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14799 count, QImode, 1, 4);
14800 return;
14801 }
14802
14803 /* When there are stringops, we can cheaply increase dest and src pointers.
14804 Otherwise we save code size by maintaining offset (zero is readily
14805 available from preceding rep operation) and using x86 addressing modes.
14806 */
14807 if (TARGET_SINGLE_STRINGOP)
14808 {
14809 if (max_size > 4)
14810 {
14811 rtx label = ix86_expand_aligntest (count, 4, true);
14812 src = change_address (srcmem, SImode, srcptr);
14813 dest = change_address (destmem, SImode, destptr);
14814 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14815 emit_label (label);
14816 LABEL_NUSES (label) = 1;
14817 }
14818 if (max_size > 2)
14819 {
14820 rtx label = ix86_expand_aligntest (count, 2, true);
14821 src = change_address (srcmem, HImode, srcptr);
14822 dest = change_address (destmem, HImode, destptr);
14823 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14824 emit_label (label);
14825 LABEL_NUSES (label) = 1;
14826 }
14827 if (max_size > 1)
14828 {
14829 rtx label = ix86_expand_aligntest (count, 1, true);
14830 src = change_address (srcmem, QImode, srcptr);
14831 dest = change_address (destmem, QImode, destptr);
14832 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14833 emit_label (label);
14834 LABEL_NUSES (label) = 1;
14835 }
14836 }
14837 else
14838 {
14839 rtx offset = force_reg (Pmode, const0_rtx);
14840 rtx tmp;
14841
14842 if (max_size > 4)
14843 {
14844 rtx label = ix86_expand_aligntest (count, 4, true);
14845 src = change_address (srcmem, SImode, srcptr);
14846 dest = change_address (destmem, SImode, destptr);
14847 emit_move_insn (dest, src);
14848 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14849 true, OPTAB_LIB_WIDEN);
14850 if (tmp != offset)
14851 emit_move_insn (offset, tmp);
14852 emit_label (label);
14853 LABEL_NUSES (label) = 1;
14854 }
14855 if (max_size > 2)
14856 {
14857 rtx label = ix86_expand_aligntest (count, 2, true);
14858 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14859 src = change_address (srcmem, HImode, tmp);
14860 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14861 dest = change_address (destmem, HImode, tmp);
14862 emit_move_insn (dest, src);
14863 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14864 true, OPTAB_LIB_WIDEN);
14865 if (tmp != offset)
14866 emit_move_insn (offset, tmp);
14867 emit_label (label);
14868 LABEL_NUSES (label) = 1;
14869 }
14870 if (max_size > 1)
14871 {
14872 rtx label = ix86_expand_aligntest (count, 1, true);
14873 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14874 src = change_address (srcmem, QImode, tmp);
14875 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14876 dest = change_address (destmem, QImode, tmp);
14877 emit_move_insn (dest, src);
14878 emit_label (label);
14879 LABEL_NUSES (label) = 1;
14880 }
14881 }
14882 }
14883
14884 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14885 static void
14886 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14887 rtx count, int max_size)
14888 {
14889 count =
14890 expand_simple_binop (counter_mode (count), AND, count,
14891 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14892 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14893 gen_lowpart (QImode, value), count, QImode,
14894 1, max_size / 2);
14895 }
14896
14897 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14898 static void
14899 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14900 {
14901 rtx dest;
14902
14903 if (CONST_INT_P (count))
14904 {
14905 HOST_WIDE_INT countval = INTVAL (count);
14906 int offset = 0;
14907
14908 if ((countval & 0x10) && max_size > 16)
14909 {
14910 if (TARGET_64BIT)
14911 {
14912 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14913 emit_insn (gen_strset (destptr, dest, value));
14914 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14915 emit_insn (gen_strset (destptr, dest, value));
14916 }
14917 else
14918 gcc_unreachable ();
14919 offset += 16;
14920 }
14921 if ((countval & 0x08) && max_size > 8)
14922 {
14923 if (TARGET_64BIT)
14924 {
14925 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14926 emit_insn (gen_strset (destptr, dest, value));
14927 }
14928 else
14929 {
14930 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14931 emit_insn (gen_strset (destptr, dest, value));
14932 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14933 emit_insn (gen_strset (destptr, dest, value));
14934 }
14935 offset += 8;
14936 }
14937 if ((countval & 0x04) && max_size > 4)
14938 {
14939 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14940 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14941 offset += 4;
14942 }
14943 if ((countval & 0x02) && max_size > 2)
14944 {
14945 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14946 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14947 offset += 2;
14948 }
14949 if ((countval & 0x01) && max_size > 1)
14950 {
14951 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14952 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14953 offset += 1;
14954 }
14955 return;
14956 }
14957 if (max_size > 32)
14958 {
14959 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14960 return;
14961 }
14962 if (max_size > 16)
14963 {
14964 rtx label = ix86_expand_aligntest (count, 16, true);
14965 if (TARGET_64BIT)
14966 {
14967 dest = change_address (destmem, DImode, destptr);
14968 emit_insn (gen_strset (destptr, dest, value));
14969 emit_insn (gen_strset (destptr, dest, value));
14970 }
14971 else
14972 {
14973 dest = change_address (destmem, SImode, destptr);
14974 emit_insn (gen_strset (destptr, dest, value));
14975 emit_insn (gen_strset (destptr, dest, value));
14976 emit_insn (gen_strset (destptr, dest, value));
14977 emit_insn (gen_strset (destptr, dest, value));
14978 }
14979 emit_label (label);
14980 LABEL_NUSES (label) = 1;
14981 }
14982 if (max_size > 8)
14983 {
14984 rtx label = ix86_expand_aligntest (count, 8, true);
14985 if (TARGET_64BIT)
14986 {
14987 dest = change_address (destmem, DImode, destptr);
14988 emit_insn (gen_strset (destptr, dest, value));
14989 }
14990 else
14991 {
14992 dest = change_address (destmem, SImode, destptr);
14993 emit_insn (gen_strset (destptr, dest, value));
14994 emit_insn (gen_strset (destptr, dest, value));
14995 }
14996 emit_label (label);
14997 LABEL_NUSES (label) = 1;
14998 }
14999 if (max_size > 4)
15000 {
15001 rtx label = ix86_expand_aligntest (count, 4, true);
15002 dest = change_address (destmem, SImode, destptr);
15003 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15004 emit_label (label);
15005 LABEL_NUSES (label) = 1;
15006 }
15007 if (max_size > 2)
15008 {
15009 rtx label = ix86_expand_aligntest (count, 2, true);
15010 dest = change_address (destmem, HImode, destptr);
15011 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15012 emit_label (label);
15013 LABEL_NUSES (label) = 1;
15014 }
15015 if (max_size > 1)
15016 {
15017 rtx label = ix86_expand_aligntest (count, 1, true);
15018 dest = change_address (destmem, QImode, destptr);
15019 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15020 emit_label (label);
15021 LABEL_NUSES (label) = 1;
15022 }
15023 }
15024
15025 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15026 DESIRED_ALIGNMENT. */
15027 static void
15028 expand_movmem_prologue (rtx destmem, rtx srcmem,
15029 rtx destptr, rtx srcptr, rtx count,
15030 int align, int desired_alignment)
15031 {
15032 if (align <= 1 && desired_alignment > 1)
15033 {
15034 rtx label = ix86_expand_aligntest (destptr, 1, false);
15035 srcmem = change_address (srcmem, QImode, srcptr);
15036 destmem = change_address (destmem, QImode, destptr);
15037 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15038 ix86_adjust_counter (count, 1);
15039 emit_label (label);
15040 LABEL_NUSES (label) = 1;
15041 }
15042 if (align <= 2 && desired_alignment > 2)
15043 {
15044 rtx label = ix86_expand_aligntest (destptr, 2, false);
15045 srcmem = change_address (srcmem, HImode, srcptr);
15046 destmem = change_address (destmem, HImode, destptr);
15047 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15048 ix86_adjust_counter (count, 2);
15049 emit_label (label);
15050 LABEL_NUSES (label) = 1;
15051 }
15052 if (align <= 4 && desired_alignment > 4)
15053 {
15054 rtx label = ix86_expand_aligntest (destptr, 4, false);
15055 srcmem = change_address (srcmem, SImode, srcptr);
15056 destmem = change_address (destmem, SImode, destptr);
15057 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15058 ix86_adjust_counter (count, 4);
15059 emit_label (label);
15060 LABEL_NUSES (label) = 1;
15061 }
15062 gcc_assert (desired_alignment <= 8);
15063 }
15064
15065 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15066 DESIRED_ALIGNMENT. */
15067 static void
15068 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15069 int align, int desired_alignment)
15070 {
15071 if (align <= 1 && desired_alignment > 1)
15072 {
15073 rtx label = ix86_expand_aligntest (destptr, 1, false);
15074 destmem = change_address (destmem, QImode, destptr);
15075 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15076 ix86_adjust_counter (count, 1);
15077 emit_label (label);
15078 LABEL_NUSES (label) = 1;
15079 }
15080 if (align <= 2 && desired_alignment > 2)
15081 {
15082 rtx label = ix86_expand_aligntest (destptr, 2, false);
15083 destmem = change_address (destmem, HImode, destptr);
15084 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15085 ix86_adjust_counter (count, 2);
15086 emit_label (label);
15087 LABEL_NUSES (label) = 1;
15088 }
15089 if (align <= 4 && desired_alignment > 4)
15090 {
15091 rtx label = ix86_expand_aligntest (destptr, 4, false);
15092 destmem = change_address (destmem, SImode, destptr);
15093 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15094 ix86_adjust_counter (count, 4);
15095 emit_label (label);
15096 LABEL_NUSES (label) = 1;
15097 }
15098 gcc_assert (desired_alignment <= 8);
15099 }
15100
15101 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15102 static enum stringop_alg
15103 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15104 int *dynamic_check)
15105 {
15106 const struct stringop_algs * algs;
15107 /* Algorithms using the rep prefix want at least edi and ecx;
15108 additionally, memset wants eax and memcpy wants esi. Don't
15109 consider such algorithms if the user has appropriated those
15110 registers for their own purposes. */
15111 bool rep_prefix_usable = !(global_regs[CX_REG] || global_regs[DI_REG]
15112 || (memset ? global_regs[AX_REG] : global_regs[SI_REG]));
15113
15114 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15115 || (alg != rep_prefix_1_byte \
15116 && alg != rep_prefix_4_byte \
15117 && alg != rep_prefix_8_byte))
15118
15119 *dynamic_check = -1;
15120 if (memset)
15121 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15122 else
15123 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15124 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15125 return stringop_alg;
15126 /* rep; movq or rep; movl is the smallest variant. */
15127 else if (optimize_size)
15128 {
15129 if (!count || (count & 3))
15130 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15131 else
15132 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15133 }
15134 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15135 */
15136 else if (expected_size != -1 && expected_size < 4)
15137 return loop_1_byte;
15138 else if (expected_size != -1)
15139 {
15140 unsigned int i;
15141 enum stringop_alg alg = libcall;
15142 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15143 {
15144 /* We get here if the algorithms that were not libcall-based
15145 were rep-prefix based and we are unable to use rep prefixes
15146 based on global register usage. Break out of the loop and
15147 use the heuristic below. */
15148 if (algs->size[i].max == 0)
15149 break;
15150 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15151 {
15152 enum stringop_alg candidate = algs->size[i].alg;
15153
15154 if (candidate != libcall && ALG_USABLE_P (candidate))
15155 alg = candidate;
15156 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15157 last non-libcall inline algorithm. */
15158 if (TARGET_INLINE_ALL_STRINGOPS)
15159 {
15160 /* When the current size is best to be copied by a libcall,
15161 but we are still forced to inline, run the heuristic below
15162 that will pick code for medium sized blocks. */
15163 if (alg != libcall)
15164 return alg;
15165 break;
15166 }
15167 else if (ALG_USABLE_P (candidate))
15168 return candidate;
15169 }
15170 }
15171 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15172 }
15173 /* When asked to inline the call anyway, try to pick meaningful choice.
15174 We look for maximal size of block that is faster to copy by hand and
15175 take blocks of at most of that size guessing that average size will
15176 be roughly half of the block.
15177
15178 If this turns out to be bad, we might simply specify the preferred
15179 choice in ix86_costs. */
15180 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15181 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15182 {
15183 int max = -1;
15184 enum stringop_alg alg;
15185 int i;
15186 bool any_alg_usable_p = true;
15187
15188 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15189 {
15190 enum stringop_alg candidate = algs->size[i].alg;
15191 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15192
15193 if (candidate != libcall && candidate
15194 && ALG_USABLE_P (candidate))
15195 max = algs->size[i].max;
15196 }
15197 /* If there aren't any usable algorithms, then recursing on
15198 smaller sizes isn't going to find anything. Just return the
15199 simple byte-at-a-time copy loop. */
15200 if (!any_alg_usable_p)
15201 {
15202 /* Pick something reasonable. */
15203 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15204 *dynamic_check = 128;
15205 return loop_1_byte;
15206 }
15207 if (max == -1)
15208 max = 4096;
15209 alg = decide_alg (count, max / 2, memset, dynamic_check);
15210 gcc_assert (*dynamic_check == -1);
15211 gcc_assert (alg != libcall);
15212 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15213 *dynamic_check = max;
15214 return alg;
15215 }
15216 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15217 #undef ALG_USABLE_P
15218 }
15219
15220 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15221 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15222 static int
15223 decide_alignment (int align,
15224 enum stringop_alg alg,
15225 int expected_size)
15226 {
15227 int desired_align = 0;
15228 switch (alg)
15229 {
15230 case no_stringop:
15231 gcc_unreachable ();
15232 case loop:
15233 case unrolled_loop:
15234 desired_align = GET_MODE_SIZE (Pmode);
15235 break;
15236 case rep_prefix_8_byte:
15237 desired_align = 8;
15238 break;
15239 case rep_prefix_4_byte:
15240 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15241 copying whole cacheline at once. */
15242 if (TARGET_PENTIUMPRO)
15243 desired_align = 8;
15244 else
15245 desired_align = 4;
15246 break;
15247 case rep_prefix_1_byte:
15248 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15249 copying whole cacheline at once. */
15250 if (TARGET_PENTIUMPRO)
15251 desired_align = 8;
15252 else
15253 desired_align = 1;
15254 break;
15255 case loop_1_byte:
15256 desired_align = 1;
15257 break;
15258 case libcall:
15259 return 0;
15260 }
15261
15262 if (optimize_size)
15263 desired_align = 1;
15264 if (desired_align < align)
15265 desired_align = align;
15266 if (expected_size != -1 && expected_size < 4)
15267 desired_align = align;
15268 return desired_align;
15269 }
15270
15271 /* Return the smallest power of 2 greater than VAL. */
15272 static int
15273 smallest_pow2_greater_than (int val)
15274 {
15275 int ret = 1;
15276 while (ret <= val)
15277 ret <<= 1;
15278 return ret;
15279 }
15280
15281 /* Expand string move (memcpy) operation. Use i386 string operations when
15282 profitable. expand_clrmem contains similar code. The code depends upon
15283 architecture, block size and alignment, but always has the same
15284 overall structure:
15285
15286 1) Prologue guard: Conditional that jumps up to epilogues for small
15287 blocks that can be handled by epilogue alone. This is faster but
15288 also needed for correctness, since prologue assume the block is larger
15289 than the desired alignment.
15290
15291 Optional dynamic check for size and libcall for large
15292 blocks is emitted here too, with -minline-stringops-dynamically.
15293
15294 2) Prologue: copy first few bytes in order to get destination aligned
15295 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15296 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15297 We emit either a jump tree on power of two sized blocks, or a byte loop.
15298
15299 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15300 with specified algorithm.
15301
15302 4) Epilogue: code copying tail of the block that is too small to be
15303 handled by main body (or up to size guarded by prologue guard). */
15304
15305 int
15306 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15307 rtx expected_align_exp, rtx expected_size_exp)
15308 {
15309 rtx destreg;
15310 rtx srcreg;
15311 rtx label = NULL;
15312 rtx tmp;
15313 rtx jump_around_label = NULL;
15314 HOST_WIDE_INT align = 1;
15315 unsigned HOST_WIDE_INT count = 0;
15316 HOST_WIDE_INT expected_size = -1;
15317 int size_needed = 0, epilogue_size_needed;
15318 int desired_align = 0;
15319 enum stringop_alg alg;
15320 int dynamic_check;
15321
15322 if (CONST_INT_P (align_exp))
15323 align = INTVAL (align_exp);
15324 /* i386 can do misaligned access on reasonably increased cost. */
15325 if (CONST_INT_P (expected_align_exp)
15326 && INTVAL (expected_align_exp) > align)
15327 align = INTVAL (expected_align_exp);
15328 if (CONST_INT_P (count_exp))
15329 count = expected_size = INTVAL (count_exp);
15330 if (CONST_INT_P (expected_size_exp) && count == 0)
15331 expected_size = INTVAL (expected_size_exp);
15332
15333 /* Step 0: Decide on preferred algorithm, desired alignment and
15334 size of chunks to be copied by main loop. */
15335
15336 alg = decide_alg (count, expected_size, false, &dynamic_check);
15337 desired_align = decide_alignment (align, alg, expected_size);
15338
15339 if (!TARGET_ALIGN_STRINGOPS)
15340 align = desired_align;
15341
15342 if (alg == libcall)
15343 return 0;
15344 gcc_assert (alg != no_stringop);
15345 if (!count)
15346 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15347 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15348 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15349 switch (alg)
15350 {
15351 case libcall:
15352 case no_stringop:
15353 gcc_unreachable ();
15354 case loop:
15355 size_needed = GET_MODE_SIZE (Pmode);
15356 break;
15357 case unrolled_loop:
15358 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15359 break;
15360 case rep_prefix_8_byte:
15361 size_needed = 8;
15362 break;
15363 case rep_prefix_4_byte:
15364 size_needed = 4;
15365 break;
15366 case rep_prefix_1_byte:
15367 case loop_1_byte:
15368 size_needed = 1;
15369 break;
15370 }
15371
15372 epilogue_size_needed = size_needed;
15373
15374 /* Step 1: Prologue guard. */
15375
15376 /* Alignment code needs count to be in register. */
15377 if (CONST_INT_P (count_exp) && desired_align > align)
15378 count_exp = force_reg (counter_mode (count_exp), count_exp);
15379 gcc_assert (desired_align >= 1 && align >= 1);
15380
15381 /* Ensure that alignment prologue won't copy past end of block. */
15382 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15383 {
15384 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15385 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15386 Make sure it is power of 2. */
15387 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15388
15389 if (CONST_INT_P (count_exp))
15390 {
15391 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15392 goto epilogue;
15393 }
15394 else
15395 {
15396 label = gen_label_rtx ();
15397 emit_cmp_and_jump_insns (count_exp,
15398 GEN_INT (epilogue_size_needed),
15399 LTU, 0, counter_mode (count_exp), 1, label);
15400 if (expected_size == -1 || expected_size < epilogue_size_needed)
15401 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15402 else
15403 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15404 }
15405 }
15406
15407 /* Emit code to decide on runtime whether library call or inline should be
15408 used. */
15409 if (dynamic_check != -1)
15410 {
15411 if (CONST_INT_P (count_exp))
15412 {
15413 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15414 {
15415 emit_block_move_via_libcall (dst, src, count_exp, false);
15416 count_exp = const0_rtx;
15417 goto epilogue;
15418 }
15419 }
15420 else
15421 {
15422 rtx hot_label = gen_label_rtx ();
15423 jump_around_label = gen_label_rtx ();
15424 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15425 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15426 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15427 emit_block_move_via_libcall (dst, src, count_exp, false);
15428 emit_jump (jump_around_label);
15429 emit_label (hot_label);
15430 }
15431 }
15432
15433 /* Step 2: Alignment prologue. */
15434
15435 if (desired_align > align)
15436 {
15437 /* Except for the first move in epilogue, we no longer know
15438 constant offset in aliasing info. It don't seems to worth
15439 the pain to maintain it for the first move, so throw away
15440 the info early. */
15441 src = change_address (src, BLKmode, srcreg);
15442 dst = change_address (dst, BLKmode, destreg);
15443 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15444 desired_align);
15445 }
15446 if (label && size_needed == 1)
15447 {
15448 emit_label (label);
15449 LABEL_NUSES (label) = 1;
15450 label = NULL;
15451 }
15452
15453 /* Step 3: Main loop. */
15454
15455 switch (alg)
15456 {
15457 case libcall:
15458 case no_stringop:
15459 gcc_unreachable ();
15460 case loop_1_byte:
15461 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15462 count_exp, QImode, 1, expected_size);
15463 break;
15464 case loop:
15465 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15466 count_exp, Pmode, 1, expected_size);
15467 break;
15468 case unrolled_loop:
15469 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15470 registers for 4 temporaries anyway. */
15471 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15472 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15473 expected_size);
15474 break;
15475 case rep_prefix_8_byte:
15476 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15477 DImode);
15478 break;
15479 case rep_prefix_4_byte:
15480 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15481 SImode);
15482 break;
15483 case rep_prefix_1_byte:
15484 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15485 QImode);
15486 break;
15487 }
15488 /* Adjust properly the offset of src and dest memory for aliasing. */
15489 if (CONST_INT_P (count_exp))
15490 {
15491 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15492 (count / size_needed) * size_needed);
15493 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15494 (count / size_needed) * size_needed);
15495 }
15496 else
15497 {
15498 src = change_address (src, BLKmode, srcreg);
15499 dst = change_address (dst, BLKmode, destreg);
15500 }
15501
15502 /* Step 4: Epilogue to copy the remaining bytes. */
15503 epilogue:
15504 if (label)
15505 {
15506 /* When the main loop is done, COUNT_EXP might hold original count,
15507 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15508 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15509 bytes. Compensate if needed. */
15510
15511 if (size_needed < epilogue_size_needed)
15512 {
15513 tmp =
15514 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15515 GEN_INT (size_needed - 1), count_exp, 1,
15516 OPTAB_DIRECT);
15517 if (tmp != count_exp)
15518 emit_move_insn (count_exp, tmp);
15519 }
15520 emit_label (label);
15521 LABEL_NUSES (label) = 1;
15522 }
15523
15524 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15525 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15526 epilogue_size_needed);
15527 if (jump_around_label)
15528 emit_label (jump_around_label);
15529 return 1;
15530 }
15531
15532 /* Helper function for memcpy. For QImode value 0xXY produce
15533 0xXYXYXYXY of wide specified by MODE. This is essentially
15534 a * 0x10101010, but we can do slightly better than
15535 synth_mult by unwinding the sequence by hand on CPUs with
15536 slow multiply. */
15537 static rtx
15538 promote_duplicated_reg (enum machine_mode mode, rtx val)
15539 {
15540 enum machine_mode valmode = GET_MODE (val);
15541 rtx tmp;
15542 int nops = mode == DImode ? 3 : 2;
15543
15544 gcc_assert (mode == SImode || mode == DImode);
15545 if (val == const0_rtx)
15546 return copy_to_mode_reg (mode, const0_rtx);
15547 if (CONST_INT_P (val))
15548 {
15549 HOST_WIDE_INT v = INTVAL (val) & 255;
15550
15551 v |= v << 8;
15552 v |= v << 16;
15553 if (mode == DImode)
15554 v |= (v << 16) << 16;
15555 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15556 }
15557
15558 if (valmode == VOIDmode)
15559 valmode = QImode;
15560 if (valmode != QImode)
15561 val = gen_lowpart (QImode, val);
15562 if (mode == QImode)
15563 return val;
15564 if (!TARGET_PARTIAL_REG_STALL)
15565 nops--;
15566 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15567 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15568 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15569 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15570 {
15571 rtx reg = convert_modes (mode, QImode, val, true);
15572 tmp = promote_duplicated_reg (mode, const1_rtx);
15573 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15574 OPTAB_DIRECT);
15575 }
15576 else
15577 {
15578 rtx reg = convert_modes (mode, QImode, val, true);
15579
15580 if (!TARGET_PARTIAL_REG_STALL)
15581 if (mode == SImode)
15582 emit_insn (gen_movsi_insv_1 (reg, reg));
15583 else
15584 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15585 else
15586 {
15587 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15588 NULL, 1, OPTAB_DIRECT);
15589 reg =
15590 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15591 }
15592 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15593 NULL, 1, OPTAB_DIRECT);
15594 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15595 if (mode == SImode)
15596 return reg;
15597 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15598 NULL, 1, OPTAB_DIRECT);
15599 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15600 return reg;
15601 }
15602 }
15603
15604 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15605 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15606 alignment from ALIGN to DESIRED_ALIGN. */
15607 static rtx
15608 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15609 {
15610 rtx promoted_val;
15611
15612 if (TARGET_64BIT
15613 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15614 promoted_val = promote_duplicated_reg (DImode, val);
15615 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15616 promoted_val = promote_duplicated_reg (SImode, val);
15617 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15618 promoted_val = promote_duplicated_reg (HImode, val);
15619 else
15620 promoted_val = val;
15621
15622 return promoted_val;
15623 }
15624
15625 /* Expand string clear operation (bzero). Use i386 string operations when
15626 profitable. See expand_movmem comment for explanation of individual
15627 steps performed. */
15628 int
15629 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15630 rtx expected_align_exp, rtx expected_size_exp)
15631 {
15632 rtx destreg;
15633 rtx label = NULL;
15634 rtx tmp;
15635 rtx jump_around_label = NULL;
15636 HOST_WIDE_INT align = 1;
15637 unsigned HOST_WIDE_INT count = 0;
15638 HOST_WIDE_INT expected_size = -1;
15639 int size_needed = 0, epilogue_size_needed;
15640 int desired_align = 0;
15641 enum stringop_alg alg;
15642 rtx promoted_val = NULL;
15643 bool force_loopy_epilogue = false;
15644 int dynamic_check;
15645
15646 if (CONST_INT_P (align_exp))
15647 align = INTVAL (align_exp);
15648 /* i386 can do misaligned access on reasonably increased cost. */
15649 if (CONST_INT_P (expected_align_exp)
15650 && INTVAL (expected_align_exp) > align)
15651 align = INTVAL (expected_align_exp);
15652 if (CONST_INT_P (count_exp))
15653 count = expected_size = INTVAL (count_exp);
15654 if (CONST_INT_P (expected_size_exp) && count == 0)
15655 expected_size = INTVAL (expected_size_exp);
15656
15657 /* Step 0: Decide on preferred algorithm, desired alignment and
15658 size of chunks to be copied by main loop. */
15659
15660 alg = decide_alg (count, expected_size, true, &dynamic_check);
15661 desired_align = decide_alignment (align, alg, expected_size);
15662
15663 if (!TARGET_ALIGN_STRINGOPS)
15664 align = desired_align;
15665
15666 if (alg == libcall)
15667 return 0;
15668 gcc_assert (alg != no_stringop);
15669 if (!count)
15670 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15671 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15672 switch (alg)
15673 {
15674 case libcall:
15675 case no_stringop:
15676 gcc_unreachable ();
15677 case loop:
15678 size_needed = GET_MODE_SIZE (Pmode);
15679 break;
15680 case unrolled_loop:
15681 size_needed = GET_MODE_SIZE (Pmode) * 4;
15682 break;
15683 case rep_prefix_8_byte:
15684 size_needed = 8;
15685 break;
15686 case rep_prefix_4_byte:
15687 size_needed = 4;
15688 break;
15689 case rep_prefix_1_byte:
15690 case loop_1_byte:
15691 size_needed = 1;
15692 break;
15693 }
15694 epilogue_size_needed = size_needed;
15695
15696 /* Step 1: Prologue guard. */
15697
15698 /* Alignment code needs count to be in register. */
15699 if (CONST_INT_P (count_exp) && desired_align > align)
15700 {
15701 enum machine_mode mode = SImode;
15702 if (TARGET_64BIT && (count & ~0xffffffff))
15703 mode = DImode;
15704 count_exp = force_reg (mode, count_exp);
15705 }
15706 /* Do the cheap promotion to allow better CSE across the
15707 main loop and epilogue (ie one load of the big constant in the
15708 front of all code. */
15709 if (CONST_INT_P (val_exp))
15710 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15711 desired_align, align);
15712 /* Ensure that alignment prologue won't copy past end of block. */
15713 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15714 {
15715 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15716 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15717 Make sure it is power of 2. */
15718 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15719
15720 /* To improve performance of small blocks, we jump around the VAL
15721 promoting mode. This mean that if the promoted VAL is not constant,
15722 we might not use it in the epilogue and have to use byte
15723 loop variant. */
15724 if (epilogue_size_needed > 2 && !promoted_val)
15725 force_loopy_epilogue = true;
15726 label = gen_label_rtx ();
15727 emit_cmp_and_jump_insns (count_exp,
15728 GEN_INT (epilogue_size_needed),
15729 LTU, 0, counter_mode (count_exp), 1, label);
15730 if (GET_CODE (count_exp) == CONST_INT)
15731 ;
15732 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15733 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15734 else
15735 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15736 }
15737 if (dynamic_check != -1)
15738 {
15739 rtx hot_label = gen_label_rtx ();
15740 jump_around_label = gen_label_rtx ();
15741 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15742 LEU, 0, counter_mode (count_exp), 1, hot_label);
15743 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15744 set_storage_via_libcall (dst, count_exp, val_exp, false);
15745 emit_jump (jump_around_label);
15746 emit_label (hot_label);
15747 }
15748
15749 /* Step 2: Alignment prologue. */
15750
15751 /* Do the expensive promotion once we branched off the small blocks. */
15752 if (!promoted_val)
15753 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15754 desired_align, align);
15755 gcc_assert (desired_align >= 1 && align >= 1);
15756
15757 if (desired_align > align)
15758 {
15759 /* Except for the first move in epilogue, we no longer know
15760 constant offset in aliasing info. It don't seems to worth
15761 the pain to maintain it for the first move, so throw away
15762 the info early. */
15763 dst = change_address (dst, BLKmode, destreg);
15764 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15765 desired_align);
15766 }
15767 if (label && size_needed == 1)
15768 {
15769 emit_label (label);
15770 LABEL_NUSES (label) = 1;
15771 label = NULL;
15772 }
15773
15774 /* Step 3: Main loop. */
15775
15776 switch (alg)
15777 {
15778 case libcall:
15779 case no_stringop:
15780 gcc_unreachable ();
15781 case loop_1_byte:
15782 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15783 count_exp, QImode, 1, expected_size);
15784 break;
15785 case loop:
15786 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15787 count_exp, Pmode, 1, expected_size);
15788 break;
15789 case unrolled_loop:
15790 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15791 count_exp, Pmode, 4, expected_size);
15792 break;
15793 case rep_prefix_8_byte:
15794 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15795 DImode);
15796 break;
15797 case rep_prefix_4_byte:
15798 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15799 SImode);
15800 break;
15801 case rep_prefix_1_byte:
15802 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15803 QImode);
15804 break;
15805 }
15806 /* Adjust properly the offset of src and dest memory for aliasing. */
15807 if (CONST_INT_P (count_exp))
15808 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15809 (count / size_needed) * size_needed);
15810 else
15811 dst = change_address (dst, BLKmode, destreg);
15812
15813 /* Step 4: Epilogue to copy the remaining bytes. */
15814
15815 if (label)
15816 {
15817 /* When the main loop is done, COUNT_EXP might hold original count,
15818 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15819 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15820 bytes. Compensate if needed. */
15821
15822 if (size_needed < desired_align - align)
15823 {
15824 tmp =
15825 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15826 GEN_INT (size_needed - 1), count_exp, 1,
15827 OPTAB_DIRECT);
15828 size_needed = desired_align - align + 1;
15829 if (tmp != count_exp)
15830 emit_move_insn (count_exp, tmp);
15831 }
15832 emit_label (label);
15833 LABEL_NUSES (label) = 1;
15834 }
15835 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15836 {
15837 if (force_loopy_epilogue)
15838 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15839 size_needed);
15840 else
15841 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15842 size_needed);
15843 }
15844 if (jump_around_label)
15845 emit_label (jump_around_label);
15846 return 1;
15847 }
15848
15849 /* Expand the appropriate insns for doing strlen if not just doing
15850 repnz; scasb
15851
15852 out = result, initialized with the start address
15853 align_rtx = alignment of the address.
15854 scratch = scratch register, initialized with the startaddress when
15855 not aligned, otherwise undefined
15856
15857 This is just the body. It needs the initializations mentioned above and
15858 some address computing at the end. These things are done in i386.md. */
15859
15860 static void
15861 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15862 {
15863 int align;
15864 rtx tmp;
15865 rtx align_2_label = NULL_RTX;
15866 rtx align_3_label = NULL_RTX;
15867 rtx align_4_label = gen_label_rtx ();
15868 rtx end_0_label = gen_label_rtx ();
15869 rtx mem;
15870 rtx tmpreg = gen_reg_rtx (SImode);
15871 rtx scratch = gen_reg_rtx (SImode);
15872 rtx cmp;
15873
15874 align = 0;
15875 if (CONST_INT_P (align_rtx))
15876 align = INTVAL (align_rtx);
15877
15878 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15879
15880 /* Is there a known alignment and is it less than 4? */
15881 if (align < 4)
15882 {
15883 rtx scratch1 = gen_reg_rtx (Pmode);
15884 emit_move_insn (scratch1, out);
15885 /* Is there a known alignment and is it not 2? */
15886 if (align != 2)
15887 {
15888 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15889 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15890
15891 /* Leave just the 3 lower bits. */
15892 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15893 NULL_RTX, 0, OPTAB_WIDEN);
15894
15895 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15896 Pmode, 1, align_4_label);
15897 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15898 Pmode, 1, align_2_label);
15899 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15900 Pmode, 1, align_3_label);
15901 }
15902 else
15903 {
15904 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15905 check if is aligned to 4 - byte. */
15906
15907 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15908 NULL_RTX, 0, OPTAB_WIDEN);
15909
15910 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15911 Pmode, 1, align_4_label);
15912 }
15913
15914 mem = change_address (src, QImode, out);
15915
15916 /* Now compare the bytes. */
15917
15918 /* Compare the first n unaligned byte on a byte per byte basis. */
15919 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15920 QImode, 1, end_0_label);
15921
15922 /* Increment the address. */
15923 if (TARGET_64BIT)
15924 emit_insn (gen_adddi3 (out, out, const1_rtx));
15925 else
15926 emit_insn (gen_addsi3 (out, out, const1_rtx));
15927
15928 /* Not needed with an alignment of 2 */
15929 if (align != 2)
15930 {
15931 emit_label (align_2_label);
15932
15933 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15934 end_0_label);
15935
15936 if (TARGET_64BIT)
15937 emit_insn (gen_adddi3 (out, out, const1_rtx));
15938 else
15939 emit_insn (gen_addsi3 (out, out, const1_rtx));
15940
15941 emit_label (align_3_label);
15942 }
15943
15944 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15945 end_0_label);
15946
15947 if (TARGET_64BIT)
15948 emit_insn (gen_adddi3 (out, out, const1_rtx));
15949 else
15950 emit_insn (gen_addsi3 (out, out, const1_rtx));
15951 }
15952
15953 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15954 align this loop. It gives only huge programs, but does not help to
15955 speed up. */
15956 emit_label (align_4_label);
15957
15958 mem = change_address (src, SImode, out);
15959 emit_move_insn (scratch, mem);
15960 if (TARGET_64BIT)
15961 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15962 else
15963 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15964
15965 /* This formula yields a nonzero result iff one of the bytes is zero.
15966 This saves three branches inside loop and many cycles. */
15967
15968 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15969 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15970 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15971 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15972 gen_int_mode (0x80808080, SImode)));
15973 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15974 align_4_label);
15975
15976 if (TARGET_CMOVE)
15977 {
15978 rtx reg = gen_reg_rtx (SImode);
15979 rtx reg2 = gen_reg_rtx (Pmode);
15980 emit_move_insn (reg, tmpreg);
15981 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15982
15983 /* If zero is not in the first two bytes, move two bytes forward. */
15984 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15985 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15986 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15987 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15988 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15989 reg,
15990 tmpreg)));
15991 /* Emit lea manually to avoid clobbering of flags. */
15992 emit_insn (gen_rtx_SET (SImode, reg2,
15993 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15994
15995 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15996 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15997 emit_insn (gen_rtx_SET (VOIDmode, out,
15998 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15999 reg2,
16000 out)));
16001
16002 }
16003 else
16004 {
16005 rtx end_2_label = gen_label_rtx ();
16006 /* Is zero in the first two bytes? */
16007
16008 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16009 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16010 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16011 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16012 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16013 pc_rtx);
16014 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16015 JUMP_LABEL (tmp) = end_2_label;
16016
16017 /* Not in the first two. Move two bytes forward. */
16018 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16019 if (TARGET_64BIT)
16020 emit_insn (gen_adddi3 (out, out, const2_rtx));
16021 else
16022 emit_insn (gen_addsi3 (out, out, const2_rtx));
16023
16024 emit_label (end_2_label);
16025
16026 }
16027
16028 /* Avoid branch in fixing the byte. */
16029 tmpreg = gen_lowpart (QImode, tmpreg);
16030 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16031 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16032 if (TARGET_64BIT)
16033 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16034 else
16035 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16036
16037 emit_label (end_0_label);
16038 }
16039
16040 /* Expand strlen. */
16041
16042 int
16043 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16044 {
16045 rtx addr, scratch1, scratch2, scratch3, scratch4;
16046
16047 /* The generic case of strlen expander is long. Avoid it's
16048 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16049
16050 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16051 && !TARGET_INLINE_ALL_STRINGOPS
16052 && !optimize_size
16053 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16054 return 0;
16055
16056 addr = force_reg (Pmode, XEXP (src, 0));
16057 scratch1 = gen_reg_rtx (Pmode);
16058
16059 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16060 && !optimize_size)
16061 {
16062 /* Well it seems that some optimizer does not combine a call like
16063 foo(strlen(bar), strlen(bar));
16064 when the move and the subtraction is done here. It does calculate
16065 the length just once when these instructions are done inside of
16066 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16067 often used and I use one fewer register for the lifetime of
16068 output_strlen_unroll() this is better. */
16069
16070 emit_move_insn (out, addr);
16071
16072 ix86_expand_strlensi_unroll_1 (out, src, align);
16073
16074 /* strlensi_unroll_1 returns the address of the zero at the end of
16075 the string, like memchr(), so compute the length by subtracting
16076 the start address. */
16077 if (TARGET_64BIT)
16078 emit_insn (gen_subdi3 (out, out, addr));
16079 else
16080 emit_insn (gen_subsi3 (out, out, addr));
16081 }
16082 else
16083 {
16084 rtx unspec;
16085
16086 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16087 if (global_regs[AX_REG] || global_regs[CX_REG] || global_regs[DI_REG])
16088 return false;
16089
16090 scratch2 = gen_reg_rtx (Pmode);
16091 scratch3 = gen_reg_rtx (Pmode);
16092 scratch4 = force_reg (Pmode, constm1_rtx);
16093
16094 emit_move_insn (scratch3, addr);
16095 eoschar = force_reg (QImode, eoschar);
16096
16097 src = replace_equiv_address_nv (src, scratch3);
16098
16099 /* If .md starts supporting :P, this can be done in .md. */
16100 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16101 scratch4), UNSPEC_SCAS);
16102 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16103 if (TARGET_64BIT)
16104 {
16105 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16106 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16107 }
16108 else
16109 {
16110 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16111 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16112 }
16113 }
16114 return 1;
16115 }
16116
16117 /* For given symbol (function) construct code to compute address of it's PLT
16118 entry in large x86-64 PIC model. */
16119 rtx
16120 construct_plt_address (rtx symbol)
16121 {
16122 rtx tmp = gen_reg_rtx (Pmode);
16123 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16124
16125 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16126 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16127
16128 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16129 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16130 return tmp;
16131 }
16132
16133 void
16134 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16135 rtx callarg2 ATTRIBUTE_UNUSED,
16136 rtx pop, int sibcall)
16137 {
16138 rtx use = NULL, call;
16139
16140 if (pop == const0_rtx)
16141 pop = NULL;
16142 gcc_assert (!TARGET_64BIT || !pop);
16143
16144 if (TARGET_MACHO && !TARGET_64BIT)
16145 {
16146 #if TARGET_MACHO
16147 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16148 fnaddr = machopic_indirect_call_target (fnaddr);
16149 #endif
16150 }
16151 else
16152 {
16153 /* Static functions and indirect calls don't need the pic register. */
16154 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16155 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16156 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16157 use_reg (&use, pic_offset_table_rtx);
16158 }
16159
16160 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16161 {
16162 rtx al = gen_rtx_REG (QImode, AX_REG);
16163 emit_move_insn (al, callarg2);
16164 use_reg (&use, al);
16165 }
16166
16167 if (ix86_cmodel == CM_LARGE_PIC
16168 && GET_CODE (fnaddr) == MEM
16169 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16170 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16171 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16172 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16173 {
16174 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16175 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16176 }
16177 if (sibcall && TARGET_64BIT
16178 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16179 {
16180 rtx addr;
16181 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16182 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16183 emit_move_insn (fnaddr, addr);
16184 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16185 }
16186
16187 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16188 if (retval)
16189 call = gen_rtx_SET (VOIDmode, retval, call);
16190 if (pop)
16191 {
16192 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16193 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16194 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16195 }
16196
16197 call = emit_call_insn (call);
16198 if (use)
16199 CALL_INSN_FUNCTION_USAGE (call) = use;
16200 }
16201
16202 \f
16203 /* Clear stack slot assignments remembered from previous functions.
16204 This is called from INIT_EXPANDERS once before RTL is emitted for each
16205 function. */
16206
16207 static struct machine_function *
16208 ix86_init_machine_status (void)
16209 {
16210 struct machine_function *f;
16211
16212 f = GGC_CNEW (struct machine_function);
16213 f->use_fast_prologue_epilogue_nregs = -1;
16214 f->tls_descriptor_call_expanded_p = 0;
16215
16216 return f;
16217 }
16218
16219 /* Return a MEM corresponding to a stack slot with mode MODE.
16220 Allocate a new slot if necessary.
16221
16222 The RTL for a function can have several slots available: N is
16223 which slot to use. */
16224
16225 rtx
16226 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16227 {
16228 struct stack_local_entry *s;
16229
16230 gcc_assert (n < MAX_386_STACK_LOCALS);
16231
16232 /* Virtual slot is valid only before vregs are instantiated. */
16233 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16234
16235 for (s = ix86_stack_locals; s; s = s->next)
16236 if (s->mode == mode && s->n == n)
16237 return copy_rtx (s->rtl);
16238
16239 s = (struct stack_local_entry *)
16240 ggc_alloc (sizeof (struct stack_local_entry));
16241 s->n = n;
16242 s->mode = mode;
16243 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16244
16245 s->next = ix86_stack_locals;
16246 ix86_stack_locals = s;
16247 return s->rtl;
16248 }
16249
16250 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16251
16252 static GTY(()) rtx ix86_tls_symbol;
16253 rtx
16254 ix86_tls_get_addr (void)
16255 {
16256
16257 if (!ix86_tls_symbol)
16258 {
16259 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16260 (TARGET_ANY_GNU_TLS
16261 && !TARGET_64BIT)
16262 ? "___tls_get_addr"
16263 : "__tls_get_addr");
16264 }
16265
16266 return ix86_tls_symbol;
16267 }
16268
16269 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16270
16271 static GTY(()) rtx ix86_tls_module_base_symbol;
16272 rtx
16273 ix86_tls_module_base (void)
16274 {
16275
16276 if (!ix86_tls_module_base_symbol)
16277 {
16278 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16279 "_TLS_MODULE_BASE_");
16280 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16281 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16282 }
16283
16284 return ix86_tls_module_base_symbol;
16285 }
16286 \f
16287 /* Calculate the length of the memory address in the instruction
16288 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16289
16290 int
16291 memory_address_length (rtx addr)
16292 {
16293 struct ix86_address parts;
16294 rtx base, index, disp;
16295 int len;
16296 int ok;
16297
16298 if (GET_CODE (addr) == PRE_DEC
16299 || GET_CODE (addr) == POST_INC
16300 || GET_CODE (addr) == PRE_MODIFY
16301 || GET_CODE (addr) == POST_MODIFY)
16302 return 0;
16303
16304 ok = ix86_decompose_address (addr, &parts);
16305 gcc_assert (ok);
16306
16307 if (parts.base && GET_CODE (parts.base) == SUBREG)
16308 parts.base = SUBREG_REG (parts.base);
16309 if (parts.index && GET_CODE (parts.index) == SUBREG)
16310 parts.index = SUBREG_REG (parts.index);
16311
16312 base = parts.base;
16313 index = parts.index;
16314 disp = parts.disp;
16315 len = 0;
16316
16317 /* Rule of thumb:
16318 - esp as the base always wants an index,
16319 - ebp as the base always wants a displacement. */
16320
16321 /* Register Indirect. */
16322 if (base && !index && !disp)
16323 {
16324 /* esp (for its index) and ebp (for its displacement) need
16325 the two-byte modrm form. */
16326 if (addr == stack_pointer_rtx
16327 || addr == arg_pointer_rtx
16328 || addr == frame_pointer_rtx
16329 || addr == hard_frame_pointer_rtx)
16330 len = 1;
16331 }
16332
16333 /* Direct Addressing. */
16334 else if (disp && !base && !index)
16335 len = 4;
16336
16337 else
16338 {
16339 /* Find the length of the displacement constant. */
16340 if (disp)
16341 {
16342 if (base && satisfies_constraint_K (disp))
16343 len = 1;
16344 else
16345 len = 4;
16346 }
16347 /* ebp always wants a displacement. */
16348 else if (base == hard_frame_pointer_rtx)
16349 len = 1;
16350
16351 /* An index requires the two-byte modrm form.... */
16352 if (index
16353 /* ...like esp, which always wants an index. */
16354 || base == stack_pointer_rtx
16355 || base == arg_pointer_rtx
16356 || base == frame_pointer_rtx)
16357 len += 1;
16358 }
16359
16360 return len;
16361 }
16362
16363 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16364 is set, expect that insn have 8bit immediate alternative. */
16365 int
16366 ix86_attr_length_immediate_default (rtx insn, int shortform)
16367 {
16368 int len = 0;
16369 int i;
16370 extract_insn_cached (insn);
16371 for (i = recog_data.n_operands - 1; i >= 0; --i)
16372 if (CONSTANT_P (recog_data.operand[i]))
16373 {
16374 gcc_assert (!len);
16375 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16376 len = 1;
16377 else
16378 {
16379 switch (get_attr_mode (insn))
16380 {
16381 case MODE_QI:
16382 len+=1;
16383 break;
16384 case MODE_HI:
16385 len+=2;
16386 break;
16387 case MODE_SI:
16388 len+=4;
16389 break;
16390 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16391 case MODE_DI:
16392 len+=4;
16393 break;
16394 default:
16395 fatal_insn ("unknown insn mode", insn);
16396 }
16397 }
16398 }
16399 return len;
16400 }
16401 /* Compute default value for "length_address" attribute. */
16402 int
16403 ix86_attr_length_address_default (rtx insn)
16404 {
16405 int i;
16406
16407 if (get_attr_type (insn) == TYPE_LEA)
16408 {
16409 rtx set = PATTERN (insn);
16410
16411 if (GET_CODE (set) == PARALLEL)
16412 set = XVECEXP (set, 0, 0);
16413
16414 gcc_assert (GET_CODE (set) == SET);
16415
16416 return memory_address_length (SET_SRC (set));
16417 }
16418
16419 extract_insn_cached (insn);
16420 for (i = recog_data.n_operands - 1; i >= 0; --i)
16421 if (MEM_P (recog_data.operand[i]))
16422 {
16423 return memory_address_length (XEXP (recog_data.operand[i], 0));
16424 break;
16425 }
16426 return 0;
16427 }
16428 \f
16429 /* Return the maximum number of instructions a cpu can issue. */
16430
16431 static int
16432 ix86_issue_rate (void)
16433 {
16434 switch (ix86_tune)
16435 {
16436 case PROCESSOR_PENTIUM:
16437 case PROCESSOR_K6:
16438 return 2;
16439
16440 case PROCESSOR_PENTIUMPRO:
16441 case PROCESSOR_PENTIUM4:
16442 case PROCESSOR_ATHLON:
16443 case PROCESSOR_K8:
16444 case PROCESSOR_AMDFAM10:
16445 case PROCESSOR_NOCONA:
16446 case PROCESSOR_GENERIC32:
16447 case PROCESSOR_GENERIC64:
16448 return 3;
16449
16450 case PROCESSOR_CORE2:
16451 return 4;
16452
16453 default:
16454 return 1;
16455 }
16456 }
16457
16458 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16459 by DEP_INSN and nothing set by DEP_INSN. */
16460
16461 static int
16462 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16463 {
16464 rtx set, set2;
16465
16466 /* Simplify the test for uninteresting insns. */
16467 if (insn_type != TYPE_SETCC
16468 && insn_type != TYPE_ICMOV
16469 && insn_type != TYPE_FCMOV
16470 && insn_type != TYPE_IBR)
16471 return 0;
16472
16473 if ((set = single_set (dep_insn)) != 0)
16474 {
16475 set = SET_DEST (set);
16476 set2 = NULL_RTX;
16477 }
16478 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16479 && XVECLEN (PATTERN (dep_insn), 0) == 2
16480 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16481 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16482 {
16483 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16484 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16485 }
16486 else
16487 return 0;
16488
16489 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16490 return 0;
16491
16492 /* This test is true if the dependent insn reads the flags but
16493 not any other potentially set register. */
16494 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16495 return 0;
16496
16497 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16498 return 0;
16499
16500 return 1;
16501 }
16502
16503 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16504 address with operands set by DEP_INSN. */
16505
16506 static int
16507 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16508 {
16509 rtx addr;
16510
16511 if (insn_type == TYPE_LEA
16512 && TARGET_PENTIUM)
16513 {
16514 addr = PATTERN (insn);
16515
16516 if (GET_CODE (addr) == PARALLEL)
16517 addr = XVECEXP (addr, 0, 0);
16518
16519 gcc_assert (GET_CODE (addr) == SET);
16520
16521 addr = SET_SRC (addr);
16522 }
16523 else
16524 {
16525 int i;
16526 extract_insn_cached (insn);
16527 for (i = recog_data.n_operands - 1; i >= 0; --i)
16528 if (MEM_P (recog_data.operand[i]))
16529 {
16530 addr = XEXP (recog_data.operand[i], 0);
16531 goto found;
16532 }
16533 return 0;
16534 found:;
16535 }
16536
16537 return modified_in_p (addr, dep_insn);
16538 }
16539
16540 static int
16541 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16542 {
16543 enum attr_type insn_type, dep_insn_type;
16544 enum attr_memory memory;
16545 rtx set, set2;
16546 int dep_insn_code_number;
16547
16548 /* Anti and output dependencies have zero cost on all CPUs. */
16549 if (REG_NOTE_KIND (link) != 0)
16550 return 0;
16551
16552 dep_insn_code_number = recog_memoized (dep_insn);
16553
16554 /* If we can't recognize the insns, we can't really do anything. */
16555 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16556 return cost;
16557
16558 insn_type = get_attr_type (insn);
16559 dep_insn_type = get_attr_type (dep_insn);
16560
16561 switch (ix86_tune)
16562 {
16563 case PROCESSOR_PENTIUM:
16564 /* Address Generation Interlock adds a cycle of latency. */
16565 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16566 cost += 1;
16567
16568 /* ??? Compares pair with jump/setcc. */
16569 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16570 cost = 0;
16571
16572 /* Floating point stores require value to be ready one cycle earlier. */
16573 if (insn_type == TYPE_FMOV
16574 && get_attr_memory (insn) == MEMORY_STORE
16575 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16576 cost += 1;
16577 break;
16578
16579 case PROCESSOR_PENTIUMPRO:
16580 memory = get_attr_memory (insn);
16581
16582 /* INT->FP conversion is expensive. */
16583 if (get_attr_fp_int_src (dep_insn))
16584 cost += 5;
16585
16586 /* There is one cycle extra latency between an FP op and a store. */
16587 if (insn_type == TYPE_FMOV
16588 && (set = single_set (dep_insn)) != NULL_RTX
16589 && (set2 = single_set (insn)) != NULL_RTX
16590 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16591 && MEM_P (SET_DEST (set2)))
16592 cost += 1;
16593
16594 /* Show ability of reorder buffer to hide latency of load by executing
16595 in parallel with previous instruction in case
16596 previous instruction is not needed to compute the address. */
16597 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16598 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16599 {
16600 /* Claim moves to take one cycle, as core can issue one load
16601 at time and the next load can start cycle later. */
16602 if (dep_insn_type == TYPE_IMOV
16603 || dep_insn_type == TYPE_FMOV)
16604 cost = 1;
16605 else if (cost > 1)
16606 cost--;
16607 }
16608 break;
16609
16610 case PROCESSOR_K6:
16611 memory = get_attr_memory (insn);
16612
16613 /* The esp dependency is resolved before the instruction is really
16614 finished. */
16615 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16616 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16617 return 1;
16618
16619 /* INT->FP conversion is expensive. */
16620 if (get_attr_fp_int_src (dep_insn))
16621 cost += 5;
16622
16623 /* Show ability of reorder buffer to hide latency of load by executing
16624 in parallel with previous instruction in case
16625 previous instruction is not needed to compute the address. */
16626 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16627 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16628 {
16629 /* Claim moves to take one cycle, as core can issue one load
16630 at time and the next load can start cycle later. */
16631 if (dep_insn_type == TYPE_IMOV
16632 || dep_insn_type == TYPE_FMOV)
16633 cost = 1;
16634 else if (cost > 2)
16635 cost -= 2;
16636 else
16637 cost = 1;
16638 }
16639 break;
16640
16641 case PROCESSOR_ATHLON:
16642 case PROCESSOR_K8:
16643 case PROCESSOR_AMDFAM10:
16644 case PROCESSOR_GENERIC32:
16645 case PROCESSOR_GENERIC64:
16646 memory = get_attr_memory (insn);
16647
16648 /* Show ability of reorder buffer to hide latency of load by executing
16649 in parallel with previous instruction in case
16650 previous instruction is not needed to compute the address. */
16651 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16652 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16653 {
16654 enum attr_unit unit = get_attr_unit (insn);
16655 int loadcost = 3;
16656
16657 /* Because of the difference between the length of integer and
16658 floating unit pipeline preparation stages, the memory operands
16659 for floating point are cheaper.
16660
16661 ??? For Athlon it the difference is most probably 2. */
16662 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16663 loadcost = 3;
16664 else
16665 loadcost = TARGET_ATHLON ? 2 : 0;
16666
16667 if (cost >= loadcost)
16668 cost -= loadcost;
16669 else
16670 cost = 0;
16671 }
16672
16673 default:
16674 break;
16675 }
16676
16677 return cost;
16678 }
16679
16680 /* How many alternative schedules to try. This should be as wide as the
16681 scheduling freedom in the DFA, but no wider. Making this value too
16682 large results extra work for the scheduler. */
16683
16684 static int
16685 ia32_multipass_dfa_lookahead (void)
16686 {
16687 switch (ix86_tune)
16688 {
16689 case PROCESSOR_PENTIUM:
16690 return 2;
16691
16692 case PROCESSOR_PENTIUMPRO:
16693 case PROCESSOR_K6:
16694 return 1;
16695
16696 default:
16697 return 0;
16698 }
16699 }
16700
16701 \f
16702 /* Compute the alignment given to a constant that is being placed in memory.
16703 EXP is the constant and ALIGN is the alignment that the object would
16704 ordinarily have.
16705 The value of this function is used instead of that alignment to align
16706 the object. */
16707
16708 int
16709 ix86_constant_alignment (tree exp, int align)
16710 {
16711 if (TREE_CODE (exp) == REAL_CST)
16712 {
16713 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16714 return 64;
16715 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16716 return 128;
16717 }
16718 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16719 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16720 return BITS_PER_WORD;
16721
16722 return align;
16723 }
16724
16725 /* Compute the alignment for a static variable.
16726 TYPE is the data type, and ALIGN is the alignment that
16727 the object would ordinarily have. The value of this function is used
16728 instead of that alignment to align the object. */
16729
16730 int
16731 ix86_data_alignment (tree type, int align)
16732 {
16733 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16734
16735 if (AGGREGATE_TYPE_P (type)
16736 && TYPE_SIZE (type)
16737 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16738 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16739 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16740 && align < max_align)
16741 align = max_align;
16742
16743 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16744 to 16byte boundary. */
16745 if (TARGET_64BIT)
16746 {
16747 if (AGGREGATE_TYPE_P (type)
16748 && TYPE_SIZE (type)
16749 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16750 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16751 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16752 return 128;
16753 }
16754
16755 if (TREE_CODE (type) == ARRAY_TYPE)
16756 {
16757 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16758 return 64;
16759 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16760 return 128;
16761 }
16762 else if (TREE_CODE (type) == COMPLEX_TYPE)
16763 {
16764
16765 if (TYPE_MODE (type) == DCmode && align < 64)
16766 return 64;
16767 if (TYPE_MODE (type) == XCmode && align < 128)
16768 return 128;
16769 }
16770 else if ((TREE_CODE (type) == RECORD_TYPE
16771 || TREE_CODE (type) == UNION_TYPE
16772 || TREE_CODE (type) == QUAL_UNION_TYPE)
16773 && TYPE_FIELDS (type))
16774 {
16775 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16776 return 64;
16777 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16778 return 128;
16779 }
16780 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16781 || TREE_CODE (type) == INTEGER_TYPE)
16782 {
16783 if (TYPE_MODE (type) == DFmode && align < 64)
16784 return 64;
16785 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16786 return 128;
16787 }
16788
16789 return align;
16790 }
16791
16792 /* Compute the alignment for a local variable.
16793 TYPE is the data type, and ALIGN is the alignment that
16794 the object would ordinarily have. The value of this macro is used
16795 instead of that alignment to align the object. */
16796
16797 int
16798 ix86_local_alignment (tree type, int align)
16799 {
16800 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16801 to 16byte boundary. */
16802 if (TARGET_64BIT)
16803 {
16804 if (AGGREGATE_TYPE_P (type)
16805 && TYPE_SIZE (type)
16806 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16807 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16808 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16809 return 128;
16810 }
16811 if (TREE_CODE (type) == ARRAY_TYPE)
16812 {
16813 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16814 return 64;
16815 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16816 return 128;
16817 }
16818 else if (TREE_CODE (type) == COMPLEX_TYPE)
16819 {
16820 if (TYPE_MODE (type) == DCmode && align < 64)
16821 return 64;
16822 if (TYPE_MODE (type) == XCmode && align < 128)
16823 return 128;
16824 }
16825 else if ((TREE_CODE (type) == RECORD_TYPE
16826 || TREE_CODE (type) == UNION_TYPE
16827 || TREE_CODE (type) == QUAL_UNION_TYPE)
16828 && TYPE_FIELDS (type))
16829 {
16830 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16831 return 64;
16832 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16833 return 128;
16834 }
16835 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16836 || TREE_CODE (type) == INTEGER_TYPE)
16837 {
16838
16839 if (TYPE_MODE (type) == DFmode && align < 64)
16840 return 64;
16841 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16842 return 128;
16843 }
16844 return align;
16845 }
16846 \f
16847 /* Emit RTL insns to initialize the variable parts of a trampoline.
16848 FNADDR is an RTX for the address of the function's pure code.
16849 CXT is an RTX for the static chain value for the function. */
16850 void
16851 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16852 {
16853 if (!TARGET_64BIT)
16854 {
16855 /* Compute offset from the end of the jmp to the target function. */
16856 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16857 plus_constant (tramp, 10),
16858 NULL_RTX, 1, OPTAB_DIRECT);
16859 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16860 gen_int_mode (0xb9, QImode));
16861 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16862 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16863 gen_int_mode (0xe9, QImode));
16864 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16865 }
16866 else
16867 {
16868 int offset = 0;
16869 /* Try to load address using shorter movl instead of movabs.
16870 We may want to support movq for kernel mode, but kernel does not use
16871 trampolines at the moment. */
16872 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16873 {
16874 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16875 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16876 gen_int_mode (0xbb41, HImode));
16877 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16878 gen_lowpart (SImode, fnaddr));
16879 offset += 6;
16880 }
16881 else
16882 {
16883 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16884 gen_int_mode (0xbb49, HImode));
16885 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16886 fnaddr);
16887 offset += 10;
16888 }
16889 /* Load static chain using movabs to r10. */
16890 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16891 gen_int_mode (0xba49, HImode));
16892 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16893 cxt);
16894 offset += 10;
16895 /* Jump to the r11 */
16896 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16897 gen_int_mode (0xff49, HImode));
16898 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16899 gen_int_mode (0xe3, QImode));
16900 offset += 3;
16901 gcc_assert (offset <= TRAMPOLINE_SIZE);
16902 }
16903
16904 #ifdef ENABLE_EXECUTE_STACK
16905 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16906 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16907 #endif
16908 }
16909 \f
16910 /* Codes for all the SSE/MMX builtins. */
16911 enum ix86_builtins
16912 {
16913 IX86_BUILTIN_ADDPS,
16914 IX86_BUILTIN_ADDSS,
16915 IX86_BUILTIN_DIVPS,
16916 IX86_BUILTIN_DIVSS,
16917 IX86_BUILTIN_MULPS,
16918 IX86_BUILTIN_MULSS,
16919 IX86_BUILTIN_SUBPS,
16920 IX86_BUILTIN_SUBSS,
16921
16922 IX86_BUILTIN_CMPEQPS,
16923 IX86_BUILTIN_CMPLTPS,
16924 IX86_BUILTIN_CMPLEPS,
16925 IX86_BUILTIN_CMPGTPS,
16926 IX86_BUILTIN_CMPGEPS,
16927 IX86_BUILTIN_CMPNEQPS,
16928 IX86_BUILTIN_CMPNLTPS,
16929 IX86_BUILTIN_CMPNLEPS,
16930 IX86_BUILTIN_CMPNGTPS,
16931 IX86_BUILTIN_CMPNGEPS,
16932 IX86_BUILTIN_CMPORDPS,
16933 IX86_BUILTIN_CMPUNORDPS,
16934 IX86_BUILTIN_CMPEQSS,
16935 IX86_BUILTIN_CMPLTSS,
16936 IX86_BUILTIN_CMPLESS,
16937 IX86_BUILTIN_CMPNEQSS,
16938 IX86_BUILTIN_CMPNLTSS,
16939 IX86_BUILTIN_CMPNLESS,
16940 IX86_BUILTIN_CMPNGTSS,
16941 IX86_BUILTIN_CMPNGESS,
16942 IX86_BUILTIN_CMPORDSS,
16943 IX86_BUILTIN_CMPUNORDSS,
16944
16945 IX86_BUILTIN_COMIEQSS,
16946 IX86_BUILTIN_COMILTSS,
16947 IX86_BUILTIN_COMILESS,
16948 IX86_BUILTIN_COMIGTSS,
16949 IX86_BUILTIN_COMIGESS,
16950 IX86_BUILTIN_COMINEQSS,
16951 IX86_BUILTIN_UCOMIEQSS,
16952 IX86_BUILTIN_UCOMILTSS,
16953 IX86_BUILTIN_UCOMILESS,
16954 IX86_BUILTIN_UCOMIGTSS,
16955 IX86_BUILTIN_UCOMIGESS,
16956 IX86_BUILTIN_UCOMINEQSS,
16957
16958 IX86_BUILTIN_CVTPI2PS,
16959 IX86_BUILTIN_CVTPS2PI,
16960 IX86_BUILTIN_CVTSI2SS,
16961 IX86_BUILTIN_CVTSI642SS,
16962 IX86_BUILTIN_CVTSS2SI,
16963 IX86_BUILTIN_CVTSS2SI64,
16964 IX86_BUILTIN_CVTTPS2PI,
16965 IX86_BUILTIN_CVTTSS2SI,
16966 IX86_BUILTIN_CVTTSS2SI64,
16967
16968 IX86_BUILTIN_MAXPS,
16969 IX86_BUILTIN_MAXSS,
16970 IX86_BUILTIN_MINPS,
16971 IX86_BUILTIN_MINSS,
16972
16973 IX86_BUILTIN_LOADUPS,
16974 IX86_BUILTIN_STOREUPS,
16975 IX86_BUILTIN_MOVSS,
16976
16977 IX86_BUILTIN_MOVHLPS,
16978 IX86_BUILTIN_MOVLHPS,
16979 IX86_BUILTIN_LOADHPS,
16980 IX86_BUILTIN_LOADLPS,
16981 IX86_BUILTIN_STOREHPS,
16982 IX86_BUILTIN_STORELPS,
16983
16984 IX86_BUILTIN_MASKMOVQ,
16985 IX86_BUILTIN_MOVMSKPS,
16986 IX86_BUILTIN_PMOVMSKB,
16987
16988 IX86_BUILTIN_MOVNTPS,
16989 IX86_BUILTIN_MOVNTQ,
16990
16991 IX86_BUILTIN_LOADDQU,
16992 IX86_BUILTIN_STOREDQU,
16993
16994 IX86_BUILTIN_PACKSSWB,
16995 IX86_BUILTIN_PACKSSDW,
16996 IX86_BUILTIN_PACKUSWB,
16997
16998 IX86_BUILTIN_PADDB,
16999 IX86_BUILTIN_PADDW,
17000 IX86_BUILTIN_PADDD,
17001 IX86_BUILTIN_PADDQ,
17002 IX86_BUILTIN_PADDSB,
17003 IX86_BUILTIN_PADDSW,
17004 IX86_BUILTIN_PADDUSB,
17005 IX86_BUILTIN_PADDUSW,
17006 IX86_BUILTIN_PSUBB,
17007 IX86_BUILTIN_PSUBW,
17008 IX86_BUILTIN_PSUBD,
17009 IX86_BUILTIN_PSUBQ,
17010 IX86_BUILTIN_PSUBSB,
17011 IX86_BUILTIN_PSUBSW,
17012 IX86_BUILTIN_PSUBUSB,
17013 IX86_BUILTIN_PSUBUSW,
17014
17015 IX86_BUILTIN_PAND,
17016 IX86_BUILTIN_PANDN,
17017 IX86_BUILTIN_POR,
17018 IX86_BUILTIN_PXOR,
17019
17020 IX86_BUILTIN_PAVGB,
17021 IX86_BUILTIN_PAVGW,
17022
17023 IX86_BUILTIN_PCMPEQB,
17024 IX86_BUILTIN_PCMPEQW,
17025 IX86_BUILTIN_PCMPEQD,
17026 IX86_BUILTIN_PCMPGTB,
17027 IX86_BUILTIN_PCMPGTW,
17028 IX86_BUILTIN_PCMPGTD,
17029
17030 IX86_BUILTIN_PMADDWD,
17031
17032 IX86_BUILTIN_PMAXSW,
17033 IX86_BUILTIN_PMAXUB,
17034 IX86_BUILTIN_PMINSW,
17035 IX86_BUILTIN_PMINUB,
17036
17037 IX86_BUILTIN_PMULHUW,
17038 IX86_BUILTIN_PMULHW,
17039 IX86_BUILTIN_PMULLW,
17040
17041 IX86_BUILTIN_PSADBW,
17042 IX86_BUILTIN_PSHUFW,
17043
17044 IX86_BUILTIN_PSLLW,
17045 IX86_BUILTIN_PSLLD,
17046 IX86_BUILTIN_PSLLQ,
17047 IX86_BUILTIN_PSRAW,
17048 IX86_BUILTIN_PSRAD,
17049 IX86_BUILTIN_PSRLW,
17050 IX86_BUILTIN_PSRLD,
17051 IX86_BUILTIN_PSRLQ,
17052 IX86_BUILTIN_PSLLWI,
17053 IX86_BUILTIN_PSLLDI,
17054 IX86_BUILTIN_PSLLQI,
17055 IX86_BUILTIN_PSRAWI,
17056 IX86_BUILTIN_PSRADI,
17057 IX86_BUILTIN_PSRLWI,
17058 IX86_BUILTIN_PSRLDI,
17059 IX86_BUILTIN_PSRLQI,
17060
17061 IX86_BUILTIN_PUNPCKHBW,
17062 IX86_BUILTIN_PUNPCKHWD,
17063 IX86_BUILTIN_PUNPCKHDQ,
17064 IX86_BUILTIN_PUNPCKLBW,
17065 IX86_BUILTIN_PUNPCKLWD,
17066 IX86_BUILTIN_PUNPCKLDQ,
17067
17068 IX86_BUILTIN_SHUFPS,
17069
17070 IX86_BUILTIN_RCPPS,
17071 IX86_BUILTIN_RCPSS,
17072 IX86_BUILTIN_RSQRTPS,
17073 IX86_BUILTIN_RSQRTSS,
17074 IX86_BUILTIN_RSQRTF,
17075 IX86_BUILTIN_SQRTPS,
17076 IX86_BUILTIN_SQRTSS,
17077
17078 IX86_BUILTIN_UNPCKHPS,
17079 IX86_BUILTIN_UNPCKLPS,
17080
17081 IX86_BUILTIN_ANDPS,
17082 IX86_BUILTIN_ANDNPS,
17083 IX86_BUILTIN_ORPS,
17084 IX86_BUILTIN_XORPS,
17085
17086 IX86_BUILTIN_EMMS,
17087 IX86_BUILTIN_LDMXCSR,
17088 IX86_BUILTIN_STMXCSR,
17089 IX86_BUILTIN_SFENCE,
17090
17091 /* 3DNow! Original */
17092 IX86_BUILTIN_FEMMS,
17093 IX86_BUILTIN_PAVGUSB,
17094 IX86_BUILTIN_PF2ID,
17095 IX86_BUILTIN_PFACC,
17096 IX86_BUILTIN_PFADD,
17097 IX86_BUILTIN_PFCMPEQ,
17098 IX86_BUILTIN_PFCMPGE,
17099 IX86_BUILTIN_PFCMPGT,
17100 IX86_BUILTIN_PFMAX,
17101 IX86_BUILTIN_PFMIN,
17102 IX86_BUILTIN_PFMUL,
17103 IX86_BUILTIN_PFRCP,
17104 IX86_BUILTIN_PFRCPIT1,
17105 IX86_BUILTIN_PFRCPIT2,
17106 IX86_BUILTIN_PFRSQIT1,
17107 IX86_BUILTIN_PFRSQRT,
17108 IX86_BUILTIN_PFSUB,
17109 IX86_BUILTIN_PFSUBR,
17110 IX86_BUILTIN_PI2FD,
17111 IX86_BUILTIN_PMULHRW,
17112
17113 /* 3DNow! Athlon Extensions */
17114 IX86_BUILTIN_PF2IW,
17115 IX86_BUILTIN_PFNACC,
17116 IX86_BUILTIN_PFPNACC,
17117 IX86_BUILTIN_PI2FW,
17118 IX86_BUILTIN_PSWAPDSI,
17119 IX86_BUILTIN_PSWAPDSF,
17120
17121 /* SSE2 */
17122 IX86_BUILTIN_ADDPD,
17123 IX86_BUILTIN_ADDSD,
17124 IX86_BUILTIN_DIVPD,
17125 IX86_BUILTIN_DIVSD,
17126 IX86_BUILTIN_MULPD,
17127 IX86_BUILTIN_MULSD,
17128 IX86_BUILTIN_SUBPD,
17129 IX86_BUILTIN_SUBSD,
17130
17131 IX86_BUILTIN_CMPEQPD,
17132 IX86_BUILTIN_CMPLTPD,
17133 IX86_BUILTIN_CMPLEPD,
17134 IX86_BUILTIN_CMPGTPD,
17135 IX86_BUILTIN_CMPGEPD,
17136 IX86_BUILTIN_CMPNEQPD,
17137 IX86_BUILTIN_CMPNLTPD,
17138 IX86_BUILTIN_CMPNLEPD,
17139 IX86_BUILTIN_CMPNGTPD,
17140 IX86_BUILTIN_CMPNGEPD,
17141 IX86_BUILTIN_CMPORDPD,
17142 IX86_BUILTIN_CMPUNORDPD,
17143 IX86_BUILTIN_CMPEQSD,
17144 IX86_BUILTIN_CMPLTSD,
17145 IX86_BUILTIN_CMPLESD,
17146 IX86_BUILTIN_CMPNEQSD,
17147 IX86_BUILTIN_CMPNLTSD,
17148 IX86_BUILTIN_CMPNLESD,
17149 IX86_BUILTIN_CMPORDSD,
17150 IX86_BUILTIN_CMPUNORDSD,
17151
17152 IX86_BUILTIN_COMIEQSD,
17153 IX86_BUILTIN_COMILTSD,
17154 IX86_BUILTIN_COMILESD,
17155 IX86_BUILTIN_COMIGTSD,
17156 IX86_BUILTIN_COMIGESD,
17157 IX86_BUILTIN_COMINEQSD,
17158 IX86_BUILTIN_UCOMIEQSD,
17159 IX86_BUILTIN_UCOMILTSD,
17160 IX86_BUILTIN_UCOMILESD,
17161 IX86_BUILTIN_UCOMIGTSD,
17162 IX86_BUILTIN_UCOMIGESD,
17163 IX86_BUILTIN_UCOMINEQSD,
17164
17165 IX86_BUILTIN_MAXPD,
17166 IX86_BUILTIN_MAXSD,
17167 IX86_BUILTIN_MINPD,
17168 IX86_BUILTIN_MINSD,
17169
17170 IX86_BUILTIN_ANDPD,
17171 IX86_BUILTIN_ANDNPD,
17172 IX86_BUILTIN_ORPD,
17173 IX86_BUILTIN_XORPD,
17174
17175 IX86_BUILTIN_SQRTPD,
17176 IX86_BUILTIN_SQRTSD,
17177
17178 IX86_BUILTIN_UNPCKHPD,
17179 IX86_BUILTIN_UNPCKLPD,
17180
17181 IX86_BUILTIN_SHUFPD,
17182
17183 IX86_BUILTIN_LOADUPD,
17184 IX86_BUILTIN_STOREUPD,
17185 IX86_BUILTIN_MOVSD,
17186
17187 IX86_BUILTIN_LOADHPD,
17188 IX86_BUILTIN_LOADLPD,
17189
17190 IX86_BUILTIN_CVTDQ2PD,
17191 IX86_BUILTIN_CVTDQ2PS,
17192
17193 IX86_BUILTIN_CVTPD2DQ,
17194 IX86_BUILTIN_CVTPD2PI,
17195 IX86_BUILTIN_CVTPD2PS,
17196 IX86_BUILTIN_CVTTPD2DQ,
17197 IX86_BUILTIN_CVTTPD2PI,
17198
17199 IX86_BUILTIN_CVTPI2PD,
17200 IX86_BUILTIN_CVTSI2SD,
17201 IX86_BUILTIN_CVTSI642SD,
17202
17203 IX86_BUILTIN_CVTSD2SI,
17204 IX86_BUILTIN_CVTSD2SI64,
17205 IX86_BUILTIN_CVTSD2SS,
17206 IX86_BUILTIN_CVTSS2SD,
17207 IX86_BUILTIN_CVTTSD2SI,
17208 IX86_BUILTIN_CVTTSD2SI64,
17209
17210 IX86_BUILTIN_CVTPS2DQ,
17211 IX86_BUILTIN_CVTPS2PD,
17212 IX86_BUILTIN_CVTTPS2DQ,
17213
17214 IX86_BUILTIN_MOVNTI,
17215 IX86_BUILTIN_MOVNTPD,
17216 IX86_BUILTIN_MOVNTDQ,
17217
17218 /* SSE2 MMX */
17219 IX86_BUILTIN_MASKMOVDQU,
17220 IX86_BUILTIN_MOVMSKPD,
17221 IX86_BUILTIN_PMOVMSKB128,
17222
17223 IX86_BUILTIN_PACKSSWB128,
17224 IX86_BUILTIN_PACKSSDW128,
17225 IX86_BUILTIN_PACKUSWB128,
17226
17227 IX86_BUILTIN_PADDB128,
17228 IX86_BUILTIN_PADDW128,
17229 IX86_BUILTIN_PADDD128,
17230 IX86_BUILTIN_PADDQ128,
17231 IX86_BUILTIN_PADDSB128,
17232 IX86_BUILTIN_PADDSW128,
17233 IX86_BUILTIN_PADDUSB128,
17234 IX86_BUILTIN_PADDUSW128,
17235 IX86_BUILTIN_PSUBB128,
17236 IX86_BUILTIN_PSUBW128,
17237 IX86_BUILTIN_PSUBD128,
17238 IX86_BUILTIN_PSUBQ128,
17239 IX86_BUILTIN_PSUBSB128,
17240 IX86_BUILTIN_PSUBSW128,
17241 IX86_BUILTIN_PSUBUSB128,
17242 IX86_BUILTIN_PSUBUSW128,
17243
17244 IX86_BUILTIN_PAND128,
17245 IX86_BUILTIN_PANDN128,
17246 IX86_BUILTIN_POR128,
17247 IX86_BUILTIN_PXOR128,
17248
17249 IX86_BUILTIN_PAVGB128,
17250 IX86_BUILTIN_PAVGW128,
17251
17252 IX86_BUILTIN_PCMPEQB128,
17253 IX86_BUILTIN_PCMPEQW128,
17254 IX86_BUILTIN_PCMPEQD128,
17255 IX86_BUILTIN_PCMPGTB128,
17256 IX86_BUILTIN_PCMPGTW128,
17257 IX86_BUILTIN_PCMPGTD128,
17258
17259 IX86_BUILTIN_PMADDWD128,
17260
17261 IX86_BUILTIN_PMAXSW128,
17262 IX86_BUILTIN_PMAXUB128,
17263 IX86_BUILTIN_PMINSW128,
17264 IX86_BUILTIN_PMINUB128,
17265
17266 IX86_BUILTIN_PMULUDQ,
17267 IX86_BUILTIN_PMULUDQ128,
17268 IX86_BUILTIN_PMULHUW128,
17269 IX86_BUILTIN_PMULHW128,
17270 IX86_BUILTIN_PMULLW128,
17271
17272 IX86_BUILTIN_PSADBW128,
17273 IX86_BUILTIN_PSHUFHW,
17274 IX86_BUILTIN_PSHUFLW,
17275 IX86_BUILTIN_PSHUFD,
17276
17277 IX86_BUILTIN_PSLLDQI128,
17278 IX86_BUILTIN_PSLLWI128,
17279 IX86_BUILTIN_PSLLDI128,
17280 IX86_BUILTIN_PSLLQI128,
17281 IX86_BUILTIN_PSRAWI128,
17282 IX86_BUILTIN_PSRADI128,
17283 IX86_BUILTIN_PSRLDQI128,
17284 IX86_BUILTIN_PSRLWI128,
17285 IX86_BUILTIN_PSRLDI128,
17286 IX86_BUILTIN_PSRLQI128,
17287
17288 IX86_BUILTIN_PSLLDQ128,
17289 IX86_BUILTIN_PSLLW128,
17290 IX86_BUILTIN_PSLLD128,
17291 IX86_BUILTIN_PSLLQ128,
17292 IX86_BUILTIN_PSRAW128,
17293 IX86_BUILTIN_PSRAD128,
17294 IX86_BUILTIN_PSRLW128,
17295 IX86_BUILTIN_PSRLD128,
17296 IX86_BUILTIN_PSRLQ128,
17297
17298 IX86_BUILTIN_PUNPCKHBW128,
17299 IX86_BUILTIN_PUNPCKHWD128,
17300 IX86_BUILTIN_PUNPCKHDQ128,
17301 IX86_BUILTIN_PUNPCKHQDQ128,
17302 IX86_BUILTIN_PUNPCKLBW128,
17303 IX86_BUILTIN_PUNPCKLWD128,
17304 IX86_BUILTIN_PUNPCKLDQ128,
17305 IX86_BUILTIN_PUNPCKLQDQ128,
17306
17307 IX86_BUILTIN_CLFLUSH,
17308 IX86_BUILTIN_MFENCE,
17309 IX86_BUILTIN_LFENCE,
17310
17311 /* Prescott New Instructions. */
17312 IX86_BUILTIN_ADDSUBPS,
17313 IX86_BUILTIN_HADDPS,
17314 IX86_BUILTIN_HSUBPS,
17315 IX86_BUILTIN_MOVSHDUP,
17316 IX86_BUILTIN_MOVSLDUP,
17317 IX86_BUILTIN_ADDSUBPD,
17318 IX86_BUILTIN_HADDPD,
17319 IX86_BUILTIN_HSUBPD,
17320 IX86_BUILTIN_LDDQU,
17321
17322 IX86_BUILTIN_MONITOR,
17323 IX86_BUILTIN_MWAIT,
17324
17325 /* SSSE3. */
17326 IX86_BUILTIN_PHADDW,
17327 IX86_BUILTIN_PHADDD,
17328 IX86_BUILTIN_PHADDSW,
17329 IX86_BUILTIN_PHSUBW,
17330 IX86_BUILTIN_PHSUBD,
17331 IX86_BUILTIN_PHSUBSW,
17332 IX86_BUILTIN_PMADDUBSW,
17333 IX86_BUILTIN_PMULHRSW,
17334 IX86_BUILTIN_PSHUFB,
17335 IX86_BUILTIN_PSIGNB,
17336 IX86_BUILTIN_PSIGNW,
17337 IX86_BUILTIN_PSIGND,
17338 IX86_BUILTIN_PALIGNR,
17339 IX86_BUILTIN_PABSB,
17340 IX86_BUILTIN_PABSW,
17341 IX86_BUILTIN_PABSD,
17342
17343 IX86_BUILTIN_PHADDW128,
17344 IX86_BUILTIN_PHADDD128,
17345 IX86_BUILTIN_PHADDSW128,
17346 IX86_BUILTIN_PHSUBW128,
17347 IX86_BUILTIN_PHSUBD128,
17348 IX86_BUILTIN_PHSUBSW128,
17349 IX86_BUILTIN_PMADDUBSW128,
17350 IX86_BUILTIN_PMULHRSW128,
17351 IX86_BUILTIN_PSHUFB128,
17352 IX86_BUILTIN_PSIGNB128,
17353 IX86_BUILTIN_PSIGNW128,
17354 IX86_BUILTIN_PSIGND128,
17355 IX86_BUILTIN_PALIGNR128,
17356 IX86_BUILTIN_PABSB128,
17357 IX86_BUILTIN_PABSW128,
17358 IX86_BUILTIN_PABSD128,
17359
17360 /* AMDFAM10 - SSE4A New Instructions. */
17361 IX86_BUILTIN_MOVNTSD,
17362 IX86_BUILTIN_MOVNTSS,
17363 IX86_BUILTIN_EXTRQI,
17364 IX86_BUILTIN_EXTRQ,
17365 IX86_BUILTIN_INSERTQI,
17366 IX86_BUILTIN_INSERTQ,
17367
17368 /* SSE4.1. */
17369 IX86_BUILTIN_BLENDPD,
17370 IX86_BUILTIN_BLENDPS,
17371 IX86_BUILTIN_BLENDVPD,
17372 IX86_BUILTIN_BLENDVPS,
17373 IX86_BUILTIN_PBLENDVB128,
17374 IX86_BUILTIN_PBLENDW128,
17375
17376 IX86_BUILTIN_DPPD,
17377 IX86_BUILTIN_DPPS,
17378
17379 IX86_BUILTIN_INSERTPS128,
17380
17381 IX86_BUILTIN_MOVNTDQA,
17382 IX86_BUILTIN_MPSADBW128,
17383 IX86_BUILTIN_PACKUSDW128,
17384 IX86_BUILTIN_PCMPEQQ,
17385 IX86_BUILTIN_PHMINPOSUW128,
17386
17387 IX86_BUILTIN_PMAXSB128,
17388 IX86_BUILTIN_PMAXSD128,
17389 IX86_BUILTIN_PMAXUD128,
17390 IX86_BUILTIN_PMAXUW128,
17391
17392 IX86_BUILTIN_PMINSB128,
17393 IX86_BUILTIN_PMINSD128,
17394 IX86_BUILTIN_PMINUD128,
17395 IX86_BUILTIN_PMINUW128,
17396
17397 IX86_BUILTIN_PMOVSXBW128,
17398 IX86_BUILTIN_PMOVSXBD128,
17399 IX86_BUILTIN_PMOVSXBQ128,
17400 IX86_BUILTIN_PMOVSXWD128,
17401 IX86_BUILTIN_PMOVSXWQ128,
17402 IX86_BUILTIN_PMOVSXDQ128,
17403
17404 IX86_BUILTIN_PMOVZXBW128,
17405 IX86_BUILTIN_PMOVZXBD128,
17406 IX86_BUILTIN_PMOVZXBQ128,
17407 IX86_BUILTIN_PMOVZXWD128,
17408 IX86_BUILTIN_PMOVZXWQ128,
17409 IX86_BUILTIN_PMOVZXDQ128,
17410
17411 IX86_BUILTIN_PMULDQ128,
17412 IX86_BUILTIN_PMULLD128,
17413
17414 IX86_BUILTIN_ROUNDPD,
17415 IX86_BUILTIN_ROUNDPS,
17416 IX86_BUILTIN_ROUNDSD,
17417 IX86_BUILTIN_ROUNDSS,
17418
17419 IX86_BUILTIN_PTESTZ,
17420 IX86_BUILTIN_PTESTC,
17421 IX86_BUILTIN_PTESTNZC,
17422
17423 IX86_BUILTIN_VEC_INIT_V2SI,
17424 IX86_BUILTIN_VEC_INIT_V4HI,
17425 IX86_BUILTIN_VEC_INIT_V8QI,
17426 IX86_BUILTIN_VEC_EXT_V2DF,
17427 IX86_BUILTIN_VEC_EXT_V2DI,
17428 IX86_BUILTIN_VEC_EXT_V4SF,
17429 IX86_BUILTIN_VEC_EXT_V4SI,
17430 IX86_BUILTIN_VEC_EXT_V8HI,
17431 IX86_BUILTIN_VEC_EXT_V2SI,
17432 IX86_BUILTIN_VEC_EXT_V4HI,
17433 IX86_BUILTIN_VEC_EXT_V16QI,
17434 IX86_BUILTIN_VEC_SET_V2DI,
17435 IX86_BUILTIN_VEC_SET_V4SF,
17436 IX86_BUILTIN_VEC_SET_V4SI,
17437 IX86_BUILTIN_VEC_SET_V8HI,
17438 IX86_BUILTIN_VEC_SET_V4HI,
17439 IX86_BUILTIN_VEC_SET_V16QI,
17440
17441 IX86_BUILTIN_VEC_PACK_SFIX,
17442
17443 /* SSE4.2. */
17444 IX86_BUILTIN_CRC32QI,
17445 IX86_BUILTIN_CRC32HI,
17446 IX86_BUILTIN_CRC32SI,
17447 IX86_BUILTIN_CRC32DI,
17448
17449 IX86_BUILTIN_PCMPESTRI128,
17450 IX86_BUILTIN_PCMPESTRM128,
17451 IX86_BUILTIN_PCMPESTRA128,
17452 IX86_BUILTIN_PCMPESTRC128,
17453 IX86_BUILTIN_PCMPESTRO128,
17454 IX86_BUILTIN_PCMPESTRS128,
17455 IX86_BUILTIN_PCMPESTRZ128,
17456 IX86_BUILTIN_PCMPISTRI128,
17457 IX86_BUILTIN_PCMPISTRM128,
17458 IX86_BUILTIN_PCMPISTRA128,
17459 IX86_BUILTIN_PCMPISTRC128,
17460 IX86_BUILTIN_PCMPISTRO128,
17461 IX86_BUILTIN_PCMPISTRS128,
17462 IX86_BUILTIN_PCMPISTRZ128,
17463
17464 IX86_BUILTIN_PCMPGTQ,
17465
17466 /* TFmode support builtins. */
17467 IX86_BUILTIN_INFQ,
17468 IX86_BUILTIN_FABSQ,
17469 IX86_BUILTIN_COPYSIGNQ,
17470
17471 /* SSE5 instructions */
17472 IX86_BUILTIN_FMADDSS,
17473 IX86_BUILTIN_FMADDSD,
17474 IX86_BUILTIN_FMADDPS,
17475 IX86_BUILTIN_FMADDPD,
17476 IX86_BUILTIN_FMSUBSS,
17477 IX86_BUILTIN_FMSUBSD,
17478 IX86_BUILTIN_FMSUBPS,
17479 IX86_BUILTIN_FMSUBPD,
17480 IX86_BUILTIN_FNMADDSS,
17481 IX86_BUILTIN_FNMADDSD,
17482 IX86_BUILTIN_FNMADDPS,
17483 IX86_BUILTIN_FNMADDPD,
17484 IX86_BUILTIN_FNMSUBSS,
17485 IX86_BUILTIN_FNMSUBSD,
17486 IX86_BUILTIN_FNMSUBPS,
17487 IX86_BUILTIN_FNMSUBPD,
17488 IX86_BUILTIN_PCMOV_V2DI,
17489 IX86_BUILTIN_PCMOV_V4SI,
17490 IX86_BUILTIN_PCMOV_V8HI,
17491 IX86_BUILTIN_PCMOV_V16QI,
17492 IX86_BUILTIN_PCMOV_V4SF,
17493 IX86_BUILTIN_PCMOV_V2DF,
17494 IX86_BUILTIN_PPERM,
17495 IX86_BUILTIN_PERMPS,
17496 IX86_BUILTIN_PERMPD,
17497 IX86_BUILTIN_PMACSSWW,
17498 IX86_BUILTIN_PMACSWW,
17499 IX86_BUILTIN_PMACSSWD,
17500 IX86_BUILTIN_PMACSWD,
17501 IX86_BUILTIN_PMACSSDD,
17502 IX86_BUILTIN_PMACSDD,
17503 IX86_BUILTIN_PMACSSDQL,
17504 IX86_BUILTIN_PMACSSDQH,
17505 IX86_BUILTIN_PMACSDQL,
17506 IX86_BUILTIN_PMACSDQH,
17507 IX86_BUILTIN_PMADCSSWD,
17508 IX86_BUILTIN_PMADCSWD,
17509 IX86_BUILTIN_PHADDBW,
17510 IX86_BUILTIN_PHADDBD,
17511 IX86_BUILTIN_PHADDBQ,
17512 IX86_BUILTIN_PHADDWD,
17513 IX86_BUILTIN_PHADDWQ,
17514 IX86_BUILTIN_PHADDDQ,
17515 IX86_BUILTIN_PHADDUBW,
17516 IX86_BUILTIN_PHADDUBD,
17517 IX86_BUILTIN_PHADDUBQ,
17518 IX86_BUILTIN_PHADDUWD,
17519 IX86_BUILTIN_PHADDUWQ,
17520 IX86_BUILTIN_PHADDUDQ,
17521 IX86_BUILTIN_PHSUBBW,
17522 IX86_BUILTIN_PHSUBWD,
17523 IX86_BUILTIN_PHSUBDQ,
17524 IX86_BUILTIN_PROTB,
17525 IX86_BUILTIN_PROTW,
17526 IX86_BUILTIN_PROTD,
17527 IX86_BUILTIN_PROTQ,
17528 IX86_BUILTIN_PROTB_IMM,
17529 IX86_BUILTIN_PROTW_IMM,
17530 IX86_BUILTIN_PROTD_IMM,
17531 IX86_BUILTIN_PROTQ_IMM,
17532 IX86_BUILTIN_PSHLB,
17533 IX86_BUILTIN_PSHLW,
17534 IX86_BUILTIN_PSHLD,
17535 IX86_BUILTIN_PSHLQ,
17536 IX86_BUILTIN_PSHAB,
17537 IX86_BUILTIN_PSHAW,
17538 IX86_BUILTIN_PSHAD,
17539 IX86_BUILTIN_PSHAQ,
17540 IX86_BUILTIN_FRCZSS,
17541 IX86_BUILTIN_FRCZSD,
17542 IX86_BUILTIN_FRCZPS,
17543 IX86_BUILTIN_FRCZPD,
17544 IX86_BUILTIN_CVTPH2PS,
17545 IX86_BUILTIN_CVTPS2PH,
17546
17547 IX86_BUILTIN_COMEQSS,
17548 IX86_BUILTIN_COMNESS,
17549 IX86_BUILTIN_COMLTSS,
17550 IX86_BUILTIN_COMLESS,
17551 IX86_BUILTIN_COMGTSS,
17552 IX86_BUILTIN_COMGESS,
17553 IX86_BUILTIN_COMUEQSS,
17554 IX86_BUILTIN_COMUNESS,
17555 IX86_BUILTIN_COMULTSS,
17556 IX86_BUILTIN_COMULESS,
17557 IX86_BUILTIN_COMUGTSS,
17558 IX86_BUILTIN_COMUGESS,
17559 IX86_BUILTIN_COMORDSS,
17560 IX86_BUILTIN_COMUNORDSS,
17561 IX86_BUILTIN_COMFALSESS,
17562 IX86_BUILTIN_COMTRUESS,
17563
17564 IX86_BUILTIN_COMEQSD,
17565 IX86_BUILTIN_COMNESD,
17566 IX86_BUILTIN_COMLTSD,
17567 IX86_BUILTIN_COMLESD,
17568 IX86_BUILTIN_COMGTSD,
17569 IX86_BUILTIN_COMGESD,
17570 IX86_BUILTIN_COMUEQSD,
17571 IX86_BUILTIN_COMUNESD,
17572 IX86_BUILTIN_COMULTSD,
17573 IX86_BUILTIN_COMULESD,
17574 IX86_BUILTIN_COMUGTSD,
17575 IX86_BUILTIN_COMUGESD,
17576 IX86_BUILTIN_COMORDSD,
17577 IX86_BUILTIN_COMUNORDSD,
17578 IX86_BUILTIN_COMFALSESD,
17579 IX86_BUILTIN_COMTRUESD,
17580
17581 IX86_BUILTIN_COMEQPS,
17582 IX86_BUILTIN_COMNEPS,
17583 IX86_BUILTIN_COMLTPS,
17584 IX86_BUILTIN_COMLEPS,
17585 IX86_BUILTIN_COMGTPS,
17586 IX86_BUILTIN_COMGEPS,
17587 IX86_BUILTIN_COMUEQPS,
17588 IX86_BUILTIN_COMUNEPS,
17589 IX86_BUILTIN_COMULTPS,
17590 IX86_BUILTIN_COMULEPS,
17591 IX86_BUILTIN_COMUGTPS,
17592 IX86_BUILTIN_COMUGEPS,
17593 IX86_BUILTIN_COMORDPS,
17594 IX86_BUILTIN_COMUNORDPS,
17595 IX86_BUILTIN_COMFALSEPS,
17596 IX86_BUILTIN_COMTRUEPS,
17597
17598 IX86_BUILTIN_COMEQPD,
17599 IX86_BUILTIN_COMNEPD,
17600 IX86_BUILTIN_COMLTPD,
17601 IX86_BUILTIN_COMLEPD,
17602 IX86_BUILTIN_COMGTPD,
17603 IX86_BUILTIN_COMGEPD,
17604 IX86_BUILTIN_COMUEQPD,
17605 IX86_BUILTIN_COMUNEPD,
17606 IX86_BUILTIN_COMULTPD,
17607 IX86_BUILTIN_COMULEPD,
17608 IX86_BUILTIN_COMUGTPD,
17609 IX86_BUILTIN_COMUGEPD,
17610 IX86_BUILTIN_COMORDPD,
17611 IX86_BUILTIN_COMUNORDPD,
17612 IX86_BUILTIN_COMFALSEPD,
17613 IX86_BUILTIN_COMTRUEPD,
17614
17615 IX86_BUILTIN_PCOMEQUB,
17616 IX86_BUILTIN_PCOMNEUB,
17617 IX86_BUILTIN_PCOMLTUB,
17618 IX86_BUILTIN_PCOMLEUB,
17619 IX86_BUILTIN_PCOMGTUB,
17620 IX86_BUILTIN_PCOMGEUB,
17621 IX86_BUILTIN_PCOMFALSEUB,
17622 IX86_BUILTIN_PCOMTRUEUB,
17623 IX86_BUILTIN_PCOMEQUW,
17624 IX86_BUILTIN_PCOMNEUW,
17625 IX86_BUILTIN_PCOMLTUW,
17626 IX86_BUILTIN_PCOMLEUW,
17627 IX86_BUILTIN_PCOMGTUW,
17628 IX86_BUILTIN_PCOMGEUW,
17629 IX86_BUILTIN_PCOMFALSEUW,
17630 IX86_BUILTIN_PCOMTRUEUW,
17631 IX86_BUILTIN_PCOMEQUD,
17632 IX86_BUILTIN_PCOMNEUD,
17633 IX86_BUILTIN_PCOMLTUD,
17634 IX86_BUILTIN_PCOMLEUD,
17635 IX86_BUILTIN_PCOMGTUD,
17636 IX86_BUILTIN_PCOMGEUD,
17637 IX86_BUILTIN_PCOMFALSEUD,
17638 IX86_BUILTIN_PCOMTRUEUD,
17639 IX86_BUILTIN_PCOMEQUQ,
17640 IX86_BUILTIN_PCOMNEUQ,
17641 IX86_BUILTIN_PCOMLTUQ,
17642 IX86_BUILTIN_PCOMLEUQ,
17643 IX86_BUILTIN_PCOMGTUQ,
17644 IX86_BUILTIN_PCOMGEUQ,
17645 IX86_BUILTIN_PCOMFALSEUQ,
17646 IX86_BUILTIN_PCOMTRUEUQ,
17647
17648 IX86_BUILTIN_PCOMEQB,
17649 IX86_BUILTIN_PCOMNEB,
17650 IX86_BUILTIN_PCOMLTB,
17651 IX86_BUILTIN_PCOMLEB,
17652 IX86_BUILTIN_PCOMGTB,
17653 IX86_BUILTIN_PCOMGEB,
17654 IX86_BUILTIN_PCOMFALSEB,
17655 IX86_BUILTIN_PCOMTRUEB,
17656 IX86_BUILTIN_PCOMEQW,
17657 IX86_BUILTIN_PCOMNEW,
17658 IX86_BUILTIN_PCOMLTW,
17659 IX86_BUILTIN_PCOMLEW,
17660 IX86_BUILTIN_PCOMGTW,
17661 IX86_BUILTIN_PCOMGEW,
17662 IX86_BUILTIN_PCOMFALSEW,
17663 IX86_BUILTIN_PCOMTRUEW,
17664 IX86_BUILTIN_PCOMEQD,
17665 IX86_BUILTIN_PCOMNED,
17666 IX86_BUILTIN_PCOMLTD,
17667 IX86_BUILTIN_PCOMLED,
17668 IX86_BUILTIN_PCOMGTD,
17669 IX86_BUILTIN_PCOMGED,
17670 IX86_BUILTIN_PCOMFALSED,
17671 IX86_BUILTIN_PCOMTRUED,
17672 IX86_BUILTIN_PCOMEQQ,
17673 IX86_BUILTIN_PCOMNEQ,
17674 IX86_BUILTIN_PCOMLTQ,
17675 IX86_BUILTIN_PCOMLEQ,
17676 IX86_BUILTIN_PCOMGTQ,
17677 IX86_BUILTIN_PCOMGEQ,
17678 IX86_BUILTIN_PCOMFALSEQ,
17679 IX86_BUILTIN_PCOMTRUEQ,
17680
17681 IX86_BUILTIN_MAX
17682 };
17683
17684 /* Table for the ix86 builtin decls. */
17685 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17686
17687 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17688 * if the target_flags include one of MASK. Stores the function decl
17689 * in the ix86_builtins array.
17690 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17691
17692 static inline tree
17693 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17694 {
17695 tree decl = NULL_TREE;
17696
17697 if (mask & ix86_isa_flags
17698 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17699 {
17700 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17701 NULL, NULL_TREE);
17702 ix86_builtins[(int) code] = decl;
17703 }
17704
17705 return decl;
17706 }
17707
17708 /* Like def_builtin, but also marks the function decl "const". */
17709
17710 static inline tree
17711 def_builtin_const (int mask, const char *name, tree type,
17712 enum ix86_builtins code)
17713 {
17714 tree decl = def_builtin (mask, name, type, code);
17715 if (decl)
17716 TREE_READONLY (decl) = 1;
17717 return decl;
17718 }
17719
17720 /* Bits for builtin_description.flag. */
17721
17722 /* Set when we don't support the comparison natively, and should
17723 swap_comparison in order to support it. */
17724 #define BUILTIN_DESC_SWAP_OPERANDS 1
17725
17726 struct builtin_description
17727 {
17728 const unsigned int mask;
17729 const enum insn_code icode;
17730 const char *const name;
17731 const enum ix86_builtins code;
17732 const enum rtx_code comparison;
17733 const int flag;
17734 };
17735
17736 static const struct builtin_description bdesc_comi[] =
17737 {
17738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17762 };
17763
17764 static const struct builtin_description bdesc_ptest[] =
17765 {
17766 /* SSE4.1 */
17767 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17768 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17769 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17770 };
17771
17772 static const struct builtin_description bdesc_pcmpestr[] =
17773 {
17774 /* SSE4.2 */
17775 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17776 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17777 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17778 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17779 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17780 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17781 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17782 };
17783
17784 static const struct builtin_description bdesc_pcmpistr[] =
17785 {
17786 /* SSE4.2 */
17787 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17788 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17789 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17790 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17794 };
17795
17796 static const struct builtin_description bdesc_crc32[] =
17797 {
17798 /* SSE4.2 */
17799 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17800 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17801 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17802 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17803 };
17804
17805 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17806 static const struct builtin_description bdesc_sse_3arg[] =
17807 {
17808 /* SSE4.1 */
17809 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17810 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17811 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17812 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17814 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17815 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17816 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17817 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17818 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17819 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17820 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17821 };
17822
17823 static const struct builtin_description bdesc_2arg[] =
17824 {
17825 /* SSE */
17826 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17827 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17828 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17829 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17830 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17831 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17832 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17833 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17834
17835 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17836 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17837 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17838 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17843 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17846 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17847 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17849 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17850 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17857
17858 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17859 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17862
17863 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17865 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17866 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17867
17868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17873
17874 /* MMX */
17875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17878 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17879 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17882 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17883
17884 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17885 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17886 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17887 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17888 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17889 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17890 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17891 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17892
17893 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17894 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17895 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17896
17897 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17898 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17899 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17900 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17901
17902 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17903 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17904
17905 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17906 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17907 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17911
17912 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17913 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17914 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17915 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17916
17917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17919 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17920 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17923
17924 /* Special. */
17925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17927 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17928
17929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17931 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17932
17933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17939
17940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17941 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17946
17947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17951
17952 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17954
17955 /* SSE2 */
17956 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17957 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17958 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17964
17965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17985
17986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17987 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17990
17991 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17993 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17994 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17995
17996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17999
18000 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18001
18002 /* SSE2 MMX */
18003 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18004 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18005 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18006 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18007 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18008 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18009 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18010 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18011
18012 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18013 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18014 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18015 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18016 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18017 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18020
18021 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18023
18024 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18026 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18027 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18028
18029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18031
18032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18038
18039 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18040 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18041 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18043
18044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18052
18053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18056
18057 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18059
18060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18062
18063 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
18064 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
18065 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
18066
18067 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
18068 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
18069 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
18070
18071 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
18072 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
18073
18074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18075
18076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18077 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18080
18081 /* SSE3 MMX */
18082 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18083 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18084 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18087 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18088
18089 /* SSSE3 */
18090 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18092 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18098 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18099 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18100 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18101 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18103 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18104 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18107 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18108 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18109 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18112 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18114
18115 /* SSE4.1 */
18116 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18122 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18126 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18127 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18128
18129 /* SSE4.2 */
18130 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18131 };
18132
18133 static const struct builtin_description bdesc_1arg[] =
18134 {
18135 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18137
18138 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18141
18142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18148
18149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18151
18152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18153
18154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18156
18157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18162
18163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18164
18165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18167 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18168 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18169
18170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18173
18174 /* SSE3 */
18175 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18176 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18177
18178 /* SSSE3 */
18179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18180 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18185
18186 /* SSE4.1 */
18187 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18188 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18189 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18190 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18191 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18192 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18198 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18199 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18200
18201 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18202 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18204 };
18205
18206 /* SSE5 */
18207 enum multi_arg_type {
18208 MULTI_ARG_UNKNOWN,
18209 MULTI_ARG_3_SF,
18210 MULTI_ARG_3_DF,
18211 MULTI_ARG_3_DI,
18212 MULTI_ARG_3_SI,
18213 MULTI_ARG_3_SI_DI,
18214 MULTI_ARG_3_HI,
18215 MULTI_ARG_3_HI_SI,
18216 MULTI_ARG_3_QI,
18217 MULTI_ARG_3_PERMPS,
18218 MULTI_ARG_3_PERMPD,
18219 MULTI_ARG_2_SF,
18220 MULTI_ARG_2_DF,
18221 MULTI_ARG_2_DI,
18222 MULTI_ARG_2_SI,
18223 MULTI_ARG_2_HI,
18224 MULTI_ARG_2_QI,
18225 MULTI_ARG_2_DI_IMM,
18226 MULTI_ARG_2_SI_IMM,
18227 MULTI_ARG_2_HI_IMM,
18228 MULTI_ARG_2_QI_IMM,
18229 MULTI_ARG_2_SF_CMP,
18230 MULTI_ARG_2_DF_CMP,
18231 MULTI_ARG_2_DI_CMP,
18232 MULTI_ARG_2_SI_CMP,
18233 MULTI_ARG_2_HI_CMP,
18234 MULTI_ARG_2_QI_CMP,
18235 MULTI_ARG_2_DI_TF,
18236 MULTI_ARG_2_SI_TF,
18237 MULTI_ARG_2_HI_TF,
18238 MULTI_ARG_2_QI_TF,
18239 MULTI_ARG_2_SF_TF,
18240 MULTI_ARG_2_DF_TF,
18241 MULTI_ARG_1_SF,
18242 MULTI_ARG_1_DF,
18243 MULTI_ARG_1_DI,
18244 MULTI_ARG_1_SI,
18245 MULTI_ARG_1_HI,
18246 MULTI_ARG_1_QI,
18247 MULTI_ARG_1_SI_DI,
18248 MULTI_ARG_1_HI_DI,
18249 MULTI_ARG_1_HI_SI,
18250 MULTI_ARG_1_QI_DI,
18251 MULTI_ARG_1_QI_SI,
18252 MULTI_ARG_1_QI_HI,
18253 MULTI_ARG_1_PH2PS,
18254 MULTI_ARG_1_PS2PH
18255 };
18256
18257 static const struct builtin_description bdesc_multi_arg[] =
18258 {
18259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18301 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18302 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18303 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18304 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18334
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18351
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18368
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18385
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18402
18403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18410
18411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18418
18419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18426
18427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18434
18435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18442
18443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18450
18451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18458
18459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18466
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18475
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18484
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18493 };
18494
18495 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18496 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18497 builtins. */
18498 static void
18499 ix86_init_mmx_sse_builtins (void)
18500 {
18501 const struct builtin_description * d;
18502 size_t i;
18503
18504 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18505 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18506 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18507 tree V2DI_type_node
18508 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18509 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18510 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18511 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18512 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18513 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18514 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18515
18516 tree pchar_type_node = build_pointer_type (char_type_node);
18517 tree pcchar_type_node = build_pointer_type (
18518 build_type_variant (char_type_node, 1, 0));
18519 tree pfloat_type_node = build_pointer_type (float_type_node);
18520 tree pcfloat_type_node = build_pointer_type (
18521 build_type_variant (float_type_node, 1, 0));
18522 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18523 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18524 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18525
18526 /* Comparisons. */
18527 tree int_ftype_v4sf_v4sf
18528 = build_function_type_list (integer_type_node,
18529 V4SF_type_node, V4SF_type_node, NULL_TREE);
18530 tree v4si_ftype_v4sf_v4sf
18531 = build_function_type_list (V4SI_type_node,
18532 V4SF_type_node, V4SF_type_node, NULL_TREE);
18533 /* MMX/SSE/integer conversions. */
18534 tree int_ftype_v4sf
18535 = build_function_type_list (integer_type_node,
18536 V4SF_type_node, NULL_TREE);
18537 tree int64_ftype_v4sf
18538 = build_function_type_list (long_long_integer_type_node,
18539 V4SF_type_node, NULL_TREE);
18540 tree int_ftype_v8qi
18541 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18542 tree v4sf_ftype_v4sf_int
18543 = build_function_type_list (V4SF_type_node,
18544 V4SF_type_node, integer_type_node, NULL_TREE);
18545 tree v4sf_ftype_v4sf_int64
18546 = build_function_type_list (V4SF_type_node,
18547 V4SF_type_node, long_long_integer_type_node,
18548 NULL_TREE);
18549 tree v4sf_ftype_v4sf_v2si
18550 = build_function_type_list (V4SF_type_node,
18551 V4SF_type_node, V2SI_type_node, NULL_TREE);
18552
18553 /* Miscellaneous. */
18554 tree v8qi_ftype_v4hi_v4hi
18555 = build_function_type_list (V8QI_type_node,
18556 V4HI_type_node, V4HI_type_node, NULL_TREE);
18557 tree v4hi_ftype_v2si_v2si
18558 = build_function_type_list (V4HI_type_node,
18559 V2SI_type_node, V2SI_type_node, NULL_TREE);
18560 tree v4sf_ftype_v4sf_v4sf_int
18561 = build_function_type_list (V4SF_type_node,
18562 V4SF_type_node, V4SF_type_node,
18563 integer_type_node, NULL_TREE);
18564 tree v2si_ftype_v4hi_v4hi
18565 = build_function_type_list (V2SI_type_node,
18566 V4HI_type_node, V4HI_type_node, NULL_TREE);
18567 tree v4hi_ftype_v4hi_int
18568 = build_function_type_list (V4HI_type_node,
18569 V4HI_type_node, integer_type_node, NULL_TREE);
18570 tree v4hi_ftype_v4hi_di
18571 = build_function_type_list (V4HI_type_node,
18572 V4HI_type_node, long_long_unsigned_type_node,
18573 NULL_TREE);
18574 tree v2si_ftype_v2si_di
18575 = build_function_type_list (V2SI_type_node,
18576 V2SI_type_node, long_long_unsigned_type_node,
18577 NULL_TREE);
18578 tree void_ftype_void
18579 = build_function_type (void_type_node, void_list_node);
18580 tree void_ftype_unsigned
18581 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18582 tree void_ftype_unsigned_unsigned
18583 = build_function_type_list (void_type_node, unsigned_type_node,
18584 unsigned_type_node, NULL_TREE);
18585 tree void_ftype_pcvoid_unsigned_unsigned
18586 = build_function_type_list (void_type_node, const_ptr_type_node,
18587 unsigned_type_node, unsigned_type_node,
18588 NULL_TREE);
18589 tree unsigned_ftype_void
18590 = build_function_type (unsigned_type_node, void_list_node);
18591 tree v2si_ftype_v4sf
18592 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18593 /* Loads/stores. */
18594 tree void_ftype_v8qi_v8qi_pchar
18595 = build_function_type_list (void_type_node,
18596 V8QI_type_node, V8QI_type_node,
18597 pchar_type_node, NULL_TREE);
18598 tree v4sf_ftype_pcfloat
18599 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18600 /* @@@ the type is bogus */
18601 tree v4sf_ftype_v4sf_pv2si
18602 = build_function_type_list (V4SF_type_node,
18603 V4SF_type_node, pv2si_type_node, NULL_TREE);
18604 tree void_ftype_pv2si_v4sf
18605 = build_function_type_list (void_type_node,
18606 pv2si_type_node, V4SF_type_node, NULL_TREE);
18607 tree void_ftype_pfloat_v4sf
18608 = build_function_type_list (void_type_node,
18609 pfloat_type_node, V4SF_type_node, NULL_TREE);
18610 tree void_ftype_pdi_di
18611 = build_function_type_list (void_type_node,
18612 pdi_type_node, long_long_unsigned_type_node,
18613 NULL_TREE);
18614 tree void_ftype_pv2di_v2di
18615 = build_function_type_list (void_type_node,
18616 pv2di_type_node, V2DI_type_node, NULL_TREE);
18617 /* Normal vector unops. */
18618 tree v4sf_ftype_v4sf
18619 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18620 tree v16qi_ftype_v16qi
18621 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18622 tree v8hi_ftype_v8hi
18623 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18624 tree v4si_ftype_v4si
18625 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18626 tree v8qi_ftype_v8qi
18627 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18628 tree v4hi_ftype_v4hi
18629 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18630
18631 /* Normal vector binops. */
18632 tree v4sf_ftype_v4sf_v4sf
18633 = build_function_type_list (V4SF_type_node,
18634 V4SF_type_node, V4SF_type_node, NULL_TREE);
18635 tree v8qi_ftype_v8qi_v8qi
18636 = build_function_type_list (V8QI_type_node,
18637 V8QI_type_node, V8QI_type_node, NULL_TREE);
18638 tree v4hi_ftype_v4hi_v4hi
18639 = build_function_type_list (V4HI_type_node,
18640 V4HI_type_node, V4HI_type_node, NULL_TREE);
18641 tree v2si_ftype_v2si_v2si
18642 = build_function_type_list (V2SI_type_node,
18643 V2SI_type_node, V2SI_type_node, NULL_TREE);
18644 tree di_ftype_di_di
18645 = build_function_type_list (long_long_unsigned_type_node,
18646 long_long_unsigned_type_node,
18647 long_long_unsigned_type_node, NULL_TREE);
18648
18649 tree di_ftype_di_di_int
18650 = build_function_type_list (long_long_unsigned_type_node,
18651 long_long_unsigned_type_node,
18652 long_long_unsigned_type_node,
18653 integer_type_node, NULL_TREE);
18654
18655 tree v2si_ftype_v2sf
18656 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18657 tree v2sf_ftype_v2si
18658 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18659 tree v2si_ftype_v2si
18660 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18661 tree v2sf_ftype_v2sf
18662 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18663 tree v2sf_ftype_v2sf_v2sf
18664 = build_function_type_list (V2SF_type_node,
18665 V2SF_type_node, V2SF_type_node, NULL_TREE);
18666 tree v2si_ftype_v2sf_v2sf
18667 = build_function_type_list (V2SI_type_node,
18668 V2SF_type_node, V2SF_type_node, NULL_TREE);
18669 tree pint_type_node = build_pointer_type (integer_type_node);
18670 tree pdouble_type_node = build_pointer_type (double_type_node);
18671 tree pcdouble_type_node = build_pointer_type (
18672 build_type_variant (double_type_node, 1, 0));
18673 tree int_ftype_v2df_v2df
18674 = build_function_type_list (integer_type_node,
18675 V2DF_type_node, V2DF_type_node, NULL_TREE);
18676
18677 tree void_ftype_pcvoid
18678 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18679 tree v4sf_ftype_v4si
18680 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18681 tree v4si_ftype_v4sf
18682 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18683 tree v2df_ftype_v4si
18684 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18685 tree v4si_ftype_v2df
18686 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18687 tree v4si_ftype_v2df_v2df
18688 = build_function_type_list (V4SI_type_node,
18689 V2DF_type_node, V2DF_type_node, NULL_TREE);
18690 tree v2si_ftype_v2df
18691 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18692 tree v4sf_ftype_v2df
18693 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18694 tree v2df_ftype_v2si
18695 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18696 tree v2df_ftype_v4sf
18697 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18698 tree int_ftype_v2df
18699 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18700 tree int64_ftype_v2df
18701 = build_function_type_list (long_long_integer_type_node,
18702 V2DF_type_node, NULL_TREE);
18703 tree v2df_ftype_v2df_int
18704 = build_function_type_list (V2DF_type_node,
18705 V2DF_type_node, integer_type_node, NULL_TREE);
18706 tree v2df_ftype_v2df_int64
18707 = build_function_type_list (V2DF_type_node,
18708 V2DF_type_node, long_long_integer_type_node,
18709 NULL_TREE);
18710 tree v4sf_ftype_v4sf_v2df
18711 = build_function_type_list (V4SF_type_node,
18712 V4SF_type_node, V2DF_type_node, NULL_TREE);
18713 tree v2df_ftype_v2df_v4sf
18714 = build_function_type_list (V2DF_type_node,
18715 V2DF_type_node, V4SF_type_node, NULL_TREE);
18716 tree v2df_ftype_v2df_v2df_int
18717 = build_function_type_list (V2DF_type_node,
18718 V2DF_type_node, V2DF_type_node,
18719 integer_type_node,
18720 NULL_TREE);
18721 tree v2df_ftype_v2df_pcdouble
18722 = build_function_type_list (V2DF_type_node,
18723 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18724 tree void_ftype_pdouble_v2df
18725 = build_function_type_list (void_type_node,
18726 pdouble_type_node, V2DF_type_node, NULL_TREE);
18727 tree void_ftype_pint_int
18728 = build_function_type_list (void_type_node,
18729 pint_type_node, integer_type_node, NULL_TREE);
18730 tree void_ftype_v16qi_v16qi_pchar
18731 = build_function_type_list (void_type_node,
18732 V16QI_type_node, V16QI_type_node,
18733 pchar_type_node, NULL_TREE);
18734 tree v2df_ftype_pcdouble
18735 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18736 tree v2df_ftype_v2df_v2df
18737 = build_function_type_list (V2DF_type_node,
18738 V2DF_type_node, V2DF_type_node, NULL_TREE);
18739 tree v16qi_ftype_v16qi_v16qi
18740 = build_function_type_list (V16QI_type_node,
18741 V16QI_type_node, V16QI_type_node, NULL_TREE);
18742 tree v8hi_ftype_v8hi_v8hi
18743 = build_function_type_list (V8HI_type_node,
18744 V8HI_type_node, V8HI_type_node, NULL_TREE);
18745 tree v4si_ftype_v4si_v4si
18746 = build_function_type_list (V4SI_type_node,
18747 V4SI_type_node, V4SI_type_node, NULL_TREE);
18748 tree v2di_ftype_v2di_v2di
18749 = build_function_type_list (V2DI_type_node,
18750 V2DI_type_node, V2DI_type_node, NULL_TREE);
18751 tree v2di_ftype_v2df_v2df
18752 = build_function_type_list (V2DI_type_node,
18753 V2DF_type_node, V2DF_type_node, NULL_TREE);
18754 tree v2df_ftype_v2df
18755 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18756 tree v2di_ftype_v2di_int
18757 = build_function_type_list (V2DI_type_node,
18758 V2DI_type_node, integer_type_node, NULL_TREE);
18759 tree v2di_ftype_v2di_v2di_int
18760 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18761 V2DI_type_node, integer_type_node, NULL_TREE);
18762 tree v4si_ftype_v4si_int
18763 = build_function_type_list (V4SI_type_node,
18764 V4SI_type_node, integer_type_node, NULL_TREE);
18765 tree v8hi_ftype_v8hi_int
18766 = build_function_type_list (V8HI_type_node,
18767 V8HI_type_node, integer_type_node, NULL_TREE);
18768 tree v4si_ftype_v8hi_v8hi
18769 = build_function_type_list (V4SI_type_node,
18770 V8HI_type_node, V8HI_type_node, NULL_TREE);
18771 tree di_ftype_v8qi_v8qi
18772 = build_function_type_list (long_long_unsigned_type_node,
18773 V8QI_type_node, V8QI_type_node, NULL_TREE);
18774 tree di_ftype_v2si_v2si
18775 = build_function_type_list (long_long_unsigned_type_node,
18776 V2SI_type_node, V2SI_type_node, NULL_TREE);
18777 tree v2di_ftype_v16qi_v16qi
18778 = build_function_type_list (V2DI_type_node,
18779 V16QI_type_node, V16QI_type_node, NULL_TREE);
18780 tree v2di_ftype_v4si_v4si
18781 = build_function_type_list (V2DI_type_node,
18782 V4SI_type_node, V4SI_type_node, NULL_TREE);
18783 tree int_ftype_v16qi
18784 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18785 tree v16qi_ftype_pcchar
18786 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18787 tree void_ftype_pchar_v16qi
18788 = build_function_type_list (void_type_node,
18789 pchar_type_node, V16QI_type_node, NULL_TREE);
18790
18791 tree v2di_ftype_v2di_unsigned_unsigned
18792 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18793 unsigned_type_node, unsigned_type_node,
18794 NULL_TREE);
18795 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18796 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18797 unsigned_type_node, unsigned_type_node,
18798 NULL_TREE);
18799 tree v2di_ftype_v2di_v16qi
18800 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18801 NULL_TREE);
18802 tree v2df_ftype_v2df_v2df_v2df
18803 = build_function_type_list (V2DF_type_node,
18804 V2DF_type_node, V2DF_type_node,
18805 V2DF_type_node, NULL_TREE);
18806 tree v4sf_ftype_v4sf_v4sf_v4sf
18807 = build_function_type_list (V4SF_type_node,
18808 V4SF_type_node, V4SF_type_node,
18809 V4SF_type_node, NULL_TREE);
18810 tree v8hi_ftype_v16qi
18811 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18812 NULL_TREE);
18813 tree v4si_ftype_v16qi
18814 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18815 NULL_TREE);
18816 tree v2di_ftype_v16qi
18817 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18818 NULL_TREE);
18819 tree v4si_ftype_v8hi
18820 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18821 NULL_TREE);
18822 tree v2di_ftype_v8hi
18823 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18824 NULL_TREE);
18825 tree v2di_ftype_v4si
18826 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18827 NULL_TREE);
18828 tree v2di_ftype_pv2di
18829 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18830 NULL_TREE);
18831 tree v16qi_ftype_v16qi_v16qi_int
18832 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18833 V16QI_type_node, integer_type_node,
18834 NULL_TREE);
18835 tree v16qi_ftype_v16qi_v16qi_v16qi
18836 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18837 V16QI_type_node, V16QI_type_node,
18838 NULL_TREE);
18839 tree v8hi_ftype_v8hi_v8hi_int
18840 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18841 V8HI_type_node, integer_type_node,
18842 NULL_TREE);
18843 tree v4si_ftype_v4si_v4si_int
18844 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18845 V4SI_type_node, integer_type_node,
18846 NULL_TREE);
18847 tree int_ftype_v2di_v2di
18848 = build_function_type_list (integer_type_node,
18849 V2DI_type_node, V2DI_type_node,
18850 NULL_TREE);
18851 tree int_ftype_v16qi_int_v16qi_int_int
18852 = build_function_type_list (integer_type_node,
18853 V16QI_type_node,
18854 integer_type_node,
18855 V16QI_type_node,
18856 integer_type_node,
18857 integer_type_node,
18858 NULL_TREE);
18859 tree v16qi_ftype_v16qi_int_v16qi_int_int
18860 = build_function_type_list (V16QI_type_node,
18861 V16QI_type_node,
18862 integer_type_node,
18863 V16QI_type_node,
18864 integer_type_node,
18865 integer_type_node,
18866 NULL_TREE);
18867 tree int_ftype_v16qi_v16qi_int
18868 = build_function_type_list (integer_type_node,
18869 V16QI_type_node,
18870 V16QI_type_node,
18871 integer_type_node,
18872 NULL_TREE);
18873
18874 /* SSE5 instructions */
18875 tree v2di_ftype_v2di_v2di_v2di
18876 = build_function_type_list (V2DI_type_node,
18877 V2DI_type_node,
18878 V2DI_type_node,
18879 V2DI_type_node,
18880 NULL_TREE);
18881
18882 tree v4si_ftype_v4si_v4si_v4si
18883 = build_function_type_list (V4SI_type_node,
18884 V4SI_type_node,
18885 V4SI_type_node,
18886 V4SI_type_node,
18887 NULL_TREE);
18888
18889 tree v4si_ftype_v4si_v4si_v2di
18890 = build_function_type_list (V4SI_type_node,
18891 V4SI_type_node,
18892 V4SI_type_node,
18893 V2DI_type_node,
18894 NULL_TREE);
18895
18896 tree v8hi_ftype_v8hi_v8hi_v8hi
18897 = build_function_type_list (V8HI_type_node,
18898 V8HI_type_node,
18899 V8HI_type_node,
18900 V8HI_type_node,
18901 NULL_TREE);
18902
18903 tree v8hi_ftype_v8hi_v8hi_v4si
18904 = build_function_type_list (V8HI_type_node,
18905 V8HI_type_node,
18906 V8HI_type_node,
18907 V4SI_type_node,
18908 NULL_TREE);
18909
18910 tree v2df_ftype_v2df_v2df_v16qi
18911 = build_function_type_list (V2DF_type_node,
18912 V2DF_type_node,
18913 V2DF_type_node,
18914 V16QI_type_node,
18915 NULL_TREE);
18916
18917 tree v4sf_ftype_v4sf_v4sf_v16qi
18918 = build_function_type_list (V4SF_type_node,
18919 V4SF_type_node,
18920 V4SF_type_node,
18921 V16QI_type_node,
18922 NULL_TREE);
18923
18924 tree v2di_ftype_v2di_si
18925 = build_function_type_list (V2DI_type_node,
18926 V2DI_type_node,
18927 integer_type_node,
18928 NULL_TREE);
18929
18930 tree v4si_ftype_v4si_si
18931 = build_function_type_list (V4SI_type_node,
18932 V4SI_type_node,
18933 integer_type_node,
18934 NULL_TREE);
18935
18936 tree v8hi_ftype_v8hi_si
18937 = build_function_type_list (V8HI_type_node,
18938 V8HI_type_node,
18939 integer_type_node,
18940 NULL_TREE);
18941
18942 tree v16qi_ftype_v16qi_si
18943 = build_function_type_list (V16QI_type_node,
18944 V16QI_type_node,
18945 integer_type_node,
18946 NULL_TREE);
18947 tree v4sf_ftype_v4hi
18948 = build_function_type_list (V4SF_type_node,
18949 V4HI_type_node,
18950 NULL_TREE);
18951
18952 tree v4hi_ftype_v4sf
18953 = build_function_type_list (V4HI_type_node,
18954 V4SF_type_node,
18955 NULL_TREE);
18956
18957 tree v2di_ftype_v2di
18958 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18959
18960 tree ftype;
18961
18962 /* The __float80 type. */
18963 if (TYPE_MODE (long_double_type_node) == XFmode)
18964 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18965 "__float80");
18966 else
18967 {
18968 /* The __float80 type. */
18969 tree float80_type_node = make_node (REAL_TYPE);
18970
18971 TYPE_PRECISION (float80_type_node) = 80;
18972 layout_type (float80_type_node);
18973 (*lang_hooks.types.register_builtin_type) (float80_type_node,
18974 "__float80");
18975 }
18976
18977 if (TARGET_64BIT)
18978 {
18979 tree float128_type_node = make_node (REAL_TYPE);
18980
18981 TYPE_PRECISION (float128_type_node) = 128;
18982 layout_type (float128_type_node);
18983 (*lang_hooks.types.register_builtin_type) (float128_type_node,
18984 "__float128");
18985
18986 /* TFmode support builtins. */
18987 ftype = build_function_type (float128_type_node,
18988 void_list_node);
18989 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
18990
18991 ftype = build_function_type_list (float128_type_node,
18992 float128_type_node,
18993 NULL_TREE);
18994 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
18995
18996 ftype = build_function_type_list (float128_type_node,
18997 float128_type_node,
18998 float128_type_node,
18999 NULL_TREE);
19000 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19001 }
19002
19003 /* Add all SSE builtins that are more or less simple operations on
19004 three operands. */
19005 for (i = 0, d = bdesc_sse_3arg;
19006 i < ARRAY_SIZE (bdesc_sse_3arg);
19007 i++, d++)
19008 {
19009 /* Use one of the operands; the target can have a different mode for
19010 mask-generating compares. */
19011 enum machine_mode mode;
19012 tree type;
19013
19014 if (d->name == 0)
19015 continue;
19016 mode = insn_data[d->icode].operand[1].mode;
19017
19018 switch (mode)
19019 {
19020 case V16QImode:
19021 type = v16qi_ftype_v16qi_v16qi_int;
19022 break;
19023 case V8HImode:
19024 type = v8hi_ftype_v8hi_v8hi_int;
19025 break;
19026 case V4SImode:
19027 type = v4si_ftype_v4si_v4si_int;
19028 break;
19029 case V2DImode:
19030 type = v2di_ftype_v2di_v2di_int;
19031 break;
19032 case V2DFmode:
19033 type = v2df_ftype_v2df_v2df_int;
19034 break;
19035 case V4SFmode:
19036 type = v4sf_ftype_v4sf_v4sf_int;
19037 break;
19038 default:
19039 gcc_unreachable ();
19040 }
19041
19042 /* Override for variable blends. */
19043 switch (d->icode)
19044 {
19045 case CODE_FOR_sse4_1_blendvpd:
19046 type = v2df_ftype_v2df_v2df_v2df;
19047 break;
19048 case CODE_FOR_sse4_1_blendvps:
19049 type = v4sf_ftype_v4sf_v4sf_v4sf;
19050 break;
19051 case CODE_FOR_sse4_1_pblendvb:
19052 type = v16qi_ftype_v16qi_v16qi_v16qi;
19053 break;
19054 default:
19055 break;
19056 }
19057
19058 def_builtin_const (d->mask, d->name, type, d->code);
19059 }
19060
19061 /* Add all builtins that are more or less simple operations on two
19062 operands. */
19063 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19064 {
19065 /* Use one of the operands; the target can have a different mode for
19066 mask-generating compares. */
19067 enum machine_mode mode;
19068 tree type;
19069
19070 if (d->name == 0)
19071 continue;
19072 mode = insn_data[d->icode].operand[1].mode;
19073
19074 switch (mode)
19075 {
19076 case V16QImode:
19077 type = v16qi_ftype_v16qi_v16qi;
19078 break;
19079 case V8HImode:
19080 type = v8hi_ftype_v8hi_v8hi;
19081 break;
19082 case V4SImode:
19083 type = v4si_ftype_v4si_v4si;
19084 break;
19085 case V2DImode:
19086 type = v2di_ftype_v2di_v2di;
19087 break;
19088 case V2DFmode:
19089 type = v2df_ftype_v2df_v2df;
19090 break;
19091 case V4SFmode:
19092 type = v4sf_ftype_v4sf_v4sf;
19093 break;
19094 case V8QImode:
19095 type = v8qi_ftype_v8qi_v8qi;
19096 break;
19097 case V4HImode:
19098 type = v4hi_ftype_v4hi_v4hi;
19099 break;
19100 case V2SImode:
19101 type = v2si_ftype_v2si_v2si;
19102 break;
19103 case DImode:
19104 type = di_ftype_di_di;
19105 break;
19106
19107 default:
19108 gcc_unreachable ();
19109 }
19110
19111 /* Override for comparisons. */
19112 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19113 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19114 type = v4si_ftype_v4sf_v4sf;
19115
19116 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19117 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19118 type = v2di_ftype_v2df_v2df;
19119
19120 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19121 type = v4si_ftype_v2df_v2df;
19122
19123 def_builtin_const (d->mask, d->name, type, d->code);
19124 }
19125
19126 /* Add all builtins that are more or less simple operations on 1 operand. */
19127 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19128 {
19129 enum machine_mode mode;
19130 tree type;
19131
19132 if (d->name == 0)
19133 continue;
19134 mode = insn_data[d->icode].operand[1].mode;
19135
19136 switch (mode)
19137 {
19138 case V16QImode:
19139 type = v16qi_ftype_v16qi;
19140 break;
19141 case V8HImode:
19142 type = v8hi_ftype_v8hi;
19143 break;
19144 case V4SImode:
19145 type = v4si_ftype_v4si;
19146 break;
19147 case V2DFmode:
19148 type = v2df_ftype_v2df;
19149 break;
19150 case V4SFmode:
19151 type = v4sf_ftype_v4sf;
19152 break;
19153 case V8QImode:
19154 type = v8qi_ftype_v8qi;
19155 break;
19156 case V4HImode:
19157 type = v4hi_ftype_v4hi;
19158 break;
19159 case V2SImode:
19160 type = v2si_ftype_v2si;
19161 break;
19162
19163 default:
19164 abort ();
19165 }
19166
19167 def_builtin_const (d->mask, d->name, type, d->code);
19168 }
19169
19170 /* pcmpestr[im] insns. */
19171 for (i = 0, d = bdesc_pcmpestr;
19172 i < ARRAY_SIZE (bdesc_pcmpestr);
19173 i++, d++)
19174 {
19175 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19176 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19177 else
19178 ftype = int_ftype_v16qi_int_v16qi_int_int;
19179 def_builtin_const (d->mask, d->name, ftype, d->code);
19180 }
19181
19182 /* pcmpistr[im] insns. */
19183 for (i = 0, d = bdesc_pcmpistr;
19184 i < ARRAY_SIZE (bdesc_pcmpistr);
19185 i++, d++)
19186 {
19187 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19188 ftype = v16qi_ftype_v16qi_v16qi_int;
19189 else
19190 ftype = int_ftype_v16qi_v16qi_int;
19191 def_builtin_const (d->mask, d->name, ftype, d->code);
19192 }
19193
19194 /* Add the remaining MMX insns with somewhat more complicated types. */
19195 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19196 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19197 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19198 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19199
19200 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19201 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19202 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19203
19204 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19205 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19206
19207 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19208 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19209
19210 /* comi/ucomi insns. */
19211 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19212 if (d->mask == OPTION_MASK_ISA_SSE2)
19213 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19214 else
19215 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19216
19217 /* ptest insns. */
19218 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19219 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19220
19221 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19222 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19223 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19224
19225 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19226 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19227 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19228 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19229 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19230 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19231 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19232 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19233 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19234 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19235 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19236
19237 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19238
19239 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19240 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19241
19242 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19243 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19244 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19245 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19246
19247 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19248 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19249 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19250 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19251
19252 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19253
19254 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19255
19256 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19257 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19258 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19259 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19260 ftype = build_function_type_list (float_type_node,
19261 float_type_node,
19262 NULL_TREE);
19263 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19264 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19265 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19266
19267 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19268
19269 /* Original 3DNow! */
19270 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19271 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19272 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19273 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19274 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19275 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19276 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19277 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19278 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19279 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19280 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19281 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19282 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19283 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19284 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19285 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19286 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19287 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19288 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19289 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19290
19291 /* 3DNow! extension as used in the Athlon CPU. */
19292 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19293 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19294 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19295 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19296 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19297 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19298
19299 /* SSE2 */
19300 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19301
19302 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19303 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19304
19305 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19306 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19307
19308 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19309 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19310 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19311 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19312 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19313
19314 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19315 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19316 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19317 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19318
19319 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19320 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19321
19322 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19323
19324 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19325 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19326
19327 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19328 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19329 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19330 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19331 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19332
19333 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19334
19335 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19336 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19337 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19338 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19339
19340 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19341 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19342 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19343
19344 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19345 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19347 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19348
19349 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19350 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19351 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19352
19353 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19354 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19355
19356 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19357 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19358
19359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19360 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19361 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19363 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19364 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19365 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19366
19367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19368 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19369 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19370 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19371 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19372 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19373 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19374
19375 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19376 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19377 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19378 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19379
19380 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19381
19382 /* Prescott New Instructions. */
19383 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19384 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19385 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19386
19387 /* SSSE3. */
19388 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19389 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19390
19391 /* SSE4.1. */
19392 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19393 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19394 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19395 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19396 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19397 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19398 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19399 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19400 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19401 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19402 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19403 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19404 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19405 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19406
19407 /* SSE4.1 and SSE5 */
19408 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19409 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19410 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19411 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19412
19413 /* SSE4.2. */
19414 ftype = build_function_type_list (unsigned_type_node,
19415 unsigned_type_node,
19416 unsigned_char_type_node,
19417 NULL_TREE);
19418 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19419 ftype = build_function_type_list (unsigned_type_node,
19420 unsigned_type_node,
19421 short_unsigned_type_node,
19422 NULL_TREE);
19423 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19424 ftype = build_function_type_list (unsigned_type_node,
19425 unsigned_type_node,
19426 unsigned_type_node,
19427 NULL_TREE);
19428 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19429 ftype = build_function_type_list (long_long_unsigned_type_node,
19430 long_long_unsigned_type_node,
19431 long_long_unsigned_type_node,
19432 NULL_TREE);
19433 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19434
19435 /* AMDFAM10 SSE4A New built-ins */
19436 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19437 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19438 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19439 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19440 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19441 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19442
19443 /* Access to the vec_init patterns. */
19444 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19445 integer_type_node, NULL_TREE);
19446 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19447
19448 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19449 short_integer_type_node,
19450 short_integer_type_node,
19451 short_integer_type_node, NULL_TREE);
19452 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19453
19454 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19455 char_type_node, char_type_node,
19456 char_type_node, char_type_node,
19457 char_type_node, char_type_node,
19458 char_type_node, NULL_TREE);
19459 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19460
19461 /* Access to the vec_extract patterns. */
19462 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19463 integer_type_node, NULL_TREE);
19464 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19465
19466 ftype = build_function_type_list (long_long_integer_type_node,
19467 V2DI_type_node, integer_type_node,
19468 NULL_TREE);
19469 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19470
19471 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19472 integer_type_node, NULL_TREE);
19473 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19474
19475 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19476 integer_type_node, NULL_TREE);
19477 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19478
19479 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19480 integer_type_node, NULL_TREE);
19481 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19482
19483 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19484 integer_type_node, NULL_TREE);
19485 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19486
19487 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19488 integer_type_node, NULL_TREE);
19489 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19490
19491 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19492 integer_type_node, NULL_TREE);
19493 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19494
19495 /* Access to the vec_set patterns. */
19496 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19497 intDI_type_node,
19498 integer_type_node, NULL_TREE);
19499 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19500
19501 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19502 float_type_node,
19503 integer_type_node, NULL_TREE);
19504 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19505
19506 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19507 intSI_type_node,
19508 integer_type_node, NULL_TREE);
19509 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19510
19511 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19512 intHI_type_node,
19513 integer_type_node, NULL_TREE);
19514 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19515
19516 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19517 intHI_type_node,
19518 integer_type_node, NULL_TREE);
19519 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19520
19521 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19522 intQI_type_node,
19523 integer_type_node, NULL_TREE);
19524 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19525
19526 /* Add SSE5 multi-arg argument instructions */
19527 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19528 {
19529 tree mtype = NULL_TREE;
19530
19531 if (d->name == 0)
19532 continue;
19533
19534 switch ((enum multi_arg_type)d->flag)
19535 {
19536 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19537 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19538 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19539 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19540 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19541 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19542 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19543 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19544 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19545 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19546 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19547 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19548 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19549 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19550 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19551 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19552 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19553 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19554 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19555 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19556 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19557 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19558 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19559 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19560 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19561 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19562 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19563 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19564 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19565 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19566 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19567 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19568 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19569 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19570 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19571 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19572 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19573 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19574 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19575 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19576 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19577 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19578 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19579 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19580 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19581 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19582 case MULTI_ARG_UNKNOWN:
19583 default:
19584 gcc_unreachable ();
19585 }
19586
19587 if (mtype)
19588 def_builtin_const (d->mask, d->name, mtype, d->code);
19589 }
19590 }
19591
19592 static void
19593 ix86_init_builtins (void)
19594 {
19595 if (TARGET_MMX)
19596 ix86_init_mmx_sse_builtins ();
19597 }
19598
19599 /* Errors in the source file can cause expand_expr to return const0_rtx
19600 where we expect a vector. To avoid crashing, use one of the vector
19601 clear instructions. */
19602 static rtx
19603 safe_vector_operand (rtx x, enum machine_mode mode)
19604 {
19605 if (x == const0_rtx)
19606 x = CONST0_RTX (mode);
19607 return x;
19608 }
19609
19610 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19611 4 operands. The third argument must be a constant smaller than 8
19612 bits or xmm0. */
19613
19614 static rtx
19615 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19616 rtx target)
19617 {
19618 rtx pat;
19619 tree arg0 = CALL_EXPR_ARG (exp, 0);
19620 tree arg1 = CALL_EXPR_ARG (exp, 1);
19621 tree arg2 = CALL_EXPR_ARG (exp, 2);
19622 rtx op0 = expand_normal (arg0);
19623 rtx op1 = expand_normal (arg1);
19624 rtx op2 = expand_normal (arg2);
19625 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19626 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19627 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19628 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19629
19630 if (VECTOR_MODE_P (mode1))
19631 op0 = safe_vector_operand (op0, mode1);
19632 if (VECTOR_MODE_P (mode2))
19633 op1 = safe_vector_operand (op1, mode2);
19634 if (VECTOR_MODE_P (mode3))
19635 op2 = safe_vector_operand (op2, mode3);
19636
19637 if (optimize
19638 || target == 0
19639 || GET_MODE (target) != tmode
19640 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19641 target = gen_reg_rtx (tmode);
19642
19643 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19644 op0 = copy_to_mode_reg (mode1, op0);
19645 if ((optimize && !register_operand (op1, mode2))
19646 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19647 op1 = copy_to_mode_reg (mode2, op1);
19648
19649 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19650 switch (icode)
19651 {
19652 case CODE_FOR_sse4_1_blendvpd:
19653 case CODE_FOR_sse4_1_blendvps:
19654 case CODE_FOR_sse4_1_pblendvb:
19655 op2 = copy_to_mode_reg (mode3, op2);
19656 break;
19657
19658 case CODE_FOR_sse4_1_roundsd:
19659 case CODE_FOR_sse4_1_roundss:
19660 error ("the third argument must be a 4-bit immediate");
19661 return const0_rtx;
19662
19663 default:
19664 error ("the third argument must be an 8-bit immediate");
19665 return const0_rtx;
19666 }
19667
19668 pat = GEN_FCN (icode) (target, op0, op1, op2);
19669 if (! pat)
19670 return 0;
19671 emit_insn (pat);
19672 return target;
19673 }
19674
19675 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19676
19677 static rtx
19678 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19679 {
19680 rtx pat;
19681 tree arg0 = CALL_EXPR_ARG (exp, 0);
19682 tree arg1 = CALL_EXPR_ARG (exp, 1);
19683 rtx op0 = expand_normal (arg0);
19684 rtx op1 = expand_normal (arg1);
19685 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19686 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19687 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19688
19689 if (optimize
19690 || !target
19691 || GET_MODE (target) != tmode
19692 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19693 target = gen_reg_rtx (tmode);
19694
19695 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19696 op0 = copy_to_mode_reg (mode0, op0);
19697 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19698 {
19699 op1 = copy_to_reg (op1);
19700 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19701 }
19702
19703 pat = GEN_FCN (icode) (target, op0, op1);
19704 if (! pat)
19705 return 0;
19706 emit_insn (pat);
19707 return target;
19708 }
19709
19710 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19711
19712 static rtx
19713 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19714 {
19715 rtx pat, xops[3];
19716 tree arg0 = CALL_EXPR_ARG (exp, 0);
19717 tree arg1 = CALL_EXPR_ARG (exp, 1);
19718 rtx op0 = expand_normal (arg0);
19719 rtx op1 = expand_normal (arg1);
19720 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19721 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19722 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19723
19724 if (VECTOR_MODE_P (mode0))
19725 op0 = safe_vector_operand (op0, mode0);
19726 if (VECTOR_MODE_P (mode1))
19727 op1 = safe_vector_operand (op1, mode1);
19728
19729 if (optimize || !target
19730 || GET_MODE (target) != tmode
19731 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19732 target = gen_reg_rtx (tmode);
19733
19734 if (GET_MODE (op1) == SImode && mode1 == TImode)
19735 {
19736 rtx x = gen_reg_rtx (V4SImode);
19737 emit_insn (gen_sse2_loadd (x, op1));
19738 op1 = gen_lowpart (TImode, x);
19739 }
19740
19741 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19742 op0 = copy_to_mode_reg (mode0, op0);
19743 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19744 op1 = copy_to_mode_reg (mode1, op1);
19745
19746 /* ??? Using ix86_fixup_binary_operands is problematic when
19747 we've got mismatched modes. Fake it. */
19748
19749 xops[0] = target;
19750 xops[1] = op0;
19751 xops[2] = op1;
19752
19753 if (tmode == mode0 && tmode == mode1)
19754 {
19755 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19756 op0 = xops[1];
19757 op1 = xops[2];
19758 }
19759 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19760 {
19761 op0 = force_reg (mode0, op0);
19762 op1 = force_reg (mode1, op1);
19763 target = gen_reg_rtx (tmode);
19764 }
19765
19766 pat = GEN_FCN (icode) (target, op0, op1);
19767 if (! pat)
19768 return 0;
19769 emit_insn (pat);
19770 return target;
19771 }
19772
19773 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19774
19775 static rtx
19776 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19777 enum multi_arg_type m_type,
19778 enum insn_code sub_code)
19779 {
19780 rtx pat;
19781 int i;
19782 int nargs;
19783 bool comparison_p = false;
19784 bool tf_p = false;
19785 bool last_arg_constant = false;
19786 int num_memory = 0;
19787 struct {
19788 rtx op;
19789 enum machine_mode mode;
19790 } args[4];
19791
19792 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19793
19794 switch (m_type)
19795 {
19796 case MULTI_ARG_3_SF:
19797 case MULTI_ARG_3_DF:
19798 case MULTI_ARG_3_DI:
19799 case MULTI_ARG_3_SI:
19800 case MULTI_ARG_3_SI_DI:
19801 case MULTI_ARG_3_HI:
19802 case MULTI_ARG_3_HI_SI:
19803 case MULTI_ARG_3_QI:
19804 case MULTI_ARG_3_PERMPS:
19805 case MULTI_ARG_3_PERMPD:
19806 nargs = 3;
19807 break;
19808
19809 case MULTI_ARG_2_SF:
19810 case MULTI_ARG_2_DF:
19811 case MULTI_ARG_2_DI:
19812 case MULTI_ARG_2_SI:
19813 case MULTI_ARG_2_HI:
19814 case MULTI_ARG_2_QI:
19815 nargs = 2;
19816 break;
19817
19818 case MULTI_ARG_2_DI_IMM:
19819 case MULTI_ARG_2_SI_IMM:
19820 case MULTI_ARG_2_HI_IMM:
19821 case MULTI_ARG_2_QI_IMM:
19822 nargs = 2;
19823 last_arg_constant = true;
19824 break;
19825
19826 case MULTI_ARG_1_SF:
19827 case MULTI_ARG_1_DF:
19828 case MULTI_ARG_1_DI:
19829 case MULTI_ARG_1_SI:
19830 case MULTI_ARG_1_HI:
19831 case MULTI_ARG_1_QI:
19832 case MULTI_ARG_1_SI_DI:
19833 case MULTI_ARG_1_HI_DI:
19834 case MULTI_ARG_1_HI_SI:
19835 case MULTI_ARG_1_QI_DI:
19836 case MULTI_ARG_1_QI_SI:
19837 case MULTI_ARG_1_QI_HI:
19838 case MULTI_ARG_1_PH2PS:
19839 case MULTI_ARG_1_PS2PH:
19840 nargs = 1;
19841 break;
19842
19843 case MULTI_ARG_2_SF_CMP:
19844 case MULTI_ARG_2_DF_CMP:
19845 case MULTI_ARG_2_DI_CMP:
19846 case MULTI_ARG_2_SI_CMP:
19847 case MULTI_ARG_2_HI_CMP:
19848 case MULTI_ARG_2_QI_CMP:
19849 nargs = 2;
19850 comparison_p = true;
19851 break;
19852
19853 case MULTI_ARG_2_SF_TF:
19854 case MULTI_ARG_2_DF_TF:
19855 case MULTI_ARG_2_DI_TF:
19856 case MULTI_ARG_2_SI_TF:
19857 case MULTI_ARG_2_HI_TF:
19858 case MULTI_ARG_2_QI_TF:
19859 nargs = 2;
19860 tf_p = true;
19861 break;
19862
19863 case MULTI_ARG_UNKNOWN:
19864 default:
19865 gcc_unreachable ();
19866 }
19867
19868 if (optimize || !target
19869 || GET_MODE (target) != tmode
19870 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19871 target = gen_reg_rtx (tmode);
19872
19873 gcc_assert (nargs <= 4);
19874
19875 for (i = 0; i < nargs; i++)
19876 {
19877 tree arg = CALL_EXPR_ARG (exp, i);
19878 rtx op = expand_normal (arg);
19879 int adjust = (comparison_p) ? 1 : 0;
19880 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19881
19882 if (last_arg_constant && i == nargs-1)
19883 {
19884 if (GET_CODE (op) != CONST_INT)
19885 {
19886 error ("last argument must be an immediate");
19887 return gen_reg_rtx (tmode);
19888 }
19889 }
19890 else
19891 {
19892 if (VECTOR_MODE_P (mode))
19893 op = safe_vector_operand (op, mode);
19894
19895 /* If we aren't optimizing, only allow one memory operand to be
19896 generated. */
19897 if (memory_operand (op, mode))
19898 num_memory++;
19899
19900 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19901
19902 if (optimize
19903 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19904 || num_memory > 1)
19905 op = force_reg (mode, op);
19906 }
19907
19908 args[i].op = op;
19909 args[i].mode = mode;
19910 }
19911
19912 switch (nargs)
19913 {
19914 case 1:
19915 pat = GEN_FCN (icode) (target, args[0].op);
19916 break;
19917
19918 case 2:
19919 if (tf_p)
19920 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19921 GEN_INT ((int)sub_code));
19922 else if (! comparison_p)
19923 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19924 else
19925 {
19926 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19927 args[0].op,
19928 args[1].op);
19929
19930 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19931 }
19932 break;
19933
19934 case 3:
19935 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19936 break;
19937
19938 default:
19939 gcc_unreachable ();
19940 }
19941
19942 if (! pat)
19943 return 0;
19944
19945 emit_insn (pat);
19946 return target;
19947 }
19948
19949 /* Subroutine of ix86_expand_builtin to take care of stores. */
19950
19951 static rtx
19952 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19953 {
19954 rtx pat;
19955 tree arg0 = CALL_EXPR_ARG (exp, 0);
19956 tree arg1 = CALL_EXPR_ARG (exp, 1);
19957 rtx op0 = expand_normal (arg0);
19958 rtx op1 = expand_normal (arg1);
19959 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19960 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19961
19962 if (VECTOR_MODE_P (mode1))
19963 op1 = safe_vector_operand (op1, mode1);
19964
19965 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19966 op1 = copy_to_mode_reg (mode1, op1);
19967
19968 pat = GEN_FCN (icode) (op0, op1);
19969 if (pat)
19970 emit_insn (pat);
19971 return 0;
19972 }
19973
19974 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
19975
19976 static rtx
19977 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
19978 rtx target, int do_load)
19979 {
19980 rtx pat;
19981 tree arg0 = CALL_EXPR_ARG (exp, 0);
19982 rtx op0 = expand_normal (arg0);
19983 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19984 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19985
19986 if (optimize || !target
19987 || GET_MODE (target) != tmode
19988 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19989 target = gen_reg_rtx (tmode);
19990 if (do_load)
19991 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19992 else
19993 {
19994 if (VECTOR_MODE_P (mode0))
19995 op0 = safe_vector_operand (op0, mode0);
19996
19997 if ((optimize && !register_operand (op0, mode0))
19998 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19999 op0 = copy_to_mode_reg (mode0, op0);
20000 }
20001
20002 switch (icode)
20003 {
20004 case CODE_FOR_sse4_1_roundpd:
20005 case CODE_FOR_sse4_1_roundps:
20006 {
20007 tree arg1 = CALL_EXPR_ARG (exp, 1);
20008 rtx op1 = expand_normal (arg1);
20009 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20010
20011 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20012 {
20013 error ("the second argument must be a 4-bit immediate");
20014 return const0_rtx;
20015 }
20016 pat = GEN_FCN (icode) (target, op0, op1);
20017 }
20018 break;
20019 default:
20020 pat = GEN_FCN (icode) (target, op0);
20021 break;
20022 }
20023
20024 if (! pat)
20025 return 0;
20026 emit_insn (pat);
20027 return target;
20028 }
20029
20030 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20031 sqrtss, rsqrtss, rcpss. */
20032
20033 static rtx
20034 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20035 {
20036 rtx pat;
20037 tree arg0 = CALL_EXPR_ARG (exp, 0);
20038 rtx op1, op0 = expand_normal (arg0);
20039 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20040 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20041
20042 if (optimize || !target
20043 || GET_MODE (target) != tmode
20044 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20045 target = gen_reg_rtx (tmode);
20046
20047 if (VECTOR_MODE_P (mode0))
20048 op0 = safe_vector_operand (op0, mode0);
20049
20050 if ((optimize && !register_operand (op0, mode0))
20051 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20052 op0 = copy_to_mode_reg (mode0, op0);
20053
20054 op1 = op0;
20055 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20056 op1 = copy_to_mode_reg (mode0, op1);
20057
20058 pat = GEN_FCN (icode) (target, op0, op1);
20059 if (! pat)
20060 return 0;
20061 emit_insn (pat);
20062 return target;
20063 }
20064
20065 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20066
20067 static rtx
20068 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20069 rtx target)
20070 {
20071 rtx pat;
20072 tree arg0 = CALL_EXPR_ARG (exp, 0);
20073 tree arg1 = CALL_EXPR_ARG (exp, 1);
20074 rtx op0 = expand_normal (arg0);
20075 rtx op1 = expand_normal (arg1);
20076 rtx op2;
20077 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20078 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20079 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20080 enum rtx_code comparison = d->comparison;
20081
20082 if (VECTOR_MODE_P (mode0))
20083 op0 = safe_vector_operand (op0, mode0);
20084 if (VECTOR_MODE_P (mode1))
20085 op1 = safe_vector_operand (op1, mode1);
20086
20087 /* Swap operands if we have a comparison that isn't available in
20088 hardware. */
20089 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20090 {
20091 rtx tmp = gen_reg_rtx (mode1);
20092 emit_move_insn (tmp, op1);
20093 op1 = op0;
20094 op0 = tmp;
20095 }
20096
20097 if (optimize || !target
20098 || GET_MODE (target) != tmode
20099 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20100 target = gen_reg_rtx (tmode);
20101
20102 if ((optimize && !register_operand (op0, mode0))
20103 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20104 op0 = copy_to_mode_reg (mode0, op0);
20105 if ((optimize && !register_operand (op1, mode1))
20106 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20107 op1 = copy_to_mode_reg (mode1, op1);
20108
20109 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20110 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20111 if (! pat)
20112 return 0;
20113 emit_insn (pat);
20114 return target;
20115 }
20116
20117 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20118
20119 static rtx
20120 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20121 rtx target)
20122 {
20123 rtx pat;
20124 tree arg0 = CALL_EXPR_ARG (exp, 0);
20125 tree arg1 = CALL_EXPR_ARG (exp, 1);
20126 rtx op0 = expand_normal (arg0);
20127 rtx op1 = expand_normal (arg1);
20128 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20129 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20130 enum rtx_code comparison = d->comparison;
20131
20132 if (VECTOR_MODE_P (mode0))
20133 op0 = safe_vector_operand (op0, mode0);
20134 if (VECTOR_MODE_P (mode1))
20135 op1 = safe_vector_operand (op1, mode1);
20136
20137 /* Swap operands if we have a comparison that isn't available in
20138 hardware. */
20139 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20140 {
20141 rtx tmp = op1;
20142 op1 = op0;
20143 op0 = tmp;
20144 }
20145
20146 target = gen_reg_rtx (SImode);
20147 emit_move_insn (target, const0_rtx);
20148 target = gen_rtx_SUBREG (QImode, target, 0);
20149
20150 if ((optimize && !register_operand (op0, mode0))
20151 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20152 op0 = copy_to_mode_reg (mode0, op0);
20153 if ((optimize && !register_operand (op1, mode1))
20154 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20155 op1 = copy_to_mode_reg (mode1, op1);
20156
20157 pat = GEN_FCN (d->icode) (op0, op1);
20158 if (! pat)
20159 return 0;
20160 emit_insn (pat);
20161 emit_insn (gen_rtx_SET (VOIDmode,
20162 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20163 gen_rtx_fmt_ee (comparison, QImode,
20164 SET_DEST (pat),
20165 const0_rtx)));
20166
20167 return SUBREG_REG (target);
20168 }
20169
20170 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20171
20172 static rtx
20173 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20174 rtx target)
20175 {
20176 rtx pat;
20177 tree arg0 = CALL_EXPR_ARG (exp, 0);
20178 tree arg1 = CALL_EXPR_ARG (exp, 1);
20179 rtx op0 = expand_normal (arg0);
20180 rtx op1 = expand_normal (arg1);
20181 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20182 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20183 enum rtx_code comparison = d->comparison;
20184
20185 if (VECTOR_MODE_P (mode0))
20186 op0 = safe_vector_operand (op0, mode0);
20187 if (VECTOR_MODE_P (mode1))
20188 op1 = safe_vector_operand (op1, mode1);
20189
20190 target = gen_reg_rtx (SImode);
20191 emit_move_insn (target, const0_rtx);
20192 target = gen_rtx_SUBREG (QImode, target, 0);
20193
20194 if ((optimize && !register_operand (op0, mode0))
20195 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20196 op0 = copy_to_mode_reg (mode0, op0);
20197 if ((optimize && !register_operand (op1, mode1))
20198 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20199 op1 = copy_to_mode_reg (mode1, op1);
20200
20201 pat = GEN_FCN (d->icode) (op0, op1);
20202 if (! pat)
20203 return 0;
20204 emit_insn (pat);
20205 emit_insn (gen_rtx_SET (VOIDmode,
20206 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20207 gen_rtx_fmt_ee (comparison, QImode,
20208 SET_DEST (pat),
20209 const0_rtx)));
20210
20211 return SUBREG_REG (target);
20212 }
20213
20214 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20215
20216 static rtx
20217 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20218 tree exp, rtx target)
20219 {
20220 rtx pat;
20221 tree arg0 = CALL_EXPR_ARG (exp, 0);
20222 tree arg1 = CALL_EXPR_ARG (exp, 1);
20223 tree arg2 = CALL_EXPR_ARG (exp, 2);
20224 tree arg3 = CALL_EXPR_ARG (exp, 3);
20225 tree arg4 = CALL_EXPR_ARG (exp, 4);
20226 rtx scratch0, scratch1;
20227 rtx op0 = expand_normal (arg0);
20228 rtx op1 = expand_normal (arg1);
20229 rtx op2 = expand_normal (arg2);
20230 rtx op3 = expand_normal (arg3);
20231 rtx op4 = expand_normal (arg4);
20232 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20233
20234 tmode0 = insn_data[d->icode].operand[0].mode;
20235 tmode1 = insn_data[d->icode].operand[1].mode;
20236 modev2 = insn_data[d->icode].operand[2].mode;
20237 modei3 = insn_data[d->icode].operand[3].mode;
20238 modev4 = insn_data[d->icode].operand[4].mode;
20239 modei5 = insn_data[d->icode].operand[5].mode;
20240 modeimm = insn_data[d->icode].operand[6].mode;
20241
20242 if (VECTOR_MODE_P (modev2))
20243 op0 = safe_vector_operand (op0, modev2);
20244 if (VECTOR_MODE_P (modev4))
20245 op2 = safe_vector_operand (op2, modev4);
20246
20247 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20248 op0 = copy_to_mode_reg (modev2, op0);
20249 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20250 op1 = copy_to_mode_reg (modei3, op1);
20251 if ((optimize && !register_operand (op2, modev4))
20252 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20253 op2 = copy_to_mode_reg (modev4, op2);
20254 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20255 op3 = copy_to_mode_reg (modei5, op3);
20256
20257 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20258 {
20259 error ("the fifth argument must be a 8-bit immediate");
20260 return const0_rtx;
20261 }
20262
20263 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20264 {
20265 if (optimize || !target
20266 || GET_MODE (target) != tmode0
20267 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20268 target = gen_reg_rtx (tmode0);
20269
20270 scratch1 = gen_reg_rtx (tmode1);
20271
20272 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20273 }
20274 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20275 {
20276 if (optimize || !target
20277 || GET_MODE (target) != tmode1
20278 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20279 target = gen_reg_rtx (tmode1);
20280
20281 scratch0 = gen_reg_rtx (tmode0);
20282
20283 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20284 }
20285 else
20286 {
20287 gcc_assert (d->flag);
20288
20289 scratch0 = gen_reg_rtx (tmode0);
20290 scratch1 = gen_reg_rtx (tmode1);
20291
20292 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20293 }
20294
20295 if (! pat)
20296 return 0;
20297
20298 emit_insn (pat);
20299
20300 if (d->flag)
20301 {
20302 target = gen_reg_rtx (SImode);
20303 emit_move_insn (target, const0_rtx);
20304 target = gen_rtx_SUBREG (QImode, target, 0);
20305
20306 emit_insn
20307 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20308 gen_rtx_fmt_ee (EQ, QImode,
20309 gen_rtx_REG ((enum machine_mode) d->flag,
20310 FLAGS_REG),
20311 const0_rtx)));
20312 return SUBREG_REG (target);
20313 }
20314 else
20315 return target;
20316 }
20317
20318
20319 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20320
20321 static rtx
20322 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20323 tree exp, rtx target)
20324 {
20325 rtx pat;
20326 tree arg0 = CALL_EXPR_ARG (exp, 0);
20327 tree arg1 = CALL_EXPR_ARG (exp, 1);
20328 tree arg2 = CALL_EXPR_ARG (exp, 2);
20329 rtx scratch0, scratch1;
20330 rtx op0 = expand_normal (arg0);
20331 rtx op1 = expand_normal (arg1);
20332 rtx op2 = expand_normal (arg2);
20333 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20334
20335 tmode0 = insn_data[d->icode].operand[0].mode;
20336 tmode1 = insn_data[d->icode].operand[1].mode;
20337 modev2 = insn_data[d->icode].operand[2].mode;
20338 modev3 = insn_data[d->icode].operand[3].mode;
20339 modeimm = insn_data[d->icode].operand[4].mode;
20340
20341 if (VECTOR_MODE_P (modev2))
20342 op0 = safe_vector_operand (op0, modev2);
20343 if (VECTOR_MODE_P (modev3))
20344 op1 = safe_vector_operand (op1, modev3);
20345
20346 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20347 op0 = copy_to_mode_reg (modev2, op0);
20348 if ((optimize && !register_operand (op1, modev3))
20349 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20350 op1 = copy_to_mode_reg (modev3, op1);
20351
20352 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20353 {
20354 error ("the third argument must be a 8-bit immediate");
20355 return const0_rtx;
20356 }
20357
20358 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20359 {
20360 if (optimize || !target
20361 || GET_MODE (target) != tmode0
20362 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20363 target = gen_reg_rtx (tmode0);
20364
20365 scratch1 = gen_reg_rtx (tmode1);
20366
20367 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20368 }
20369 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20370 {
20371 if (optimize || !target
20372 || GET_MODE (target) != tmode1
20373 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20374 target = gen_reg_rtx (tmode1);
20375
20376 scratch0 = gen_reg_rtx (tmode0);
20377
20378 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20379 }
20380 else
20381 {
20382 gcc_assert (d->flag);
20383
20384 scratch0 = gen_reg_rtx (tmode0);
20385 scratch1 = gen_reg_rtx (tmode1);
20386
20387 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20388 }
20389
20390 if (! pat)
20391 return 0;
20392
20393 emit_insn (pat);
20394
20395 if (d->flag)
20396 {
20397 target = gen_reg_rtx (SImode);
20398 emit_move_insn (target, const0_rtx);
20399 target = gen_rtx_SUBREG (QImode, target, 0);
20400
20401 emit_insn
20402 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20403 gen_rtx_fmt_ee (EQ, QImode,
20404 gen_rtx_REG ((enum machine_mode) d->flag,
20405 FLAGS_REG),
20406 const0_rtx)));
20407 return SUBREG_REG (target);
20408 }
20409 else
20410 return target;
20411 }
20412
20413 /* Return the integer constant in ARG. Constrain it to be in the range
20414 of the subparts of VEC_TYPE; issue an error if not. */
20415
20416 static int
20417 get_element_number (tree vec_type, tree arg)
20418 {
20419 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20420
20421 if (!host_integerp (arg, 1)
20422 || (elt = tree_low_cst (arg, 1), elt > max))
20423 {
20424 error ("selector must be an integer constant in the range 0..%wi", max);
20425 return 0;
20426 }
20427
20428 return elt;
20429 }
20430
20431 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20432 ix86_expand_vector_init. We DO have language-level syntax for this, in
20433 the form of (type){ init-list }. Except that since we can't place emms
20434 instructions from inside the compiler, we can't allow the use of MMX
20435 registers unless the user explicitly asks for it. So we do *not* define
20436 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20437 we have builtins invoked by mmintrin.h that gives us license to emit
20438 these sorts of instructions. */
20439
20440 static rtx
20441 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20442 {
20443 enum machine_mode tmode = TYPE_MODE (type);
20444 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20445 int i, n_elt = GET_MODE_NUNITS (tmode);
20446 rtvec v = rtvec_alloc (n_elt);
20447
20448 gcc_assert (VECTOR_MODE_P (tmode));
20449 gcc_assert (call_expr_nargs (exp) == n_elt);
20450
20451 for (i = 0; i < n_elt; ++i)
20452 {
20453 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20454 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20455 }
20456
20457 if (!target || !register_operand (target, tmode))
20458 target = gen_reg_rtx (tmode);
20459
20460 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20461 return target;
20462 }
20463
20464 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20465 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20466 had a language-level syntax for referencing vector elements. */
20467
20468 static rtx
20469 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20470 {
20471 enum machine_mode tmode, mode0;
20472 tree arg0, arg1;
20473 int elt;
20474 rtx op0;
20475
20476 arg0 = CALL_EXPR_ARG (exp, 0);
20477 arg1 = CALL_EXPR_ARG (exp, 1);
20478
20479 op0 = expand_normal (arg0);
20480 elt = get_element_number (TREE_TYPE (arg0), arg1);
20481
20482 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20483 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20484 gcc_assert (VECTOR_MODE_P (mode0));
20485
20486 op0 = force_reg (mode0, op0);
20487
20488 if (optimize || !target || !register_operand (target, tmode))
20489 target = gen_reg_rtx (tmode);
20490
20491 ix86_expand_vector_extract (true, target, op0, elt);
20492
20493 return target;
20494 }
20495
20496 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20497 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20498 a language-level syntax for referencing vector elements. */
20499
20500 static rtx
20501 ix86_expand_vec_set_builtin (tree exp)
20502 {
20503 enum machine_mode tmode, mode1;
20504 tree arg0, arg1, arg2;
20505 int elt;
20506 rtx op0, op1, target;
20507
20508 arg0 = CALL_EXPR_ARG (exp, 0);
20509 arg1 = CALL_EXPR_ARG (exp, 1);
20510 arg2 = CALL_EXPR_ARG (exp, 2);
20511
20512 tmode = TYPE_MODE (TREE_TYPE (arg0));
20513 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20514 gcc_assert (VECTOR_MODE_P (tmode));
20515
20516 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20517 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20518 elt = get_element_number (TREE_TYPE (arg0), arg2);
20519
20520 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20521 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20522
20523 op0 = force_reg (tmode, op0);
20524 op1 = force_reg (mode1, op1);
20525
20526 /* OP0 is the source of these builtin functions and shouldn't be
20527 modified. Create a copy, use it and return it as target. */
20528 target = gen_reg_rtx (tmode);
20529 emit_move_insn (target, op0);
20530 ix86_expand_vector_set (true, target, op1, elt);
20531
20532 return target;
20533 }
20534
20535 /* Expand an expression EXP that calls a built-in function,
20536 with result going to TARGET if that's convenient
20537 (and in mode MODE if that's convenient).
20538 SUBTARGET may be used as the target for computing one of EXP's operands.
20539 IGNORE is nonzero if the value is to be ignored. */
20540
20541 static rtx
20542 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20543 enum machine_mode mode ATTRIBUTE_UNUSED,
20544 int ignore ATTRIBUTE_UNUSED)
20545 {
20546 const struct builtin_description *d;
20547 size_t i;
20548 enum insn_code icode;
20549 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20550 tree arg0, arg1, arg2, arg3;
20551 rtx op0, op1, op2, op3, pat;
20552 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20553 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20554
20555 switch (fcode)
20556 {
20557 case IX86_BUILTIN_EMMS:
20558 emit_insn (gen_mmx_emms ());
20559 return 0;
20560
20561 case IX86_BUILTIN_SFENCE:
20562 emit_insn (gen_sse_sfence ());
20563 return 0;
20564
20565 case IX86_BUILTIN_MASKMOVQ:
20566 case IX86_BUILTIN_MASKMOVDQU:
20567 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20568 ? CODE_FOR_mmx_maskmovq
20569 : CODE_FOR_sse2_maskmovdqu);
20570 /* Note the arg order is different from the operand order. */
20571 arg1 = CALL_EXPR_ARG (exp, 0);
20572 arg2 = CALL_EXPR_ARG (exp, 1);
20573 arg0 = CALL_EXPR_ARG (exp, 2);
20574 op0 = expand_normal (arg0);
20575 op1 = expand_normal (arg1);
20576 op2 = expand_normal (arg2);
20577 mode0 = insn_data[icode].operand[0].mode;
20578 mode1 = insn_data[icode].operand[1].mode;
20579 mode2 = insn_data[icode].operand[2].mode;
20580
20581 op0 = force_reg (Pmode, op0);
20582 op0 = gen_rtx_MEM (mode1, op0);
20583
20584 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20585 op0 = copy_to_mode_reg (mode0, op0);
20586 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20587 op1 = copy_to_mode_reg (mode1, op1);
20588 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20589 op2 = copy_to_mode_reg (mode2, op2);
20590 pat = GEN_FCN (icode) (op0, op1, op2);
20591 if (! pat)
20592 return 0;
20593 emit_insn (pat);
20594 return 0;
20595
20596 case IX86_BUILTIN_RSQRTF:
20597 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20598
20599 case IX86_BUILTIN_SQRTSS:
20600 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20601 case IX86_BUILTIN_RSQRTSS:
20602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20603 case IX86_BUILTIN_RCPSS:
20604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20605
20606 case IX86_BUILTIN_LOADUPS:
20607 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20608
20609 case IX86_BUILTIN_STOREUPS:
20610 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20611
20612 case IX86_BUILTIN_LOADHPS:
20613 case IX86_BUILTIN_LOADLPS:
20614 case IX86_BUILTIN_LOADHPD:
20615 case IX86_BUILTIN_LOADLPD:
20616 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20617 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20618 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20619 : CODE_FOR_sse2_loadlpd);
20620 arg0 = CALL_EXPR_ARG (exp, 0);
20621 arg1 = CALL_EXPR_ARG (exp, 1);
20622 op0 = expand_normal (arg0);
20623 op1 = expand_normal (arg1);
20624 tmode = insn_data[icode].operand[0].mode;
20625 mode0 = insn_data[icode].operand[1].mode;
20626 mode1 = insn_data[icode].operand[2].mode;
20627
20628 op0 = force_reg (mode0, op0);
20629 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20630 if (optimize || target == 0
20631 || GET_MODE (target) != tmode
20632 || !register_operand (target, tmode))
20633 target = gen_reg_rtx (tmode);
20634 pat = GEN_FCN (icode) (target, op0, op1);
20635 if (! pat)
20636 return 0;
20637 emit_insn (pat);
20638 return target;
20639
20640 case IX86_BUILTIN_STOREHPS:
20641 case IX86_BUILTIN_STORELPS:
20642 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20643 : CODE_FOR_sse_storelps);
20644 arg0 = CALL_EXPR_ARG (exp, 0);
20645 arg1 = CALL_EXPR_ARG (exp, 1);
20646 op0 = expand_normal (arg0);
20647 op1 = expand_normal (arg1);
20648 mode0 = insn_data[icode].operand[0].mode;
20649 mode1 = insn_data[icode].operand[1].mode;
20650
20651 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20652 op1 = force_reg (mode1, op1);
20653
20654 pat = GEN_FCN (icode) (op0, op1);
20655 if (! pat)
20656 return 0;
20657 emit_insn (pat);
20658 return const0_rtx;
20659
20660 case IX86_BUILTIN_MOVNTPS:
20661 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20662 case IX86_BUILTIN_MOVNTQ:
20663 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20664
20665 case IX86_BUILTIN_LDMXCSR:
20666 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20667 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20668 emit_move_insn (target, op0);
20669 emit_insn (gen_sse_ldmxcsr (target));
20670 return 0;
20671
20672 case IX86_BUILTIN_STMXCSR:
20673 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20674 emit_insn (gen_sse_stmxcsr (target));
20675 return copy_to_mode_reg (SImode, target);
20676
20677 case IX86_BUILTIN_SHUFPS:
20678 case IX86_BUILTIN_SHUFPD:
20679 icode = (fcode == IX86_BUILTIN_SHUFPS
20680 ? CODE_FOR_sse_shufps
20681 : CODE_FOR_sse2_shufpd);
20682 arg0 = CALL_EXPR_ARG (exp, 0);
20683 arg1 = CALL_EXPR_ARG (exp, 1);
20684 arg2 = CALL_EXPR_ARG (exp, 2);
20685 op0 = expand_normal (arg0);
20686 op1 = expand_normal (arg1);
20687 op2 = expand_normal (arg2);
20688 tmode = insn_data[icode].operand[0].mode;
20689 mode0 = insn_data[icode].operand[1].mode;
20690 mode1 = insn_data[icode].operand[2].mode;
20691 mode2 = insn_data[icode].operand[3].mode;
20692
20693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20694 op0 = copy_to_mode_reg (mode0, op0);
20695 if ((optimize && !register_operand (op1, mode1))
20696 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20697 op1 = copy_to_mode_reg (mode1, op1);
20698 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20699 {
20700 /* @@@ better error message */
20701 error ("mask must be an immediate");
20702 return gen_reg_rtx (tmode);
20703 }
20704 if (optimize || target == 0
20705 || GET_MODE (target) != tmode
20706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20707 target = gen_reg_rtx (tmode);
20708 pat = GEN_FCN (icode) (target, op0, op1, op2);
20709 if (! pat)
20710 return 0;
20711 emit_insn (pat);
20712 return target;
20713
20714 case IX86_BUILTIN_PSHUFW:
20715 case IX86_BUILTIN_PSHUFD:
20716 case IX86_BUILTIN_PSHUFHW:
20717 case IX86_BUILTIN_PSHUFLW:
20718 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20719 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20720 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20721 : CODE_FOR_mmx_pshufw);
20722 arg0 = CALL_EXPR_ARG (exp, 0);
20723 arg1 = CALL_EXPR_ARG (exp, 1);
20724 op0 = expand_normal (arg0);
20725 op1 = expand_normal (arg1);
20726 tmode = insn_data[icode].operand[0].mode;
20727 mode1 = insn_data[icode].operand[1].mode;
20728 mode2 = insn_data[icode].operand[2].mode;
20729
20730 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20731 op0 = copy_to_mode_reg (mode1, op0);
20732 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20733 {
20734 /* @@@ better error message */
20735 error ("mask must be an immediate");
20736 return const0_rtx;
20737 }
20738 if (target == 0
20739 || GET_MODE (target) != tmode
20740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20741 target = gen_reg_rtx (tmode);
20742 pat = GEN_FCN (icode) (target, op0, op1);
20743 if (! pat)
20744 return 0;
20745 emit_insn (pat);
20746 return target;
20747
20748 case IX86_BUILTIN_PSLLW128:
20749 case IX86_BUILTIN_PSLLWI128:
20750 icode = CODE_FOR_ashlv8hi3;
20751 goto do_pshift;
20752 case IX86_BUILTIN_PSLLD128:
20753 case IX86_BUILTIN_PSLLDI128:
20754 icode = CODE_FOR_ashlv4si3;
20755 goto do_pshift;
20756 case IX86_BUILTIN_PSLLQ128:
20757 case IX86_BUILTIN_PSLLQI128:
20758 icode = CODE_FOR_ashlv2di3;
20759 goto do_pshift;
20760 case IX86_BUILTIN_PSRAW128:
20761 case IX86_BUILTIN_PSRAWI128:
20762 icode = CODE_FOR_ashrv8hi3;
20763 goto do_pshift;
20764 case IX86_BUILTIN_PSRAD128:
20765 case IX86_BUILTIN_PSRADI128:
20766 icode = CODE_FOR_ashrv4si3;
20767 goto do_pshift;
20768 case IX86_BUILTIN_PSRLW128:
20769 case IX86_BUILTIN_PSRLWI128:
20770 icode = CODE_FOR_lshrv8hi3;
20771 goto do_pshift;
20772 case IX86_BUILTIN_PSRLD128:
20773 case IX86_BUILTIN_PSRLDI128:
20774 icode = CODE_FOR_lshrv4si3;
20775 goto do_pshift;
20776 case IX86_BUILTIN_PSRLQ128:
20777 case IX86_BUILTIN_PSRLQI128:
20778 icode = CODE_FOR_lshrv2di3;
20779
20780 do_pshift:
20781 arg0 = CALL_EXPR_ARG (exp, 0);
20782 arg1 = CALL_EXPR_ARG (exp, 1);
20783 op0 = expand_normal (arg0);
20784 op1 = expand_normal (arg1);
20785
20786 tmode = insn_data[icode].operand[0].mode;
20787 mode1 = insn_data[icode].operand[1].mode;
20788
20789 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20790 op0 = copy_to_reg (op0);
20791
20792 if (!CONST_INT_P (op1))
20793 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20794
20795 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20796 op1 = copy_to_reg (op1);
20797
20798 target = gen_reg_rtx (tmode);
20799 pat = GEN_FCN (icode) (target, op0, op1);
20800 if (!pat)
20801 return 0;
20802 emit_insn (pat);
20803 return target;
20804
20805 case IX86_BUILTIN_PSLLDQI128:
20806 case IX86_BUILTIN_PSRLDQI128:
20807 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20808 : CODE_FOR_sse2_lshrti3);
20809 arg0 = CALL_EXPR_ARG (exp, 0);
20810 arg1 = CALL_EXPR_ARG (exp, 1);
20811 op0 = expand_normal (arg0);
20812 op1 = expand_normal (arg1);
20813 tmode = insn_data[icode].operand[0].mode;
20814 mode1 = insn_data[icode].operand[1].mode;
20815 mode2 = insn_data[icode].operand[2].mode;
20816
20817 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20818 {
20819 op0 = copy_to_reg (op0);
20820 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20821 }
20822 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20823 {
20824 error ("shift must be an immediate");
20825 return const0_rtx;
20826 }
20827 target = gen_reg_rtx (V2DImode);
20828 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20829 op0, op1);
20830 if (! pat)
20831 return 0;
20832 emit_insn (pat);
20833 return target;
20834
20835 case IX86_BUILTIN_FEMMS:
20836 emit_insn (gen_mmx_femms ());
20837 return NULL_RTX;
20838
20839 case IX86_BUILTIN_PAVGUSB:
20840 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20841
20842 case IX86_BUILTIN_PF2ID:
20843 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20844
20845 case IX86_BUILTIN_PFACC:
20846 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20847
20848 case IX86_BUILTIN_PFADD:
20849 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20850
20851 case IX86_BUILTIN_PFCMPEQ:
20852 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20853
20854 case IX86_BUILTIN_PFCMPGE:
20855 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20856
20857 case IX86_BUILTIN_PFCMPGT:
20858 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20859
20860 case IX86_BUILTIN_PFMAX:
20861 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20862
20863 case IX86_BUILTIN_PFMIN:
20864 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20865
20866 case IX86_BUILTIN_PFMUL:
20867 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20868
20869 case IX86_BUILTIN_PFRCP:
20870 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20871
20872 case IX86_BUILTIN_PFRCPIT1:
20873 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20874
20875 case IX86_BUILTIN_PFRCPIT2:
20876 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20877
20878 case IX86_BUILTIN_PFRSQIT1:
20879 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20880
20881 case IX86_BUILTIN_PFRSQRT:
20882 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20883
20884 case IX86_BUILTIN_PFSUB:
20885 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20886
20887 case IX86_BUILTIN_PFSUBR:
20888 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20889
20890 case IX86_BUILTIN_PI2FD:
20891 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20892
20893 case IX86_BUILTIN_PMULHRW:
20894 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20895
20896 case IX86_BUILTIN_PF2IW:
20897 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20898
20899 case IX86_BUILTIN_PFNACC:
20900 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20901
20902 case IX86_BUILTIN_PFPNACC:
20903 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20904
20905 case IX86_BUILTIN_PI2FW:
20906 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20907
20908 case IX86_BUILTIN_PSWAPDSI:
20909 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20910
20911 case IX86_BUILTIN_PSWAPDSF:
20912 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20913
20914 case IX86_BUILTIN_SQRTSD:
20915 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20916 case IX86_BUILTIN_LOADUPD:
20917 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20918 case IX86_BUILTIN_STOREUPD:
20919 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20920
20921 case IX86_BUILTIN_MFENCE:
20922 emit_insn (gen_sse2_mfence ());
20923 return 0;
20924 case IX86_BUILTIN_LFENCE:
20925 emit_insn (gen_sse2_lfence ());
20926 return 0;
20927
20928 case IX86_BUILTIN_CLFLUSH:
20929 arg0 = CALL_EXPR_ARG (exp, 0);
20930 op0 = expand_normal (arg0);
20931 icode = CODE_FOR_sse2_clflush;
20932 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20933 op0 = copy_to_mode_reg (Pmode, op0);
20934
20935 emit_insn (gen_sse2_clflush (op0));
20936 return 0;
20937
20938 case IX86_BUILTIN_MOVNTPD:
20939 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20940 case IX86_BUILTIN_MOVNTDQ:
20941 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20942 case IX86_BUILTIN_MOVNTI:
20943 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20944
20945 case IX86_BUILTIN_LOADDQU:
20946 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20947 case IX86_BUILTIN_STOREDQU:
20948 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20949
20950 case IX86_BUILTIN_MONITOR:
20951 arg0 = CALL_EXPR_ARG (exp, 0);
20952 arg1 = CALL_EXPR_ARG (exp, 1);
20953 arg2 = CALL_EXPR_ARG (exp, 2);
20954 op0 = expand_normal (arg0);
20955 op1 = expand_normal (arg1);
20956 op2 = expand_normal (arg2);
20957 if (!REG_P (op0))
20958 op0 = copy_to_mode_reg (Pmode, op0);
20959 if (!REG_P (op1))
20960 op1 = copy_to_mode_reg (SImode, op1);
20961 if (!REG_P (op2))
20962 op2 = copy_to_mode_reg (SImode, op2);
20963 if (!TARGET_64BIT)
20964 emit_insn (gen_sse3_monitor (op0, op1, op2));
20965 else
20966 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
20967 return 0;
20968
20969 case IX86_BUILTIN_MWAIT:
20970 arg0 = CALL_EXPR_ARG (exp, 0);
20971 arg1 = CALL_EXPR_ARG (exp, 1);
20972 op0 = expand_normal (arg0);
20973 op1 = expand_normal (arg1);
20974 if (!REG_P (op0))
20975 op0 = copy_to_mode_reg (SImode, op0);
20976 if (!REG_P (op1))
20977 op1 = copy_to_mode_reg (SImode, op1);
20978 emit_insn (gen_sse3_mwait (op0, op1));
20979 return 0;
20980
20981 case IX86_BUILTIN_LDDQU:
20982 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
20983 target, 1);
20984
20985 case IX86_BUILTIN_PALIGNR:
20986 case IX86_BUILTIN_PALIGNR128:
20987 if (fcode == IX86_BUILTIN_PALIGNR)
20988 {
20989 icode = CODE_FOR_ssse3_palignrdi;
20990 mode = DImode;
20991 }
20992 else
20993 {
20994 icode = CODE_FOR_ssse3_palignrti;
20995 mode = V2DImode;
20996 }
20997 arg0 = CALL_EXPR_ARG (exp, 0);
20998 arg1 = CALL_EXPR_ARG (exp, 1);
20999 arg2 = CALL_EXPR_ARG (exp, 2);
21000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21002 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21003 tmode = insn_data[icode].operand[0].mode;
21004 mode1 = insn_data[icode].operand[1].mode;
21005 mode2 = insn_data[icode].operand[2].mode;
21006 mode3 = insn_data[icode].operand[3].mode;
21007
21008 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21009 {
21010 op0 = copy_to_reg (op0);
21011 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21012 }
21013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21014 {
21015 op1 = copy_to_reg (op1);
21016 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21017 }
21018 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21019 {
21020 error ("shift must be an immediate");
21021 return const0_rtx;
21022 }
21023 target = gen_reg_rtx (mode);
21024 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21025 op0, op1, op2);
21026 if (! pat)
21027 return 0;
21028 emit_insn (pat);
21029 return target;
21030
21031 case IX86_BUILTIN_MOVNTDQA:
21032 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21033 target, 1);
21034
21035 case IX86_BUILTIN_MOVNTSD:
21036 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21037
21038 case IX86_BUILTIN_MOVNTSS:
21039 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21040
21041 case IX86_BUILTIN_INSERTQ:
21042 case IX86_BUILTIN_EXTRQ:
21043 icode = (fcode == IX86_BUILTIN_EXTRQ
21044 ? CODE_FOR_sse4a_extrq
21045 : CODE_FOR_sse4a_insertq);
21046 arg0 = CALL_EXPR_ARG (exp, 0);
21047 arg1 = CALL_EXPR_ARG (exp, 1);
21048 op0 = expand_normal (arg0);
21049 op1 = expand_normal (arg1);
21050 tmode = insn_data[icode].operand[0].mode;
21051 mode1 = insn_data[icode].operand[1].mode;
21052 mode2 = insn_data[icode].operand[2].mode;
21053 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21054 op0 = copy_to_mode_reg (mode1, op0);
21055 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21056 op1 = copy_to_mode_reg (mode2, op1);
21057 if (optimize || target == 0
21058 || GET_MODE (target) != tmode
21059 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21060 target = gen_reg_rtx (tmode);
21061 pat = GEN_FCN (icode) (target, op0, op1);
21062 if (! pat)
21063 return NULL_RTX;
21064 emit_insn (pat);
21065 return target;
21066
21067 case IX86_BUILTIN_EXTRQI:
21068 icode = CODE_FOR_sse4a_extrqi;
21069 arg0 = CALL_EXPR_ARG (exp, 0);
21070 arg1 = CALL_EXPR_ARG (exp, 1);
21071 arg2 = CALL_EXPR_ARG (exp, 2);
21072 op0 = expand_normal (arg0);
21073 op1 = expand_normal (arg1);
21074 op2 = expand_normal (arg2);
21075 tmode = insn_data[icode].operand[0].mode;
21076 mode1 = insn_data[icode].operand[1].mode;
21077 mode2 = insn_data[icode].operand[2].mode;
21078 mode3 = insn_data[icode].operand[3].mode;
21079 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21080 op0 = copy_to_mode_reg (mode1, op0);
21081 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21082 {
21083 error ("index mask must be an immediate");
21084 return gen_reg_rtx (tmode);
21085 }
21086 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21087 {
21088 error ("length mask must be an immediate");
21089 return gen_reg_rtx (tmode);
21090 }
21091 if (optimize || target == 0
21092 || GET_MODE (target) != tmode
21093 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21094 target = gen_reg_rtx (tmode);
21095 pat = GEN_FCN (icode) (target, op0, op1, op2);
21096 if (! pat)
21097 return NULL_RTX;
21098 emit_insn (pat);
21099 return target;
21100
21101 case IX86_BUILTIN_INSERTQI:
21102 icode = CODE_FOR_sse4a_insertqi;
21103 arg0 = CALL_EXPR_ARG (exp, 0);
21104 arg1 = CALL_EXPR_ARG (exp, 1);
21105 arg2 = CALL_EXPR_ARG (exp, 2);
21106 arg3 = CALL_EXPR_ARG (exp, 3);
21107 op0 = expand_normal (arg0);
21108 op1 = expand_normal (arg1);
21109 op2 = expand_normal (arg2);
21110 op3 = expand_normal (arg3);
21111 tmode = insn_data[icode].operand[0].mode;
21112 mode1 = insn_data[icode].operand[1].mode;
21113 mode2 = insn_data[icode].operand[2].mode;
21114 mode3 = insn_data[icode].operand[3].mode;
21115 mode4 = insn_data[icode].operand[4].mode;
21116
21117 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21118 op0 = copy_to_mode_reg (mode1, op0);
21119
21120 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21121 op1 = copy_to_mode_reg (mode2, op1);
21122
21123 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21124 {
21125 error ("index mask must be an immediate");
21126 return gen_reg_rtx (tmode);
21127 }
21128 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21129 {
21130 error ("length mask must be an immediate");
21131 return gen_reg_rtx (tmode);
21132 }
21133 if (optimize || target == 0
21134 || GET_MODE (target) != tmode
21135 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21136 target = gen_reg_rtx (tmode);
21137 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21138 if (! pat)
21139 return NULL_RTX;
21140 emit_insn (pat);
21141 return target;
21142
21143 case IX86_BUILTIN_VEC_INIT_V2SI:
21144 case IX86_BUILTIN_VEC_INIT_V4HI:
21145 case IX86_BUILTIN_VEC_INIT_V8QI:
21146 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21147
21148 case IX86_BUILTIN_VEC_EXT_V2DF:
21149 case IX86_BUILTIN_VEC_EXT_V2DI:
21150 case IX86_BUILTIN_VEC_EXT_V4SF:
21151 case IX86_BUILTIN_VEC_EXT_V4SI:
21152 case IX86_BUILTIN_VEC_EXT_V8HI:
21153 case IX86_BUILTIN_VEC_EXT_V2SI:
21154 case IX86_BUILTIN_VEC_EXT_V4HI:
21155 case IX86_BUILTIN_VEC_EXT_V16QI:
21156 return ix86_expand_vec_ext_builtin (exp, target);
21157
21158 case IX86_BUILTIN_VEC_SET_V2DI:
21159 case IX86_BUILTIN_VEC_SET_V4SF:
21160 case IX86_BUILTIN_VEC_SET_V4SI:
21161 case IX86_BUILTIN_VEC_SET_V8HI:
21162 case IX86_BUILTIN_VEC_SET_V4HI:
21163 case IX86_BUILTIN_VEC_SET_V16QI:
21164 return ix86_expand_vec_set_builtin (exp);
21165
21166 case IX86_BUILTIN_INFQ:
21167 {
21168 REAL_VALUE_TYPE inf;
21169 rtx tmp;
21170
21171 real_inf (&inf);
21172 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21173
21174 tmp = validize_mem (force_const_mem (mode, tmp));
21175
21176 if (target == 0)
21177 target = gen_reg_rtx (mode);
21178
21179 emit_move_insn (target, tmp);
21180 return target;
21181 }
21182
21183 case IX86_BUILTIN_FABSQ:
21184 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21185
21186 case IX86_BUILTIN_COPYSIGNQ:
21187 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21188
21189 default:
21190 break;
21191 }
21192
21193 for (i = 0, d = bdesc_sse_3arg;
21194 i < ARRAY_SIZE (bdesc_sse_3arg);
21195 i++, d++)
21196 if (d->code == fcode)
21197 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21198 target);
21199
21200 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21201 if (d->code == fcode)
21202 {
21203 /* Compares are treated specially. */
21204 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21205 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21206 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21207 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21208 return ix86_expand_sse_compare (d, exp, target);
21209
21210 return ix86_expand_binop_builtin (d->icode, exp, target);
21211 }
21212
21213 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21214 if (d->code == fcode)
21215 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21216
21217 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21218 if (d->code == fcode)
21219 return ix86_expand_sse_comi (d, exp, target);
21220
21221 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21222 if (d->code == fcode)
21223 return ix86_expand_sse_ptest (d, exp, target);
21224
21225 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21226 if (d->code == fcode)
21227 return ix86_expand_crc32 (d->icode, exp, target);
21228
21229 for (i = 0, d = bdesc_pcmpestr;
21230 i < ARRAY_SIZE (bdesc_pcmpestr);
21231 i++, d++)
21232 if (d->code == fcode)
21233 return ix86_expand_sse_pcmpestr (d, exp, target);
21234
21235 for (i = 0, d = bdesc_pcmpistr;
21236 i < ARRAY_SIZE (bdesc_pcmpistr);
21237 i++, d++)
21238 if (d->code == fcode)
21239 return ix86_expand_sse_pcmpistr (d, exp, target);
21240
21241 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21242 if (d->code == fcode)
21243 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21244 (enum multi_arg_type)d->flag,
21245 d->comparison);
21246
21247 gcc_unreachable ();
21248 }
21249
21250 /* Returns a function decl for a vectorized version of the builtin function
21251 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21252 if it is not available. */
21253
21254 static tree
21255 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21256 tree type_in)
21257 {
21258 enum machine_mode in_mode, out_mode;
21259 int in_n, out_n;
21260
21261 if (TREE_CODE (type_out) != VECTOR_TYPE
21262 || TREE_CODE (type_in) != VECTOR_TYPE)
21263 return NULL_TREE;
21264
21265 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21266 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21267 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21268 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21269
21270 switch (fn)
21271 {
21272 case BUILT_IN_SQRT:
21273 if (out_mode == DFmode && out_n == 2
21274 && in_mode == DFmode && in_n == 2)
21275 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21276 break;
21277
21278 case BUILT_IN_SQRTF:
21279 if (out_mode == SFmode && out_n == 4
21280 && in_mode == SFmode && in_n == 4)
21281 return ix86_builtins[IX86_BUILTIN_SQRTPS];
21282 break;
21283
21284 case BUILT_IN_LRINT:
21285 if (out_mode == SImode && out_n == 4
21286 && in_mode == DFmode && in_n == 2)
21287 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21288 break;
21289
21290 case BUILT_IN_LRINTF:
21291 if (out_mode == SImode && out_n == 4
21292 && in_mode == SFmode && in_n == 4)
21293 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21294 break;
21295
21296 default:
21297 ;
21298 }
21299
21300 /* Dispatch to a handler for a vectorization library. */
21301 if (ix86_veclib_handler)
21302 return (*ix86_veclib_handler)(fn, type_out, type_in);
21303
21304 return NULL_TREE;
21305 }
21306
21307 /* Handler for an ACML-style interface to a library with vectorized
21308 intrinsics. */
21309
21310 static tree
21311 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21312 {
21313 char name[20] = "__vr.._";
21314 tree fntype, new_fndecl, args;
21315 unsigned arity;
21316 const char *bname;
21317 enum machine_mode el_mode, in_mode;
21318 int n, in_n;
21319
21320 /* The ACML is 64bits only and suitable for unsafe math only as
21321 it does not correctly support parts of IEEE with the required
21322 precision such as denormals. */
21323 if (!TARGET_64BIT
21324 || !flag_unsafe_math_optimizations)
21325 return NULL_TREE;
21326
21327 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21328 n = TYPE_VECTOR_SUBPARTS (type_out);
21329 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21330 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21331 if (el_mode != in_mode
21332 || n != in_n)
21333 return NULL_TREE;
21334
21335 switch (fn)
21336 {
21337 case BUILT_IN_SIN:
21338 case BUILT_IN_COS:
21339 case BUILT_IN_EXP:
21340 case BUILT_IN_LOG:
21341 case BUILT_IN_LOG2:
21342 case BUILT_IN_LOG10:
21343 name[4] = 'd';
21344 name[5] = '2';
21345 if (el_mode != DFmode
21346 || n != 2)
21347 return NULL_TREE;
21348 break;
21349
21350 case BUILT_IN_SINF:
21351 case BUILT_IN_COSF:
21352 case BUILT_IN_EXPF:
21353 case BUILT_IN_POWF:
21354 case BUILT_IN_LOGF:
21355 case BUILT_IN_LOG2F:
21356 case BUILT_IN_LOG10F:
21357 name[4] = 's';
21358 name[5] = '4';
21359 if (el_mode != SFmode
21360 || n != 4)
21361 return NULL_TREE;
21362 break;
21363
21364 default:
21365 return NULL_TREE;
21366 }
21367
21368 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21369 sprintf (name + 7, "%s", bname+10);
21370
21371 arity = 0;
21372 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21373 args = TREE_CHAIN (args))
21374 arity++;
21375
21376 if (arity == 1)
21377 fntype = build_function_type_list (type_out, type_in, NULL);
21378 else
21379 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21380
21381 /* Build a function declaration for the vectorized function. */
21382 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21383 TREE_PUBLIC (new_fndecl) = 1;
21384 DECL_EXTERNAL (new_fndecl) = 1;
21385 DECL_IS_NOVOPS (new_fndecl) = 1;
21386 TREE_READONLY (new_fndecl) = 1;
21387
21388 return new_fndecl;
21389 }
21390
21391
21392 /* Returns a decl of a function that implements conversion of the
21393 input vector of type TYPE, or NULL_TREE if it is not available. */
21394
21395 static tree
21396 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21397 {
21398 if (TREE_CODE (type) != VECTOR_TYPE)
21399 return NULL_TREE;
21400
21401 switch (code)
21402 {
21403 case FLOAT_EXPR:
21404 switch (TYPE_MODE (type))
21405 {
21406 case V4SImode:
21407 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21408 default:
21409 return NULL_TREE;
21410 }
21411
21412 case FIX_TRUNC_EXPR:
21413 switch (TYPE_MODE (type))
21414 {
21415 case V4SFmode:
21416 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21417 default:
21418 return NULL_TREE;
21419 }
21420 default:
21421 return NULL_TREE;
21422
21423 }
21424 }
21425
21426 /* Returns a code for a target-specific builtin that implements
21427 reciprocal of the function, or NULL_TREE if not available. */
21428
21429 static tree
21430 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21431 bool sqrt ATTRIBUTE_UNUSED)
21432 {
21433 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21434 && flag_finite_math_only && !flag_trapping_math
21435 && flag_unsafe_math_optimizations))
21436 return NULL_TREE;
21437
21438 if (md_fn)
21439 /* Machine dependent builtins. */
21440 switch (fn)
21441 {
21442 /* Vectorized version of sqrt to rsqrt conversion. */
21443 case IX86_BUILTIN_SQRTPS:
21444 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
21445
21446 default:
21447 return NULL_TREE;
21448 }
21449 else
21450 /* Normal builtins. */
21451 switch (fn)
21452 {
21453 /* Sqrt to rsqrt conversion. */
21454 case BUILT_IN_SQRTF:
21455 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21456
21457 default:
21458 return NULL_TREE;
21459 }
21460 }
21461
21462 /* Store OPERAND to the memory after reload is completed. This means
21463 that we can't easily use assign_stack_local. */
21464 rtx
21465 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21466 {
21467 rtx result;
21468
21469 gcc_assert (reload_completed);
21470 if (TARGET_RED_ZONE)
21471 {
21472 result = gen_rtx_MEM (mode,
21473 gen_rtx_PLUS (Pmode,
21474 stack_pointer_rtx,
21475 GEN_INT (-RED_ZONE_SIZE)));
21476 emit_move_insn (result, operand);
21477 }
21478 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21479 {
21480 switch (mode)
21481 {
21482 case HImode:
21483 case SImode:
21484 operand = gen_lowpart (DImode, operand);
21485 /* FALLTHRU */
21486 case DImode:
21487 emit_insn (
21488 gen_rtx_SET (VOIDmode,
21489 gen_rtx_MEM (DImode,
21490 gen_rtx_PRE_DEC (DImode,
21491 stack_pointer_rtx)),
21492 operand));
21493 break;
21494 default:
21495 gcc_unreachable ();
21496 }
21497 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21498 }
21499 else
21500 {
21501 switch (mode)
21502 {
21503 case DImode:
21504 {
21505 rtx operands[2];
21506 split_di (&operand, 1, operands, operands + 1);
21507 emit_insn (
21508 gen_rtx_SET (VOIDmode,
21509 gen_rtx_MEM (SImode,
21510 gen_rtx_PRE_DEC (Pmode,
21511 stack_pointer_rtx)),
21512 operands[1]));
21513 emit_insn (
21514 gen_rtx_SET (VOIDmode,
21515 gen_rtx_MEM (SImode,
21516 gen_rtx_PRE_DEC (Pmode,
21517 stack_pointer_rtx)),
21518 operands[0]));
21519 }
21520 break;
21521 case HImode:
21522 /* Store HImodes as SImodes. */
21523 operand = gen_lowpart (SImode, operand);
21524 /* FALLTHRU */
21525 case SImode:
21526 emit_insn (
21527 gen_rtx_SET (VOIDmode,
21528 gen_rtx_MEM (GET_MODE (operand),
21529 gen_rtx_PRE_DEC (SImode,
21530 stack_pointer_rtx)),
21531 operand));
21532 break;
21533 default:
21534 gcc_unreachable ();
21535 }
21536 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21537 }
21538 return result;
21539 }
21540
21541 /* Free operand from the memory. */
21542 void
21543 ix86_free_from_memory (enum machine_mode mode)
21544 {
21545 if (!TARGET_RED_ZONE)
21546 {
21547 int size;
21548
21549 if (mode == DImode || TARGET_64BIT)
21550 size = 8;
21551 else
21552 size = 4;
21553 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21554 to pop or add instruction if registers are available. */
21555 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21556 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21557 GEN_INT (size))));
21558 }
21559 }
21560
21561 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21562 QImode must go into class Q_REGS.
21563 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21564 movdf to do mem-to-mem moves through integer regs. */
21565 enum reg_class
21566 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21567 {
21568 enum machine_mode mode = GET_MODE (x);
21569
21570 /* We're only allowed to return a subclass of CLASS. Many of the
21571 following checks fail for NO_REGS, so eliminate that early. */
21572 if (regclass == NO_REGS)
21573 return NO_REGS;
21574
21575 /* All classes can load zeros. */
21576 if (x == CONST0_RTX (mode))
21577 return regclass;
21578
21579 /* Force constants into memory if we are loading a (nonzero) constant into
21580 an MMX or SSE register. This is because there are no MMX/SSE instructions
21581 to load from a constant. */
21582 if (CONSTANT_P (x)
21583 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21584 return NO_REGS;
21585
21586 /* Prefer SSE regs only, if we can use them for math. */
21587 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21588 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21589
21590 /* Floating-point constants need more complex checks. */
21591 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21592 {
21593 /* General regs can load everything. */
21594 if (reg_class_subset_p (regclass, GENERAL_REGS))
21595 return regclass;
21596
21597 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21598 zero above. We only want to wind up preferring 80387 registers if
21599 we plan on doing computation with them. */
21600 if (TARGET_80387
21601 && standard_80387_constant_p (x))
21602 {
21603 /* Limit class to non-sse. */
21604 if (regclass == FLOAT_SSE_REGS)
21605 return FLOAT_REGS;
21606 if (regclass == FP_TOP_SSE_REGS)
21607 return FP_TOP_REG;
21608 if (regclass == FP_SECOND_SSE_REGS)
21609 return FP_SECOND_REG;
21610 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21611 return regclass;
21612 }
21613
21614 return NO_REGS;
21615 }
21616
21617 /* Generally when we see PLUS here, it's the function invariant
21618 (plus soft-fp const_int). Which can only be computed into general
21619 regs. */
21620 if (GET_CODE (x) == PLUS)
21621 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21622
21623 /* QImode constants are easy to load, but non-constant QImode data
21624 must go into Q_REGS. */
21625 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21626 {
21627 if (reg_class_subset_p (regclass, Q_REGS))
21628 return regclass;
21629 if (reg_class_subset_p (Q_REGS, regclass))
21630 return Q_REGS;
21631 return NO_REGS;
21632 }
21633
21634 return regclass;
21635 }
21636
21637 /* Discourage putting floating-point values in SSE registers unless
21638 SSE math is being used, and likewise for the 387 registers. */
21639 enum reg_class
21640 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21641 {
21642 enum machine_mode mode = GET_MODE (x);
21643
21644 /* Restrict the output reload class to the register bank that we are doing
21645 math on. If we would like not to return a subset of CLASS, reject this
21646 alternative: if reload cannot do this, it will still use its choice. */
21647 mode = GET_MODE (x);
21648 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21649 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21650
21651 if (X87_FLOAT_MODE_P (mode))
21652 {
21653 if (regclass == FP_TOP_SSE_REGS)
21654 return FP_TOP_REG;
21655 else if (regclass == FP_SECOND_SSE_REGS)
21656 return FP_SECOND_REG;
21657 else
21658 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21659 }
21660
21661 return regclass;
21662 }
21663
21664 /* If we are copying between general and FP registers, we need a memory
21665 location. The same is true for SSE and MMX registers.
21666
21667 To optimize register_move_cost performance, allow inline variant.
21668
21669 The macro can't work reliably when one of the CLASSES is class containing
21670 registers from multiple units (SSE, MMX, integer). We avoid this by never
21671 combining those units in single alternative in the machine description.
21672 Ensure that this constraint holds to avoid unexpected surprises.
21673
21674 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21675 enforce these sanity checks. */
21676
21677 static inline int
21678 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21679 enum machine_mode mode, int strict)
21680 {
21681 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21682 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21683 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21684 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21685 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21686 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21687 {
21688 gcc_assert (!strict);
21689 return true;
21690 }
21691
21692 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21693 return true;
21694
21695 /* ??? This is a lie. We do have moves between mmx/general, and for
21696 mmx/sse2. But by saying we need secondary memory we discourage the
21697 register allocator from using the mmx registers unless needed. */
21698 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21699 return true;
21700
21701 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21702 {
21703 /* SSE1 doesn't have any direct moves from other classes. */
21704 if (!TARGET_SSE2)
21705 return true;
21706
21707 /* If the target says that inter-unit moves are more expensive
21708 than moving through memory, then don't generate them. */
21709 if (!TARGET_INTER_UNIT_MOVES)
21710 return true;
21711
21712 /* Between SSE and general, we have moves no larger than word size. */
21713 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21714 return true;
21715 }
21716
21717 return false;
21718 }
21719
21720 int
21721 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21722 enum machine_mode mode, int strict)
21723 {
21724 return inline_secondary_memory_needed (class1, class2, mode, strict);
21725 }
21726
21727 /* Return true if the registers in CLASS cannot represent the change from
21728 modes FROM to TO. */
21729
21730 bool
21731 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21732 enum reg_class regclass)
21733 {
21734 if (from == to)
21735 return false;
21736
21737 /* x87 registers can't do subreg at all, as all values are reformatted
21738 to extended precision. */
21739 if (MAYBE_FLOAT_CLASS_P (regclass))
21740 return true;
21741
21742 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21743 {
21744 /* Vector registers do not support QI or HImode loads. If we don't
21745 disallow a change to these modes, reload will assume it's ok to
21746 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21747 the vec_dupv4hi pattern. */
21748 if (GET_MODE_SIZE (from) < 4)
21749 return true;
21750
21751 /* Vector registers do not support subreg with nonzero offsets, which
21752 are otherwise valid for integer registers. Since we can't see
21753 whether we have a nonzero offset from here, prohibit all
21754 nonparadoxical subregs changing size. */
21755 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21756 return true;
21757 }
21758
21759 return false;
21760 }
21761
21762 /* Return the cost of moving data of mode M between a
21763 register and memory. A value of 2 is the default; this cost is
21764 relative to those in `REGISTER_MOVE_COST'.
21765
21766 This function is used extensively by register_move_cost that is used to
21767 build tables at startup. Make it inline in this case.
21768 When IN is 2, return maximum of in and out move cost.
21769
21770 If moving between registers and memory is more expensive than
21771 between two registers, you should define this macro to express the
21772 relative cost.
21773
21774 Model also increased moving costs of QImode registers in non
21775 Q_REGS classes.
21776 */
21777 static inline int
21778 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21779 int in)
21780 {
21781 int cost;
21782 if (FLOAT_CLASS_P (regclass))
21783 {
21784 int index;
21785 switch (mode)
21786 {
21787 case SFmode:
21788 index = 0;
21789 break;
21790 case DFmode:
21791 index = 1;
21792 break;
21793 case XFmode:
21794 index = 2;
21795 break;
21796 default:
21797 return 100;
21798 }
21799 if (in == 2)
21800 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21801 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21802 }
21803 if (SSE_CLASS_P (regclass))
21804 {
21805 int index;
21806 switch (GET_MODE_SIZE (mode))
21807 {
21808 case 4:
21809 index = 0;
21810 break;
21811 case 8:
21812 index = 1;
21813 break;
21814 case 16:
21815 index = 2;
21816 break;
21817 default:
21818 return 100;
21819 }
21820 if (in == 2)
21821 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21822 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21823 }
21824 if (MMX_CLASS_P (regclass))
21825 {
21826 int index;
21827 switch (GET_MODE_SIZE (mode))
21828 {
21829 case 4:
21830 index = 0;
21831 break;
21832 case 8:
21833 index = 1;
21834 break;
21835 default:
21836 return 100;
21837 }
21838 if (in)
21839 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21840 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21841 }
21842 switch (GET_MODE_SIZE (mode))
21843 {
21844 case 1:
21845 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21846 {
21847 if (!in)
21848 return ix86_cost->int_store[0];
21849 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21850 cost = ix86_cost->movzbl_load;
21851 else
21852 cost = ix86_cost->int_load[0];
21853 if (in == 2)
21854 return MAX (cost, ix86_cost->int_store[0]);
21855 return cost;
21856 }
21857 else
21858 {
21859 if (in == 2)
21860 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21861 if (in)
21862 return ix86_cost->movzbl_load;
21863 else
21864 return ix86_cost->int_store[0] + 4;
21865 }
21866 break;
21867 case 2:
21868 if (in == 2)
21869 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21870 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21871 default:
21872 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21873 if (mode == TFmode)
21874 mode = XFmode;
21875 if (in == 2)
21876 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21877 else if (in)
21878 cost = ix86_cost->int_load[2];
21879 else
21880 cost = ix86_cost->int_store[2];
21881 return (cost * (((int) GET_MODE_SIZE (mode)
21882 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21883 }
21884 }
21885
21886 int
21887 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21888 {
21889 return inline_memory_move_cost (mode, regclass, in);
21890 }
21891
21892
21893 /* Return the cost of moving data from a register in class CLASS1 to
21894 one in class CLASS2.
21895
21896 It is not required that the cost always equal 2 when FROM is the same as TO;
21897 on some machines it is expensive to move between registers if they are not
21898 general registers. */
21899
21900 int
21901 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21902 enum reg_class class2)
21903 {
21904 /* In case we require secondary memory, compute cost of the store followed
21905 by load. In order to avoid bad register allocation choices, we need
21906 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21907
21908 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21909 {
21910 int cost = 1;
21911
21912 cost += inline_memory_move_cost (mode, class1, 2);
21913 cost += inline_memory_move_cost (mode, class2, 2);
21914
21915 /* In case of copying from general_purpose_register we may emit multiple
21916 stores followed by single load causing memory size mismatch stall.
21917 Count this as arbitrarily high cost of 20. */
21918 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21919 cost += 20;
21920
21921 /* In the case of FP/MMX moves, the registers actually overlap, and we
21922 have to switch modes in order to treat them differently. */
21923 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21924 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21925 cost += 20;
21926
21927 return cost;
21928 }
21929
21930 /* Moves between SSE/MMX and integer unit are expensive. */
21931 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21932 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21933
21934 /* ??? By keeping returned value relatively high, we limit the number
21935 of moves between integer and MMX/SSE registers for all targets.
21936 Additionally, high value prevents problem with x86_modes_tieable_p(),
21937 where integer modes in MMX/SSE registers are not tieable
21938 because of missing QImode and HImode moves to, from or between
21939 MMX/SSE registers. */
21940 return MAX (ix86_cost->mmxsse_to_integer, 8);
21941
21942 if (MAYBE_FLOAT_CLASS_P (class1))
21943 return ix86_cost->fp_move;
21944 if (MAYBE_SSE_CLASS_P (class1))
21945 return ix86_cost->sse_move;
21946 if (MAYBE_MMX_CLASS_P (class1))
21947 return ix86_cost->mmx_move;
21948 return 2;
21949 }
21950
21951 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21952
21953 bool
21954 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21955 {
21956 /* Flags and only flags can only hold CCmode values. */
21957 if (CC_REGNO_P (regno))
21958 return GET_MODE_CLASS (mode) == MODE_CC;
21959 if (GET_MODE_CLASS (mode) == MODE_CC
21960 || GET_MODE_CLASS (mode) == MODE_RANDOM
21961 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21962 return 0;
21963 if (FP_REGNO_P (regno))
21964 return VALID_FP_MODE_P (mode);
21965 if (SSE_REGNO_P (regno))
21966 {
21967 /* We implement the move patterns for all vector modes into and
21968 out of SSE registers, even when no operation instructions
21969 are available. */
21970 return (VALID_SSE_REG_MODE (mode)
21971 || VALID_SSE2_REG_MODE (mode)
21972 || VALID_MMX_REG_MODE (mode)
21973 || VALID_MMX_REG_MODE_3DNOW (mode));
21974 }
21975 if (MMX_REGNO_P (regno))
21976 {
21977 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21978 so if the register is available at all, then we can move data of
21979 the given mode into or out of it. */
21980 return (VALID_MMX_REG_MODE (mode)
21981 || VALID_MMX_REG_MODE_3DNOW (mode));
21982 }
21983
21984 if (mode == QImode)
21985 {
21986 /* Take care for QImode values - they can be in non-QI regs,
21987 but then they do cause partial register stalls. */
21988 if (regno < 4 || TARGET_64BIT)
21989 return 1;
21990 if (!TARGET_PARTIAL_REG_STALL)
21991 return 1;
21992 return reload_in_progress || reload_completed;
21993 }
21994 /* We handle both integer and floats in the general purpose registers. */
21995 else if (VALID_INT_MODE_P (mode))
21996 return 1;
21997 else if (VALID_FP_MODE_P (mode))
21998 return 1;
21999 else if (VALID_DFP_MODE_P (mode))
22000 return 1;
22001 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22002 on to use that value in smaller contexts, this can easily force a
22003 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22004 supporting DImode, allow it. */
22005 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22006 return 1;
22007
22008 return 0;
22009 }
22010
22011 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22012 tieable integer mode. */
22013
22014 static bool
22015 ix86_tieable_integer_mode_p (enum machine_mode mode)
22016 {
22017 switch (mode)
22018 {
22019 case HImode:
22020 case SImode:
22021 return true;
22022
22023 case QImode:
22024 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22025
22026 case DImode:
22027 return TARGET_64BIT;
22028
22029 default:
22030 return false;
22031 }
22032 }
22033
22034 /* Return true if MODE1 is accessible in a register that can hold MODE2
22035 without copying. That is, all register classes that can hold MODE2
22036 can also hold MODE1. */
22037
22038 bool
22039 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22040 {
22041 if (mode1 == mode2)
22042 return true;
22043
22044 if (ix86_tieable_integer_mode_p (mode1)
22045 && ix86_tieable_integer_mode_p (mode2))
22046 return true;
22047
22048 /* MODE2 being XFmode implies fp stack or general regs, which means we
22049 can tie any smaller floating point modes to it. Note that we do not
22050 tie this with TFmode. */
22051 if (mode2 == XFmode)
22052 return mode1 == SFmode || mode1 == DFmode;
22053
22054 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22055 that we can tie it with SFmode. */
22056 if (mode2 == DFmode)
22057 return mode1 == SFmode;
22058
22059 /* If MODE2 is only appropriate for an SSE register, then tie with
22060 any other mode acceptable to SSE registers. */
22061 if (GET_MODE_SIZE (mode2) == 16
22062 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22063 return (GET_MODE_SIZE (mode1) == 16
22064 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22065
22066 /* If MODE2 is appropriate for an MMX register, then tie
22067 with any other mode acceptable to MMX registers. */
22068 if (GET_MODE_SIZE (mode2) == 8
22069 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22070 return (GET_MODE_SIZE (mode1) == 8
22071 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22072
22073 return false;
22074 }
22075
22076 /* Compute a (partial) cost for rtx X. Return true if the complete
22077 cost has been computed, and false if subexpressions should be
22078 scanned. In either case, *TOTAL contains the cost result. */
22079
22080 static bool
22081 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22082 {
22083 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22084 enum machine_mode mode = GET_MODE (x);
22085
22086 switch (code)
22087 {
22088 case CONST_INT:
22089 case CONST:
22090 case LABEL_REF:
22091 case SYMBOL_REF:
22092 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22093 *total = 3;
22094 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22095 *total = 2;
22096 else if (flag_pic && SYMBOLIC_CONST (x)
22097 && (!TARGET_64BIT
22098 || (!GET_CODE (x) != LABEL_REF
22099 && (GET_CODE (x) != SYMBOL_REF
22100 || !SYMBOL_REF_LOCAL_P (x)))))
22101 *total = 1;
22102 else
22103 *total = 0;
22104 return true;
22105
22106 case CONST_DOUBLE:
22107 if (mode == VOIDmode)
22108 *total = 0;
22109 else
22110 switch (standard_80387_constant_p (x))
22111 {
22112 case 1: /* 0.0 */
22113 *total = 1;
22114 break;
22115 default: /* Other constants */
22116 *total = 2;
22117 break;
22118 case 0:
22119 case -1:
22120 /* Start with (MEM (SYMBOL_REF)), since that's where
22121 it'll probably end up. Add a penalty for size. */
22122 *total = (COSTS_N_INSNS (1)
22123 + (flag_pic != 0 && !TARGET_64BIT)
22124 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22125 break;
22126 }
22127 return true;
22128
22129 case ZERO_EXTEND:
22130 /* The zero extensions is often completely free on x86_64, so make
22131 it as cheap as possible. */
22132 if (TARGET_64BIT && mode == DImode
22133 && GET_MODE (XEXP (x, 0)) == SImode)
22134 *total = 1;
22135 else if (TARGET_ZERO_EXTEND_WITH_AND)
22136 *total = ix86_cost->add;
22137 else
22138 *total = ix86_cost->movzx;
22139 return false;
22140
22141 case SIGN_EXTEND:
22142 *total = ix86_cost->movsx;
22143 return false;
22144
22145 case ASHIFT:
22146 if (CONST_INT_P (XEXP (x, 1))
22147 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22148 {
22149 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22150 if (value == 1)
22151 {
22152 *total = ix86_cost->add;
22153 return false;
22154 }
22155 if ((value == 2 || value == 3)
22156 && ix86_cost->lea <= ix86_cost->shift_const)
22157 {
22158 *total = ix86_cost->lea;
22159 return false;
22160 }
22161 }
22162 /* FALLTHRU */
22163
22164 case ROTATE:
22165 case ASHIFTRT:
22166 case LSHIFTRT:
22167 case ROTATERT:
22168 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22169 {
22170 if (CONST_INT_P (XEXP (x, 1)))
22171 {
22172 if (INTVAL (XEXP (x, 1)) > 32)
22173 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22174 else
22175 *total = ix86_cost->shift_const * 2;
22176 }
22177 else
22178 {
22179 if (GET_CODE (XEXP (x, 1)) == AND)
22180 *total = ix86_cost->shift_var * 2;
22181 else
22182 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22183 }
22184 }
22185 else
22186 {
22187 if (CONST_INT_P (XEXP (x, 1)))
22188 *total = ix86_cost->shift_const;
22189 else
22190 *total = ix86_cost->shift_var;
22191 }
22192 return false;
22193
22194 case MULT:
22195 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22196 {
22197 /* ??? SSE scalar cost should be used here. */
22198 *total = ix86_cost->fmul;
22199 return false;
22200 }
22201 else if (X87_FLOAT_MODE_P (mode))
22202 {
22203 *total = ix86_cost->fmul;
22204 return false;
22205 }
22206 else if (FLOAT_MODE_P (mode))
22207 {
22208 /* ??? SSE vector cost should be used here. */
22209 *total = ix86_cost->fmul;
22210 return false;
22211 }
22212 else
22213 {
22214 rtx op0 = XEXP (x, 0);
22215 rtx op1 = XEXP (x, 1);
22216 int nbits;
22217 if (CONST_INT_P (XEXP (x, 1)))
22218 {
22219 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22220 for (nbits = 0; value != 0; value &= value - 1)
22221 nbits++;
22222 }
22223 else
22224 /* This is arbitrary. */
22225 nbits = 7;
22226
22227 /* Compute costs correctly for widening multiplication. */
22228 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
22229 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22230 == GET_MODE_SIZE (mode))
22231 {
22232 int is_mulwiden = 0;
22233 enum machine_mode inner_mode = GET_MODE (op0);
22234
22235 if (GET_CODE (op0) == GET_CODE (op1))
22236 is_mulwiden = 1, op1 = XEXP (op1, 0);
22237 else if (CONST_INT_P (op1))
22238 {
22239 if (GET_CODE (op0) == SIGN_EXTEND)
22240 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22241 == INTVAL (op1);
22242 else
22243 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22244 }
22245
22246 if (is_mulwiden)
22247 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22248 }
22249
22250 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22251 + nbits * ix86_cost->mult_bit
22252 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22253
22254 return true;
22255 }
22256
22257 case DIV:
22258 case UDIV:
22259 case MOD:
22260 case UMOD:
22261 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22262 /* ??? SSE cost should be used here. */
22263 *total = ix86_cost->fdiv;
22264 else if (X87_FLOAT_MODE_P (mode))
22265 *total = ix86_cost->fdiv;
22266 else if (FLOAT_MODE_P (mode))
22267 /* ??? SSE vector cost should be used here. */
22268 *total = ix86_cost->fdiv;
22269 else
22270 *total = ix86_cost->divide[MODE_INDEX (mode)];
22271 return false;
22272
22273 case PLUS:
22274 if (GET_MODE_CLASS (mode) == MODE_INT
22275 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22276 {
22277 if (GET_CODE (XEXP (x, 0)) == PLUS
22278 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22279 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22280 && CONSTANT_P (XEXP (x, 1)))
22281 {
22282 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22283 if (val == 2 || val == 4 || val == 8)
22284 {
22285 *total = ix86_cost->lea;
22286 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22287 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22288 outer_code);
22289 *total += rtx_cost (XEXP (x, 1), outer_code);
22290 return true;
22291 }
22292 }
22293 else if (GET_CODE (XEXP (x, 0)) == MULT
22294 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22295 {
22296 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22297 if (val == 2 || val == 4 || val == 8)
22298 {
22299 *total = ix86_cost->lea;
22300 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22301 *total += rtx_cost (XEXP (x, 1), outer_code);
22302 return true;
22303 }
22304 }
22305 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22306 {
22307 *total = ix86_cost->lea;
22308 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22309 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22310 *total += rtx_cost (XEXP (x, 1), outer_code);
22311 return true;
22312 }
22313 }
22314 /* FALLTHRU */
22315
22316 case MINUS:
22317 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22318 {
22319 /* ??? SSE cost should be used here. */
22320 *total = ix86_cost->fadd;
22321 return false;
22322 }
22323 else if (X87_FLOAT_MODE_P (mode))
22324 {
22325 *total = ix86_cost->fadd;
22326 return false;
22327 }
22328 else if (FLOAT_MODE_P (mode))
22329 {
22330 /* ??? SSE vector cost should be used here. */
22331 *total = ix86_cost->fadd;
22332 return false;
22333 }
22334 /* FALLTHRU */
22335
22336 case AND:
22337 case IOR:
22338 case XOR:
22339 if (!TARGET_64BIT && mode == DImode)
22340 {
22341 *total = (ix86_cost->add * 2
22342 + (rtx_cost (XEXP (x, 0), outer_code)
22343 << (GET_MODE (XEXP (x, 0)) != DImode))
22344 + (rtx_cost (XEXP (x, 1), outer_code)
22345 << (GET_MODE (XEXP (x, 1)) != DImode)));
22346 return true;
22347 }
22348 /* FALLTHRU */
22349
22350 case NEG:
22351 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22352 {
22353 /* ??? SSE cost should be used here. */
22354 *total = ix86_cost->fchs;
22355 return false;
22356 }
22357 else if (X87_FLOAT_MODE_P (mode))
22358 {
22359 *total = ix86_cost->fchs;
22360 return false;
22361 }
22362 else if (FLOAT_MODE_P (mode))
22363 {
22364 /* ??? SSE vector cost should be used here. */
22365 *total = ix86_cost->fchs;
22366 return false;
22367 }
22368 /* FALLTHRU */
22369
22370 case NOT:
22371 if (!TARGET_64BIT && mode == DImode)
22372 *total = ix86_cost->add * 2;
22373 else
22374 *total = ix86_cost->add;
22375 return false;
22376
22377 case COMPARE:
22378 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22379 && XEXP (XEXP (x, 0), 1) == const1_rtx
22380 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22381 && XEXP (x, 1) == const0_rtx)
22382 {
22383 /* This kind of construct is implemented using test[bwl].
22384 Treat it as if we had an AND. */
22385 *total = (ix86_cost->add
22386 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22387 + rtx_cost (const1_rtx, outer_code));
22388 return true;
22389 }
22390 return false;
22391
22392 case FLOAT_EXTEND:
22393 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22394 *total = 0;
22395 return false;
22396
22397 case ABS:
22398 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22399 /* ??? SSE cost should be used here. */
22400 *total = ix86_cost->fabs;
22401 else if (X87_FLOAT_MODE_P (mode))
22402 *total = ix86_cost->fabs;
22403 else if (FLOAT_MODE_P (mode))
22404 /* ??? SSE vector cost should be used here. */
22405 *total = ix86_cost->fabs;
22406 return false;
22407
22408 case SQRT:
22409 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22410 /* ??? SSE cost should be used here. */
22411 *total = ix86_cost->fsqrt;
22412 else if (X87_FLOAT_MODE_P (mode))
22413 *total = ix86_cost->fsqrt;
22414 else if (FLOAT_MODE_P (mode))
22415 /* ??? SSE vector cost should be used here. */
22416 *total = ix86_cost->fsqrt;
22417 return false;
22418
22419 case UNSPEC:
22420 if (XINT (x, 1) == UNSPEC_TP)
22421 *total = 0;
22422 return false;
22423
22424 default:
22425 return false;
22426 }
22427 }
22428
22429 #if TARGET_MACHO
22430
22431 static int current_machopic_label_num;
22432
22433 /* Given a symbol name and its associated stub, write out the
22434 definition of the stub. */
22435
22436 void
22437 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22438 {
22439 unsigned int length;
22440 char *binder_name, *symbol_name, lazy_ptr_name[32];
22441 int label = ++current_machopic_label_num;
22442
22443 /* For 64-bit we shouldn't get here. */
22444 gcc_assert (!TARGET_64BIT);
22445
22446 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22447 symb = (*targetm.strip_name_encoding) (symb);
22448
22449 length = strlen (stub);
22450 binder_name = alloca (length + 32);
22451 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22452
22453 length = strlen (symb);
22454 symbol_name = alloca (length + 32);
22455 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22456
22457 sprintf (lazy_ptr_name, "L%d$lz", label);
22458
22459 if (MACHOPIC_PURE)
22460 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22461 else
22462 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22463
22464 fprintf (file, "%s:\n", stub);
22465 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22466
22467 if (MACHOPIC_PURE)
22468 {
22469 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22470 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22471 fprintf (file, "\tjmp\t*%%edx\n");
22472 }
22473 else
22474 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22475
22476 fprintf (file, "%s:\n", binder_name);
22477
22478 if (MACHOPIC_PURE)
22479 {
22480 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22481 fprintf (file, "\tpushl\t%%eax\n");
22482 }
22483 else
22484 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22485
22486 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22487
22488 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22489 fprintf (file, "%s:\n", lazy_ptr_name);
22490 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22491 fprintf (file, "\t.long %s\n", binder_name);
22492 }
22493
22494 void
22495 darwin_x86_file_end (void)
22496 {
22497 darwin_file_end ();
22498 ix86_file_end ();
22499 }
22500 #endif /* TARGET_MACHO */
22501
22502 /* Order the registers for register allocator. */
22503
22504 void
22505 x86_order_regs_for_local_alloc (void)
22506 {
22507 int pos = 0;
22508 int i;
22509
22510 /* First allocate the local general purpose registers. */
22511 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22512 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22513 reg_alloc_order [pos++] = i;
22514
22515 /* Global general purpose registers. */
22516 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22517 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22518 reg_alloc_order [pos++] = i;
22519
22520 /* x87 registers come first in case we are doing FP math
22521 using them. */
22522 if (!TARGET_SSE_MATH)
22523 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22524 reg_alloc_order [pos++] = i;
22525
22526 /* SSE registers. */
22527 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22528 reg_alloc_order [pos++] = i;
22529 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22530 reg_alloc_order [pos++] = i;
22531
22532 /* x87 registers. */
22533 if (TARGET_SSE_MATH)
22534 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22535 reg_alloc_order [pos++] = i;
22536
22537 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22538 reg_alloc_order [pos++] = i;
22539
22540 /* Initialize the rest of array as we do not allocate some registers
22541 at all. */
22542 while (pos < FIRST_PSEUDO_REGISTER)
22543 reg_alloc_order [pos++] = 0;
22544 }
22545
22546 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22547 struct attribute_spec.handler. */
22548 static tree
22549 ix86_handle_struct_attribute (tree *node, tree name,
22550 tree args ATTRIBUTE_UNUSED,
22551 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22552 {
22553 tree *type = NULL;
22554 if (DECL_P (*node))
22555 {
22556 if (TREE_CODE (*node) == TYPE_DECL)
22557 type = &TREE_TYPE (*node);
22558 }
22559 else
22560 type = node;
22561
22562 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22563 || TREE_CODE (*type) == UNION_TYPE)))
22564 {
22565 warning (OPT_Wattributes, "%qs attribute ignored",
22566 IDENTIFIER_POINTER (name));
22567 *no_add_attrs = true;
22568 }
22569
22570 else if ((is_attribute_p ("ms_struct", name)
22571 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22572 || ((is_attribute_p ("gcc_struct", name)
22573 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22574 {
22575 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22576 IDENTIFIER_POINTER (name));
22577 *no_add_attrs = true;
22578 }
22579
22580 return NULL_TREE;
22581 }
22582
22583 static bool
22584 ix86_ms_bitfield_layout_p (const_tree record_type)
22585 {
22586 return (TARGET_MS_BITFIELD_LAYOUT &&
22587 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22588 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22589 }
22590
22591 /* Returns an expression indicating where the this parameter is
22592 located on entry to the FUNCTION. */
22593
22594 static rtx
22595 x86_this_parameter (tree function)
22596 {
22597 tree type = TREE_TYPE (function);
22598 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22599
22600 if (TARGET_64BIT)
22601 {
22602 const int *parm_regs;
22603
22604 if (TARGET_64BIT_MS_ABI)
22605 parm_regs = x86_64_ms_abi_int_parameter_registers;
22606 else
22607 parm_regs = x86_64_int_parameter_registers;
22608 return gen_rtx_REG (DImode, parm_regs[aggr]);
22609 }
22610
22611 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22612 {
22613 int regno = AX_REG;
22614 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22615 regno = CX_REG;
22616 return gen_rtx_REG (SImode, regno);
22617 }
22618
22619 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22620 }
22621
22622 /* Determine whether x86_output_mi_thunk can succeed. */
22623
22624 static bool
22625 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22626 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22627 HOST_WIDE_INT vcall_offset, const_tree function)
22628 {
22629 /* 64-bit can handle anything. */
22630 if (TARGET_64BIT)
22631 return true;
22632
22633 /* For 32-bit, everything's fine if we have one free register. */
22634 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22635 return true;
22636
22637 /* Need a free register for vcall_offset. */
22638 if (vcall_offset)
22639 return false;
22640
22641 /* Need a free register for GOT references. */
22642 if (flag_pic && !(*targetm.binds_local_p) (function))
22643 return false;
22644
22645 /* Otherwise ok. */
22646 return true;
22647 }
22648
22649 /* Output the assembler code for a thunk function. THUNK_DECL is the
22650 declaration for the thunk function itself, FUNCTION is the decl for
22651 the target function. DELTA is an immediate constant offset to be
22652 added to THIS. If VCALL_OFFSET is nonzero, the word at
22653 *(*this + vcall_offset) should be added to THIS. */
22654
22655 static void
22656 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22657 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22658 HOST_WIDE_INT vcall_offset, tree function)
22659 {
22660 rtx xops[3];
22661 rtx this_param = x86_this_parameter (function);
22662 rtx this_reg, tmp;
22663
22664 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22665 pull it in now and let DELTA benefit. */
22666 if (REG_P (this_param))
22667 this_reg = this_param;
22668 else if (vcall_offset)
22669 {
22670 /* Put the this parameter into %eax. */
22671 xops[0] = this_param;
22672 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
22673 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22674 }
22675 else
22676 this_reg = NULL_RTX;
22677
22678 /* Adjust the this parameter by a fixed constant. */
22679 if (delta)
22680 {
22681 xops[0] = GEN_INT (delta);
22682 xops[1] = this_reg ? this_reg : this_param;
22683 if (TARGET_64BIT)
22684 {
22685 if (!x86_64_general_operand (xops[0], DImode))
22686 {
22687 tmp = gen_rtx_REG (DImode, R10_REG);
22688 xops[1] = tmp;
22689 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22690 xops[0] = tmp;
22691 xops[1] = this_param;
22692 }
22693 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22694 }
22695 else
22696 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22697 }
22698
22699 /* Adjust the this parameter by a value stored in the vtable. */
22700 if (vcall_offset)
22701 {
22702 if (TARGET_64BIT)
22703 tmp = gen_rtx_REG (DImode, R10_REG);
22704 else
22705 {
22706 int tmp_regno = CX_REG;
22707 if (lookup_attribute ("fastcall",
22708 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22709 tmp_regno = AX_REG;
22710 tmp = gen_rtx_REG (SImode, tmp_regno);
22711 }
22712
22713 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22714 xops[1] = tmp;
22715 if (TARGET_64BIT)
22716 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22717 else
22718 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22719
22720 /* Adjust the this parameter. */
22721 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22722 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22723 {
22724 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22725 xops[0] = GEN_INT (vcall_offset);
22726 xops[1] = tmp2;
22727 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22728 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22729 }
22730 xops[1] = this_reg;
22731 if (TARGET_64BIT)
22732 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22733 else
22734 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22735 }
22736
22737 /* If necessary, drop THIS back to its stack slot. */
22738 if (this_reg && this_reg != this_param)
22739 {
22740 xops[0] = this_reg;
22741 xops[1] = this_param;
22742 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22743 }
22744
22745 xops[0] = XEXP (DECL_RTL (function), 0);
22746 if (TARGET_64BIT)
22747 {
22748 if (!flag_pic || (*targetm.binds_local_p) (function))
22749 output_asm_insn ("jmp\t%P0", xops);
22750 /* All thunks should be in the same object as their target,
22751 and thus binds_local_p should be true. */
22752 else if (TARGET_64BIT_MS_ABI)
22753 gcc_unreachable ();
22754 else
22755 {
22756 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22757 tmp = gen_rtx_CONST (Pmode, tmp);
22758 tmp = gen_rtx_MEM (QImode, tmp);
22759 xops[0] = tmp;
22760 output_asm_insn ("jmp\t%A0", xops);
22761 }
22762 }
22763 else
22764 {
22765 if (!flag_pic || (*targetm.binds_local_p) (function))
22766 output_asm_insn ("jmp\t%P0", xops);
22767 else
22768 #if TARGET_MACHO
22769 if (TARGET_MACHO)
22770 {
22771 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22772 tmp = (gen_rtx_SYMBOL_REF
22773 (Pmode,
22774 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22775 tmp = gen_rtx_MEM (QImode, tmp);
22776 xops[0] = tmp;
22777 output_asm_insn ("jmp\t%0", xops);
22778 }
22779 else
22780 #endif /* TARGET_MACHO */
22781 {
22782 tmp = gen_rtx_REG (SImode, CX_REG);
22783 output_set_got (tmp, NULL_RTX);
22784
22785 xops[1] = tmp;
22786 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22787 output_asm_insn ("jmp\t{*}%1", xops);
22788 }
22789 }
22790 }
22791
22792 static void
22793 x86_file_start (void)
22794 {
22795 default_file_start ();
22796 #if TARGET_MACHO
22797 darwin_file_start ();
22798 #endif
22799 if (X86_FILE_START_VERSION_DIRECTIVE)
22800 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22801 if (X86_FILE_START_FLTUSED)
22802 fputs ("\t.global\t__fltused\n", asm_out_file);
22803 if (ix86_asm_dialect == ASM_INTEL)
22804 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22805 }
22806
22807 int
22808 x86_field_alignment (tree field, int computed)
22809 {
22810 enum machine_mode mode;
22811 tree type = TREE_TYPE (field);
22812
22813 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22814 return computed;
22815 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22816 ? get_inner_array_type (type) : type);
22817 if (mode == DFmode || mode == DCmode
22818 || GET_MODE_CLASS (mode) == MODE_INT
22819 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22820 return MIN (32, computed);
22821 return computed;
22822 }
22823
22824 /* Output assembler code to FILE to increment profiler label # LABELNO
22825 for profiling a function entry. */
22826 void
22827 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22828 {
22829 if (TARGET_64BIT)
22830 {
22831 #ifndef NO_PROFILE_COUNTERS
22832 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22833 #endif
22834
22835 if (!TARGET_64BIT_MS_ABI && flag_pic)
22836 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22837 else
22838 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22839 }
22840 else if (flag_pic)
22841 {
22842 #ifndef NO_PROFILE_COUNTERS
22843 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22844 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22845 #endif
22846 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22847 }
22848 else
22849 {
22850 #ifndef NO_PROFILE_COUNTERS
22851 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22852 PROFILE_COUNT_REGISTER);
22853 #endif
22854 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22855 }
22856 }
22857
22858 /* We don't have exact information about the insn sizes, but we may assume
22859 quite safely that we are informed about all 1 byte insns and memory
22860 address sizes. This is enough to eliminate unnecessary padding in
22861 99% of cases. */
22862
22863 static int
22864 min_insn_size (rtx insn)
22865 {
22866 int l = 0;
22867
22868 if (!INSN_P (insn) || !active_insn_p (insn))
22869 return 0;
22870
22871 /* Discard alignments we've emit and jump instructions. */
22872 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22873 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22874 return 0;
22875 if (JUMP_P (insn)
22876 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22877 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22878 return 0;
22879
22880 /* Important case - calls are always 5 bytes.
22881 It is common to have many calls in the row. */
22882 if (CALL_P (insn)
22883 && symbolic_reference_mentioned_p (PATTERN (insn))
22884 && !SIBLING_CALL_P (insn))
22885 return 5;
22886 if (get_attr_length (insn) <= 1)
22887 return 1;
22888
22889 /* For normal instructions we may rely on the sizes of addresses
22890 and the presence of symbol to require 4 bytes of encoding.
22891 This is not the case for jumps where references are PC relative. */
22892 if (!JUMP_P (insn))
22893 {
22894 l = get_attr_length_address (insn);
22895 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22896 l = 4;
22897 }
22898 if (l)
22899 return 1+l;
22900 else
22901 return 2;
22902 }
22903
22904 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22905 window. */
22906
22907 static void
22908 ix86_avoid_jump_misspredicts (void)
22909 {
22910 rtx insn, start = get_insns ();
22911 int nbytes = 0, njumps = 0;
22912 int isjump = 0;
22913
22914 /* Look for all minimal intervals of instructions containing 4 jumps.
22915 The intervals are bounded by START and INSN. NBYTES is the total
22916 size of instructions in the interval including INSN and not including
22917 START. When the NBYTES is smaller than 16 bytes, it is possible
22918 that the end of START and INSN ends up in the same 16byte page.
22919
22920 The smallest offset in the page INSN can start is the case where START
22921 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22922 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22923 */
22924 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22925 {
22926
22927 nbytes += min_insn_size (insn);
22928 if (dump_file)
22929 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22930 INSN_UID (insn), min_insn_size (insn));
22931 if ((JUMP_P (insn)
22932 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22933 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22934 || CALL_P (insn))
22935 njumps++;
22936 else
22937 continue;
22938
22939 while (njumps > 3)
22940 {
22941 start = NEXT_INSN (start);
22942 if ((JUMP_P (start)
22943 && GET_CODE (PATTERN (start)) != ADDR_VEC
22944 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22945 || CALL_P (start))
22946 njumps--, isjump = 1;
22947 else
22948 isjump = 0;
22949 nbytes -= min_insn_size (start);
22950 }
22951 gcc_assert (njumps >= 0);
22952 if (dump_file)
22953 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22954 INSN_UID (start), INSN_UID (insn), nbytes);
22955
22956 if (njumps == 3 && isjump && nbytes < 16)
22957 {
22958 int padsize = 15 - nbytes + min_insn_size (insn);
22959
22960 if (dump_file)
22961 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22962 INSN_UID (insn), padsize);
22963 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
22964 }
22965 }
22966 }
22967
22968 /* AMD Athlon works faster
22969 when RET is not destination of conditional jump or directly preceded
22970 by other jump instruction. We avoid the penalty by inserting NOP just
22971 before the RET instructions in such cases. */
22972 static void
22973 ix86_pad_returns (void)
22974 {
22975 edge e;
22976 edge_iterator ei;
22977
22978 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
22979 {
22980 basic_block bb = e->src;
22981 rtx ret = BB_END (bb);
22982 rtx prev;
22983 bool replace = false;
22984
22985 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
22986 || !maybe_hot_bb_p (bb))
22987 continue;
22988 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
22989 if (active_insn_p (prev) || LABEL_P (prev))
22990 break;
22991 if (prev && LABEL_P (prev))
22992 {
22993 edge e;
22994 edge_iterator ei;
22995
22996 FOR_EACH_EDGE (e, ei, bb->preds)
22997 if (EDGE_FREQUENCY (e) && e->src->index >= 0
22998 && !(e->flags & EDGE_FALLTHRU))
22999 replace = true;
23000 }
23001 if (!replace)
23002 {
23003 prev = prev_active_insn (ret);
23004 if (prev
23005 && ((JUMP_P (prev) && any_condjump_p (prev))
23006 || CALL_P (prev)))
23007 replace = true;
23008 /* Empty functions get branch mispredict even when the jump destination
23009 is not visible to us. */
23010 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23011 replace = true;
23012 }
23013 if (replace)
23014 {
23015 emit_insn_before (gen_return_internal_long (), ret);
23016 delete_insn (ret);
23017 }
23018 }
23019 }
23020
23021 /* Implement machine specific optimizations. We implement padding of returns
23022 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23023 static void
23024 ix86_reorg (void)
23025 {
23026 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23027 ix86_pad_returns ();
23028 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23029 ix86_avoid_jump_misspredicts ();
23030 }
23031
23032 /* Return nonzero when QImode register that must be represented via REX prefix
23033 is used. */
23034 bool
23035 x86_extended_QIreg_mentioned_p (rtx insn)
23036 {
23037 int i;
23038 extract_insn_cached (insn);
23039 for (i = 0; i < recog_data.n_operands; i++)
23040 if (REG_P (recog_data.operand[i])
23041 && REGNO (recog_data.operand[i]) >= 4)
23042 return true;
23043 return false;
23044 }
23045
23046 /* Return nonzero when P points to register encoded via REX prefix.
23047 Called via for_each_rtx. */
23048 static int
23049 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23050 {
23051 unsigned int regno;
23052 if (!REG_P (*p))
23053 return 0;
23054 regno = REGNO (*p);
23055 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23056 }
23057
23058 /* Return true when INSN mentions register that must be encoded using REX
23059 prefix. */
23060 bool
23061 x86_extended_reg_mentioned_p (rtx insn)
23062 {
23063 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23064 }
23065
23066 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23067 optabs would emit if we didn't have TFmode patterns. */
23068
23069 void
23070 x86_emit_floatuns (rtx operands[2])
23071 {
23072 rtx neglab, donelab, i0, i1, f0, in, out;
23073 enum machine_mode mode, inmode;
23074
23075 inmode = GET_MODE (operands[1]);
23076 gcc_assert (inmode == SImode || inmode == DImode);
23077
23078 out = operands[0];
23079 in = force_reg (inmode, operands[1]);
23080 mode = GET_MODE (out);
23081 neglab = gen_label_rtx ();
23082 donelab = gen_label_rtx ();
23083 f0 = gen_reg_rtx (mode);
23084
23085 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23086
23087 expand_float (out, in, 0);
23088
23089 emit_jump_insn (gen_jump (donelab));
23090 emit_barrier ();
23091
23092 emit_label (neglab);
23093
23094 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23095 1, OPTAB_DIRECT);
23096 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23097 1, OPTAB_DIRECT);
23098 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23099
23100 expand_float (f0, i0, 0);
23101
23102 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23103
23104 emit_label (donelab);
23105 }
23106 \f
23107 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23108 with all elements equal to VAR. Return true if successful. */
23109
23110 static bool
23111 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23112 rtx target, rtx val)
23113 {
23114 enum machine_mode smode, wsmode, wvmode;
23115 rtx x;
23116
23117 switch (mode)
23118 {
23119 case V2SImode:
23120 case V2SFmode:
23121 if (!mmx_ok)
23122 return false;
23123 /* FALLTHRU */
23124
23125 case V2DFmode:
23126 case V2DImode:
23127 case V4SFmode:
23128 case V4SImode:
23129 val = force_reg (GET_MODE_INNER (mode), val);
23130 x = gen_rtx_VEC_DUPLICATE (mode, val);
23131 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23132 return true;
23133
23134 case V4HImode:
23135 if (!mmx_ok)
23136 return false;
23137 if (TARGET_SSE || TARGET_3DNOW_A)
23138 {
23139 val = gen_lowpart (SImode, val);
23140 x = gen_rtx_TRUNCATE (HImode, val);
23141 x = gen_rtx_VEC_DUPLICATE (mode, x);
23142 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23143 return true;
23144 }
23145 else
23146 {
23147 smode = HImode;
23148 wsmode = SImode;
23149 wvmode = V2SImode;
23150 goto widen;
23151 }
23152
23153 case V8QImode:
23154 if (!mmx_ok)
23155 return false;
23156 smode = QImode;
23157 wsmode = HImode;
23158 wvmode = V4HImode;
23159 goto widen;
23160 case V8HImode:
23161 if (TARGET_SSE2)
23162 {
23163 rtx tmp1, tmp2;
23164 /* Extend HImode to SImode using a paradoxical SUBREG. */
23165 tmp1 = gen_reg_rtx (SImode);
23166 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23167 /* Insert the SImode value as low element of V4SImode vector. */
23168 tmp2 = gen_reg_rtx (V4SImode);
23169 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23170 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23171 CONST0_RTX (V4SImode),
23172 const1_rtx);
23173 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23174 /* Cast the V4SImode vector back to a V8HImode vector. */
23175 tmp1 = gen_reg_rtx (V8HImode);
23176 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23177 /* Duplicate the low short through the whole low SImode word. */
23178 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23179 /* Cast the V8HImode vector back to a V4SImode vector. */
23180 tmp2 = gen_reg_rtx (V4SImode);
23181 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23182 /* Replicate the low element of the V4SImode vector. */
23183 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23184 /* Cast the V2SImode back to V8HImode, and store in target. */
23185 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23186 return true;
23187 }
23188 smode = HImode;
23189 wsmode = SImode;
23190 wvmode = V4SImode;
23191 goto widen;
23192 case V16QImode:
23193 if (TARGET_SSE2)
23194 {
23195 rtx tmp1, tmp2;
23196 /* Extend QImode to SImode using a paradoxical SUBREG. */
23197 tmp1 = gen_reg_rtx (SImode);
23198 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23199 /* Insert the SImode value as low element of V4SImode vector. */
23200 tmp2 = gen_reg_rtx (V4SImode);
23201 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23202 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23203 CONST0_RTX (V4SImode),
23204 const1_rtx);
23205 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23206 /* Cast the V4SImode vector back to a V16QImode vector. */
23207 tmp1 = gen_reg_rtx (V16QImode);
23208 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23209 /* Duplicate the low byte through the whole low SImode word. */
23210 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23211 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23212 /* Cast the V16QImode vector back to a V4SImode vector. */
23213 tmp2 = gen_reg_rtx (V4SImode);
23214 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23215 /* Replicate the low element of the V4SImode vector. */
23216 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23217 /* Cast the V2SImode back to V16QImode, and store in target. */
23218 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23219 return true;
23220 }
23221 smode = QImode;
23222 wsmode = HImode;
23223 wvmode = V8HImode;
23224 goto widen;
23225 widen:
23226 /* Replicate the value once into the next wider mode and recurse. */
23227 val = convert_modes (wsmode, smode, val, true);
23228 x = expand_simple_binop (wsmode, ASHIFT, val,
23229 GEN_INT (GET_MODE_BITSIZE (smode)),
23230 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23231 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23232
23233 x = gen_reg_rtx (wvmode);
23234 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23235 gcc_unreachable ();
23236 emit_move_insn (target, gen_lowpart (mode, x));
23237 return true;
23238
23239 default:
23240 return false;
23241 }
23242 }
23243
23244 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23245 whose ONE_VAR element is VAR, and other elements are zero. Return true
23246 if successful. */
23247
23248 static bool
23249 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23250 rtx target, rtx var, int one_var)
23251 {
23252 enum machine_mode vsimode;
23253 rtx new_target;
23254 rtx x, tmp;
23255
23256 switch (mode)
23257 {
23258 case V2SFmode:
23259 case V2SImode:
23260 if (!mmx_ok)
23261 return false;
23262 /* FALLTHRU */
23263
23264 case V2DFmode:
23265 case V2DImode:
23266 if (one_var != 0)
23267 return false;
23268 var = force_reg (GET_MODE_INNER (mode), var);
23269 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23270 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23271 return true;
23272
23273 case V4SFmode:
23274 case V4SImode:
23275 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23276 new_target = gen_reg_rtx (mode);
23277 else
23278 new_target = target;
23279 var = force_reg (GET_MODE_INNER (mode), var);
23280 x = gen_rtx_VEC_DUPLICATE (mode, var);
23281 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23282 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23283 if (one_var != 0)
23284 {
23285 /* We need to shuffle the value to the correct position, so
23286 create a new pseudo to store the intermediate result. */
23287
23288 /* With SSE2, we can use the integer shuffle insns. */
23289 if (mode != V4SFmode && TARGET_SSE2)
23290 {
23291 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23292 GEN_INT (1),
23293 GEN_INT (one_var == 1 ? 0 : 1),
23294 GEN_INT (one_var == 2 ? 0 : 1),
23295 GEN_INT (one_var == 3 ? 0 : 1)));
23296 if (target != new_target)
23297 emit_move_insn (target, new_target);
23298 return true;
23299 }
23300
23301 /* Otherwise convert the intermediate result to V4SFmode and
23302 use the SSE1 shuffle instructions. */
23303 if (mode != V4SFmode)
23304 {
23305 tmp = gen_reg_rtx (V4SFmode);
23306 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23307 }
23308 else
23309 tmp = new_target;
23310
23311 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23312 GEN_INT (1),
23313 GEN_INT (one_var == 1 ? 0 : 1),
23314 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23315 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23316
23317 if (mode != V4SFmode)
23318 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23319 else if (tmp != target)
23320 emit_move_insn (target, tmp);
23321 }
23322 else if (target != new_target)
23323 emit_move_insn (target, new_target);
23324 return true;
23325
23326 case V8HImode:
23327 case V16QImode:
23328 vsimode = V4SImode;
23329 goto widen;
23330 case V4HImode:
23331 case V8QImode:
23332 if (!mmx_ok)
23333 return false;
23334 vsimode = V2SImode;
23335 goto widen;
23336 widen:
23337 if (one_var != 0)
23338 return false;
23339
23340 /* Zero extend the variable element to SImode and recurse. */
23341 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23342
23343 x = gen_reg_rtx (vsimode);
23344 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23345 var, one_var))
23346 gcc_unreachable ();
23347
23348 emit_move_insn (target, gen_lowpart (mode, x));
23349 return true;
23350
23351 default:
23352 return false;
23353 }
23354 }
23355
23356 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23357 consisting of the values in VALS. It is known that all elements
23358 except ONE_VAR are constants. Return true if successful. */
23359
23360 static bool
23361 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23362 rtx target, rtx vals, int one_var)
23363 {
23364 rtx var = XVECEXP (vals, 0, one_var);
23365 enum machine_mode wmode;
23366 rtx const_vec, x;
23367
23368 const_vec = copy_rtx (vals);
23369 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23370 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23371
23372 switch (mode)
23373 {
23374 case V2DFmode:
23375 case V2DImode:
23376 case V2SFmode:
23377 case V2SImode:
23378 /* For the two element vectors, it's just as easy to use
23379 the general case. */
23380 return false;
23381
23382 case V4SFmode:
23383 case V4SImode:
23384 case V8HImode:
23385 case V4HImode:
23386 break;
23387
23388 case V16QImode:
23389 wmode = V8HImode;
23390 goto widen;
23391 case V8QImode:
23392 wmode = V4HImode;
23393 goto widen;
23394 widen:
23395 /* There's no way to set one QImode entry easily. Combine
23396 the variable value with its adjacent constant value, and
23397 promote to an HImode set. */
23398 x = XVECEXP (vals, 0, one_var ^ 1);
23399 if (one_var & 1)
23400 {
23401 var = convert_modes (HImode, QImode, var, true);
23402 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23403 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23404 x = GEN_INT (INTVAL (x) & 0xff);
23405 }
23406 else
23407 {
23408 var = convert_modes (HImode, QImode, var, true);
23409 x = gen_int_mode (INTVAL (x) << 8, HImode);
23410 }
23411 if (x != const0_rtx)
23412 var = expand_simple_binop (HImode, IOR, var, x, var,
23413 1, OPTAB_LIB_WIDEN);
23414
23415 x = gen_reg_rtx (wmode);
23416 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23417 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23418
23419 emit_move_insn (target, gen_lowpart (mode, x));
23420 return true;
23421
23422 default:
23423 return false;
23424 }
23425
23426 emit_move_insn (target, const_vec);
23427 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23428 return true;
23429 }
23430
23431 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23432 all values variable, and none identical. */
23433
23434 static void
23435 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23436 rtx target, rtx vals)
23437 {
23438 enum machine_mode half_mode = GET_MODE_INNER (mode);
23439 rtx op0 = NULL, op1 = NULL;
23440 bool use_vec_concat = false;
23441
23442 switch (mode)
23443 {
23444 case V2SFmode:
23445 case V2SImode:
23446 if (!mmx_ok && !TARGET_SSE)
23447 break;
23448 /* FALLTHRU */
23449
23450 case V2DFmode:
23451 case V2DImode:
23452 /* For the two element vectors, we always implement VEC_CONCAT. */
23453 op0 = XVECEXP (vals, 0, 0);
23454 op1 = XVECEXP (vals, 0, 1);
23455 use_vec_concat = true;
23456 break;
23457
23458 case V4SFmode:
23459 half_mode = V2SFmode;
23460 goto half;
23461 case V4SImode:
23462 half_mode = V2SImode;
23463 goto half;
23464 half:
23465 {
23466 rtvec v;
23467
23468 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23469 Recurse to load the two halves. */
23470
23471 op0 = gen_reg_rtx (half_mode);
23472 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23473 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23474
23475 op1 = gen_reg_rtx (half_mode);
23476 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23477 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23478
23479 use_vec_concat = true;
23480 }
23481 break;
23482
23483 case V8HImode:
23484 case V16QImode:
23485 case V4HImode:
23486 case V8QImode:
23487 break;
23488
23489 default:
23490 gcc_unreachable ();
23491 }
23492
23493 if (use_vec_concat)
23494 {
23495 if (!register_operand (op0, half_mode))
23496 op0 = force_reg (half_mode, op0);
23497 if (!register_operand (op1, half_mode))
23498 op1 = force_reg (half_mode, op1);
23499
23500 emit_insn (gen_rtx_SET (VOIDmode, target,
23501 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23502 }
23503 else
23504 {
23505 int i, j, n_elts, n_words, n_elt_per_word;
23506 enum machine_mode inner_mode;
23507 rtx words[4], shift;
23508
23509 inner_mode = GET_MODE_INNER (mode);
23510 n_elts = GET_MODE_NUNITS (mode);
23511 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23512 n_elt_per_word = n_elts / n_words;
23513 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23514
23515 for (i = 0; i < n_words; ++i)
23516 {
23517 rtx word = NULL_RTX;
23518
23519 for (j = 0; j < n_elt_per_word; ++j)
23520 {
23521 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23522 elt = convert_modes (word_mode, inner_mode, elt, true);
23523
23524 if (j == 0)
23525 word = elt;
23526 else
23527 {
23528 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23529 word, 1, OPTAB_LIB_WIDEN);
23530 word = expand_simple_binop (word_mode, IOR, word, elt,
23531 word, 1, OPTAB_LIB_WIDEN);
23532 }
23533 }
23534
23535 words[i] = word;
23536 }
23537
23538 if (n_words == 1)
23539 emit_move_insn (target, gen_lowpart (mode, words[0]));
23540 else if (n_words == 2)
23541 {
23542 rtx tmp = gen_reg_rtx (mode);
23543 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23544 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23545 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23546 emit_move_insn (target, tmp);
23547 }
23548 else if (n_words == 4)
23549 {
23550 rtx tmp = gen_reg_rtx (V4SImode);
23551 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23552 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23553 emit_move_insn (target, gen_lowpart (mode, tmp));
23554 }
23555 else
23556 gcc_unreachable ();
23557 }
23558 }
23559
23560 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23561 instructions unless MMX_OK is true. */
23562
23563 void
23564 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23565 {
23566 enum machine_mode mode = GET_MODE (target);
23567 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23568 int n_elts = GET_MODE_NUNITS (mode);
23569 int n_var = 0, one_var = -1;
23570 bool all_same = true, all_const_zero = true;
23571 int i;
23572 rtx x;
23573
23574 for (i = 0; i < n_elts; ++i)
23575 {
23576 x = XVECEXP (vals, 0, i);
23577 if (!CONSTANT_P (x))
23578 n_var++, one_var = i;
23579 else if (x != CONST0_RTX (inner_mode))
23580 all_const_zero = false;
23581 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23582 all_same = false;
23583 }
23584
23585 /* Constants are best loaded from the constant pool. */
23586 if (n_var == 0)
23587 {
23588 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23589 return;
23590 }
23591
23592 /* If all values are identical, broadcast the value. */
23593 if (all_same
23594 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23595 XVECEXP (vals, 0, 0)))
23596 return;
23597
23598 /* Values where only one field is non-constant are best loaded from
23599 the pool and overwritten via move later. */
23600 if (n_var == 1)
23601 {
23602 if (all_const_zero
23603 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23604 XVECEXP (vals, 0, one_var),
23605 one_var))
23606 return;
23607
23608 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23609 return;
23610 }
23611
23612 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23613 }
23614
23615 void
23616 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23617 {
23618 enum machine_mode mode = GET_MODE (target);
23619 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23620 bool use_vec_merge = false;
23621 rtx tmp;
23622
23623 switch (mode)
23624 {
23625 case V2SFmode:
23626 case V2SImode:
23627 if (mmx_ok)
23628 {
23629 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23630 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23631 if (elt == 0)
23632 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23633 else
23634 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23635 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23636 return;
23637 }
23638 break;
23639
23640 case V2DImode:
23641 use_vec_merge = TARGET_SSE4_1;
23642 if (use_vec_merge)
23643 break;
23644
23645 case V2DFmode:
23646 {
23647 rtx op0, op1;
23648
23649 /* For the two element vectors, we implement a VEC_CONCAT with
23650 the extraction of the other element. */
23651
23652 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23653 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23654
23655 if (elt == 0)
23656 op0 = val, op1 = tmp;
23657 else
23658 op0 = tmp, op1 = val;
23659
23660 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23661 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23662 }
23663 return;
23664
23665 case V4SFmode:
23666 use_vec_merge = TARGET_SSE4_1;
23667 if (use_vec_merge)
23668 break;
23669
23670 switch (elt)
23671 {
23672 case 0:
23673 use_vec_merge = true;
23674 break;
23675
23676 case 1:
23677 /* tmp = target = A B C D */
23678 tmp = copy_to_reg (target);
23679 /* target = A A B B */
23680 emit_insn (gen_sse_unpcklps (target, target, target));
23681 /* target = X A B B */
23682 ix86_expand_vector_set (false, target, val, 0);
23683 /* target = A X C D */
23684 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23685 GEN_INT (1), GEN_INT (0),
23686 GEN_INT (2+4), GEN_INT (3+4)));
23687 return;
23688
23689 case 2:
23690 /* tmp = target = A B C D */
23691 tmp = copy_to_reg (target);
23692 /* tmp = X B C D */
23693 ix86_expand_vector_set (false, tmp, val, 0);
23694 /* target = A B X D */
23695 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23696 GEN_INT (0), GEN_INT (1),
23697 GEN_INT (0+4), GEN_INT (3+4)));
23698 return;
23699
23700 case 3:
23701 /* tmp = target = A B C D */
23702 tmp = copy_to_reg (target);
23703 /* tmp = X B C D */
23704 ix86_expand_vector_set (false, tmp, val, 0);
23705 /* target = A B X D */
23706 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23707 GEN_INT (0), GEN_INT (1),
23708 GEN_INT (2+4), GEN_INT (0+4)));
23709 return;
23710
23711 default:
23712 gcc_unreachable ();
23713 }
23714 break;
23715
23716 case V4SImode:
23717 use_vec_merge = TARGET_SSE4_1;
23718 if (use_vec_merge)
23719 break;
23720
23721 /* Element 0 handled by vec_merge below. */
23722 if (elt == 0)
23723 {
23724 use_vec_merge = true;
23725 break;
23726 }
23727
23728 if (TARGET_SSE2)
23729 {
23730 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23731 store into element 0, then shuffle them back. */
23732
23733 rtx order[4];
23734
23735 order[0] = GEN_INT (elt);
23736 order[1] = const1_rtx;
23737 order[2] = const2_rtx;
23738 order[3] = GEN_INT (3);
23739 order[elt] = const0_rtx;
23740
23741 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23742 order[1], order[2], order[3]));
23743
23744 ix86_expand_vector_set (false, target, val, 0);
23745
23746 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23747 order[1], order[2], order[3]));
23748 }
23749 else
23750 {
23751 /* For SSE1, we have to reuse the V4SF code. */
23752 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23753 gen_lowpart (SFmode, val), elt);
23754 }
23755 return;
23756
23757 case V8HImode:
23758 use_vec_merge = TARGET_SSE2;
23759 break;
23760 case V4HImode:
23761 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23762 break;
23763
23764 case V16QImode:
23765 use_vec_merge = TARGET_SSE4_1;
23766 break;
23767
23768 case V8QImode:
23769 default:
23770 break;
23771 }
23772
23773 if (use_vec_merge)
23774 {
23775 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23776 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23777 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23778 }
23779 else
23780 {
23781 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23782
23783 emit_move_insn (mem, target);
23784
23785 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23786 emit_move_insn (tmp, val);
23787
23788 emit_move_insn (target, mem);
23789 }
23790 }
23791
23792 void
23793 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23794 {
23795 enum machine_mode mode = GET_MODE (vec);
23796 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23797 bool use_vec_extr = false;
23798 rtx tmp;
23799
23800 switch (mode)
23801 {
23802 case V2SImode:
23803 case V2SFmode:
23804 if (!mmx_ok)
23805 break;
23806 /* FALLTHRU */
23807
23808 case V2DFmode:
23809 case V2DImode:
23810 use_vec_extr = true;
23811 break;
23812
23813 case V4SFmode:
23814 use_vec_extr = TARGET_SSE4_1;
23815 if (use_vec_extr)
23816 break;
23817
23818 switch (elt)
23819 {
23820 case 0:
23821 tmp = vec;
23822 break;
23823
23824 case 1:
23825 case 3:
23826 tmp = gen_reg_rtx (mode);
23827 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23828 GEN_INT (elt), GEN_INT (elt),
23829 GEN_INT (elt+4), GEN_INT (elt+4)));
23830 break;
23831
23832 case 2:
23833 tmp = gen_reg_rtx (mode);
23834 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23835 break;
23836
23837 default:
23838 gcc_unreachable ();
23839 }
23840 vec = tmp;
23841 use_vec_extr = true;
23842 elt = 0;
23843 break;
23844
23845 case V4SImode:
23846 use_vec_extr = TARGET_SSE4_1;
23847 if (use_vec_extr)
23848 break;
23849
23850 if (TARGET_SSE2)
23851 {
23852 switch (elt)
23853 {
23854 case 0:
23855 tmp = vec;
23856 break;
23857
23858 case 1:
23859 case 3:
23860 tmp = gen_reg_rtx (mode);
23861 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23862 GEN_INT (elt), GEN_INT (elt),
23863 GEN_INT (elt), GEN_INT (elt)));
23864 break;
23865
23866 case 2:
23867 tmp = gen_reg_rtx (mode);
23868 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23869 break;
23870
23871 default:
23872 gcc_unreachable ();
23873 }
23874 vec = tmp;
23875 use_vec_extr = true;
23876 elt = 0;
23877 }
23878 else
23879 {
23880 /* For SSE1, we have to reuse the V4SF code. */
23881 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23882 gen_lowpart (V4SFmode, vec), elt);
23883 return;
23884 }
23885 break;
23886
23887 case V8HImode:
23888 use_vec_extr = TARGET_SSE2;
23889 break;
23890 case V4HImode:
23891 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23892 break;
23893
23894 case V16QImode:
23895 use_vec_extr = TARGET_SSE4_1;
23896 break;
23897
23898 case V8QImode:
23899 /* ??? Could extract the appropriate HImode element and shift. */
23900 default:
23901 break;
23902 }
23903
23904 if (use_vec_extr)
23905 {
23906 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23907 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23908
23909 /* Let the rtl optimizers know about the zero extension performed. */
23910 if (inner_mode == QImode || inner_mode == HImode)
23911 {
23912 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23913 target = gen_lowpart (SImode, target);
23914 }
23915
23916 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23917 }
23918 else
23919 {
23920 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23921
23922 emit_move_insn (mem, vec);
23923
23924 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23925 emit_move_insn (target, tmp);
23926 }
23927 }
23928
23929 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23930 pattern to reduce; DEST is the destination; IN is the input vector. */
23931
23932 void
23933 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23934 {
23935 rtx tmp1, tmp2, tmp3;
23936
23937 tmp1 = gen_reg_rtx (V4SFmode);
23938 tmp2 = gen_reg_rtx (V4SFmode);
23939 tmp3 = gen_reg_rtx (V4SFmode);
23940
23941 emit_insn (gen_sse_movhlps (tmp1, in, in));
23942 emit_insn (fn (tmp2, tmp1, in));
23943
23944 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23945 GEN_INT (1), GEN_INT (1),
23946 GEN_INT (1+4), GEN_INT (1+4)));
23947 emit_insn (fn (dest, tmp2, tmp3));
23948 }
23949 \f
23950 /* Target hook for scalar_mode_supported_p. */
23951 static bool
23952 ix86_scalar_mode_supported_p (enum machine_mode mode)
23953 {
23954 if (DECIMAL_FLOAT_MODE_P (mode))
23955 return true;
23956 else if (mode == TFmode)
23957 return TARGET_64BIT;
23958 else
23959 return default_scalar_mode_supported_p (mode);
23960 }
23961
23962 /* Implements target hook vector_mode_supported_p. */
23963 static bool
23964 ix86_vector_mode_supported_p (enum machine_mode mode)
23965 {
23966 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23967 return true;
23968 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23969 return true;
23970 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
23971 return true;
23972 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
23973 return true;
23974 return false;
23975 }
23976
23977 /* Target hook for c_mode_for_suffix. */
23978 static enum machine_mode
23979 ix86_c_mode_for_suffix (char suffix)
23980 {
23981 if (TARGET_64BIT && suffix == 'q')
23982 return TFmode;
23983 if (TARGET_MMX && suffix == 'w')
23984 return XFmode;
23985
23986 return VOIDmode;
23987 }
23988
23989 /* Worker function for TARGET_MD_ASM_CLOBBERS.
23990
23991 We do this in the new i386 backend to maintain source compatibility
23992 with the old cc0-based compiler. */
23993
23994 static tree
23995 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
23996 tree inputs ATTRIBUTE_UNUSED,
23997 tree clobbers)
23998 {
23999 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24000 clobbers);
24001 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24002 clobbers);
24003 return clobbers;
24004 }
24005
24006 /* Implements target vector targetm.asm.encode_section_info. This
24007 is not used by netware. */
24008
24009 static void ATTRIBUTE_UNUSED
24010 ix86_encode_section_info (tree decl, rtx rtl, int first)
24011 {
24012 default_encode_section_info (decl, rtl, first);
24013
24014 if (TREE_CODE (decl) == VAR_DECL
24015 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24016 && ix86_in_large_data_p (decl))
24017 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24018 }
24019
24020 /* Worker function for REVERSE_CONDITION. */
24021
24022 enum rtx_code
24023 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24024 {
24025 return (mode != CCFPmode && mode != CCFPUmode
24026 ? reverse_condition (code)
24027 : reverse_condition_maybe_unordered (code));
24028 }
24029
24030 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24031 to OPERANDS[0]. */
24032
24033 const char *
24034 output_387_reg_move (rtx insn, rtx *operands)
24035 {
24036 if (REG_P (operands[0]))
24037 {
24038 if (REG_P (operands[1])
24039 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24040 {
24041 if (REGNO (operands[0]) == FIRST_STACK_REG)
24042 return output_387_ffreep (operands, 0);
24043 return "fstp\t%y0";
24044 }
24045 if (STACK_TOP_P (operands[0]))
24046 return "fld%z1\t%y1";
24047 return "fst\t%y0";
24048 }
24049 else if (MEM_P (operands[0]))
24050 {
24051 gcc_assert (REG_P (operands[1]));
24052 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24053 return "fstp%z0\t%y0";
24054 else
24055 {
24056 /* There is no non-popping store to memory for XFmode.
24057 So if we need one, follow the store with a load. */
24058 if (GET_MODE (operands[0]) == XFmode)
24059 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24060 else
24061 return "fst%z0\t%y0";
24062 }
24063 }
24064 else
24065 gcc_unreachable();
24066 }
24067
24068 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24069 FP status register is set. */
24070
24071 void
24072 ix86_emit_fp_unordered_jump (rtx label)
24073 {
24074 rtx reg = gen_reg_rtx (HImode);
24075 rtx temp;
24076
24077 emit_insn (gen_x86_fnstsw_1 (reg));
24078
24079 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24080 {
24081 emit_insn (gen_x86_sahf_1 (reg));
24082
24083 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24084 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24085 }
24086 else
24087 {
24088 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24089
24090 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24091 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24092 }
24093
24094 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24095 gen_rtx_LABEL_REF (VOIDmode, label),
24096 pc_rtx);
24097 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24098
24099 emit_jump_insn (temp);
24100 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24101 }
24102
24103 /* Output code to perform a log1p XFmode calculation. */
24104
24105 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24106 {
24107 rtx label1 = gen_label_rtx ();
24108 rtx label2 = gen_label_rtx ();
24109
24110 rtx tmp = gen_reg_rtx (XFmode);
24111 rtx tmp2 = gen_reg_rtx (XFmode);
24112
24113 emit_insn (gen_absxf2 (tmp, op1));
24114 emit_insn (gen_cmpxf (tmp,
24115 CONST_DOUBLE_FROM_REAL_VALUE (
24116 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24117 XFmode)));
24118 emit_jump_insn (gen_bge (label1));
24119
24120 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24121 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24122 emit_jump (label2);
24123
24124 emit_label (label1);
24125 emit_move_insn (tmp, CONST1_RTX (XFmode));
24126 emit_insn (gen_addxf3 (tmp, op1, tmp));
24127 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24128 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24129
24130 emit_label (label2);
24131 }
24132
24133 /* Output code to perform a Newton-Rhapson approximation of a single precision
24134 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24135
24136 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24137 {
24138 rtx x0, x1, e0, e1, two;
24139
24140 x0 = gen_reg_rtx (mode);
24141 e0 = gen_reg_rtx (mode);
24142 e1 = gen_reg_rtx (mode);
24143 x1 = gen_reg_rtx (mode);
24144
24145 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24146
24147 if (VECTOR_MODE_P (mode))
24148 two = ix86_build_const_vector (SFmode, true, two);
24149
24150 two = force_reg (mode, two);
24151
24152 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24153
24154 /* x0 = 1./b estimate */
24155 emit_insn (gen_rtx_SET (VOIDmode, x0,
24156 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24157 UNSPEC_RCP)));
24158 /* e0 = x0 * b */
24159 emit_insn (gen_rtx_SET (VOIDmode, e0,
24160 gen_rtx_MULT (mode, x0, b)));
24161 /* e1 = 2. - e0 */
24162 emit_insn (gen_rtx_SET (VOIDmode, e1,
24163 gen_rtx_MINUS (mode, two, e0)));
24164 /* x1 = x0 * e1 */
24165 emit_insn (gen_rtx_SET (VOIDmode, x1,
24166 gen_rtx_MULT (mode, x0, e1)));
24167 /* res = a * x1 */
24168 emit_insn (gen_rtx_SET (VOIDmode, res,
24169 gen_rtx_MULT (mode, a, x1)));
24170 }
24171
24172 /* Output code to perform a Newton-Rhapson approximation of a
24173 single precision floating point [reciprocal] square root. */
24174
24175 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24176 bool recip)
24177 {
24178 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
24179
24180 x0 = gen_reg_rtx (mode);
24181 e0 = gen_reg_rtx (mode);
24182 e1 = gen_reg_rtx (mode);
24183 e2 = gen_reg_rtx (mode);
24184 e3 = gen_reg_rtx (mode);
24185
24186 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
24187 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
24188
24189 mask = gen_reg_rtx (mode);
24190
24191 if (VECTOR_MODE_P (mode))
24192 {
24193 three = ix86_build_const_vector (SFmode, true, three);
24194 half = ix86_build_const_vector (SFmode, true, half);
24195 }
24196
24197 three = force_reg (mode, three);
24198 half = force_reg (mode, half);
24199
24200 zero = force_reg (mode, CONST0_RTX(mode));
24201
24202 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24203 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24204
24205 /* Compare a to zero. */
24206 emit_insn (gen_rtx_SET (VOIDmode, mask,
24207 gen_rtx_NE (mode, a, zero)));
24208
24209 /* x0 = 1./sqrt(a) estimate */
24210 emit_insn (gen_rtx_SET (VOIDmode, x0,
24211 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24212 UNSPEC_RSQRT)));
24213 /* Filter out infinity. */
24214 if (VECTOR_MODE_P (mode))
24215 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
24216 gen_rtx_AND (mode,
24217 gen_lowpart (V4SFmode, x0),
24218 gen_lowpart (V4SFmode, mask))));
24219 else
24220 emit_insn (gen_rtx_SET (VOIDmode, x0,
24221 gen_rtx_AND (mode, x0, mask)));
24222
24223 /* e0 = x0 * a */
24224 emit_insn (gen_rtx_SET (VOIDmode, e0,
24225 gen_rtx_MULT (mode, x0, a)));
24226 /* e1 = e0 * x0 */
24227 emit_insn (gen_rtx_SET (VOIDmode, e1,
24228 gen_rtx_MULT (mode, e0, x0)));
24229 /* e2 = 3. - e1 */
24230 emit_insn (gen_rtx_SET (VOIDmode, e2,
24231 gen_rtx_MINUS (mode, three, e1)));
24232 if (recip)
24233 /* e3 = .5 * x0 */
24234 emit_insn (gen_rtx_SET (VOIDmode, e3,
24235 gen_rtx_MULT (mode, half, x0)));
24236 else
24237 /* e3 = .5 * e0 */
24238 emit_insn (gen_rtx_SET (VOIDmode, e3,
24239 gen_rtx_MULT (mode, half, e0)));
24240 /* ret = e2 * e3 */
24241 emit_insn (gen_rtx_SET (VOIDmode, res,
24242 gen_rtx_MULT (mode, e2, e3)));
24243 }
24244
24245 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24246
24247 static void ATTRIBUTE_UNUSED
24248 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24249 tree decl)
24250 {
24251 /* With Binutils 2.15, the "@unwind" marker must be specified on
24252 every occurrence of the ".eh_frame" section, not just the first
24253 one. */
24254 if (TARGET_64BIT
24255 && strcmp (name, ".eh_frame") == 0)
24256 {
24257 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24258 flags & SECTION_WRITE ? "aw" : "a");
24259 return;
24260 }
24261 default_elf_asm_named_section (name, flags, decl);
24262 }
24263
24264 /* Return the mangling of TYPE if it is an extended fundamental type. */
24265
24266 static const char *
24267 ix86_mangle_type (const_tree type)
24268 {
24269 type = TYPE_MAIN_VARIANT (type);
24270
24271 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24272 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24273 return NULL;
24274
24275 switch (TYPE_MODE (type))
24276 {
24277 case TFmode:
24278 /* __float128 is "g". */
24279 return "g";
24280 case XFmode:
24281 /* "long double" or __float80 is "e". */
24282 return "e";
24283 default:
24284 return NULL;
24285 }
24286 }
24287
24288 /* For 32-bit code we can save PIC register setup by using
24289 __stack_chk_fail_local hidden function instead of calling
24290 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24291 register, so it is better to call __stack_chk_fail directly. */
24292
24293 static tree
24294 ix86_stack_protect_fail (void)
24295 {
24296 return TARGET_64BIT
24297 ? default_external_stack_protect_fail ()
24298 : default_hidden_stack_protect_fail ();
24299 }
24300
24301 /* Select a format to encode pointers in exception handling data. CODE
24302 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24303 true if the symbol may be affected by dynamic relocations.
24304
24305 ??? All x86 object file formats are capable of representing this.
24306 After all, the relocation needed is the same as for the call insn.
24307 Whether or not a particular assembler allows us to enter such, I
24308 guess we'll have to see. */
24309 int
24310 asm_preferred_eh_data_format (int code, int global)
24311 {
24312 if (flag_pic)
24313 {
24314 int type = DW_EH_PE_sdata8;
24315 if (!TARGET_64BIT
24316 || ix86_cmodel == CM_SMALL_PIC
24317 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24318 type = DW_EH_PE_sdata4;
24319 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24320 }
24321 if (ix86_cmodel == CM_SMALL
24322 || (ix86_cmodel == CM_MEDIUM && code))
24323 return DW_EH_PE_udata4;
24324 return DW_EH_PE_absptr;
24325 }
24326 \f
24327 /* Expand copysign from SIGN to the positive value ABS_VALUE
24328 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24329 the sign-bit. */
24330 static void
24331 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24332 {
24333 enum machine_mode mode = GET_MODE (sign);
24334 rtx sgn = gen_reg_rtx (mode);
24335 if (mask == NULL_RTX)
24336 {
24337 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24338 if (!VECTOR_MODE_P (mode))
24339 {
24340 /* We need to generate a scalar mode mask in this case. */
24341 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24342 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24343 mask = gen_reg_rtx (mode);
24344 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24345 }
24346 }
24347 else
24348 mask = gen_rtx_NOT (mode, mask);
24349 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24350 gen_rtx_AND (mode, mask, sign)));
24351 emit_insn (gen_rtx_SET (VOIDmode, result,
24352 gen_rtx_IOR (mode, abs_value, sgn)));
24353 }
24354
24355 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24356 mask for masking out the sign-bit is stored in *SMASK, if that is
24357 non-null. */
24358 static rtx
24359 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24360 {
24361 enum machine_mode mode = GET_MODE (op0);
24362 rtx xa, mask;
24363
24364 xa = gen_reg_rtx (mode);
24365 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24366 if (!VECTOR_MODE_P (mode))
24367 {
24368 /* We need to generate a scalar mode mask in this case. */
24369 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24370 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24371 mask = gen_reg_rtx (mode);
24372 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24373 }
24374 emit_insn (gen_rtx_SET (VOIDmode, xa,
24375 gen_rtx_AND (mode, op0, mask)));
24376
24377 if (smask)
24378 *smask = mask;
24379
24380 return xa;
24381 }
24382
24383 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24384 swapping the operands if SWAP_OPERANDS is true. The expanded
24385 code is a forward jump to a newly created label in case the
24386 comparison is true. The generated label rtx is returned. */
24387 static rtx
24388 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24389 bool swap_operands)
24390 {
24391 rtx label, tmp;
24392
24393 if (swap_operands)
24394 {
24395 tmp = op0;
24396 op0 = op1;
24397 op1 = tmp;
24398 }
24399
24400 label = gen_label_rtx ();
24401 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24402 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24403 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24404 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24405 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24406 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24407 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24408 JUMP_LABEL (tmp) = label;
24409
24410 return label;
24411 }
24412
24413 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24414 using comparison code CODE. Operands are swapped for the comparison if
24415 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24416 static rtx
24417 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24418 bool swap_operands)
24419 {
24420 enum machine_mode mode = GET_MODE (op0);
24421 rtx mask = gen_reg_rtx (mode);
24422
24423 if (swap_operands)
24424 {
24425 rtx tmp = op0;
24426 op0 = op1;
24427 op1 = tmp;
24428 }
24429
24430 if (mode == DFmode)
24431 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24432 gen_rtx_fmt_ee (code, mode, op0, op1)));
24433 else
24434 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24435 gen_rtx_fmt_ee (code, mode, op0, op1)));
24436
24437 return mask;
24438 }
24439
24440 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24441 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24442 static rtx
24443 ix86_gen_TWO52 (enum machine_mode mode)
24444 {
24445 REAL_VALUE_TYPE TWO52r;
24446 rtx TWO52;
24447
24448 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24449 TWO52 = const_double_from_real_value (TWO52r, mode);
24450 TWO52 = force_reg (mode, TWO52);
24451
24452 return TWO52;
24453 }
24454
24455 /* Expand SSE sequence for computing lround from OP1 storing
24456 into OP0. */
24457 void
24458 ix86_expand_lround (rtx op0, rtx op1)
24459 {
24460 /* C code for the stuff we're doing below:
24461 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24462 return (long)tmp;
24463 */
24464 enum machine_mode mode = GET_MODE (op1);
24465 const struct real_format *fmt;
24466 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24467 rtx adj;
24468
24469 /* load nextafter (0.5, 0.0) */
24470 fmt = REAL_MODE_FORMAT (mode);
24471 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24472 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24473
24474 /* adj = copysign (0.5, op1) */
24475 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24476 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24477
24478 /* adj = op1 + adj */
24479 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24480
24481 /* op0 = (imode)adj */
24482 expand_fix (op0, adj, 0);
24483 }
24484
24485 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24486 into OPERAND0. */
24487 void
24488 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24489 {
24490 /* C code for the stuff we're doing below (for do_floor):
24491 xi = (long)op1;
24492 xi -= (double)xi > op1 ? 1 : 0;
24493 return xi;
24494 */
24495 enum machine_mode fmode = GET_MODE (op1);
24496 enum machine_mode imode = GET_MODE (op0);
24497 rtx ireg, freg, label, tmp;
24498
24499 /* reg = (long)op1 */
24500 ireg = gen_reg_rtx (imode);
24501 expand_fix (ireg, op1, 0);
24502
24503 /* freg = (double)reg */
24504 freg = gen_reg_rtx (fmode);
24505 expand_float (freg, ireg, 0);
24506
24507 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24508 label = ix86_expand_sse_compare_and_jump (UNLE,
24509 freg, op1, !do_floor);
24510 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24511 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24512 emit_move_insn (ireg, tmp);
24513
24514 emit_label (label);
24515 LABEL_NUSES (label) = 1;
24516
24517 emit_move_insn (op0, ireg);
24518 }
24519
24520 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24521 result in OPERAND0. */
24522 void
24523 ix86_expand_rint (rtx operand0, rtx operand1)
24524 {
24525 /* C code for the stuff we're doing below:
24526 xa = fabs (operand1);
24527 if (!isless (xa, 2**52))
24528 return operand1;
24529 xa = xa + 2**52 - 2**52;
24530 return copysign (xa, operand1);
24531 */
24532 enum machine_mode mode = GET_MODE (operand0);
24533 rtx res, xa, label, TWO52, mask;
24534
24535 res = gen_reg_rtx (mode);
24536 emit_move_insn (res, operand1);
24537
24538 /* xa = abs (operand1) */
24539 xa = ix86_expand_sse_fabs (res, &mask);
24540
24541 /* if (!isless (xa, TWO52)) goto label; */
24542 TWO52 = ix86_gen_TWO52 (mode);
24543 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24544
24545 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24546 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24547
24548 ix86_sse_copysign_to_positive (res, xa, res, mask);
24549
24550 emit_label (label);
24551 LABEL_NUSES (label) = 1;
24552
24553 emit_move_insn (operand0, res);
24554 }
24555
24556 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24557 into OPERAND0. */
24558 void
24559 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24560 {
24561 /* C code for the stuff we expand below.
24562 double xa = fabs (x), x2;
24563 if (!isless (xa, TWO52))
24564 return x;
24565 xa = xa + TWO52 - TWO52;
24566 x2 = copysign (xa, x);
24567 Compensate. Floor:
24568 if (x2 > x)
24569 x2 -= 1;
24570 Compensate. Ceil:
24571 if (x2 < x)
24572 x2 -= -1;
24573 return x2;
24574 */
24575 enum machine_mode mode = GET_MODE (operand0);
24576 rtx xa, TWO52, tmp, label, one, res, mask;
24577
24578 TWO52 = ix86_gen_TWO52 (mode);
24579
24580 /* Temporary for holding the result, initialized to the input
24581 operand to ease control flow. */
24582 res = gen_reg_rtx (mode);
24583 emit_move_insn (res, operand1);
24584
24585 /* xa = abs (operand1) */
24586 xa = ix86_expand_sse_fabs (res, &mask);
24587
24588 /* if (!isless (xa, TWO52)) goto label; */
24589 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24590
24591 /* xa = xa + TWO52 - TWO52; */
24592 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24593 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24594
24595 /* xa = copysign (xa, operand1) */
24596 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24597
24598 /* generate 1.0 or -1.0 */
24599 one = force_reg (mode,
24600 const_double_from_real_value (do_floor
24601 ? dconst1 : dconstm1, mode));
24602
24603 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24604 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24605 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24606 gen_rtx_AND (mode, one, tmp)));
24607 /* We always need to subtract here to preserve signed zero. */
24608 tmp = expand_simple_binop (mode, MINUS,
24609 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24610 emit_move_insn (res, tmp);
24611
24612 emit_label (label);
24613 LABEL_NUSES (label) = 1;
24614
24615 emit_move_insn (operand0, res);
24616 }
24617
24618 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24619 into OPERAND0. */
24620 void
24621 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24622 {
24623 /* C code for the stuff we expand below.
24624 double xa = fabs (x), x2;
24625 if (!isless (xa, TWO52))
24626 return x;
24627 x2 = (double)(long)x;
24628 Compensate. Floor:
24629 if (x2 > x)
24630 x2 -= 1;
24631 Compensate. Ceil:
24632 if (x2 < x)
24633 x2 += 1;
24634 if (HONOR_SIGNED_ZEROS (mode))
24635 return copysign (x2, x);
24636 return x2;
24637 */
24638 enum machine_mode mode = GET_MODE (operand0);
24639 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24640
24641 TWO52 = ix86_gen_TWO52 (mode);
24642
24643 /* Temporary for holding the result, initialized to the input
24644 operand to ease control flow. */
24645 res = gen_reg_rtx (mode);
24646 emit_move_insn (res, operand1);
24647
24648 /* xa = abs (operand1) */
24649 xa = ix86_expand_sse_fabs (res, &mask);
24650
24651 /* if (!isless (xa, TWO52)) goto label; */
24652 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24653
24654 /* xa = (double)(long)x */
24655 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24656 expand_fix (xi, res, 0);
24657 expand_float (xa, xi, 0);
24658
24659 /* generate 1.0 */
24660 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24661
24662 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24663 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24664 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24665 gen_rtx_AND (mode, one, tmp)));
24666 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24667 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24668 emit_move_insn (res, tmp);
24669
24670 if (HONOR_SIGNED_ZEROS (mode))
24671 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24672
24673 emit_label (label);
24674 LABEL_NUSES (label) = 1;
24675
24676 emit_move_insn (operand0, res);
24677 }
24678
24679 /* Expand SSE sequence for computing round from OPERAND1 storing
24680 into OPERAND0. Sequence that works without relying on DImode truncation
24681 via cvttsd2siq that is only available on 64bit targets. */
24682 void
24683 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24684 {
24685 /* C code for the stuff we expand below.
24686 double xa = fabs (x), xa2, x2;
24687 if (!isless (xa, TWO52))
24688 return x;
24689 Using the absolute value and copying back sign makes
24690 -0.0 -> -0.0 correct.
24691 xa2 = xa + TWO52 - TWO52;
24692 Compensate.
24693 dxa = xa2 - xa;
24694 if (dxa <= -0.5)
24695 xa2 += 1;
24696 else if (dxa > 0.5)
24697 xa2 -= 1;
24698 x2 = copysign (xa2, x);
24699 return x2;
24700 */
24701 enum machine_mode mode = GET_MODE (operand0);
24702 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24703
24704 TWO52 = ix86_gen_TWO52 (mode);
24705
24706 /* Temporary for holding the result, initialized to the input
24707 operand to ease control flow. */
24708 res = gen_reg_rtx (mode);
24709 emit_move_insn (res, operand1);
24710
24711 /* xa = abs (operand1) */
24712 xa = ix86_expand_sse_fabs (res, &mask);
24713
24714 /* if (!isless (xa, TWO52)) goto label; */
24715 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24716
24717 /* xa2 = xa + TWO52 - TWO52; */
24718 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24719 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24720
24721 /* dxa = xa2 - xa; */
24722 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24723
24724 /* generate 0.5, 1.0 and -0.5 */
24725 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24726 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24727 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24728 0, OPTAB_DIRECT);
24729
24730 /* Compensate. */
24731 tmp = gen_reg_rtx (mode);
24732 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24733 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24734 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24735 gen_rtx_AND (mode, one, tmp)));
24736 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24737 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24738 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24739 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24740 gen_rtx_AND (mode, one, tmp)));
24741 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24742
24743 /* res = copysign (xa2, operand1) */
24744 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24745
24746 emit_label (label);
24747 LABEL_NUSES (label) = 1;
24748
24749 emit_move_insn (operand0, res);
24750 }
24751
24752 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24753 into OPERAND0. */
24754 void
24755 ix86_expand_trunc (rtx operand0, rtx operand1)
24756 {
24757 /* C code for SSE variant we expand below.
24758 double xa = fabs (x), x2;
24759 if (!isless (xa, TWO52))
24760 return x;
24761 x2 = (double)(long)x;
24762 if (HONOR_SIGNED_ZEROS (mode))
24763 return copysign (x2, x);
24764 return x2;
24765 */
24766 enum machine_mode mode = GET_MODE (operand0);
24767 rtx xa, xi, TWO52, label, res, mask;
24768
24769 TWO52 = ix86_gen_TWO52 (mode);
24770
24771 /* Temporary for holding the result, initialized to the input
24772 operand to ease control flow. */
24773 res = gen_reg_rtx (mode);
24774 emit_move_insn (res, operand1);
24775
24776 /* xa = abs (operand1) */
24777 xa = ix86_expand_sse_fabs (res, &mask);
24778
24779 /* if (!isless (xa, TWO52)) goto label; */
24780 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24781
24782 /* x = (double)(long)x */
24783 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24784 expand_fix (xi, res, 0);
24785 expand_float (res, xi, 0);
24786
24787 if (HONOR_SIGNED_ZEROS (mode))
24788 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24789
24790 emit_label (label);
24791 LABEL_NUSES (label) = 1;
24792
24793 emit_move_insn (operand0, res);
24794 }
24795
24796 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24797 into OPERAND0. */
24798 void
24799 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24800 {
24801 enum machine_mode mode = GET_MODE (operand0);
24802 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24803
24804 /* C code for SSE variant we expand below.
24805 double xa = fabs (x), x2;
24806 if (!isless (xa, TWO52))
24807 return x;
24808 xa2 = xa + TWO52 - TWO52;
24809 Compensate:
24810 if (xa2 > xa)
24811 xa2 -= 1.0;
24812 x2 = copysign (xa2, x);
24813 return x2;
24814 */
24815
24816 TWO52 = ix86_gen_TWO52 (mode);
24817
24818 /* Temporary for holding the result, initialized to the input
24819 operand to ease control flow. */
24820 res = gen_reg_rtx (mode);
24821 emit_move_insn (res, operand1);
24822
24823 /* xa = abs (operand1) */
24824 xa = ix86_expand_sse_fabs (res, &smask);
24825
24826 /* if (!isless (xa, TWO52)) goto label; */
24827 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24828
24829 /* res = xa + TWO52 - TWO52; */
24830 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24831 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24832 emit_move_insn (res, tmp);
24833
24834 /* generate 1.0 */
24835 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24836
24837 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24838 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24839 emit_insn (gen_rtx_SET (VOIDmode, mask,
24840 gen_rtx_AND (mode, mask, one)));
24841 tmp = expand_simple_binop (mode, MINUS,
24842 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24843 emit_move_insn (res, tmp);
24844
24845 /* res = copysign (res, operand1) */
24846 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24847
24848 emit_label (label);
24849 LABEL_NUSES (label) = 1;
24850
24851 emit_move_insn (operand0, res);
24852 }
24853
24854 /* Expand SSE sequence for computing round from OPERAND1 storing
24855 into OPERAND0. */
24856 void
24857 ix86_expand_round (rtx operand0, rtx operand1)
24858 {
24859 /* C code for the stuff we're doing below:
24860 double xa = fabs (x);
24861 if (!isless (xa, TWO52))
24862 return x;
24863 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24864 return copysign (xa, x);
24865 */
24866 enum machine_mode mode = GET_MODE (operand0);
24867 rtx res, TWO52, xa, label, xi, half, mask;
24868 const struct real_format *fmt;
24869 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24870
24871 /* Temporary for holding the result, initialized to the input
24872 operand to ease control flow. */
24873 res = gen_reg_rtx (mode);
24874 emit_move_insn (res, operand1);
24875
24876 TWO52 = ix86_gen_TWO52 (mode);
24877 xa = ix86_expand_sse_fabs (res, &mask);
24878 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24879
24880 /* load nextafter (0.5, 0.0) */
24881 fmt = REAL_MODE_FORMAT (mode);
24882 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24883 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24884
24885 /* xa = xa + 0.5 */
24886 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24887 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24888
24889 /* xa = (double)(int64_t)xa */
24890 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24891 expand_fix (xi, xa, 0);
24892 expand_float (xa, xi, 0);
24893
24894 /* res = copysign (xa, operand1) */
24895 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24896
24897 emit_label (label);
24898 LABEL_NUSES (label) = 1;
24899
24900 emit_move_insn (operand0, res);
24901 }
24902
24903 \f
24904 /* Validate whether a SSE5 instruction is valid or not.
24905 OPERANDS is the array of operands.
24906 NUM is the number of operands.
24907 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24908 NUM_MEMORY is the maximum number of memory operands to accept. */
24909 bool
24910 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24911 {
24912 int mem_mask;
24913 int mem_count;
24914 int i;
24915
24916 /* Count the number of memory arguments */
24917 mem_mask = 0;
24918 mem_count = 0;
24919 for (i = 0; i < num; i++)
24920 {
24921 enum machine_mode mode = GET_MODE (operands[i]);
24922 if (register_operand (operands[i], mode))
24923 ;
24924
24925 else if (memory_operand (operands[i], mode))
24926 {
24927 mem_mask |= (1 << i);
24928 mem_count++;
24929 }
24930
24931 else
24932 {
24933 rtx pattern = PATTERN (insn);
24934
24935 /* allow 0 for pcmov */
24936 if (GET_CODE (pattern) != SET
24937 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24938 || i < 2
24939 || operands[i] != CONST0_RTX (mode))
24940 return false;
24941 }
24942 }
24943
24944 /* If there were no memory operations, allow the insn */
24945 if (mem_mask == 0)
24946 return true;
24947
24948 /* Do not allow the destination register to be a memory operand. */
24949 else if (mem_mask & (1 << 0))
24950 return false;
24951
24952 /* If there are too many memory operations, disallow the instruction. While
24953 the hardware only allows 1 memory reference, before register allocation
24954 for some insns, we allow two memory operations sometimes in order to allow
24955 code like the following to be optimized:
24956
24957 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24958
24959 or similar cases that are vectorized into using the fmaddss
24960 instruction. */
24961 else if (mem_count > num_memory)
24962 return false;
24963
24964 /* Don't allow more than one memory operation if not optimizing. */
24965 else if (mem_count > 1 && !optimize)
24966 return false;
24967
24968 else if (num == 4 && mem_count == 1)
24969 {
24970 /* formats (destination is the first argument), example fmaddss:
24971 xmm1, xmm1, xmm2, xmm3/mem
24972 xmm1, xmm1, xmm2/mem, xmm3
24973 xmm1, xmm2, xmm3/mem, xmm1
24974 xmm1, xmm2/mem, xmm3, xmm1 */
24975 if (uses_oc0)
24976 return ((mem_mask == (1 << 1))
24977 || (mem_mask == (1 << 2))
24978 || (mem_mask == (1 << 3)));
24979
24980 /* format, example pmacsdd:
24981 xmm1, xmm2, xmm3/mem, xmm1 */
24982 else
24983 return (mem_mask == (1 << 2));
24984 }
24985
24986 else if (num == 4 && num_memory == 2)
24987 {
24988 /* If there are two memory operations, we can load one of the memory ops
24989 into the destination register. This is for optimizing the
24990 multiply/add ops, which the combiner has optimized both the multiply
24991 and the add insns to have a memory operation. We have to be careful
24992 that the destination doesn't overlap with the inputs. */
24993 rtx op0 = operands[0];
24994
24995 if (reg_mentioned_p (op0, operands[1])
24996 || reg_mentioned_p (op0, operands[2])
24997 || reg_mentioned_p (op0, operands[3]))
24998 return false;
24999
25000 /* formats (destination is the first argument), example fmaddss:
25001 xmm1, xmm1, xmm2, xmm3/mem
25002 xmm1, xmm1, xmm2/mem, xmm3
25003 xmm1, xmm2, xmm3/mem, xmm1
25004 xmm1, xmm2/mem, xmm3, xmm1
25005
25006 For the oc0 case, we will load either operands[1] or operands[3] into
25007 operands[0], so any combination of 2 memory operands is ok. */
25008 if (uses_oc0)
25009 return true;
25010
25011 /* format, example pmacsdd:
25012 xmm1, xmm2, xmm3/mem, xmm1
25013
25014 For the integer multiply/add instructions be more restrictive and
25015 require operands[2] and operands[3] to be the memory operands. */
25016 else
25017 return (mem_mask == ((1 << 2) | (1 << 3)));
25018 }
25019
25020 else if (num == 3 && num_memory == 1)
25021 {
25022 /* formats, example protb:
25023 xmm1, xmm2, xmm3/mem
25024 xmm1, xmm2/mem, xmm3 */
25025 if (uses_oc0)
25026 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25027
25028 /* format, example comeq:
25029 xmm1, xmm2, xmm3/mem */
25030 else
25031 return (mem_mask == (1 << 2));
25032 }
25033
25034 else
25035 gcc_unreachable ();
25036
25037 return false;
25038 }
25039
25040 \f
25041 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25042 hardware will allow by using the destination register to load one of the
25043 memory operations. Presently this is used by the multiply/add routines to
25044 allow 2 memory references. */
25045
25046 void
25047 ix86_expand_sse5_multiple_memory (rtx operands[],
25048 int num,
25049 enum machine_mode mode)
25050 {
25051 rtx op0 = operands[0];
25052 if (num != 4
25053 || memory_operand (op0, mode)
25054 || reg_mentioned_p (op0, operands[1])
25055 || reg_mentioned_p (op0, operands[2])
25056 || reg_mentioned_p (op0, operands[3]))
25057 gcc_unreachable ();
25058
25059 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25060 the destination register. */
25061 if (memory_operand (operands[1], mode))
25062 {
25063 emit_move_insn (op0, operands[1]);
25064 operands[1] = op0;
25065 }
25066 else if (memory_operand (operands[3], mode))
25067 {
25068 emit_move_insn (op0, operands[3]);
25069 operands[3] = op0;
25070 }
25071 else
25072 gcc_unreachable ();
25073
25074 return;
25075 }
25076
25077 \f
25078 /* Table of valid machine attributes. */
25079 static const struct attribute_spec ix86_attribute_table[] =
25080 {
25081 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25082 /* Stdcall attribute says callee is responsible for popping arguments
25083 if they are not variable. */
25084 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25085 /* Fastcall attribute says callee is responsible for popping arguments
25086 if they are not variable. */
25087 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25088 /* Cdecl attribute says the callee is a normal C declaration */
25089 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25090 /* Regparm attribute specifies how many integer arguments are to be
25091 passed in registers. */
25092 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25093 /* Sseregparm attribute says we are using x86_64 calling conventions
25094 for FP arguments. */
25095 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25096 /* force_align_arg_pointer says this function realigns the stack at entry. */
25097 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25098 false, true, true, ix86_handle_cconv_attribute },
25099 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25100 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25101 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25102 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25103 #endif
25104 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25105 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25106 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25107 SUBTARGET_ATTRIBUTE_TABLE,
25108 #endif
25109 { NULL, 0, 0, false, false, false, NULL }
25110 };
25111
25112 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25113 static int
25114 x86_builtin_vectorization_cost (bool runtime_test)
25115 {
25116 /* If the branch of the runtime test is taken - i.e. - the vectorized
25117 version is skipped - this incurs a misprediction cost (because the
25118 vectorized version is expected to be the fall-through). So we subtract
25119 the latency of a mispredicted branch from the costs that are incured
25120 when the vectorized version is executed.
25121
25122 TODO: The values in individual target tables have to be tuned or new
25123 fields may be needed. For eg. on K8, the default branch path is the
25124 not-taken path. If the taken path is predicted correctly, the minimum
25125 penalty of going down the taken-path is 1 cycle. If the taken-path is
25126 not predicted correctly, then the minimum penalty is 10 cycles. */
25127
25128 if (runtime_test)
25129 {
25130 return (-(ix86_cost->cond_taken_branch_cost));
25131 }
25132 else
25133 return 0;
25134 }
25135
25136 /* Initialize the GCC target structure. */
25137 #undef TARGET_ATTRIBUTE_TABLE
25138 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25139 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25140 # undef TARGET_MERGE_DECL_ATTRIBUTES
25141 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25142 #endif
25143
25144 #undef TARGET_COMP_TYPE_ATTRIBUTES
25145 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25146
25147 #undef TARGET_INIT_BUILTINS
25148 #define TARGET_INIT_BUILTINS ix86_init_builtins
25149 #undef TARGET_EXPAND_BUILTIN
25150 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25151
25152 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25153 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25154 ix86_builtin_vectorized_function
25155
25156 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25157 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25158
25159 #undef TARGET_BUILTIN_RECIPROCAL
25160 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25161
25162 #undef TARGET_ASM_FUNCTION_EPILOGUE
25163 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25164
25165 #undef TARGET_ENCODE_SECTION_INFO
25166 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25167 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25168 #else
25169 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25170 #endif
25171
25172 #undef TARGET_ASM_OPEN_PAREN
25173 #define TARGET_ASM_OPEN_PAREN ""
25174 #undef TARGET_ASM_CLOSE_PAREN
25175 #define TARGET_ASM_CLOSE_PAREN ""
25176
25177 #undef TARGET_ASM_ALIGNED_HI_OP
25178 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25179 #undef TARGET_ASM_ALIGNED_SI_OP
25180 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25181 #ifdef ASM_QUAD
25182 #undef TARGET_ASM_ALIGNED_DI_OP
25183 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25184 #endif
25185
25186 #undef TARGET_ASM_UNALIGNED_HI_OP
25187 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25188 #undef TARGET_ASM_UNALIGNED_SI_OP
25189 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25190 #undef TARGET_ASM_UNALIGNED_DI_OP
25191 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25192
25193 #undef TARGET_SCHED_ADJUST_COST
25194 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25195 #undef TARGET_SCHED_ISSUE_RATE
25196 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25197 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25198 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25199 ia32_multipass_dfa_lookahead
25200
25201 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25202 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25203
25204 #ifdef HAVE_AS_TLS
25205 #undef TARGET_HAVE_TLS
25206 #define TARGET_HAVE_TLS true
25207 #endif
25208 #undef TARGET_CANNOT_FORCE_CONST_MEM
25209 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25210 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25211 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25212
25213 #undef TARGET_DELEGITIMIZE_ADDRESS
25214 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25215
25216 #undef TARGET_MS_BITFIELD_LAYOUT_P
25217 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25218
25219 #if TARGET_MACHO
25220 #undef TARGET_BINDS_LOCAL_P
25221 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25222 #endif
25223 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25224 #undef TARGET_BINDS_LOCAL_P
25225 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25226 #endif
25227
25228 #undef TARGET_ASM_OUTPUT_MI_THUNK
25229 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25230 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25231 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25232
25233 #undef TARGET_ASM_FILE_START
25234 #define TARGET_ASM_FILE_START x86_file_start
25235
25236 #undef TARGET_DEFAULT_TARGET_FLAGS
25237 #define TARGET_DEFAULT_TARGET_FLAGS \
25238 (TARGET_DEFAULT \
25239 | TARGET_SUBTARGET_DEFAULT \
25240 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25241
25242 #undef TARGET_HANDLE_OPTION
25243 #define TARGET_HANDLE_OPTION ix86_handle_option
25244
25245 #undef TARGET_RTX_COSTS
25246 #define TARGET_RTX_COSTS ix86_rtx_costs
25247 #undef TARGET_ADDRESS_COST
25248 #define TARGET_ADDRESS_COST ix86_address_cost
25249
25250 #undef TARGET_FIXED_CONDITION_CODE_REGS
25251 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25252 #undef TARGET_CC_MODES_COMPATIBLE
25253 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25254
25255 #undef TARGET_MACHINE_DEPENDENT_REORG
25256 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25257
25258 #undef TARGET_BUILD_BUILTIN_VA_LIST
25259 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25260
25261 #undef TARGET_EXPAND_BUILTIN_VA_START
25262 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25263
25264 #undef TARGET_MD_ASM_CLOBBERS
25265 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25266
25267 #undef TARGET_PROMOTE_PROTOTYPES
25268 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25269 #undef TARGET_STRUCT_VALUE_RTX
25270 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25271 #undef TARGET_SETUP_INCOMING_VARARGS
25272 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25273 #undef TARGET_MUST_PASS_IN_STACK
25274 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25275 #undef TARGET_PASS_BY_REFERENCE
25276 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25277 #undef TARGET_INTERNAL_ARG_POINTER
25278 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25279 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25280 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25281 #undef TARGET_STRICT_ARGUMENT_NAMING
25282 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25283
25284 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25285 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25286
25287 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25288 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25289
25290 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25291 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25292
25293 #undef TARGET_C_MODE_FOR_SUFFIX
25294 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25295
25296 #ifdef HAVE_AS_TLS
25297 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25298 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25299 #endif
25300
25301 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25302 #undef TARGET_INSERT_ATTRIBUTES
25303 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25304 #endif
25305
25306 #undef TARGET_MANGLE_TYPE
25307 #define TARGET_MANGLE_TYPE ix86_mangle_type
25308
25309 #undef TARGET_STACK_PROTECT_FAIL
25310 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25311
25312 #undef TARGET_FUNCTION_VALUE
25313 #define TARGET_FUNCTION_VALUE ix86_function_value
25314
25315 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25316 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25317
25318 struct gcc_target targetm = TARGET_INITIALIZER;
25319 \f
25320 #include "gt-i386.h"