ecec205b5003df5c0fc512e7b75e4e46f280dec9
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 static int x86_builtin_vectorization_cost (bool);
56
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
59 #endif
60
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
67 : 4)
68
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
72
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74
75 static const
76 struct processor_costs size_cost = { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
94 0, /* "large" insn */
95 2, /* MOVE_RATIO */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
121 2, /* Branch cost */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
143 };
144
145 /* Processor costs (relative to an add) */
146 static const
147 struct processor_costs i386_cost = { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
192 1, /* Branch cost */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
200 DUMMY_STRINGOP_ALGS},
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
214 };
215
216 static const
217 struct processor_costs i486_cost = { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
236 3, /* MOVE_RATIO */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
264 1, /* Branch cost */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
272 DUMMY_STRINGOP_ALGS},
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
286 };
287
288 static const
289 struct processor_costs pentium_cost = {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
308 6, /* MOVE_RATIO */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 2, /* Branch cost */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
342 DUMMY_STRINGOP_ALGS},
343 {{libcall, {{-1, rep_prefix_4_byte}}},
344 DUMMY_STRINGOP_ALGS},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
356 };
357
358 static const
359 struct processor_costs pentiumpro_cost = {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
378 6, /* MOVE_RATIO */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
404 2, /* Branch cost */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
415 */
416 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
417 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
418 DUMMY_STRINGOP_ALGS},
419 {{rep_prefix_4_byte, {{1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
433 };
434
435 static const
436 struct processor_costs geode_cost = {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
455 4, /* MOVE_RATIO */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
466
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
482 1, /* Branch cost */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
490 DUMMY_STRINGOP_ALGS},
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
504 };
505
506 static const
507 struct processor_costs k6_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
526 4, /* MOVE_RATIO */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
555 1, /* Branch cost */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
563 DUMMY_STRINGOP_ALGS},
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
577 };
578
579 static const
580 struct processor_costs athlon_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
599 9, /* MOVE_RATIO */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
625 5, /* Branch cost */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
636 DUMMY_STRINGOP_ALGS},
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs k8_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
672 9, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
701 time). */
702 100, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
714 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
715 {{libcall, {{8, loop}, {24, unrolled_loop},
716 {2048, rep_prefix_4_byte}, {-1, libcall}}},
717 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
729 };
730
731 struct processor_costs amdfam10_cost = {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
750 9, /* MOVE_RATIO */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
772 /* On K8
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
775 On AMDFAM10
776 MOVD reg64, xmmreg Double FADD 3
777 1/1 1/1
778 MOVD reg32, xmmreg Double FADD 3
779 1/1 1/1 */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 5, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
801 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
802 {{libcall, {{8, loop}, {24, unrolled_loop},
803 {2048, rep_prefix_4_byte}, {-1, libcall}}},
804 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
816 };
817
818 static const
819 struct processor_costs pentium4_cost = {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
838 6, /* MOVE_RATIO */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
864 2, /* Branch cost */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
872 DUMMY_STRINGOP_ALGS},
873 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
874 {-1, libcall}}},
875 DUMMY_STRINGOP_ALGS},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
887 };
888
889 static const
890 struct processor_costs nocona_cost = {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
909 17, /* MOVE_RATIO */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
935 1, /* Branch cost */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
943 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
944 {100000, unrolled_loop}, {-1, libcall}}}},
945 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
946 {-1, libcall}}},
947 {libcall, {{24, loop}, {64, unrolled_loop},
948 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
960 };
961
962 static const
963 struct processor_costs core2_cost = {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
982 16, /* MOVE_RATIO */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1015 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1016 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1017 {{libcall, {{8, loop}, {15, unrolled_loop},
1018 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1019 {libcall, {{24, loop}, {32, unrolled_loop},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1032 };
1033
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1035 static const
1036 struct processor_costs generic64_cost = {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS,
1095 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1109 };
1110
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1112 static const
1113 struct processor_costs generic32_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1166 DUMMY_STRINGOP_ALGS},
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1180 };
1181
1182 const struct processor_costs *ix86_cost = &pentium_cost;
1183
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1192
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1201
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1204
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1208
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1216
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1219 | m_NOCONA | m_CORE2 | m_GENERIC,
1220
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1222 m_486 | m_PENT,
1223
1224 /* X86_TUNE_USE_BIT_TEST */
1225 m_386,
1226
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1232
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1237 0,
1238
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1240 ~m_386,
1241
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1244 | m_NOCONA | m_CORE2 | m_GENERIC,
1245
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1249 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1250
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1259 m_PPRO,
1260
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2 | m_GENERIC,
1263
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386 | m_486 | m_K6_GEODE,
1266
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1269
1270 /* X86_TUNE_USE_MOV0 */
1271 m_K6,
1272
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1277 m_PENT4,
1278
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1280 m_PPRO,
1281
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1283 ~m_PENT,
1284
1285 /* X86_TUNE_READ_MODIFY */
1286 ~(m_PENT | m_PPRO),
1287
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1290 | m_GENERIC /* | m_PENT4 ? */,
1291
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT | m_486 | m_386),
1294
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386 | m_PENT4 | m_NOCONA,
1297
1298 /* X86_TUNE_QIMODE_MATH */
1299 ~0,
1300
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1305 ~m_PPRO,
1306
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1308 0,
1309
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1311 m_PPRO,
1312
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1315
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1318 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1325 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1330 | m_GENERIC | m_GEODE),
1331
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1345
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1347 m_AMDFAM10,
1348
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1353 m_ATHLON_K8,
1354
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1356 m_ATHLON_K8_AMDFAM10,
1357
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO | m_PENT4 | m_NOCONA,
1360
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1363
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1366
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_SHIFT1 */
1371 ~m_486,
1372
1373 /* X86_TUNE_USE_FFREEP */
1374 m_ATHLON_K8_AMDFAM10,
1375
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1378
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1380 ~(m_AMDFAM10),
1381
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1385
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1388
1389 /* X86_TUNE_USE_BT */
1390 m_ATHLON_K8_AMDFAM10,
1391
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1394
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1400
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1402 ~m_K8,
1403
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1405 m_K8 | m_GENERIC64,
1406
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1409 ~(m_386 | m_486),
1410
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8 | m_GENERIC64 | m_AMDFAM10,
1414
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1416 machines. */
1417 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1420 than a MOV. */
1421 m_PENT,
1422
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1425 m_PENT,
1426
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1430 m_K6,
1431
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
1433 from integer to FP. */
1434 m_AMDFAM10,
1435 };
1436
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386 | m_486 | m_PENT | m_K6),
1441
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1443 ~m_386,
1444
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1446 ~(m_386 | m_486),
1447
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1449 ~m_386,
1450
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1452 ~m_386,
1453 };
1454
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1457
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1460 | m_NOCONA | m_CORE2 | m_GENERIC;
1461
1462 static enum stringop_alg stringop_alg = no_stringop;
1463
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1466 epilogue code. */
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1468
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1471 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1472 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1473
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1476
1477 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1478 {
1479 /* ax, dx, cx, bx */
1480 AREG, DREG, CREG, BREG,
1481 /* si, di, bp, sp */
1482 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1483 /* FP registers */
1484 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1485 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1486 /* arg pointer */
1487 NON_Q_REGS,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1490 /* SSE registers */
1491 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1492 SSE_REGS, SSE_REGS,
1493 /* MMX registers */
1494 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1495 MMX_REGS, MMX_REGS,
1496 /* REX registers */
1497 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 /* SSE REX registers */
1500 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1501 SSE_REGS, SSE_REGS,
1502 };
1503
1504 /* The "default" register map used in 32bit mode. */
1505
1506 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1507 {
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1515 };
1516
1517 static int const x86_64_int_parameter_registers[6] =
1518 {
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1521 };
1522
1523 static int const x86_64_ms_abi_int_parameter_registers[4] =
1524 {
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1527 };
1528
1529 static int const x86_64_int_return_registers[4] =
1530 {
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1532 };
1533
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1536 {
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1544 };
1545
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1590 numbers.
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1599 */
1600 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1601 {
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1609 };
1610
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1613
1614 rtx ix86_compare_op0 = NULL_RTX;
1615 rtx ix86_compare_op1 = NULL_RTX;
1616 rtx ix86_compare_emitted = NULL_RTX;
1617
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1620
1621 /* Define the structure for the machine field in struct function. */
1622
1623 struct stack_local_entry GTY(())
1624 {
1625 unsigned short mode;
1626 unsigned short n;
1627 rtx rtl;
1628 struct stack_local_entry *next;
1629 };
1630
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1633
1634 [arguments]
1635 <- ARG_POINTER
1636 saved pc
1637
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1640 [saved regs]
1641
1642 [padding1] \
1643 )
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1646 [frame] (
1647 )
1648 [padding2] /
1649 */
1650 struct ix86_frame
1651 {
1652 int nregs;
1653 int padding1;
1654 int va_arg_size;
1655 HOST_WIDE_INT frame;
1656 int padding2;
1657 int outgoing_arguments_size;
1658 int red_zone_size;
1659
1660 HOST_WIDE_INT to_allocate;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset;
1663 HOST_WIDE_INT hard_frame_pointer_offset;
1664 HOST_WIDE_INT stack_pointer_offset;
1665
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov;
1669 };
1670
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel;
1673 /* Asm dialect. */
1674 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1675 /* TLS dialects. */
1676 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1677
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath;
1680
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune;
1683
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch;
1686
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse;
1689
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm;
1692
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer;
1695 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1696
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary;
1699
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost;
1702
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1705
1706 int ix86_section_threshold = 65536;
1707
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix[16];
1710 int internal_label_prefix_len;
1711
1712 /* Fence to use after loop using movnt. */
1713 tree x86_mfence;
1714
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1719
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1723 {
1724 X86_64_NO_CLASS,
1725 X86_64_INTEGER_CLASS,
1726 X86_64_INTEGERSI_CLASS,
1727 X86_64_SSE_CLASS,
1728 X86_64_SSESF_CLASS,
1729 X86_64_SSEDF_CLASS,
1730 X86_64_SSEUP_CLASS,
1731 X86_64_X87_CLASS,
1732 X86_64_X87UP_CLASS,
1733 X86_64_COMPLEX_X87_CLASS,
1734 X86_64_MEMORY_CLASS
1735 };
1736 static const char * const x86_64_reg_class_name[] =
1737 {
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1740 };
1741
1742 #define MAX_CLASSES 4
1743
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1746 static bool ext_80387_constants_init = 0;
1747
1748 \f
1749 static struct machine_function * ix86_init_machine_status (void);
1750 static rtx ix86_function_value (const_tree, const_tree, bool);
1751 static int ix86_function_regparm (const_tree, const_tree);
1752 static void ix86_compute_frame_layout (struct ix86_frame *);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1754 rtx, rtx, int);
1755
1756 \f
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1758 in memory. */
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1761 #endif
1762
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1765
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit;
1769
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1772
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1776
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1788
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1794
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1796
1797 /* Vectorization library interface and handlers. */
1798 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1799 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1800
1801 /* Implement TARGET_HANDLE_OPTION. */
1802
1803 static bool
1804 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1805 {
1806 switch (code)
1807 {
1808 case OPT_mmmx:
1809 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1810 if (!value)
1811 {
1812 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1813 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1814 }
1815 return true;
1816
1817 case OPT_m3dnow:
1818 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1819 if (!value)
1820 {
1821 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1822 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1823 }
1824 return true;
1825
1826 case OPT_m3dnowa:
1827 return false;
1828
1829 case OPT_msse:
1830 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1831 if (!value)
1832 {
1833 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1834 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1835 }
1836 return true;
1837
1838 case OPT_msse2:
1839 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1840 if (!value)
1841 {
1842 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1843 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1844 }
1845 return true;
1846
1847 case OPT_msse3:
1848 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1849 if (!value)
1850 {
1851 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1852 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1853 }
1854 return true;
1855
1856 case OPT_mssse3:
1857 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1858 if (!value)
1859 {
1860 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1861 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1862 }
1863 return true;
1864
1865 case OPT_msse4_1:
1866 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1867 if (!value)
1868 {
1869 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1870 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1871 }
1872 return true;
1873
1874 case OPT_msse4_2:
1875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1876 if (!value)
1877 {
1878 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1879 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1880 }
1881 return true;
1882
1883 case OPT_msse4:
1884 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1885 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1886 return true;
1887
1888 case OPT_mno_sse4:
1889 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1890 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1891 return true;
1892
1893 case OPT_msse4a:
1894 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1895 if (!value)
1896 {
1897 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1898 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1899 }
1900 return true;
1901
1902 default:
1903 return true;
1904 }
1905 }
1906
1907 /* Sometimes certain combinations of command options do not make
1908 sense on a particular target machine. You can define a macro
1909 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1910 defined, is executed once just after all the command options have
1911 been parsed.
1912
1913 Don't use this macro to turn on various extra optimizations for
1914 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1915
1916 void
1917 override_options (void)
1918 {
1919 int i;
1920 int ix86_tune_defaulted = 0;
1921 int ix86_arch_specified = 0;
1922 unsigned int ix86_arch_mask, ix86_tune_mask;
1923
1924 /* Comes from final.c -- no real reason to change it. */
1925 #define MAX_CODE_ALIGN 16
1926
1927 static struct ptt
1928 {
1929 const struct processor_costs *cost; /* Processor costs */
1930 const int align_loop; /* Default alignments. */
1931 const int align_loop_max_skip;
1932 const int align_jump;
1933 const int align_jump_max_skip;
1934 const int align_func;
1935 }
1936 const processor_target_table[PROCESSOR_max] =
1937 {
1938 {&i386_cost, 4, 3, 4, 3, 4},
1939 {&i486_cost, 16, 15, 16, 15, 16},
1940 {&pentium_cost, 16, 7, 16, 7, 16},
1941 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1942 {&geode_cost, 0, 0, 0, 0, 0},
1943 {&k6_cost, 32, 7, 32, 7, 32},
1944 {&athlon_cost, 16, 7, 16, 7, 16},
1945 {&pentium4_cost, 0, 0, 0, 0, 0},
1946 {&k8_cost, 16, 7, 16, 7, 16},
1947 {&nocona_cost, 0, 0, 0, 0, 0},
1948 {&core2_cost, 16, 10, 16, 10, 16},
1949 {&generic32_cost, 16, 7, 16, 7, 16},
1950 {&generic64_cost, 16, 10, 16, 10, 16},
1951 {&amdfam10_cost, 32, 24, 32, 7, 32}
1952 };
1953
1954 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1955 enum pta_flags
1956 {
1957 PTA_SSE = 1 << 0,
1958 PTA_SSE2 = 1 << 1,
1959 PTA_SSE3 = 1 << 2,
1960 PTA_MMX = 1 << 3,
1961 PTA_PREFETCH_SSE = 1 << 4,
1962 PTA_3DNOW = 1 << 5,
1963 PTA_3DNOW_A = 1 << 6,
1964 PTA_64BIT = 1 << 7,
1965 PTA_SSSE3 = 1 << 8,
1966 PTA_CX16 = 1 << 9,
1967 PTA_POPCNT = 1 << 10,
1968 PTA_ABM = 1 << 11,
1969 PTA_SSE4A = 1 << 12,
1970 PTA_NO_SAHF = 1 << 13,
1971 PTA_SSE4_1 = 1 << 14,
1972 PTA_SSE4_2 = 1 << 15
1973 };
1974
1975 static struct pta
1976 {
1977 const char *const name; /* processor name or nickname. */
1978 const enum processor_type processor;
1979 const unsigned /*enum pta_flags*/ flags;
1980 }
1981 const processor_alias_table[] =
1982 {
1983 {"i386", PROCESSOR_I386, 0},
1984 {"i486", PROCESSOR_I486, 0},
1985 {"i586", PROCESSOR_PENTIUM, 0},
1986 {"pentium", PROCESSOR_PENTIUM, 0},
1987 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1988 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1989 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1990 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1991 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1992 {"i686", PROCESSOR_PENTIUMPRO, 0},
1993 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1994 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1995 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1996 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1997 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1998 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1999 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2000 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2001 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2002 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2003 | PTA_CX16 | PTA_NO_SAHF)},
2004 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2005 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2006 | PTA_SSSE3
2007 | PTA_CX16)},
2008 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2009 |PTA_PREFETCH_SSE)},
2010 {"k6", PROCESSOR_K6, PTA_MMX},
2011 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2012 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2013 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2014 | PTA_PREFETCH_SSE)},
2015 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2016 | PTA_PREFETCH_SSE)},
2017 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2018 | PTA_SSE)},
2019 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2020 | PTA_SSE)},
2021 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2022 | PTA_SSE)},
2023 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2024 | PTA_MMX | PTA_SSE | PTA_SSE2
2025 | PTA_NO_SAHF)},
2026 {"k8", PROCESSOR_K8, (PTA_64BIT
2027 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2028 | PTA_SSE | PTA_SSE2
2029 | PTA_NO_SAHF)},
2030 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2031 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2032 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2033 | PTA_NO_SAHF)},
2034 {"opteron", PROCESSOR_K8, (PTA_64BIT
2035 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2036 | PTA_SSE | PTA_SSE2
2037 | PTA_NO_SAHF)},
2038 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2039 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2040 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2041 | PTA_NO_SAHF)},
2042 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2043 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2044 | PTA_SSE | PTA_SSE2
2045 | PTA_NO_SAHF)},
2046 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2047 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2048 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2049 | PTA_NO_SAHF)},
2050 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2051 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2052 | PTA_SSE | PTA_SSE2
2053 | PTA_NO_SAHF)},
2054 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2055 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2056 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2057 | PTA_SSE4A
2058 | PTA_CX16 | PTA_ABM)},
2059 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2060 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2061 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2062 | PTA_SSE4A
2063 | PTA_CX16 | PTA_ABM)},
2064 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2065 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2066 };
2067
2068 int const pta_size = ARRAY_SIZE (processor_alias_table);
2069
2070 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2071 SUBTARGET_OVERRIDE_OPTIONS;
2072 #endif
2073
2074 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2075 SUBSUBTARGET_OVERRIDE_OPTIONS;
2076 #endif
2077
2078 /* -fPIC is the default for x86_64. */
2079 if (TARGET_MACHO && TARGET_64BIT)
2080 flag_pic = 2;
2081
2082 /* Set the default values for switches whose default depends on TARGET_64BIT
2083 in case they weren't overwritten by command line options. */
2084 if (TARGET_64BIT)
2085 {
2086 /* Mach-O doesn't support omitting the frame pointer for now. */
2087 if (flag_omit_frame_pointer == 2)
2088 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2089 if (flag_asynchronous_unwind_tables == 2)
2090 flag_asynchronous_unwind_tables = 1;
2091 if (flag_pcc_struct_return == 2)
2092 flag_pcc_struct_return = 0;
2093 }
2094 else
2095 {
2096 if (flag_omit_frame_pointer == 2)
2097 flag_omit_frame_pointer = 0;
2098 if (flag_asynchronous_unwind_tables == 2)
2099 flag_asynchronous_unwind_tables = 0;
2100 if (flag_pcc_struct_return == 2)
2101 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2102 }
2103
2104 /* Need to check -mtune=generic first. */
2105 if (ix86_tune_string)
2106 {
2107 if (!strcmp (ix86_tune_string, "generic")
2108 || !strcmp (ix86_tune_string, "i686")
2109 /* As special support for cross compilers we read -mtune=native
2110 as -mtune=generic. With native compilers we won't see the
2111 -mtune=native, as it was changed by the driver. */
2112 || !strcmp (ix86_tune_string, "native"))
2113 {
2114 if (TARGET_64BIT)
2115 ix86_tune_string = "generic64";
2116 else
2117 ix86_tune_string = "generic32";
2118 }
2119 else if (!strncmp (ix86_tune_string, "generic", 7))
2120 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2121 }
2122 else
2123 {
2124 if (ix86_arch_string)
2125 ix86_tune_string = ix86_arch_string;
2126 if (!ix86_tune_string)
2127 {
2128 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2129 ix86_tune_defaulted = 1;
2130 }
2131
2132 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2133 need to use a sensible tune option. */
2134 if (!strcmp (ix86_tune_string, "generic")
2135 || !strcmp (ix86_tune_string, "x86-64")
2136 || !strcmp (ix86_tune_string, "i686"))
2137 {
2138 if (TARGET_64BIT)
2139 ix86_tune_string = "generic64";
2140 else
2141 ix86_tune_string = "generic32";
2142 }
2143 }
2144 if (ix86_stringop_string)
2145 {
2146 if (!strcmp (ix86_stringop_string, "rep_byte"))
2147 stringop_alg = rep_prefix_1_byte;
2148 else if (!strcmp (ix86_stringop_string, "libcall"))
2149 stringop_alg = libcall;
2150 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2151 stringop_alg = rep_prefix_4_byte;
2152 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2153 stringop_alg = rep_prefix_8_byte;
2154 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2155 stringop_alg = loop_1_byte;
2156 else if (!strcmp (ix86_stringop_string, "loop"))
2157 stringop_alg = loop;
2158 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2159 stringop_alg = unrolled_loop;
2160 else
2161 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2162 }
2163 if (!strcmp (ix86_tune_string, "x86-64"))
2164 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2165 "-mtune=generic instead as appropriate.");
2166
2167 if (!ix86_arch_string)
2168 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2169 else
2170 ix86_arch_specified = 1;
2171
2172 if (!strcmp (ix86_arch_string, "generic"))
2173 error ("generic CPU can be used only for -mtune= switch");
2174 if (!strncmp (ix86_arch_string, "generic", 7))
2175 error ("bad value (%s) for -march= switch", ix86_arch_string);
2176
2177 if (ix86_cmodel_string != 0)
2178 {
2179 if (!strcmp (ix86_cmodel_string, "small"))
2180 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2181 else if (!strcmp (ix86_cmodel_string, "medium"))
2182 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2183 else if (!strcmp (ix86_cmodel_string, "large"))
2184 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2185 else if (flag_pic)
2186 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2187 else if (!strcmp (ix86_cmodel_string, "32"))
2188 ix86_cmodel = CM_32;
2189 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2190 ix86_cmodel = CM_KERNEL;
2191 else
2192 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2193 }
2194 else
2195 {
2196 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2197 use of rip-relative addressing. This eliminates fixups that
2198 would otherwise be needed if this object is to be placed in a
2199 DLL, and is essentially just as efficient as direct addressing. */
2200 if (TARGET_64BIT_MS_ABI)
2201 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2202 else if (TARGET_64BIT)
2203 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2204 else
2205 ix86_cmodel = CM_32;
2206 }
2207 if (ix86_asm_string != 0)
2208 {
2209 if (! TARGET_MACHO
2210 && !strcmp (ix86_asm_string, "intel"))
2211 ix86_asm_dialect = ASM_INTEL;
2212 else if (!strcmp (ix86_asm_string, "att"))
2213 ix86_asm_dialect = ASM_ATT;
2214 else
2215 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2216 }
2217 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2218 error ("code model %qs not supported in the %s bit mode",
2219 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2220 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2221 sorry ("%i-bit mode not compiled in",
2222 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2223
2224 for (i = 0; i < pta_size; i++)
2225 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2226 {
2227 ix86_arch = processor_alias_table[i].processor;
2228 /* Default cpu tuning to the architecture. */
2229 ix86_tune = ix86_arch;
2230
2231 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2232 error ("CPU you selected does not support x86-64 "
2233 "instruction set");
2234
2235 if (processor_alias_table[i].flags & PTA_MMX
2236 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2237 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2238 if (processor_alias_table[i].flags & PTA_3DNOW
2239 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2240 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2241 if (processor_alias_table[i].flags & PTA_3DNOW_A
2242 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2243 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2244 if (processor_alias_table[i].flags & PTA_SSE
2245 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2246 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2247 if (processor_alias_table[i].flags & PTA_SSE2
2248 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2249 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2250 if (processor_alias_table[i].flags & PTA_SSE3
2251 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2252 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2253 if (processor_alias_table[i].flags & PTA_SSSE3
2254 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2255 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2256 if (processor_alias_table[i].flags & PTA_SSE4_1
2257 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2258 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2259 if (processor_alias_table[i].flags & PTA_SSE4_2
2260 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2261 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2262 if (processor_alias_table[i].flags & PTA_SSE4A
2263 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2264 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2265
2266 if (processor_alias_table[i].flags & PTA_ABM)
2267 x86_abm = true;
2268 if (processor_alias_table[i].flags & PTA_CX16)
2269 x86_cmpxchg16b = true;
2270 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2271 x86_popcnt = true;
2272 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2273 x86_prefetch_sse = true;
2274 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2275 x86_sahf = true;
2276
2277 break;
2278 }
2279
2280 if (i == pta_size)
2281 error ("bad value (%s) for -march= switch", ix86_arch_string);
2282
2283 ix86_arch_mask = 1u << ix86_arch;
2284 for (i = 0; i < X86_ARCH_LAST; ++i)
2285 ix86_arch_features[i] &= ix86_arch_mask;
2286
2287 for (i = 0; i < pta_size; i++)
2288 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2289 {
2290 ix86_tune = processor_alias_table[i].processor;
2291 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2292 {
2293 if (ix86_tune_defaulted)
2294 {
2295 ix86_tune_string = "x86-64";
2296 for (i = 0; i < pta_size; i++)
2297 if (! strcmp (ix86_tune_string,
2298 processor_alias_table[i].name))
2299 break;
2300 ix86_tune = processor_alias_table[i].processor;
2301 }
2302 else
2303 error ("CPU you selected does not support x86-64 "
2304 "instruction set");
2305 }
2306 /* Intel CPUs have always interpreted SSE prefetch instructions as
2307 NOPs; so, we can enable SSE prefetch instructions even when
2308 -mtune (rather than -march) points us to a processor that has them.
2309 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2310 higher processors. */
2311 if (TARGET_CMOVE
2312 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2313 x86_prefetch_sse = true;
2314 break;
2315 }
2316 if (i == pta_size)
2317 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2318
2319 ix86_tune_mask = 1u << ix86_tune;
2320 for (i = 0; i < X86_TUNE_LAST; ++i)
2321 ix86_tune_features[i] &= ix86_tune_mask;
2322
2323 if (optimize_size)
2324 ix86_cost = &size_cost;
2325 else
2326 ix86_cost = processor_target_table[ix86_tune].cost;
2327
2328 /* Arrange to set up i386_stack_locals for all functions. */
2329 init_machine_status = ix86_init_machine_status;
2330
2331 /* Validate -mregparm= value. */
2332 if (ix86_regparm_string)
2333 {
2334 if (TARGET_64BIT)
2335 warning (0, "-mregparm is ignored in 64-bit mode");
2336 i = atoi (ix86_regparm_string);
2337 if (i < 0 || i > REGPARM_MAX)
2338 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2339 else
2340 ix86_regparm = i;
2341 }
2342 if (TARGET_64BIT)
2343 ix86_regparm = REGPARM_MAX;
2344
2345 /* If the user has provided any of the -malign-* options,
2346 warn and use that value only if -falign-* is not set.
2347 Remove this code in GCC 3.2 or later. */
2348 if (ix86_align_loops_string)
2349 {
2350 warning (0, "-malign-loops is obsolete, use -falign-loops");
2351 if (align_loops == 0)
2352 {
2353 i = atoi (ix86_align_loops_string);
2354 if (i < 0 || i > MAX_CODE_ALIGN)
2355 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2356 else
2357 align_loops = 1 << i;
2358 }
2359 }
2360
2361 if (ix86_align_jumps_string)
2362 {
2363 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2364 if (align_jumps == 0)
2365 {
2366 i = atoi (ix86_align_jumps_string);
2367 if (i < 0 || i > MAX_CODE_ALIGN)
2368 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2369 else
2370 align_jumps = 1 << i;
2371 }
2372 }
2373
2374 if (ix86_align_funcs_string)
2375 {
2376 warning (0, "-malign-functions is obsolete, use -falign-functions");
2377 if (align_functions == 0)
2378 {
2379 i = atoi (ix86_align_funcs_string);
2380 if (i < 0 || i > MAX_CODE_ALIGN)
2381 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2382 else
2383 align_functions = 1 << i;
2384 }
2385 }
2386
2387 /* Default align_* from the processor table. */
2388 if (align_loops == 0)
2389 {
2390 align_loops = processor_target_table[ix86_tune].align_loop;
2391 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2392 }
2393 if (align_jumps == 0)
2394 {
2395 align_jumps = processor_target_table[ix86_tune].align_jump;
2396 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2397 }
2398 if (align_functions == 0)
2399 {
2400 align_functions = processor_target_table[ix86_tune].align_func;
2401 }
2402
2403 /* Validate -mbranch-cost= value, or provide default. */
2404 ix86_branch_cost = ix86_cost->branch_cost;
2405 if (ix86_branch_cost_string)
2406 {
2407 i = atoi (ix86_branch_cost_string);
2408 if (i < 0 || i > 5)
2409 error ("-mbranch-cost=%d is not between 0 and 5", i);
2410 else
2411 ix86_branch_cost = i;
2412 }
2413 if (ix86_section_threshold_string)
2414 {
2415 i = atoi (ix86_section_threshold_string);
2416 if (i < 0)
2417 error ("-mlarge-data-threshold=%d is negative", i);
2418 else
2419 ix86_section_threshold = i;
2420 }
2421
2422 if (ix86_tls_dialect_string)
2423 {
2424 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2425 ix86_tls_dialect = TLS_DIALECT_GNU;
2426 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2427 ix86_tls_dialect = TLS_DIALECT_GNU2;
2428 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2429 ix86_tls_dialect = TLS_DIALECT_SUN;
2430 else
2431 error ("bad value (%s) for -mtls-dialect= switch",
2432 ix86_tls_dialect_string);
2433 }
2434
2435 if (ix87_precision_string)
2436 {
2437 i = atoi (ix87_precision_string);
2438 if (i != 32 && i != 64 && i != 80)
2439 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2440 }
2441
2442 if (TARGET_64BIT)
2443 {
2444 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2445
2446 /* Enable by default the SSE and MMX builtins. Do allow the user to
2447 explicitly disable any of these. In particular, disabling SSE and
2448 MMX for kernel code is extremely useful. */
2449 if (!ix86_arch_specified)
2450 ix86_isa_flags
2451 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2452 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2453
2454 if (TARGET_RTD)
2455 warning (0, "-mrtd is ignored in 64bit mode");
2456 }
2457 else
2458 {
2459 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2460
2461 if (!ix86_arch_specified)
2462 ix86_isa_flags
2463 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2464
2465 /* i386 ABI does not specify red zone. It still makes sense to use it
2466 when programmer takes care to stack from being destroyed. */
2467 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2468 target_flags |= MASK_NO_RED_ZONE;
2469 }
2470
2471 /* Keep nonleaf frame pointers. */
2472 if (flag_omit_frame_pointer)
2473 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2474 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2475 flag_omit_frame_pointer = 1;
2476
2477 /* If we're doing fast math, we don't care about comparison order
2478 wrt NaNs. This lets us use a shorter comparison sequence. */
2479 if (flag_finite_math_only)
2480 target_flags &= ~MASK_IEEE_FP;
2481
2482 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2483 since the insns won't need emulation. */
2484 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2485 target_flags &= ~MASK_NO_FANCY_MATH_387;
2486
2487 /* Likewise, if the target doesn't have a 387, or we've specified
2488 software floating point, don't use 387 inline intrinsics. */
2489 if (!TARGET_80387)
2490 target_flags |= MASK_NO_FANCY_MATH_387;
2491
2492 /* Turn on SSE4.1 builtins for -msse4.2. */
2493 if (TARGET_SSE4_2)
2494 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2495
2496 /* Turn on SSSE3 builtins for -msse4.1. */
2497 if (TARGET_SSE4_1)
2498 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2499
2500 /* Turn on SSE3 builtins for -mssse3. */
2501 if (TARGET_SSSE3)
2502 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2503
2504 /* Turn on SSE3 builtins for -msse4a. */
2505 if (TARGET_SSE4A)
2506 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2507
2508 /* Turn on SSE2 builtins for -msse3. */
2509 if (TARGET_SSE3)
2510 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2511
2512 /* Turn on SSE builtins for -msse2. */
2513 if (TARGET_SSE2)
2514 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2515
2516 /* Turn on MMX builtins for -msse. */
2517 if (TARGET_SSE)
2518 {
2519 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2520 x86_prefetch_sse = true;
2521 }
2522
2523 /* Turn on MMX builtins for 3Dnow. */
2524 if (TARGET_3DNOW)
2525 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2526
2527 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2528 if (TARGET_SSE4_2 || TARGET_ABM)
2529 x86_popcnt = true;
2530
2531 /* Validate -mpreferred-stack-boundary= value, or provide default.
2532 The default of 128 bits is for Pentium III's SSE __m128. We can't
2533 change it because of optimize_size. Otherwise, we can't mix object
2534 files compiled with -Os and -On. */
2535 ix86_preferred_stack_boundary = 128;
2536 if (ix86_preferred_stack_boundary_string)
2537 {
2538 i = atoi (ix86_preferred_stack_boundary_string);
2539 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2540 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2541 TARGET_64BIT ? 4 : 2);
2542 else
2543 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2544 }
2545
2546 /* Accept -msseregparm only if at least SSE support is enabled. */
2547 if (TARGET_SSEREGPARM
2548 && ! TARGET_SSE)
2549 error ("-msseregparm used without SSE enabled");
2550
2551 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2552 if (ix86_fpmath_string != 0)
2553 {
2554 if (! strcmp (ix86_fpmath_string, "387"))
2555 ix86_fpmath = FPMATH_387;
2556 else if (! strcmp (ix86_fpmath_string, "sse"))
2557 {
2558 if (!TARGET_SSE)
2559 {
2560 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2561 ix86_fpmath = FPMATH_387;
2562 }
2563 else
2564 ix86_fpmath = FPMATH_SSE;
2565 }
2566 else if (! strcmp (ix86_fpmath_string, "387,sse")
2567 || ! strcmp (ix86_fpmath_string, "sse,387"))
2568 {
2569 if (!TARGET_SSE)
2570 {
2571 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2572 ix86_fpmath = FPMATH_387;
2573 }
2574 else if (!TARGET_80387)
2575 {
2576 warning (0, "387 instruction set disabled, using SSE arithmetics");
2577 ix86_fpmath = FPMATH_SSE;
2578 }
2579 else
2580 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2581 }
2582 else
2583 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2584 }
2585
2586 /* If the i387 is disabled, then do not return values in it. */
2587 if (!TARGET_80387)
2588 target_flags &= ~MASK_FLOAT_RETURNS;
2589
2590 /* Use external vectorized library in vectorizing intrinsics. */
2591 if (ix86_veclibabi_string)
2592 {
2593 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2594 ix86_veclib_handler = ix86_veclibabi_acml;
2595 else
2596 error ("unknown vectorization library ABI type (%s) for "
2597 "-mveclibabi= switch", ix86_veclibabi_string);
2598 }
2599
2600 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2601 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2602 && !optimize_size)
2603 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2604
2605 /* ??? Unwind info is not correct around the CFG unless either a frame
2606 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2607 unwind info generation to be aware of the CFG and propagating states
2608 around edges. */
2609 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2610 || flag_exceptions || flag_non_call_exceptions)
2611 && flag_omit_frame_pointer
2612 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2613 {
2614 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2615 warning (0, "unwind tables currently require either a frame pointer "
2616 "or -maccumulate-outgoing-args for correctness");
2617 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2618 }
2619
2620 /* For sane SSE instruction set generation we need fcomi instruction.
2621 It is safe to enable all CMOVE instructions. */
2622 if (TARGET_SSE)
2623 TARGET_CMOVE = 1;
2624
2625 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2626 {
2627 char *p;
2628 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2629 p = strchr (internal_label_prefix, 'X');
2630 internal_label_prefix_len = p - internal_label_prefix;
2631 *p = '\0';
2632 }
2633
2634 /* When scheduling description is not available, disable scheduler pass
2635 so it won't slow down the compilation and make x87 code slower. */
2636 if (!TARGET_SCHEDULE)
2637 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2638
2639 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2640 set_param_value ("simultaneous-prefetches",
2641 ix86_cost->simultaneous_prefetches);
2642 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2643 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2644 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2645 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2646 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2647 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2648 }
2649 \f
2650 /* Return true if this goes in large data/bss. */
2651
2652 static bool
2653 ix86_in_large_data_p (tree exp)
2654 {
2655 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2656 return false;
2657
2658 /* Functions are never large data. */
2659 if (TREE_CODE (exp) == FUNCTION_DECL)
2660 return false;
2661
2662 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2663 {
2664 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2665 if (strcmp (section, ".ldata") == 0
2666 || strcmp (section, ".lbss") == 0)
2667 return true;
2668 return false;
2669 }
2670 else
2671 {
2672 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2673
2674 /* If this is an incomplete type with size 0, then we can't put it
2675 in data because it might be too big when completed. */
2676 if (!size || size > ix86_section_threshold)
2677 return true;
2678 }
2679
2680 return false;
2681 }
2682
2683 /* Switch to the appropriate section for output of DECL.
2684 DECL is either a `VAR_DECL' node or a constant of some sort.
2685 RELOC indicates whether forming the initial value of DECL requires
2686 link-time relocations. */
2687
2688 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2689 ATTRIBUTE_UNUSED;
2690
2691 static section *
2692 x86_64_elf_select_section (tree decl, int reloc,
2693 unsigned HOST_WIDE_INT align)
2694 {
2695 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2696 && ix86_in_large_data_p (decl))
2697 {
2698 const char *sname = NULL;
2699 unsigned int flags = SECTION_WRITE;
2700 switch (categorize_decl_for_section (decl, reloc))
2701 {
2702 case SECCAT_DATA:
2703 sname = ".ldata";
2704 break;
2705 case SECCAT_DATA_REL:
2706 sname = ".ldata.rel";
2707 break;
2708 case SECCAT_DATA_REL_LOCAL:
2709 sname = ".ldata.rel.local";
2710 break;
2711 case SECCAT_DATA_REL_RO:
2712 sname = ".ldata.rel.ro";
2713 break;
2714 case SECCAT_DATA_REL_RO_LOCAL:
2715 sname = ".ldata.rel.ro.local";
2716 break;
2717 case SECCAT_BSS:
2718 sname = ".lbss";
2719 flags |= SECTION_BSS;
2720 break;
2721 case SECCAT_RODATA:
2722 case SECCAT_RODATA_MERGE_STR:
2723 case SECCAT_RODATA_MERGE_STR_INIT:
2724 case SECCAT_RODATA_MERGE_CONST:
2725 sname = ".lrodata";
2726 flags = 0;
2727 break;
2728 case SECCAT_SRODATA:
2729 case SECCAT_SDATA:
2730 case SECCAT_SBSS:
2731 gcc_unreachable ();
2732 case SECCAT_TEXT:
2733 case SECCAT_TDATA:
2734 case SECCAT_TBSS:
2735 /* We don't split these for medium model. Place them into
2736 default sections and hope for best. */
2737 break;
2738 }
2739 if (sname)
2740 {
2741 /* We might get called with string constants, but get_named_section
2742 doesn't like them as they are not DECLs. Also, we need to set
2743 flags in that case. */
2744 if (!DECL_P (decl))
2745 return get_section (sname, flags, NULL);
2746 return get_named_section (decl, sname, reloc);
2747 }
2748 }
2749 return default_elf_select_section (decl, reloc, align);
2750 }
2751
2752 /* Build up a unique section name, expressed as a
2753 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2754 RELOC indicates whether the initial value of EXP requires
2755 link-time relocations. */
2756
2757 static void ATTRIBUTE_UNUSED
2758 x86_64_elf_unique_section (tree decl, int reloc)
2759 {
2760 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2761 && ix86_in_large_data_p (decl))
2762 {
2763 const char *prefix = NULL;
2764 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2765 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2766
2767 switch (categorize_decl_for_section (decl, reloc))
2768 {
2769 case SECCAT_DATA:
2770 case SECCAT_DATA_REL:
2771 case SECCAT_DATA_REL_LOCAL:
2772 case SECCAT_DATA_REL_RO:
2773 case SECCAT_DATA_REL_RO_LOCAL:
2774 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2775 break;
2776 case SECCAT_BSS:
2777 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2778 break;
2779 case SECCAT_RODATA:
2780 case SECCAT_RODATA_MERGE_STR:
2781 case SECCAT_RODATA_MERGE_STR_INIT:
2782 case SECCAT_RODATA_MERGE_CONST:
2783 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2784 break;
2785 case SECCAT_SRODATA:
2786 case SECCAT_SDATA:
2787 case SECCAT_SBSS:
2788 gcc_unreachable ();
2789 case SECCAT_TEXT:
2790 case SECCAT_TDATA:
2791 case SECCAT_TBSS:
2792 /* We don't split these for medium model. Place them into
2793 default sections and hope for best. */
2794 break;
2795 }
2796 if (prefix)
2797 {
2798 const char *name;
2799 size_t nlen, plen;
2800 char *string;
2801 plen = strlen (prefix);
2802
2803 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2804 name = targetm.strip_name_encoding (name);
2805 nlen = strlen (name);
2806
2807 string = (char *) alloca (nlen + plen + 1);
2808 memcpy (string, prefix, plen);
2809 memcpy (string + plen, name, nlen + 1);
2810
2811 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2812 return;
2813 }
2814 }
2815 default_unique_section (decl, reloc);
2816 }
2817
2818 #ifdef COMMON_ASM_OP
2819 /* This says how to output assembler code to declare an
2820 uninitialized external linkage data object.
2821
2822 For medium model x86-64 we need to use .largecomm opcode for
2823 large objects. */
2824 void
2825 x86_elf_aligned_common (FILE *file,
2826 const char *name, unsigned HOST_WIDE_INT size,
2827 int align)
2828 {
2829 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2830 && size > (unsigned int)ix86_section_threshold)
2831 fprintf (file, ".largecomm\t");
2832 else
2833 fprintf (file, "%s", COMMON_ASM_OP);
2834 assemble_name (file, name);
2835 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2836 size, align / BITS_PER_UNIT);
2837 }
2838 #endif
2839
2840 /* Utility function for targets to use in implementing
2841 ASM_OUTPUT_ALIGNED_BSS. */
2842
2843 void
2844 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2845 const char *name, unsigned HOST_WIDE_INT size,
2846 int align)
2847 {
2848 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2849 && size > (unsigned int)ix86_section_threshold)
2850 switch_to_section (get_named_section (decl, ".lbss", 0));
2851 else
2852 switch_to_section (bss_section);
2853 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2854 #ifdef ASM_DECLARE_OBJECT_NAME
2855 last_assemble_variable_decl = decl;
2856 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2857 #else
2858 /* Standard thing is just output label for the object. */
2859 ASM_OUTPUT_LABEL (file, name);
2860 #endif /* ASM_DECLARE_OBJECT_NAME */
2861 ASM_OUTPUT_SKIP (file, size ? size : 1);
2862 }
2863 \f
2864 void
2865 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2866 {
2867 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2868 make the problem with not enough registers even worse. */
2869 #ifdef INSN_SCHEDULING
2870 if (level > 1)
2871 flag_schedule_insns = 0;
2872 #endif
2873
2874 if (TARGET_MACHO)
2875 /* The Darwin libraries never set errno, so we might as well
2876 avoid calling them when that's the only reason we would. */
2877 flag_errno_math = 0;
2878
2879 /* The default values of these switches depend on the TARGET_64BIT
2880 that is not known at this moment. Mark these values with 2 and
2881 let user the to override these. In case there is no command line option
2882 specifying them, we will set the defaults in override_options. */
2883 if (optimize >= 1)
2884 flag_omit_frame_pointer = 2;
2885 flag_pcc_struct_return = 2;
2886 flag_asynchronous_unwind_tables = 2;
2887 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2888 SUBTARGET_OPTIMIZATION_OPTIONS;
2889 #endif
2890 }
2891 \f
2892 /* Decide whether we can make a sibling call to a function. DECL is the
2893 declaration of the function being targeted by the call and EXP is the
2894 CALL_EXPR representing the call. */
2895
2896 static bool
2897 ix86_function_ok_for_sibcall (tree decl, tree exp)
2898 {
2899 tree func;
2900 rtx a, b;
2901
2902 /* If we are generating position-independent code, we cannot sibcall
2903 optimize any indirect call, or a direct call to a global function,
2904 as the PLT requires %ebx be live. */
2905 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2906 return false;
2907
2908 if (decl)
2909 func = decl;
2910 else
2911 {
2912 func = TREE_TYPE (CALL_EXPR_FN (exp));
2913 if (POINTER_TYPE_P (func))
2914 func = TREE_TYPE (func);
2915 }
2916
2917 /* Check that the return value locations are the same. Like
2918 if we are returning floats on the 80387 register stack, we cannot
2919 make a sibcall from a function that doesn't return a float to a
2920 function that does or, conversely, from a function that does return
2921 a float to a function that doesn't; the necessary stack adjustment
2922 would not be executed. This is also the place we notice
2923 differences in the return value ABI. Note that it is ok for one
2924 of the functions to have void return type as long as the return
2925 value of the other is passed in a register. */
2926 a = ix86_function_value (TREE_TYPE (exp), func, false);
2927 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2928 cfun->decl, false);
2929 if (STACK_REG_P (a) || STACK_REG_P (b))
2930 {
2931 if (!rtx_equal_p (a, b))
2932 return false;
2933 }
2934 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2935 ;
2936 else if (!rtx_equal_p (a, b))
2937 return false;
2938
2939 /* If this call is indirect, we'll need to be able to use a call-clobbered
2940 register for the address of the target function. Make sure that all
2941 such registers are not used for passing parameters. */
2942 if (!decl && !TARGET_64BIT)
2943 {
2944 tree type;
2945
2946 /* We're looking at the CALL_EXPR, we need the type of the function. */
2947 type = CALL_EXPR_FN (exp); /* pointer expression */
2948 type = TREE_TYPE (type); /* pointer type */
2949 type = TREE_TYPE (type); /* function type */
2950
2951 if (ix86_function_regparm (type, NULL) >= 3)
2952 {
2953 /* ??? Need to count the actual number of registers to be used,
2954 not the possible number of registers. Fix later. */
2955 return false;
2956 }
2957 }
2958
2959 /* Dllimport'd functions are also called indirectly. */
2960 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2961 && decl && DECL_DLLIMPORT_P (decl)
2962 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2963 return false;
2964
2965 /* If we forced aligned the stack, then sibcalling would unalign the
2966 stack, which may break the called function. */
2967 if (cfun->machine->force_align_arg_pointer)
2968 return false;
2969
2970 /* Otherwise okay. That also includes certain types of indirect calls. */
2971 return true;
2972 }
2973
2974 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2975 calling convention attributes;
2976 arguments as in struct attribute_spec.handler. */
2977
2978 static tree
2979 ix86_handle_cconv_attribute (tree *node, tree name,
2980 tree args,
2981 int flags ATTRIBUTE_UNUSED,
2982 bool *no_add_attrs)
2983 {
2984 if (TREE_CODE (*node) != FUNCTION_TYPE
2985 && TREE_CODE (*node) != METHOD_TYPE
2986 && TREE_CODE (*node) != FIELD_DECL
2987 && TREE_CODE (*node) != TYPE_DECL)
2988 {
2989 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2990 IDENTIFIER_POINTER (name));
2991 *no_add_attrs = true;
2992 return NULL_TREE;
2993 }
2994
2995 /* Can combine regparm with all attributes but fastcall. */
2996 if (is_attribute_p ("regparm", name))
2997 {
2998 tree cst;
2999
3000 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3001 {
3002 error ("fastcall and regparm attributes are not compatible");
3003 }
3004
3005 cst = TREE_VALUE (args);
3006 if (TREE_CODE (cst) != INTEGER_CST)
3007 {
3008 warning (OPT_Wattributes,
3009 "%qs attribute requires an integer constant argument",
3010 IDENTIFIER_POINTER (name));
3011 *no_add_attrs = true;
3012 }
3013 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3014 {
3015 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3016 IDENTIFIER_POINTER (name), REGPARM_MAX);
3017 *no_add_attrs = true;
3018 }
3019
3020 if (!TARGET_64BIT
3021 && lookup_attribute (ix86_force_align_arg_pointer_string,
3022 TYPE_ATTRIBUTES (*node))
3023 && compare_tree_int (cst, REGPARM_MAX-1))
3024 {
3025 error ("%s functions limited to %d register parameters",
3026 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3027 }
3028
3029 return NULL_TREE;
3030 }
3031
3032 if (TARGET_64BIT)
3033 {
3034 /* Do not warn when emulating the MS ABI. */
3035 if (!TARGET_64BIT_MS_ABI)
3036 warning (OPT_Wattributes, "%qs attribute ignored",
3037 IDENTIFIER_POINTER (name));
3038 *no_add_attrs = true;
3039 return NULL_TREE;
3040 }
3041
3042 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3043 if (is_attribute_p ("fastcall", name))
3044 {
3045 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3046 {
3047 error ("fastcall and cdecl attributes are not compatible");
3048 }
3049 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3050 {
3051 error ("fastcall and stdcall attributes are not compatible");
3052 }
3053 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3054 {
3055 error ("fastcall and regparm attributes are not compatible");
3056 }
3057 }
3058
3059 /* Can combine stdcall with fastcall (redundant), regparm and
3060 sseregparm. */
3061 else if (is_attribute_p ("stdcall", name))
3062 {
3063 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3064 {
3065 error ("stdcall and cdecl attributes are not compatible");
3066 }
3067 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3068 {
3069 error ("stdcall and fastcall attributes are not compatible");
3070 }
3071 }
3072
3073 /* Can combine cdecl with regparm and sseregparm. */
3074 else if (is_attribute_p ("cdecl", name))
3075 {
3076 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3077 {
3078 error ("stdcall and cdecl attributes are not compatible");
3079 }
3080 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3081 {
3082 error ("fastcall and cdecl attributes are not compatible");
3083 }
3084 }
3085
3086 /* Can combine sseregparm with all attributes. */
3087
3088 return NULL_TREE;
3089 }
3090
3091 /* Return 0 if the attributes for two types are incompatible, 1 if they
3092 are compatible, and 2 if they are nearly compatible (which causes a
3093 warning to be generated). */
3094
3095 static int
3096 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3097 {
3098 /* Check for mismatch of non-default calling convention. */
3099 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3100
3101 if (TREE_CODE (type1) != FUNCTION_TYPE)
3102 return 1;
3103
3104 /* Check for mismatched fastcall/regparm types. */
3105 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3106 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3107 || (ix86_function_regparm (type1, NULL)
3108 != ix86_function_regparm (type2, NULL)))
3109 return 0;
3110
3111 /* Check for mismatched sseregparm types. */
3112 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3113 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3114 return 0;
3115
3116 /* Check for mismatched return types (cdecl vs stdcall). */
3117 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3118 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3119 return 0;
3120
3121 return 1;
3122 }
3123 \f
3124 /* Return the regparm value for a function with the indicated TYPE and DECL.
3125 DECL may be NULL when calling function indirectly
3126 or considering a libcall. */
3127
3128 static int
3129 ix86_function_regparm (const_tree type, const_tree decl)
3130 {
3131 tree attr;
3132 int regparm = ix86_regparm;
3133
3134 if (TARGET_64BIT)
3135 return regparm;
3136
3137 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3138 if (attr)
3139 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3140
3141 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3142 return 2;
3143
3144 /* Use register calling convention for local functions when possible. */
3145 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3146 && flag_unit_at_a_time && !profile_flag)
3147 {
3148 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3149 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3150 if (i && i->local)
3151 {
3152 int local_regparm, globals = 0, regno;
3153 struct function *f;
3154
3155 /* Make sure no regparm register is taken by a
3156 global register variable. */
3157 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3158 if (global_regs[local_regparm])
3159 break;
3160
3161 /* We can't use regparm(3) for nested functions as these use
3162 static chain pointer in third argument. */
3163 if (local_regparm == 3
3164 && (decl_function_context (decl)
3165 || ix86_force_align_arg_pointer)
3166 && !DECL_NO_STATIC_CHAIN (decl))
3167 local_regparm = 2;
3168
3169 /* If the function realigns its stackpointer, the prologue will
3170 clobber %ecx. If we've already generated code for the callee,
3171 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3172 scanning the attributes for the self-realigning property. */
3173 f = DECL_STRUCT_FUNCTION (decl);
3174 if (local_regparm == 3
3175 && (f ? !!f->machine->force_align_arg_pointer
3176 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3177 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3178 local_regparm = 2;
3179
3180 /* Each global register variable increases register preassure,
3181 so the more global reg vars there are, the smaller regparm
3182 optimization use, unless requested by the user explicitly. */
3183 for (regno = 0; regno < 6; regno++)
3184 if (global_regs[regno])
3185 globals++;
3186 local_regparm
3187 = globals < local_regparm ? local_regparm - globals : 0;
3188
3189 if (local_regparm > regparm)
3190 regparm = local_regparm;
3191 }
3192 }
3193
3194 return regparm;
3195 }
3196
3197 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3198 DFmode (2) arguments in SSE registers for a function with the
3199 indicated TYPE and DECL. DECL may be NULL when calling function
3200 indirectly or considering a libcall. Otherwise return 0. */
3201
3202 static int
3203 ix86_function_sseregparm (const_tree type, const_tree decl)
3204 {
3205 gcc_assert (!TARGET_64BIT);
3206
3207 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3208 by the sseregparm attribute. */
3209 if (TARGET_SSEREGPARM
3210 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3211 {
3212 if (!TARGET_SSE)
3213 {
3214 if (decl)
3215 error ("Calling %qD with attribute sseregparm without "
3216 "SSE/SSE2 enabled", decl);
3217 else
3218 error ("Calling %qT with attribute sseregparm without "
3219 "SSE/SSE2 enabled", type);
3220 return 0;
3221 }
3222
3223 return 2;
3224 }
3225
3226 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3227 (and DFmode for SSE2) arguments in SSE registers. */
3228 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3229 {
3230 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3231 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3232 if (i && i->local)
3233 return TARGET_SSE2 ? 2 : 1;
3234 }
3235
3236 return 0;
3237 }
3238
3239 /* Return true if EAX is live at the start of the function. Used by
3240 ix86_expand_prologue to determine if we need special help before
3241 calling allocate_stack_worker. */
3242
3243 static bool
3244 ix86_eax_live_at_start_p (void)
3245 {
3246 /* Cheat. Don't bother working forward from ix86_function_regparm
3247 to the function type to whether an actual argument is located in
3248 eax. Instead just look at cfg info, which is still close enough
3249 to correct at this point. This gives false positives for broken
3250 functions that might use uninitialized data that happens to be
3251 allocated in eax, but who cares? */
3252 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3253 }
3254
3255 /* Return true if TYPE has a variable argument list. */
3256
3257 static bool
3258 type_has_variadic_args_p (tree type)
3259 {
3260 tree n, t = TYPE_ARG_TYPES (type);
3261
3262 if (t == NULL)
3263 return false;
3264
3265 while ((n = TREE_CHAIN (t)) != NULL)
3266 t = n;
3267
3268 return TREE_VALUE (t) != void_type_node;
3269 }
3270
3271 /* Value is the number of bytes of arguments automatically
3272 popped when returning from a subroutine call.
3273 FUNDECL is the declaration node of the function (as a tree),
3274 FUNTYPE is the data type of the function (as a tree),
3275 or for a library call it is an identifier node for the subroutine name.
3276 SIZE is the number of bytes of arguments passed on the stack.
3277
3278 On the 80386, the RTD insn may be used to pop them if the number
3279 of args is fixed, but if the number is variable then the caller
3280 must pop them all. RTD can't be used for library calls now
3281 because the library is compiled with the Unix compiler.
3282 Use of RTD is a selectable option, since it is incompatible with
3283 standard Unix calling sequences. If the option is not selected,
3284 the caller must always pop the args.
3285
3286 The attribute stdcall is equivalent to RTD on a per module basis. */
3287
3288 int
3289 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3290 {
3291 int rtd;
3292
3293 /* None of the 64-bit ABIs pop arguments. */
3294 if (TARGET_64BIT)
3295 return 0;
3296
3297 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3298
3299 /* Cdecl functions override -mrtd, and never pop the stack. */
3300 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3301 {
3302 /* Stdcall and fastcall functions will pop the stack if not
3303 variable args. */
3304 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3305 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3306 rtd = 1;
3307
3308 if (rtd && ! type_has_variadic_args_p (funtype))
3309 return size;
3310 }
3311
3312 /* Lose any fake structure return argument if it is passed on the stack. */
3313 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3314 && !KEEP_AGGREGATE_RETURN_POINTER)
3315 {
3316 int nregs = ix86_function_regparm (funtype, fundecl);
3317 if (nregs == 0)
3318 return GET_MODE_SIZE (Pmode);
3319 }
3320
3321 return 0;
3322 }
3323 \f
3324 /* Argument support functions. */
3325
3326 /* Return true when register may be used to pass function parameters. */
3327 bool
3328 ix86_function_arg_regno_p (int regno)
3329 {
3330 int i;
3331 const int *parm_regs;
3332
3333 if (!TARGET_64BIT)
3334 {
3335 if (TARGET_MACHO)
3336 return (regno < REGPARM_MAX
3337 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3338 else
3339 return (regno < REGPARM_MAX
3340 || (TARGET_MMX && MMX_REGNO_P (regno)
3341 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3342 || (TARGET_SSE && SSE_REGNO_P (regno)
3343 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3344 }
3345
3346 if (TARGET_MACHO)
3347 {
3348 if (SSE_REGNO_P (regno) && TARGET_SSE)
3349 return true;
3350 }
3351 else
3352 {
3353 if (TARGET_SSE && SSE_REGNO_P (regno)
3354 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3355 return true;
3356 }
3357
3358 /* RAX is used as hidden argument to va_arg functions. */
3359 if (!TARGET_64BIT_MS_ABI && regno == 0)
3360 return true;
3361
3362 if (TARGET_64BIT_MS_ABI)
3363 parm_regs = x86_64_ms_abi_int_parameter_registers;
3364 else
3365 parm_regs = x86_64_int_parameter_registers;
3366 for (i = 0; i < REGPARM_MAX; i++)
3367 if (regno == parm_regs[i])
3368 return true;
3369 return false;
3370 }
3371
3372 /* Return if we do not know how to pass TYPE solely in registers. */
3373
3374 static bool
3375 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3376 {
3377 if (must_pass_in_stack_var_size_or_pad (mode, type))
3378 return true;
3379
3380 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3381 The layout_type routine is crafty and tries to trick us into passing
3382 currently unsupported vector types on the stack by using TImode. */
3383 return (!TARGET_64BIT && mode == TImode
3384 && type && TREE_CODE (type) != VECTOR_TYPE);
3385 }
3386
3387 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3388 for a call to a function whose data type is FNTYPE.
3389 For a library call, FNTYPE is 0. */
3390
3391 void
3392 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3393 tree fntype, /* tree ptr for function decl */
3394 rtx libname, /* SYMBOL_REF of library name or 0 */
3395 tree fndecl)
3396 {
3397 memset (cum, 0, sizeof (*cum));
3398
3399 /* Set up the number of registers to use for passing arguments. */
3400 cum->nregs = ix86_regparm;
3401 if (TARGET_SSE)
3402 cum->sse_nregs = SSE_REGPARM_MAX;
3403 if (TARGET_MMX)
3404 cum->mmx_nregs = MMX_REGPARM_MAX;
3405 cum->warn_sse = true;
3406 cum->warn_mmx = true;
3407 cum->maybe_vaarg = (fntype
3408 ? (!TYPE_ARG_TYPES (fntype)
3409 || type_has_variadic_args_p (fntype))
3410 : !libname);
3411
3412 if (!TARGET_64BIT)
3413 {
3414 /* If there are variable arguments, then we won't pass anything
3415 in registers in 32-bit mode. */
3416 if (cum->maybe_vaarg)
3417 {
3418 cum->nregs = 0;
3419 cum->sse_nregs = 0;
3420 cum->mmx_nregs = 0;
3421 cum->warn_sse = 0;
3422 cum->warn_mmx = 0;
3423 return;
3424 }
3425
3426 /* Use ecx and edx registers if function has fastcall attribute,
3427 else look for regparm information. */
3428 if (fntype)
3429 {
3430 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3431 {
3432 cum->nregs = 2;
3433 cum->fastcall = 1;
3434 }
3435 else
3436 cum->nregs = ix86_function_regparm (fntype, fndecl);
3437 }
3438
3439 /* Set up the number of SSE registers used for passing SFmode
3440 and DFmode arguments. Warn for mismatching ABI. */
3441 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3442 }
3443 }
3444
3445 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3446 But in the case of vector types, it is some vector mode.
3447
3448 When we have only some of our vector isa extensions enabled, then there
3449 are some modes for which vector_mode_supported_p is false. For these
3450 modes, the generic vector support in gcc will choose some non-vector mode
3451 in order to implement the type. By computing the natural mode, we'll
3452 select the proper ABI location for the operand and not depend on whatever
3453 the middle-end decides to do with these vector types. */
3454
3455 static enum machine_mode
3456 type_natural_mode (const_tree type)
3457 {
3458 enum machine_mode mode = TYPE_MODE (type);
3459
3460 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3461 {
3462 HOST_WIDE_INT size = int_size_in_bytes (type);
3463 if ((size == 8 || size == 16)
3464 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3465 && TYPE_VECTOR_SUBPARTS (type) > 1)
3466 {
3467 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3468
3469 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3470 mode = MIN_MODE_VECTOR_FLOAT;
3471 else
3472 mode = MIN_MODE_VECTOR_INT;
3473
3474 /* Get the mode which has this inner mode and number of units. */
3475 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3476 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3477 && GET_MODE_INNER (mode) == innermode)
3478 return mode;
3479
3480 gcc_unreachable ();
3481 }
3482 }
3483
3484 return mode;
3485 }
3486
3487 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3488 this may not agree with the mode that the type system has chosen for the
3489 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3490 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3491
3492 static rtx
3493 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3494 unsigned int regno)
3495 {
3496 rtx tmp;
3497
3498 if (orig_mode != BLKmode)
3499 tmp = gen_rtx_REG (orig_mode, regno);
3500 else
3501 {
3502 tmp = gen_rtx_REG (mode, regno);
3503 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3504 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3505 }
3506
3507 return tmp;
3508 }
3509
3510 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3511 of this code is to classify each 8bytes of incoming argument by the register
3512 class and assign registers accordingly. */
3513
3514 /* Return the union class of CLASS1 and CLASS2.
3515 See the x86-64 PS ABI for details. */
3516
3517 static enum x86_64_reg_class
3518 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3519 {
3520 /* Rule #1: If both classes are equal, this is the resulting class. */
3521 if (class1 == class2)
3522 return class1;
3523
3524 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3525 the other class. */
3526 if (class1 == X86_64_NO_CLASS)
3527 return class2;
3528 if (class2 == X86_64_NO_CLASS)
3529 return class1;
3530
3531 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3532 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3533 return X86_64_MEMORY_CLASS;
3534
3535 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3536 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3537 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3538 return X86_64_INTEGERSI_CLASS;
3539 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3540 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3541 return X86_64_INTEGER_CLASS;
3542
3543 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3544 MEMORY is used. */
3545 if (class1 == X86_64_X87_CLASS
3546 || class1 == X86_64_X87UP_CLASS
3547 || class1 == X86_64_COMPLEX_X87_CLASS
3548 || class2 == X86_64_X87_CLASS
3549 || class2 == X86_64_X87UP_CLASS
3550 || class2 == X86_64_COMPLEX_X87_CLASS)
3551 return X86_64_MEMORY_CLASS;
3552
3553 /* Rule #6: Otherwise class SSE is used. */
3554 return X86_64_SSE_CLASS;
3555 }
3556
3557 /* Classify the argument of type TYPE and mode MODE.
3558 CLASSES will be filled by the register class used to pass each word
3559 of the operand. The number of words is returned. In case the parameter
3560 should be passed in memory, 0 is returned. As a special case for zero
3561 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3562
3563 BIT_OFFSET is used internally for handling records and specifies offset
3564 of the offset in bits modulo 256 to avoid overflow cases.
3565
3566 See the x86-64 PS ABI for details.
3567 */
3568
3569 static int
3570 classify_argument (enum machine_mode mode, const_tree type,
3571 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3572 {
3573 HOST_WIDE_INT bytes =
3574 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3575 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3576
3577 /* Variable sized entities are always passed/returned in memory. */
3578 if (bytes < 0)
3579 return 0;
3580
3581 if (mode != VOIDmode
3582 && targetm.calls.must_pass_in_stack (mode, type))
3583 return 0;
3584
3585 if (type && AGGREGATE_TYPE_P (type))
3586 {
3587 int i;
3588 tree field;
3589 enum x86_64_reg_class subclasses[MAX_CLASSES];
3590
3591 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3592 if (bytes > 16)
3593 return 0;
3594
3595 for (i = 0; i < words; i++)
3596 classes[i] = X86_64_NO_CLASS;
3597
3598 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3599 signalize memory class, so handle it as special case. */
3600 if (!words)
3601 {
3602 classes[0] = X86_64_NO_CLASS;
3603 return 1;
3604 }
3605
3606 /* Classify each field of record and merge classes. */
3607 switch (TREE_CODE (type))
3608 {
3609 case RECORD_TYPE:
3610 /* And now merge the fields of structure. */
3611 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3612 {
3613 if (TREE_CODE (field) == FIELD_DECL)
3614 {
3615 int num;
3616
3617 if (TREE_TYPE (field) == error_mark_node)
3618 continue;
3619
3620 /* Bitfields are always classified as integer. Handle them
3621 early, since later code would consider them to be
3622 misaligned integers. */
3623 if (DECL_BIT_FIELD (field))
3624 {
3625 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3626 i < ((int_bit_position (field) + (bit_offset % 64))
3627 + tree_low_cst (DECL_SIZE (field), 0)
3628 + 63) / 8 / 8; i++)
3629 classes[i] =
3630 merge_classes (X86_64_INTEGER_CLASS,
3631 classes[i]);
3632 }
3633 else
3634 {
3635 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3636 TREE_TYPE (field), subclasses,
3637 (int_bit_position (field)
3638 + bit_offset) % 256);
3639 if (!num)
3640 return 0;
3641 for (i = 0; i < num; i++)
3642 {
3643 int pos =
3644 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3645 classes[i + pos] =
3646 merge_classes (subclasses[i], classes[i + pos]);
3647 }
3648 }
3649 }
3650 }
3651 break;
3652
3653 case ARRAY_TYPE:
3654 /* Arrays are handled as small records. */
3655 {
3656 int num;
3657 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3658 TREE_TYPE (type), subclasses, bit_offset);
3659 if (!num)
3660 return 0;
3661
3662 /* The partial classes are now full classes. */
3663 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3664 subclasses[0] = X86_64_SSE_CLASS;
3665 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3666 subclasses[0] = X86_64_INTEGER_CLASS;
3667
3668 for (i = 0; i < words; i++)
3669 classes[i] = subclasses[i % num];
3670
3671 break;
3672 }
3673 case UNION_TYPE:
3674 case QUAL_UNION_TYPE:
3675 /* Unions are similar to RECORD_TYPE but offset is always 0.
3676 */
3677 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3678 {
3679 if (TREE_CODE (field) == FIELD_DECL)
3680 {
3681 int num;
3682
3683 if (TREE_TYPE (field) == error_mark_node)
3684 continue;
3685
3686 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3687 TREE_TYPE (field), subclasses,
3688 bit_offset);
3689 if (!num)
3690 return 0;
3691 for (i = 0; i < num; i++)
3692 classes[i] = merge_classes (subclasses[i], classes[i]);
3693 }
3694 }
3695 break;
3696
3697 default:
3698 gcc_unreachable ();
3699 }
3700
3701 /* Final merger cleanup. */
3702 for (i = 0; i < words; i++)
3703 {
3704 /* If one class is MEMORY, everything should be passed in
3705 memory. */
3706 if (classes[i] == X86_64_MEMORY_CLASS)
3707 return 0;
3708
3709 /* The X86_64_SSEUP_CLASS should be always preceded by
3710 X86_64_SSE_CLASS. */
3711 if (classes[i] == X86_64_SSEUP_CLASS
3712 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3713 classes[i] = X86_64_SSE_CLASS;
3714
3715 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3716 if (classes[i] == X86_64_X87UP_CLASS
3717 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3718 classes[i] = X86_64_SSE_CLASS;
3719 }
3720 return words;
3721 }
3722
3723 /* Compute alignment needed. We align all types to natural boundaries with
3724 exception of XFmode that is aligned to 64bits. */
3725 if (mode != VOIDmode && mode != BLKmode)
3726 {
3727 int mode_alignment = GET_MODE_BITSIZE (mode);
3728
3729 if (mode == XFmode)
3730 mode_alignment = 128;
3731 else if (mode == XCmode)
3732 mode_alignment = 256;
3733 if (COMPLEX_MODE_P (mode))
3734 mode_alignment /= 2;
3735 /* Misaligned fields are always returned in memory. */
3736 if (bit_offset % mode_alignment)
3737 return 0;
3738 }
3739
3740 /* for V1xx modes, just use the base mode */
3741 if (VECTOR_MODE_P (mode)
3742 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3743 mode = GET_MODE_INNER (mode);
3744
3745 /* Classification of atomic types. */
3746 switch (mode)
3747 {
3748 case SDmode:
3749 case DDmode:
3750 classes[0] = X86_64_SSE_CLASS;
3751 return 1;
3752 case TDmode:
3753 classes[0] = X86_64_SSE_CLASS;
3754 classes[1] = X86_64_SSEUP_CLASS;
3755 return 2;
3756 case DImode:
3757 case SImode:
3758 case HImode:
3759 case QImode:
3760 case CSImode:
3761 case CHImode:
3762 case CQImode:
3763 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3764 classes[0] = X86_64_INTEGERSI_CLASS;
3765 else
3766 classes[0] = X86_64_INTEGER_CLASS;
3767 return 1;
3768 case CDImode:
3769 case TImode:
3770 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3771 return 2;
3772 case CTImode:
3773 return 0;
3774 case SFmode:
3775 if (!(bit_offset % 64))
3776 classes[0] = X86_64_SSESF_CLASS;
3777 else
3778 classes[0] = X86_64_SSE_CLASS;
3779 return 1;
3780 case DFmode:
3781 classes[0] = X86_64_SSEDF_CLASS;
3782 return 1;
3783 case XFmode:
3784 classes[0] = X86_64_X87_CLASS;
3785 classes[1] = X86_64_X87UP_CLASS;
3786 return 2;
3787 case TFmode:
3788 classes[0] = X86_64_SSE_CLASS;
3789 classes[1] = X86_64_SSEUP_CLASS;
3790 return 2;
3791 case SCmode:
3792 classes[0] = X86_64_SSE_CLASS;
3793 return 1;
3794 case DCmode:
3795 classes[0] = X86_64_SSEDF_CLASS;
3796 classes[1] = X86_64_SSEDF_CLASS;
3797 return 2;
3798 case XCmode:
3799 classes[0] = X86_64_COMPLEX_X87_CLASS;
3800 return 1;
3801 case TCmode:
3802 /* This modes is larger than 16 bytes. */
3803 return 0;
3804 case V4SFmode:
3805 case V4SImode:
3806 case V16QImode:
3807 case V8HImode:
3808 case V2DFmode:
3809 case V2DImode:
3810 classes[0] = X86_64_SSE_CLASS;
3811 classes[1] = X86_64_SSEUP_CLASS;
3812 return 2;
3813 case V2SFmode:
3814 case V2SImode:
3815 case V4HImode:
3816 case V8QImode:
3817 classes[0] = X86_64_SSE_CLASS;
3818 return 1;
3819 case BLKmode:
3820 case VOIDmode:
3821 return 0;
3822 default:
3823 gcc_assert (VECTOR_MODE_P (mode));
3824
3825 if (bytes > 16)
3826 return 0;
3827
3828 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3829
3830 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3831 classes[0] = X86_64_INTEGERSI_CLASS;
3832 else
3833 classes[0] = X86_64_INTEGER_CLASS;
3834 classes[1] = X86_64_INTEGER_CLASS;
3835 return 1 + (bytes > 8);
3836 }
3837 }
3838
3839 /* Examine the argument and return set number of register required in each
3840 class. Return 0 iff parameter should be passed in memory. */
3841 static int
3842 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3843 int *int_nregs, int *sse_nregs)
3844 {
3845 enum x86_64_reg_class regclass[MAX_CLASSES];
3846 int n = classify_argument (mode, type, regclass, 0);
3847
3848 *int_nregs = 0;
3849 *sse_nregs = 0;
3850 if (!n)
3851 return 0;
3852 for (n--; n >= 0; n--)
3853 switch (regclass[n])
3854 {
3855 case X86_64_INTEGER_CLASS:
3856 case X86_64_INTEGERSI_CLASS:
3857 (*int_nregs)++;
3858 break;
3859 case X86_64_SSE_CLASS:
3860 case X86_64_SSESF_CLASS:
3861 case X86_64_SSEDF_CLASS:
3862 (*sse_nregs)++;
3863 break;
3864 case X86_64_NO_CLASS:
3865 case X86_64_SSEUP_CLASS:
3866 break;
3867 case X86_64_X87_CLASS:
3868 case X86_64_X87UP_CLASS:
3869 if (!in_return)
3870 return 0;
3871 break;
3872 case X86_64_COMPLEX_X87_CLASS:
3873 return in_return ? 2 : 0;
3874 case X86_64_MEMORY_CLASS:
3875 gcc_unreachable ();
3876 }
3877 return 1;
3878 }
3879
3880 /* Construct container for the argument used by GCC interface. See
3881 FUNCTION_ARG for the detailed description. */
3882
3883 static rtx
3884 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3885 const_tree type, int in_return, int nintregs, int nsseregs,
3886 const int *intreg, int sse_regno)
3887 {
3888 /* The following variables hold the static issued_error state. */
3889 static bool issued_sse_arg_error;
3890 static bool issued_sse_ret_error;
3891 static bool issued_x87_ret_error;
3892
3893 enum machine_mode tmpmode;
3894 int bytes =
3895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3896 enum x86_64_reg_class regclass[MAX_CLASSES];
3897 int n;
3898 int i;
3899 int nexps = 0;
3900 int needed_sseregs, needed_intregs;
3901 rtx exp[MAX_CLASSES];
3902 rtx ret;
3903
3904 n = classify_argument (mode, type, regclass, 0);
3905 if (!n)
3906 return NULL;
3907 if (!examine_argument (mode, type, in_return, &needed_intregs,
3908 &needed_sseregs))
3909 return NULL;
3910 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3911 return NULL;
3912
3913 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3914 some less clueful developer tries to use floating-point anyway. */
3915 if (needed_sseregs && !TARGET_SSE)
3916 {
3917 if (in_return)
3918 {
3919 if (!issued_sse_ret_error)
3920 {
3921 error ("SSE register return with SSE disabled");
3922 issued_sse_ret_error = true;
3923 }
3924 }
3925 else if (!issued_sse_arg_error)
3926 {
3927 error ("SSE register argument with SSE disabled");
3928 issued_sse_arg_error = true;
3929 }
3930 return NULL;
3931 }
3932
3933 /* Likewise, error if the ABI requires us to return values in the
3934 x87 registers and the user specified -mno-80387. */
3935 if (!TARGET_80387 && in_return)
3936 for (i = 0; i < n; i++)
3937 if (regclass[i] == X86_64_X87_CLASS
3938 || regclass[i] == X86_64_X87UP_CLASS
3939 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3940 {
3941 if (!issued_x87_ret_error)
3942 {
3943 error ("x87 register return with x87 disabled");
3944 issued_x87_ret_error = true;
3945 }
3946 return NULL;
3947 }
3948
3949 /* First construct simple cases. Avoid SCmode, since we want to use
3950 single register to pass this type. */
3951 if (n == 1 && mode != SCmode)
3952 switch (regclass[0])
3953 {
3954 case X86_64_INTEGER_CLASS:
3955 case X86_64_INTEGERSI_CLASS:
3956 return gen_rtx_REG (mode, intreg[0]);
3957 case X86_64_SSE_CLASS:
3958 case X86_64_SSESF_CLASS:
3959 case X86_64_SSEDF_CLASS:
3960 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3961 case X86_64_X87_CLASS:
3962 case X86_64_COMPLEX_X87_CLASS:
3963 return gen_rtx_REG (mode, FIRST_STACK_REG);
3964 case X86_64_NO_CLASS:
3965 /* Zero sized array, struct or class. */
3966 return NULL;
3967 default:
3968 gcc_unreachable ();
3969 }
3970 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3971 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3972 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3973
3974 if (n == 2
3975 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3976 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3977 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3978 && regclass[1] == X86_64_INTEGER_CLASS
3979 && (mode == CDImode || mode == TImode || mode == TFmode)
3980 && intreg[0] + 1 == intreg[1])
3981 return gen_rtx_REG (mode, intreg[0]);
3982
3983 /* Otherwise figure out the entries of the PARALLEL. */
3984 for (i = 0; i < n; i++)
3985 {
3986 switch (regclass[i])
3987 {
3988 case X86_64_NO_CLASS:
3989 break;
3990 case X86_64_INTEGER_CLASS:
3991 case X86_64_INTEGERSI_CLASS:
3992 /* Merge TImodes on aligned occasions here too. */
3993 if (i * 8 + 8 > bytes)
3994 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3995 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3996 tmpmode = SImode;
3997 else
3998 tmpmode = DImode;
3999 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4000 if (tmpmode == BLKmode)
4001 tmpmode = DImode;
4002 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4003 gen_rtx_REG (tmpmode, *intreg),
4004 GEN_INT (i*8));
4005 intreg++;
4006 break;
4007 case X86_64_SSESF_CLASS:
4008 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4009 gen_rtx_REG (SFmode,
4010 SSE_REGNO (sse_regno)),
4011 GEN_INT (i*8));
4012 sse_regno++;
4013 break;
4014 case X86_64_SSEDF_CLASS:
4015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4016 gen_rtx_REG (DFmode,
4017 SSE_REGNO (sse_regno)),
4018 GEN_INT (i*8));
4019 sse_regno++;
4020 break;
4021 case X86_64_SSE_CLASS:
4022 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4023 tmpmode = TImode;
4024 else
4025 tmpmode = DImode;
4026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4027 gen_rtx_REG (tmpmode,
4028 SSE_REGNO (sse_regno)),
4029 GEN_INT (i*8));
4030 if (tmpmode == TImode)
4031 i++;
4032 sse_regno++;
4033 break;
4034 default:
4035 gcc_unreachable ();
4036 }
4037 }
4038
4039 /* Empty aligned struct, union or class. */
4040 if (nexps == 0)
4041 return NULL;
4042
4043 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4044 for (i = 0; i < nexps; i++)
4045 XVECEXP (ret, 0, i) = exp [i];
4046 return ret;
4047 }
4048
4049 /* Update the data in CUM to advance over an argument of mode MODE
4050 and data type TYPE. (TYPE is null for libcalls where that information
4051 may not be available.) */
4052
4053 static void
4054 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4055 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4056 {
4057 switch (mode)
4058 {
4059 default:
4060 break;
4061
4062 case BLKmode:
4063 if (bytes < 0)
4064 break;
4065 /* FALLTHRU */
4066
4067 case DImode:
4068 case SImode:
4069 case HImode:
4070 case QImode:
4071 cum->words += words;
4072 cum->nregs -= words;
4073 cum->regno += words;
4074
4075 if (cum->nregs <= 0)
4076 {
4077 cum->nregs = 0;
4078 cum->regno = 0;
4079 }
4080 break;
4081
4082 case DFmode:
4083 if (cum->float_in_sse < 2)
4084 break;
4085 case SFmode:
4086 if (cum->float_in_sse < 1)
4087 break;
4088 /* FALLTHRU */
4089
4090 case TImode:
4091 case V16QImode:
4092 case V8HImode:
4093 case V4SImode:
4094 case V2DImode:
4095 case V4SFmode:
4096 case V2DFmode:
4097 if (!type || !AGGREGATE_TYPE_P (type))
4098 {
4099 cum->sse_words += words;
4100 cum->sse_nregs -= 1;
4101 cum->sse_regno += 1;
4102 if (cum->sse_nregs <= 0)
4103 {
4104 cum->sse_nregs = 0;
4105 cum->sse_regno = 0;
4106 }
4107 }
4108 break;
4109
4110 case V8QImode:
4111 case V4HImode:
4112 case V2SImode:
4113 case V2SFmode:
4114 if (!type || !AGGREGATE_TYPE_P (type))
4115 {
4116 cum->mmx_words += words;
4117 cum->mmx_nregs -= 1;
4118 cum->mmx_regno += 1;
4119 if (cum->mmx_nregs <= 0)
4120 {
4121 cum->mmx_nregs = 0;
4122 cum->mmx_regno = 0;
4123 }
4124 }
4125 break;
4126 }
4127 }
4128
4129 static void
4130 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4131 tree type, HOST_WIDE_INT words)
4132 {
4133 int int_nregs, sse_nregs;
4134
4135 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4136 cum->words += words;
4137 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4138 {
4139 cum->nregs -= int_nregs;
4140 cum->sse_nregs -= sse_nregs;
4141 cum->regno += int_nregs;
4142 cum->sse_regno += sse_nregs;
4143 }
4144 else
4145 cum->words += words;
4146 }
4147
4148 static void
4149 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4150 HOST_WIDE_INT words)
4151 {
4152 /* Otherwise, this should be passed indirect. */
4153 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4154
4155 cum->words += words;
4156 if (cum->nregs > 0)
4157 {
4158 cum->nregs -= 1;
4159 cum->regno += 1;
4160 }
4161 }
4162
4163 void
4164 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4165 tree type, int named ATTRIBUTE_UNUSED)
4166 {
4167 HOST_WIDE_INT bytes, words;
4168
4169 if (mode == BLKmode)
4170 bytes = int_size_in_bytes (type);
4171 else
4172 bytes = GET_MODE_SIZE (mode);
4173 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4174
4175 if (type)
4176 mode = type_natural_mode (type);
4177
4178 if (TARGET_64BIT_MS_ABI)
4179 function_arg_advance_ms_64 (cum, bytes, words);
4180 else if (TARGET_64BIT)
4181 function_arg_advance_64 (cum, mode, type, words);
4182 else
4183 function_arg_advance_32 (cum, mode, type, bytes, words);
4184 }
4185
4186 /* Define where to put the arguments to a function.
4187 Value is zero to push the argument on the stack,
4188 or a hard register in which to store the argument.
4189
4190 MODE is the argument's machine mode.
4191 TYPE is the data type of the argument (as a tree).
4192 This is null for libcalls where that information may
4193 not be available.
4194 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4195 the preceding args and about the function being called.
4196 NAMED is nonzero if this argument is a named parameter
4197 (otherwise it is an extra parameter matching an ellipsis). */
4198
4199 static rtx
4200 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4201 enum machine_mode orig_mode, tree type,
4202 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4203 {
4204 static bool warnedsse, warnedmmx;
4205
4206 /* Avoid the AL settings for the Unix64 ABI. */
4207 if (mode == VOIDmode)
4208 return constm1_rtx;
4209
4210 switch (mode)
4211 {
4212 default:
4213 break;
4214
4215 case BLKmode:
4216 if (bytes < 0)
4217 break;
4218 /* FALLTHRU */
4219 case DImode:
4220 case SImode:
4221 case HImode:
4222 case QImode:
4223 if (words <= cum->nregs)
4224 {
4225 int regno = cum->regno;
4226
4227 /* Fastcall allocates the first two DWORD (SImode) or
4228 smaller arguments to ECX and EDX. */
4229 if (cum->fastcall)
4230 {
4231 if (mode == BLKmode || mode == DImode)
4232 break;
4233
4234 /* ECX not EAX is the first allocated register. */
4235 if (regno == 0)
4236 regno = 2;
4237 }
4238 return gen_rtx_REG (mode, regno);
4239 }
4240 break;
4241
4242 case DFmode:
4243 if (cum->float_in_sse < 2)
4244 break;
4245 case SFmode:
4246 if (cum->float_in_sse < 1)
4247 break;
4248 /* FALLTHRU */
4249 case TImode:
4250 case V16QImode:
4251 case V8HImode:
4252 case V4SImode:
4253 case V2DImode:
4254 case V4SFmode:
4255 case V2DFmode:
4256 if (!type || !AGGREGATE_TYPE_P (type))
4257 {
4258 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4259 {
4260 warnedsse = true;
4261 warning (0, "SSE vector argument without SSE enabled "
4262 "changes the ABI");
4263 }
4264 if (cum->sse_nregs)
4265 return gen_reg_or_parallel (mode, orig_mode,
4266 cum->sse_regno + FIRST_SSE_REG);
4267 }
4268 break;
4269
4270 case V8QImode:
4271 case V4HImode:
4272 case V2SImode:
4273 case V2SFmode:
4274 if (!type || !AGGREGATE_TYPE_P (type))
4275 {
4276 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4277 {
4278 warnedmmx = true;
4279 warning (0, "MMX vector argument without MMX enabled "
4280 "changes the ABI");
4281 }
4282 if (cum->mmx_nregs)
4283 return gen_reg_or_parallel (mode, orig_mode,
4284 cum->mmx_regno + FIRST_MMX_REG);
4285 }
4286 break;
4287 }
4288
4289 return NULL_RTX;
4290 }
4291
4292 static rtx
4293 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4294 enum machine_mode orig_mode, tree type)
4295 {
4296 /* Handle a hidden AL argument containing number of registers
4297 for varargs x86-64 functions. */
4298 if (mode == VOIDmode)
4299 return GEN_INT (cum->maybe_vaarg
4300 ? (cum->sse_nregs < 0
4301 ? SSE_REGPARM_MAX
4302 : cum->sse_regno)
4303 : -1);
4304
4305 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4306 cum->sse_nregs,
4307 &x86_64_int_parameter_registers [cum->regno],
4308 cum->sse_regno);
4309 }
4310
4311 static rtx
4312 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4313 enum machine_mode orig_mode, int named)
4314 {
4315 unsigned int regno;
4316
4317 /* Avoid the AL settings for the Unix64 ABI. */
4318 if (mode == VOIDmode)
4319 return constm1_rtx;
4320
4321 /* If we've run out of registers, it goes on the stack. */
4322 if (cum->nregs == 0)
4323 return NULL_RTX;
4324
4325 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4326
4327 /* Only floating point modes are passed in anything but integer regs. */
4328 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4329 {
4330 if (named)
4331 regno = cum->regno + FIRST_SSE_REG;
4332 else
4333 {
4334 rtx t1, t2;
4335
4336 /* Unnamed floating parameters are passed in both the
4337 SSE and integer registers. */
4338 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4339 t2 = gen_rtx_REG (mode, regno);
4340 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4341 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4342 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4343 }
4344 }
4345
4346 return gen_reg_or_parallel (mode, orig_mode, regno);
4347 }
4348
4349 rtx
4350 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4351 tree type, int named)
4352 {
4353 enum machine_mode mode = omode;
4354 HOST_WIDE_INT bytes, words;
4355
4356 if (mode == BLKmode)
4357 bytes = int_size_in_bytes (type);
4358 else
4359 bytes = GET_MODE_SIZE (mode);
4360 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4361
4362 /* To simplify the code below, represent vector types with a vector mode
4363 even if MMX/SSE are not active. */
4364 if (type && TREE_CODE (type) == VECTOR_TYPE)
4365 mode = type_natural_mode (type);
4366
4367 if (TARGET_64BIT_MS_ABI)
4368 return function_arg_ms_64 (cum, mode, omode, named);
4369 else if (TARGET_64BIT)
4370 return function_arg_64 (cum, mode, omode, type);
4371 else
4372 return function_arg_32 (cum, mode, omode, type, bytes, words);
4373 }
4374
4375 /* A C expression that indicates when an argument must be passed by
4376 reference. If nonzero for an argument, a copy of that argument is
4377 made in memory and a pointer to the argument is passed instead of
4378 the argument itself. The pointer is passed in whatever way is
4379 appropriate for passing a pointer to that type. */
4380
4381 static bool
4382 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4383 enum machine_mode mode ATTRIBUTE_UNUSED,
4384 const_tree type, bool named ATTRIBUTE_UNUSED)
4385 {
4386 if (TARGET_64BIT_MS_ABI)
4387 {
4388 if (type)
4389 {
4390 /* Arrays are passed by reference. */
4391 if (TREE_CODE (type) == ARRAY_TYPE)
4392 return true;
4393
4394 if (AGGREGATE_TYPE_P (type))
4395 {
4396 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4397 are passed by reference. */
4398 int el2 = exact_log2 (int_size_in_bytes (type));
4399 return !(el2 >= 0 && el2 <= 3);
4400 }
4401 }
4402
4403 /* __m128 is passed by reference. */
4404 /* ??? How to handle complex? For now treat them as structs,
4405 and pass them by reference if they're too large. */
4406 if (GET_MODE_SIZE (mode) > 8)
4407 return true;
4408 }
4409 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4410 return 1;
4411
4412 return 0;
4413 }
4414
4415 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4416 ABI. Only called if TARGET_SSE. */
4417 static bool
4418 contains_128bit_aligned_vector_p (tree type)
4419 {
4420 enum machine_mode mode = TYPE_MODE (type);
4421 if (SSE_REG_MODE_P (mode)
4422 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4423 return true;
4424 if (TYPE_ALIGN (type) < 128)
4425 return false;
4426
4427 if (AGGREGATE_TYPE_P (type))
4428 {
4429 /* Walk the aggregates recursively. */
4430 switch (TREE_CODE (type))
4431 {
4432 case RECORD_TYPE:
4433 case UNION_TYPE:
4434 case QUAL_UNION_TYPE:
4435 {
4436 tree field;
4437
4438 /* Walk all the structure fields. */
4439 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4440 {
4441 if (TREE_CODE (field) == FIELD_DECL
4442 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4443 return true;
4444 }
4445 break;
4446 }
4447
4448 case ARRAY_TYPE:
4449 /* Just for use if some languages passes arrays by value. */
4450 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4451 return true;
4452 break;
4453
4454 default:
4455 gcc_unreachable ();
4456 }
4457 }
4458 return false;
4459 }
4460
4461 /* Gives the alignment boundary, in bits, of an argument with the
4462 specified mode and type. */
4463
4464 int
4465 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4466 {
4467 int align;
4468 if (type)
4469 align = TYPE_ALIGN (type);
4470 else
4471 align = GET_MODE_ALIGNMENT (mode);
4472 if (align < PARM_BOUNDARY)
4473 align = PARM_BOUNDARY;
4474 if (!TARGET_64BIT)
4475 {
4476 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4477 make an exception for SSE modes since these require 128bit
4478 alignment.
4479
4480 The handling here differs from field_alignment. ICC aligns MMX
4481 arguments to 4 byte boundaries, while structure fields are aligned
4482 to 8 byte boundaries. */
4483 if (!TARGET_SSE)
4484 align = PARM_BOUNDARY;
4485 else if (!type)
4486 {
4487 if (!SSE_REG_MODE_P (mode))
4488 align = PARM_BOUNDARY;
4489 }
4490 else
4491 {
4492 if (!contains_128bit_aligned_vector_p (type))
4493 align = PARM_BOUNDARY;
4494 }
4495 }
4496 if (align > 128)
4497 align = 128;
4498 return align;
4499 }
4500
4501 /* Return true if N is a possible register number of function value. */
4502
4503 bool
4504 ix86_function_value_regno_p (int regno)
4505 {
4506 switch (regno)
4507 {
4508 case 0:
4509 return true;
4510
4511 case FIRST_FLOAT_REG:
4512 if (TARGET_64BIT_MS_ABI)
4513 return false;
4514 return TARGET_FLOAT_RETURNS_IN_80387;
4515
4516 case FIRST_SSE_REG:
4517 return TARGET_SSE;
4518
4519 case FIRST_MMX_REG:
4520 if (TARGET_MACHO || TARGET_64BIT)
4521 return false;
4522 return TARGET_MMX;
4523 }
4524
4525 return false;
4526 }
4527
4528 /* Define how to find the value returned by a function.
4529 VALTYPE is the data type of the value (as a tree).
4530 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4531 otherwise, FUNC is 0. */
4532
4533 static rtx
4534 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4535 const_tree fntype, const_tree fn)
4536 {
4537 unsigned int regno;
4538
4539 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4540 we normally prevent this case when mmx is not available. However
4541 some ABIs may require the result to be returned like DImode. */
4542 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4543 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4544
4545 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4546 we prevent this case when sse is not available. However some ABIs
4547 may require the result to be returned like integer TImode. */
4548 else if (mode == TImode
4549 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4550 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4551
4552 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4553 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4554 regno = FIRST_FLOAT_REG;
4555 else
4556 /* Most things go in %eax. */
4557 regno = 0;
4558
4559 /* Override FP return register with %xmm0 for local functions when
4560 SSE math is enabled or for functions with sseregparm attribute. */
4561 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4562 {
4563 int sse_level = ix86_function_sseregparm (fntype, fn);
4564 if ((sse_level >= 1 && mode == SFmode)
4565 || (sse_level == 2 && mode == DFmode))
4566 regno = FIRST_SSE_REG;
4567 }
4568
4569 return gen_rtx_REG (orig_mode, regno);
4570 }
4571
4572 static rtx
4573 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4574 const_tree valtype)
4575 {
4576 rtx ret;
4577
4578 /* Handle libcalls, which don't provide a type node. */
4579 if (valtype == NULL)
4580 {
4581 switch (mode)
4582 {
4583 case SFmode:
4584 case SCmode:
4585 case DFmode:
4586 case DCmode:
4587 case TFmode:
4588 case SDmode:
4589 case DDmode:
4590 case TDmode:
4591 return gen_rtx_REG (mode, FIRST_SSE_REG);
4592 case XFmode:
4593 case XCmode:
4594 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4595 case TCmode:
4596 return NULL;
4597 default:
4598 return gen_rtx_REG (mode, 0);
4599 }
4600 }
4601
4602 ret = construct_container (mode, orig_mode, valtype, 1,
4603 REGPARM_MAX, SSE_REGPARM_MAX,
4604 x86_64_int_return_registers, 0);
4605
4606 /* For zero sized structures, construct_container returns NULL, but we
4607 need to keep rest of compiler happy by returning meaningful value. */
4608 if (!ret)
4609 ret = gen_rtx_REG (orig_mode, 0);
4610
4611 return ret;
4612 }
4613
4614 static rtx
4615 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4616 {
4617 unsigned int regno = 0;
4618
4619 if (TARGET_SSE)
4620 {
4621 if (mode == SFmode || mode == DFmode)
4622 regno = FIRST_SSE_REG;
4623 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4624 regno = FIRST_SSE_REG;
4625 }
4626
4627 return gen_rtx_REG (orig_mode, regno);
4628 }
4629
4630 static rtx
4631 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4632 enum machine_mode orig_mode, enum machine_mode mode)
4633 {
4634 const_tree fn, fntype;
4635
4636 fn = NULL_TREE;
4637 if (fntype_or_decl && DECL_P (fntype_or_decl))
4638 fn = fntype_or_decl;
4639 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4640
4641 if (TARGET_64BIT_MS_ABI)
4642 return function_value_ms_64 (orig_mode, mode);
4643 else if (TARGET_64BIT)
4644 return function_value_64 (orig_mode, mode, valtype);
4645 else
4646 return function_value_32 (orig_mode, mode, fntype, fn);
4647 }
4648
4649 static rtx
4650 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4651 bool outgoing ATTRIBUTE_UNUSED)
4652 {
4653 enum machine_mode mode, orig_mode;
4654
4655 orig_mode = TYPE_MODE (valtype);
4656 mode = type_natural_mode (valtype);
4657 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4658 }
4659
4660 rtx
4661 ix86_libcall_value (enum machine_mode mode)
4662 {
4663 return ix86_function_value_1 (NULL, NULL, mode, mode);
4664 }
4665
4666 /* Return true iff type is returned in memory. */
4667
4668 static int
4669 return_in_memory_32 (const_tree type, enum machine_mode mode)
4670 {
4671 HOST_WIDE_INT size;
4672
4673 if (mode == BLKmode)
4674 return 1;
4675
4676 size = int_size_in_bytes (type);
4677
4678 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4679 return 0;
4680
4681 if (VECTOR_MODE_P (mode) || mode == TImode)
4682 {
4683 /* User-created vectors small enough to fit in EAX. */
4684 if (size < 8)
4685 return 0;
4686
4687 /* MMX/3dNow values are returned in MM0,
4688 except when it doesn't exits. */
4689 if (size == 8)
4690 return (TARGET_MMX ? 0 : 1);
4691
4692 /* SSE values are returned in XMM0, except when it doesn't exist. */
4693 if (size == 16)
4694 return (TARGET_SSE ? 0 : 1);
4695 }
4696
4697 if (mode == XFmode)
4698 return 0;
4699
4700 if (mode == TDmode)
4701 return 1;
4702
4703 if (size > 12)
4704 return 1;
4705 return 0;
4706 }
4707
4708 static int
4709 return_in_memory_64 (const_tree type, enum machine_mode mode)
4710 {
4711 int needed_intregs, needed_sseregs;
4712 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4713 }
4714
4715 static int
4716 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4717 {
4718 HOST_WIDE_INT size = int_size_in_bytes (type);
4719
4720 /* __m128 and friends are returned in xmm0. */
4721 if (size == 16 && VECTOR_MODE_P (mode))
4722 return 0;
4723
4724 /* Otherwise, the size must be exactly in [1248]. */
4725 return (size != 1 && size != 2 && size != 4 && size != 8);
4726 }
4727
4728 int
4729 ix86_return_in_memory (const_tree type)
4730 {
4731 const enum machine_mode mode = type_natural_mode (type);
4732
4733 if (TARGET_64BIT_MS_ABI)
4734 return return_in_memory_ms_64 (type, mode);
4735 else if (TARGET_64BIT)
4736 return return_in_memory_64 (type, mode);
4737 else
4738 return return_in_memory_32 (type, mode);
4739 }
4740
4741 /* Return false iff TYPE is returned in memory. This version is used
4742 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4743 but differs notably in that when MMX is available, 8-byte vectors
4744 are returned in memory, rather than in MMX registers. */
4745
4746 int
4747 ix86_sol10_return_in_memory (const_tree type)
4748 {
4749 int size;
4750 enum machine_mode mode = type_natural_mode (type);
4751
4752 if (TARGET_64BIT)
4753 return return_in_memory_64 (type, mode);
4754
4755 if (mode == BLKmode)
4756 return 1;
4757
4758 size = int_size_in_bytes (type);
4759
4760 if (VECTOR_MODE_P (mode))
4761 {
4762 /* Return in memory only if MMX registers *are* available. This
4763 seems backwards, but it is consistent with the existing
4764 Solaris x86 ABI. */
4765 if (size == 8)
4766 return TARGET_MMX;
4767 if (size == 16)
4768 return !TARGET_SSE;
4769 }
4770 else if (mode == TImode)
4771 return !TARGET_SSE;
4772 else if (mode == XFmode)
4773 return 0;
4774
4775 return size > 12;
4776 }
4777
4778 /* When returning SSE vector types, we have a choice of either
4779 (1) being abi incompatible with a -march switch, or
4780 (2) generating an error.
4781 Given no good solution, I think the safest thing is one warning.
4782 The user won't be able to use -Werror, but....
4783
4784 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4785 called in response to actually generating a caller or callee that
4786 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4787 via aggregate_value_p for general type probing from tree-ssa. */
4788
4789 static rtx
4790 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4791 {
4792 static bool warnedsse, warnedmmx;
4793
4794 if (!TARGET_64BIT && type)
4795 {
4796 /* Look at the return type of the function, not the function type. */
4797 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4798
4799 if (!TARGET_SSE && !warnedsse)
4800 {
4801 if (mode == TImode
4802 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4803 {
4804 warnedsse = true;
4805 warning (0, "SSE vector return without SSE enabled "
4806 "changes the ABI");
4807 }
4808 }
4809
4810 if (!TARGET_MMX && !warnedmmx)
4811 {
4812 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4813 {
4814 warnedmmx = true;
4815 warning (0, "MMX vector return without MMX enabled "
4816 "changes the ABI");
4817 }
4818 }
4819 }
4820
4821 return NULL;
4822 }
4823
4824 \f
4825 /* Create the va_list data type. */
4826
4827 static tree
4828 ix86_build_builtin_va_list (void)
4829 {
4830 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4831
4832 /* For i386 we use plain pointer to argument area. */
4833 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4834 return build_pointer_type (char_type_node);
4835
4836 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4837 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4838
4839 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4840 unsigned_type_node);
4841 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4842 unsigned_type_node);
4843 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4844 ptr_type_node);
4845 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4846 ptr_type_node);
4847
4848 va_list_gpr_counter_field = f_gpr;
4849 va_list_fpr_counter_field = f_fpr;
4850
4851 DECL_FIELD_CONTEXT (f_gpr) = record;
4852 DECL_FIELD_CONTEXT (f_fpr) = record;
4853 DECL_FIELD_CONTEXT (f_ovf) = record;
4854 DECL_FIELD_CONTEXT (f_sav) = record;
4855
4856 TREE_CHAIN (record) = type_decl;
4857 TYPE_NAME (record) = type_decl;
4858 TYPE_FIELDS (record) = f_gpr;
4859 TREE_CHAIN (f_gpr) = f_fpr;
4860 TREE_CHAIN (f_fpr) = f_ovf;
4861 TREE_CHAIN (f_ovf) = f_sav;
4862
4863 layout_type (record);
4864
4865 /* The correct type is an array type of one element. */
4866 return build_array_type (record, build_index_type (size_zero_node));
4867 }
4868
4869 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4870
4871 static void
4872 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4873 {
4874 rtx save_area, mem;
4875 rtx label;
4876 rtx label_ref;
4877 rtx tmp_reg;
4878 rtx nsse_reg;
4879 alias_set_type set;
4880 int i;
4881
4882 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4883 return;
4884
4885 /* Indicate to allocate space on the stack for varargs save area. */
4886 ix86_save_varrargs_registers = 1;
4887 /* We need 16-byte stack alignment to save SSE registers. If user
4888 asked for lower preferred_stack_boundary, lets just hope that he knows
4889 what he is doing and won't varargs SSE values.
4890
4891 We also may end up assuming that only 64bit values are stored in SSE
4892 register let some floating point program work. */
4893 if (ix86_preferred_stack_boundary >= 128)
4894 cfun->stack_alignment_needed = 128;
4895
4896 save_area = frame_pointer_rtx;
4897 set = get_varargs_alias_set ();
4898
4899 for (i = cum->regno;
4900 i < ix86_regparm
4901 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4902 i++)
4903 {
4904 mem = gen_rtx_MEM (Pmode,
4905 plus_constant (save_area, i * UNITS_PER_WORD));
4906 MEM_NOTRAP_P (mem) = 1;
4907 set_mem_alias_set (mem, set);
4908 emit_move_insn (mem, gen_rtx_REG (Pmode,
4909 x86_64_int_parameter_registers[i]));
4910 }
4911
4912 if (cum->sse_nregs && cfun->va_list_fpr_size)
4913 {
4914 /* Now emit code to save SSE registers. The AX parameter contains number
4915 of SSE parameter registers used to call this function. We use
4916 sse_prologue_save insn template that produces computed jump across
4917 SSE saves. We need some preparation work to get this working. */
4918
4919 label = gen_label_rtx ();
4920 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4921
4922 /* Compute address to jump to :
4923 label - 5*eax + nnamed_sse_arguments*5 */
4924 tmp_reg = gen_reg_rtx (Pmode);
4925 nsse_reg = gen_reg_rtx (Pmode);
4926 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4927 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4928 gen_rtx_MULT (Pmode, nsse_reg,
4929 GEN_INT (4))));
4930 if (cum->sse_regno)
4931 emit_move_insn
4932 (nsse_reg,
4933 gen_rtx_CONST (DImode,
4934 gen_rtx_PLUS (DImode,
4935 label_ref,
4936 GEN_INT (cum->sse_regno * 4))));
4937 else
4938 emit_move_insn (nsse_reg, label_ref);
4939 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4940
4941 /* Compute address of memory block we save into. We always use pointer
4942 pointing 127 bytes after first byte to store - this is needed to keep
4943 instruction size limited by 4 bytes. */
4944 tmp_reg = gen_reg_rtx (Pmode);
4945 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4946 plus_constant (save_area,
4947 8 * REGPARM_MAX + 127)));
4948 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4949 MEM_NOTRAP_P (mem) = 1;
4950 set_mem_alias_set (mem, set);
4951 set_mem_align (mem, BITS_PER_WORD);
4952
4953 /* And finally do the dirty job! */
4954 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4955 GEN_INT (cum->sse_regno), label));
4956 }
4957 }
4958
4959 static void
4960 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4961 {
4962 alias_set_type set = get_varargs_alias_set ();
4963 int i;
4964
4965 for (i = cum->regno; i < REGPARM_MAX; i++)
4966 {
4967 rtx reg, mem;
4968
4969 mem = gen_rtx_MEM (Pmode,
4970 plus_constant (virtual_incoming_args_rtx,
4971 i * UNITS_PER_WORD));
4972 MEM_NOTRAP_P (mem) = 1;
4973 set_mem_alias_set (mem, set);
4974
4975 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4976 emit_move_insn (mem, reg);
4977 }
4978 }
4979
4980 static void
4981 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4982 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4983 int no_rtl)
4984 {
4985 CUMULATIVE_ARGS next_cum;
4986 tree fntype;
4987 int stdarg_p;
4988
4989 /* This argument doesn't appear to be used anymore. Which is good,
4990 because the old code here didn't suppress rtl generation. */
4991 gcc_assert (!no_rtl);
4992
4993 if (!TARGET_64BIT)
4994 return;
4995
4996 fntype = TREE_TYPE (current_function_decl);
4997 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4998 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4999 != void_type_node));
5000
5001 /* For varargs, we do not want to skip the dummy va_dcl argument.
5002 For stdargs, we do want to skip the last named argument. */
5003 next_cum = *cum;
5004 if (stdarg_p)
5005 function_arg_advance (&next_cum, mode, type, 1);
5006
5007 if (TARGET_64BIT_MS_ABI)
5008 setup_incoming_varargs_ms_64 (&next_cum);
5009 else
5010 setup_incoming_varargs_64 (&next_cum);
5011 }
5012
5013 /* Implement va_start. */
5014
5015 void
5016 ix86_va_start (tree valist, rtx nextarg)
5017 {
5018 HOST_WIDE_INT words, n_gpr, n_fpr;
5019 tree f_gpr, f_fpr, f_ovf, f_sav;
5020 tree gpr, fpr, ovf, sav, t;
5021 tree type;
5022
5023 /* Only 64bit target needs something special. */
5024 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5025 {
5026 std_expand_builtin_va_start (valist, nextarg);
5027 return;
5028 }
5029
5030 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5031 f_fpr = TREE_CHAIN (f_gpr);
5032 f_ovf = TREE_CHAIN (f_fpr);
5033 f_sav = TREE_CHAIN (f_ovf);
5034
5035 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5036 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5037 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5038 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5039 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5040
5041 /* Count number of gp and fp argument registers used. */
5042 words = current_function_args_info.words;
5043 n_gpr = current_function_args_info.regno;
5044 n_fpr = current_function_args_info.sse_regno;
5045
5046 if (cfun->va_list_gpr_size)
5047 {
5048 type = TREE_TYPE (gpr);
5049 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5050 build_int_cst (type, n_gpr * 8));
5051 TREE_SIDE_EFFECTS (t) = 1;
5052 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5053 }
5054
5055 if (cfun->va_list_fpr_size)
5056 {
5057 type = TREE_TYPE (fpr);
5058 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5059 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5060 TREE_SIDE_EFFECTS (t) = 1;
5061 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5062 }
5063
5064 /* Find the overflow area. */
5065 type = TREE_TYPE (ovf);
5066 t = make_tree (type, virtual_incoming_args_rtx);
5067 if (words != 0)
5068 t = build2 (POINTER_PLUS_EXPR, type, t,
5069 size_int (words * UNITS_PER_WORD));
5070 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5071 TREE_SIDE_EFFECTS (t) = 1;
5072 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5073
5074 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5075 {
5076 /* Find the register save area.
5077 Prologue of the function save it right above stack frame. */
5078 type = TREE_TYPE (sav);
5079 t = make_tree (type, frame_pointer_rtx);
5080 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5081 TREE_SIDE_EFFECTS (t) = 1;
5082 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5083 }
5084 }
5085
5086 /* Implement va_arg. */
5087
5088 static tree
5089 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5090 {
5091 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5092 tree f_gpr, f_fpr, f_ovf, f_sav;
5093 tree gpr, fpr, ovf, sav, t;
5094 int size, rsize;
5095 tree lab_false, lab_over = NULL_TREE;
5096 tree addr, t2;
5097 rtx container;
5098 int indirect_p = 0;
5099 tree ptrtype;
5100 enum machine_mode nat_mode;
5101
5102 /* Only 64bit target needs something special. */
5103 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5104 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5105
5106 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5107 f_fpr = TREE_CHAIN (f_gpr);
5108 f_ovf = TREE_CHAIN (f_fpr);
5109 f_sav = TREE_CHAIN (f_ovf);
5110
5111 valist = build_va_arg_indirect_ref (valist);
5112 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5113 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5114 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5115 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5116
5117 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5118 if (indirect_p)
5119 type = build_pointer_type (type);
5120 size = int_size_in_bytes (type);
5121 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5122
5123 nat_mode = type_natural_mode (type);
5124 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5125 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5126
5127 /* Pull the value out of the saved registers. */
5128
5129 addr = create_tmp_var (ptr_type_node, "addr");
5130 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5131
5132 if (container)
5133 {
5134 int needed_intregs, needed_sseregs;
5135 bool need_temp;
5136 tree int_addr, sse_addr;
5137
5138 lab_false = create_artificial_label ();
5139 lab_over = create_artificial_label ();
5140
5141 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5142
5143 need_temp = (!REG_P (container)
5144 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5145 || TYPE_ALIGN (type) > 128));
5146
5147 /* In case we are passing structure, verify that it is consecutive block
5148 on the register save area. If not we need to do moves. */
5149 if (!need_temp && !REG_P (container))
5150 {
5151 /* Verify that all registers are strictly consecutive */
5152 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5153 {
5154 int i;
5155
5156 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5157 {
5158 rtx slot = XVECEXP (container, 0, i);
5159 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5160 || INTVAL (XEXP (slot, 1)) != i * 16)
5161 need_temp = 1;
5162 }
5163 }
5164 else
5165 {
5166 int i;
5167
5168 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5169 {
5170 rtx slot = XVECEXP (container, 0, i);
5171 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5172 || INTVAL (XEXP (slot, 1)) != i * 8)
5173 need_temp = 1;
5174 }
5175 }
5176 }
5177 if (!need_temp)
5178 {
5179 int_addr = addr;
5180 sse_addr = addr;
5181 }
5182 else
5183 {
5184 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5185 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5186 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5187 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5188 }
5189
5190 /* First ensure that we fit completely in registers. */
5191 if (needed_intregs)
5192 {
5193 t = build_int_cst (TREE_TYPE (gpr),
5194 (REGPARM_MAX - needed_intregs + 1) * 8);
5195 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5196 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5197 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5198 gimplify_and_add (t, pre_p);
5199 }
5200 if (needed_sseregs)
5201 {
5202 t = build_int_cst (TREE_TYPE (fpr),
5203 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5204 + REGPARM_MAX * 8);
5205 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5206 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5207 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5208 gimplify_and_add (t, pre_p);
5209 }
5210
5211 /* Compute index to start of area used for integer regs. */
5212 if (needed_intregs)
5213 {
5214 /* int_addr = gpr + sav; */
5215 t = fold_convert (sizetype, gpr);
5216 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5217 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5218 gimplify_and_add (t, pre_p);
5219 }
5220 if (needed_sseregs)
5221 {
5222 /* sse_addr = fpr + sav; */
5223 t = fold_convert (sizetype, fpr);
5224 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5225 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5226 gimplify_and_add (t, pre_p);
5227 }
5228 if (need_temp)
5229 {
5230 int i;
5231 tree temp = create_tmp_var (type, "va_arg_tmp");
5232
5233 /* addr = &temp; */
5234 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5235 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5236 gimplify_and_add (t, pre_p);
5237
5238 for (i = 0; i < XVECLEN (container, 0); i++)
5239 {
5240 rtx slot = XVECEXP (container, 0, i);
5241 rtx reg = XEXP (slot, 0);
5242 enum machine_mode mode = GET_MODE (reg);
5243 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5244 tree addr_type = build_pointer_type (piece_type);
5245 tree src_addr, src;
5246 int src_offset;
5247 tree dest_addr, dest;
5248
5249 if (SSE_REGNO_P (REGNO (reg)))
5250 {
5251 src_addr = sse_addr;
5252 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5253 }
5254 else
5255 {
5256 src_addr = int_addr;
5257 src_offset = REGNO (reg) * 8;
5258 }
5259 src_addr = fold_convert (addr_type, src_addr);
5260 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5261 size_int (src_offset));
5262 src = build_va_arg_indirect_ref (src_addr);
5263
5264 dest_addr = fold_convert (addr_type, addr);
5265 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5266 size_int (INTVAL (XEXP (slot, 1))));
5267 dest = build_va_arg_indirect_ref (dest_addr);
5268
5269 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5270 gimplify_and_add (t, pre_p);
5271 }
5272 }
5273
5274 if (needed_intregs)
5275 {
5276 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5277 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5278 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5279 gimplify_and_add (t, pre_p);
5280 }
5281 if (needed_sseregs)
5282 {
5283 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5284 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5285 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5286 gimplify_and_add (t, pre_p);
5287 }
5288
5289 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5290 gimplify_and_add (t, pre_p);
5291
5292 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5293 append_to_statement_list (t, pre_p);
5294 }
5295
5296 /* ... otherwise out of the overflow area. */
5297
5298 /* Care for on-stack alignment if needed. */
5299 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5300 || integer_zerop (TYPE_SIZE (type)))
5301 t = ovf;
5302 else
5303 {
5304 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5305 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5306 size_int (align - 1));
5307 t = fold_convert (sizetype, t);
5308 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5309 size_int (-align));
5310 t = fold_convert (TREE_TYPE (ovf), t);
5311 }
5312 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5313
5314 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5315 gimplify_and_add (t2, pre_p);
5316
5317 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5318 size_int (rsize * UNITS_PER_WORD));
5319 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5320 gimplify_and_add (t, pre_p);
5321
5322 if (container)
5323 {
5324 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5325 append_to_statement_list (t, pre_p);
5326 }
5327
5328 ptrtype = build_pointer_type (type);
5329 addr = fold_convert (ptrtype, addr);
5330
5331 if (indirect_p)
5332 addr = build_va_arg_indirect_ref (addr);
5333 return build_va_arg_indirect_ref (addr);
5334 }
5335 \f
5336 /* Return nonzero if OPNUM's MEM should be matched
5337 in movabs* patterns. */
5338
5339 int
5340 ix86_check_movabs (rtx insn, int opnum)
5341 {
5342 rtx set, mem;
5343
5344 set = PATTERN (insn);
5345 if (GET_CODE (set) == PARALLEL)
5346 set = XVECEXP (set, 0, 0);
5347 gcc_assert (GET_CODE (set) == SET);
5348 mem = XEXP (set, opnum);
5349 while (GET_CODE (mem) == SUBREG)
5350 mem = SUBREG_REG (mem);
5351 gcc_assert (MEM_P (mem));
5352 return (volatile_ok || !MEM_VOLATILE_P (mem));
5353 }
5354 \f
5355 /* Initialize the table of extra 80387 mathematical constants. */
5356
5357 static void
5358 init_ext_80387_constants (void)
5359 {
5360 static const char * cst[5] =
5361 {
5362 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5363 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5364 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5365 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5366 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5367 };
5368 int i;
5369
5370 for (i = 0; i < 5; i++)
5371 {
5372 real_from_string (&ext_80387_constants_table[i], cst[i]);
5373 /* Ensure each constant is rounded to XFmode precision. */
5374 real_convert (&ext_80387_constants_table[i],
5375 XFmode, &ext_80387_constants_table[i]);
5376 }
5377
5378 ext_80387_constants_init = 1;
5379 }
5380
5381 /* Return true if the constant is something that can be loaded with
5382 a special instruction. */
5383
5384 int
5385 standard_80387_constant_p (rtx x)
5386 {
5387 enum machine_mode mode = GET_MODE (x);
5388
5389 REAL_VALUE_TYPE r;
5390
5391 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5392 return -1;
5393
5394 if (x == CONST0_RTX (mode))
5395 return 1;
5396 if (x == CONST1_RTX (mode))
5397 return 2;
5398
5399 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5400
5401 /* For XFmode constants, try to find a special 80387 instruction when
5402 optimizing for size or on those CPUs that benefit from them. */
5403 if (mode == XFmode
5404 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5405 {
5406 int i;
5407
5408 if (! ext_80387_constants_init)
5409 init_ext_80387_constants ();
5410
5411 for (i = 0; i < 5; i++)
5412 if (real_identical (&r, &ext_80387_constants_table[i]))
5413 return i + 3;
5414 }
5415
5416 /* Load of the constant -0.0 or -1.0 will be split as
5417 fldz;fchs or fld1;fchs sequence. */
5418 if (real_isnegzero (&r))
5419 return 8;
5420 if (real_identical (&r, &dconstm1))
5421 return 9;
5422
5423 return 0;
5424 }
5425
5426 /* Return the opcode of the special instruction to be used to load
5427 the constant X. */
5428
5429 const char *
5430 standard_80387_constant_opcode (rtx x)
5431 {
5432 switch (standard_80387_constant_p (x))
5433 {
5434 case 1:
5435 return "fldz";
5436 case 2:
5437 return "fld1";
5438 case 3:
5439 return "fldlg2";
5440 case 4:
5441 return "fldln2";
5442 case 5:
5443 return "fldl2e";
5444 case 6:
5445 return "fldl2t";
5446 case 7:
5447 return "fldpi";
5448 case 8:
5449 case 9:
5450 return "#";
5451 default:
5452 gcc_unreachable ();
5453 }
5454 }
5455
5456 /* Return the CONST_DOUBLE representing the 80387 constant that is
5457 loaded by the specified special instruction. The argument IDX
5458 matches the return value from standard_80387_constant_p. */
5459
5460 rtx
5461 standard_80387_constant_rtx (int idx)
5462 {
5463 int i;
5464
5465 if (! ext_80387_constants_init)
5466 init_ext_80387_constants ();
5467
5468 switch (idx)
5469 {
5470 case 3:
5471 case 4:
5472 case 5:
5473 case 6:
5474 case 7:
5475 i = idx - 3;
5476 break;
5477
5478 default:
5479 gcc_unreachable ();
5480 }
5481
5482 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5483 XFmode);
5484 }
5485
5486 /* Return 1 if mode is a valid mode for sse. */
5487 static int
5488 standard_sse_mode_p (enum machine_mode mode)
5489 {
5490 switch (mode)
5491 {
5492 case V16QImode:
5493 case V8HImode:
5494 case V4SImode:
5495 case V2DImode:
5496 case V4SFmode:
5497 case V2DFmode:
5498 return 1;
5499
5500 default:
5501 return 0;
5502 }
5503 }
5504
5505 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5506 */
5507 int
5508 standard_sse_constant_p (rtx x)
5509 {
5510 enum machine_mode mode = GET_MODE (x);
5511
5512 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5513 return 1;
5514 if (vector_all_ones_operand (x, mode)
5515 && standard_sse_mode_p (mode))
5516 return TARGET_SSE2 ? 2 : -1;
5517
5518 return 0;
5519 }
5520
5521 /* Return the opcode of the special instruction to be used to load
5522 the constant X. */
5523
5524 const char *
5525 standard_sse_constant_opcode (rtx insn, rtx x)
5526 {
5527 switch (standard_sse_constant_p (x))
5528 {
5529 case 1:
5530 if (get_attr_mode (insn) == MODE_V4SF)
5531 return "xorps\t%0, %0";
5532 else if (get_attr_mode (insn) == MODE_V2DF)
5533 return "xorpd\t%0, %0";
5534 else
5535 return "pxor\t%0, %0";
5536 case 2:
5537 return "pcmpeqd\t%0, %0";
5538 }
5539 gcc_unreachable ();
5540 }
5541
5542 /* Returns 1 if OP contains a symbol reference */
5543
5544 int
5545 symbolic_reference_mentioned_p (rtx op)
5546 {
5547 const char *fmt;
5548 int i;
5549
5550 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5551 return 1;
5552
5553 fmt = GET_RTX_FORMAT (GET_CODE (op));
5554 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5555 {
5556 if (fmt[i] == 'E')
5557 {
5558 int j;
5559
5560 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5561 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5562 return 1;
5563 }
5564
5565 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5566 return 1;
5567 }
5568
5569 return 0;
5570 }
5571
5572 /* Return 1 if it is appropriate to emit `ret' instructions in the
5573 body of a function. Do this only if the epilogue is simple, needing a
5574 couple of insns. Prior to reloading, we can't tell how many registers
5575 must be saved, so return 0 then. Return 0 if there is no frame
5576 marker to de-allocate. */
5577
5578 int
5579 ix86_can_use_return_insn_p (void)
5580 {
5581 struct ix86_frame frame;
5582
5583 if (! reload_completed || frame_pointer_needed)
5584 return 0;
5585
5586 /* Don't allow more than 32 pop, since that's all we can do
5587 with one instruction. */
5588 if (current_function_pops_args
5589 && current_function_args_size >= 32768)
5590 return 0;
5591
5592 ix86_compute_frame_layout (&frame);
5593 return frame.to_allocate == 0 && frame.nregs == 0;
5594 }
5595 \f
5596 /* Value should be nonzero if functions must have frame pointers.
5597 Zero means the frame pointer need not be set up (and parms may
5598 be accessed via the stack pointer) in functions that seem suitable. */
5599
5600 int
5601 ix86_frame_pointer_required (void)
5602 {
5603 /* If we accessed previous frames, then the generated code expects
5604 to be able to access the saved ebp value in our frame. */
5605 if (cfun->machine->accesses_prev_frame)
5606 return 1;
5607
5608 /* Several x86 os'es need a frame pointer for other reasons,
5609 usually pertaining to setjmp. */
5610 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5611 return 1;
5612
5613 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5614 the frame pointer by default. Turn it back on now if we've not
5615 got a leaf function. */
5616 if (TARGET_OMIT_LEAF_FRAME_POINTER
5617 && (!current_function_is_leaf
5618 || ix86_current_function_calls_tls_descriptor))
5619 return 1;
5620
5621 if (current_function_profile)
5622 return 1;
5623
5624 return 0;
5625 }
5626
5627 /* Record that the current function accesses previous call frames. */
5628
5629 void
5630 ix86_setup_frame_addresses (void)
5631 {
5632 cfun->machine->accesses_prev_frame = 1;
5633 }
5634 \f
5635 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5636 # define USE_HIDDEN_LINKONCE 1
5637 #else
5638 # define USE_HIDDEN_LINKONCE 0
5639 #endif
5640
5641 static int pic_labels_used;
5642
5643 /* Fills in the label name that should be used for a pc thunk for
5644 the given register. */
5645
5646 static void
5647 get_pc_thunk_name (char name[32], unsigned int regno)
5648 {
5649 gcc_assert (!TARGET_64BIT);
5650
5651 if (USE_HIDDEN_LINKONCE)
5652 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5653 else
5654 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5655 }
5656
5657
5658 /* This function generates code for -fpic that loads %ebx with
5659 the return address of the caller and then returns. */
5660
5661 void
5662 ix86_file_end (void)
5663 {
5664 rtx xops[2];
5665 int regno;
5666
5667 for (regno = 0; regno < 8; ++regno)
5668 {
5669 char name[32];
5670
5671 if (! ((pic_labels_used >> regno) & 1))
5672 continue;
5673
5674 get_pc_thunk_name (name, regno);
5675
5676 #if TARGET_MACHO
5677 if (TARGET_MACHO)
5678 {
5679 switch_to_section (darwin_sections[text_coal_section]);
5680 fputs ("\t.weak_definition\t", asm_out_file);
5681 assemble_name (asm_out_file, name);
5682 fputs ("\n\t.private_extern\t", asm_out_file);
5683 assemble_name (asm_out_file, name);
5684 fputs ("\n", asm_out_file);
5685 ASM_OUTPUT_LABEL (asm_out_file, name);
5686 }
5687 else
5688 #endif
5689 if (USE_HIDDEN_LINKONCE)
5690 {
5691 tree decl;
5692
5693 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5694 error_mark_node);
5695 TREE_PUBLIC (decl) = 1;
5696 TREE_STATIC (decl) = 1;
5697 DECL_ONE_ONLY (decl) = 1;
5698
5699 (*targetm.asm_out.unique_section) (decl, 0);
5700 switch_to_section (get_named_section (decl, NULL, 0));
5701
5702 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5703 fputs ("\t.hidden\t", asm_out_file);
5704 assemble_name (asm_out_file, name);
5705 fputc ('\n', asm_out_file);
5706 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5707 }
5708 else
5709 {
5710 switch_to_section (text_section);
5711 ASM_OUTPUT_LABEL (asm_out_file, name);
5712 }
5713
5714 xops[0] = gen_rtx_REG (SImode, regno);
5715 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5716 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5717 output_asm_insn ("ret", xops);
5718 }
5719
5720 if (NEED_INDICATE_EXEC_STACK)
5721 file_end_indicate_exec_stack ();
5722 }
5723
5724 /* Emit code for the SET_GOT patterns. */
5725
5726 const char *
5727 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5728 {
5729 rtx xops[3];
5730
5731 xops[0] = dest;
5732
5733 if (TARGET_VXWORKS_RTP && flag_pic)
5734 {
5735 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5736 xops[2] = gen_rtx_MEM (Pmode,
5737 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5738 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5739
5740 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5741 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5742 an unadorned address. */
5743 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5744 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5745 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5746 return "";
5747 }
5748
5749 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5750
5751 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5752 {
5753 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5754
5755 if (!flag_pic)
5756 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5757 else
5758 output_asm_insn ("call\t%a2", xops);
5759
5760 #if TARGET_MACHO
5761 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5762 is what will be referenced by the Mach-O PIC subsystem. */
5763 if (!label)
5764 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5765 #endif
5766
5767 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5768 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5769
5770 if (flag_pic)
5771 output_asm_insn ("pop{l}\t%0", xops);
5772 }
5773 else
5774 {
5775 char name[32];
5776 get_pc_thunk_name (name, REGNO (dest));
5777 pic_labels_used |= 1 << REGNO (dest);
5778
5779 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5780 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5781 output_asm_insn ("call\t%X2", xops);
5782 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5783 is what will be referenced by the Mach-O PIC subsystem. */
5784 #if TARGET_MACHO
5785 if (!label)
5786 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5787 else
5788 targetm.asm_out.internal_label (asm_out_file, "L",
5789 CODE_LABEL_NUMBER (label));
5790 #endif
5791 }
5792
5793 if (TARGET_MACHO)
5794 return "";
5795
5796 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5797 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5798 else
5799 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5800
5801 return "";
5802 }
5803
5804 /* Generate an "push" pattern for input ARG. */
5805
5806 static rtx
5807 gen_push (rtx arg)
5808 {
5809 return gen_rtx_SET (VOIDmode,
5810 gen_rtx_MEM (Pmode,
5811 gen_rtx_PRE_DEC (Pmode,
5812 stack_pointer_rtx)),
5813 arg);
5814 }
5815
5816 /* Return >= 0 if there is an unused call-clobbered register available
5817 for the entire function. */
5818
5819 static unsigned int
5820 ix86_select_alt_pic_regnum (void)
5821 {
5822 if (current_function_is_leaf && !current_function_profile
5823 && !ix86_current_function_calls_tls_descriptor)
5824 {
5825 int i;
5826 for (i = 2; i >= 0; --i)
5827 if (!df_regs_ever_live_p (i))
5828 return i;
5829 }
5830
5831 return INVALID_REGNUM;
5832 }
5833
5834 /* Return 1 if we need to save REGNO. */
5835 static int
5836 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5837 {
5838 if (pic_offset_table_rtx
5839 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5840 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5841 || current_function_profile
5842 || current_function_calls_eh_return
5843 || current_function_uses_const_pool))
5844 {
5845 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5846 return 0;
5847 return 1;
5848 }
5849
5850 if (current_function_calls_eh_return && maybe_eh_return)
5851 {
5852 unsigned i;
5853 for (i = 0; ; i++)
5854 {
5855 unsigned test = EH_RETURN_DATA_REGNO (i);
5856 if (test == INVALID_REGNUM)
5857 break;
5858 if (test == regno)
5859 return 1;
5860 }
5861 }
5862
5863 if (cfun->machine->force_align_arg_pointer
5864 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5865 return 1;
5866
5867 return (df_regs_ever_live_p (regno)
5868 && !call_used_regs[regno]
5869 && !fixed_regs[regno]
5870 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5871 }
5872
5873 /* Return number of registers to be saved on the stack. */
5874
5875 static int
5876 ix86_nsaved_regs (void)
5877 {
5878 int nregs = 0;
5879 int regno;
5880
5881 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5882 if (ix86_save_reg (regno, true))
5883 nregs++;
5884 return nregs;
5885 }
5886
5887 /* Return the offset between two registers, one to be eliminated, and the other
5888 its replacement, at the start of a routine. */
5889
5890 HOST_WIDE_INT
5891 ix86_initial_elimination_offset (int from, int to)
5892 {
5893 struct ix86_frame frame;
5894 ix86_compute_frame_layout (&frame);
5895
5896 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5897 return frame.hard_frame_pointer_offset;
5898 else if (from == FRAME_POINTER_REGNUM
5899 && to == HARD_FRAME_POINTER_REGNUM)
5900 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5901 else
5902 {
5903 gcc_assert (to == STACK_POINTER_REGNUM);
5904
5905 if (from == ARG_POINTER_REGNUM)
5906 return frame.stack_pointer_offset;
5907
5908 gcc_assert (from == FRAME_POINTER_REGNUM);
5909 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5910 }
5911 }
5912
5913 /* Fill structure ix86_frame about frame of currently computed function. */
5914
5915 static void
5916 ix86_compute_frame_layout (struct ix86_frame *frame)
5917 {
5918 HOST_WIDE_INT total_size;
5919 unsigned int stack_alignment_needed;
5920 HOST_WIDE_INT offset;
5921 unsigned int preferred_alignment;
5922 HOST_WIDE_INT size = get_frame_size ();
5923
5924 frame->nregs = ix86_nsaved_regs ();
5925 total_size = size;
5926
5927 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5928 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5929
5930 /* During reload iteration the amount of registers saved can change.
5931 Recompute the value as needed. Do not recompute when amount of registers
5932 didn't change as reload does multiple calls to the function and does not
5933 expect the decision to change within single iteration. */
5934 if (!optimize_size
5935 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5936 {
5937 int count = frame->nregs;
5938
5939 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5940 /* The fast prologue uses move instead of push to save registers. This
5941 is significantly longer, but also executes faster as modern hardware
5942 can execute the moves in parallel, but can't do that for push/pop.
5943
5944 Be careful about choosing what prologue to emit: When function takes
5945 many instructions to execute we may use slow version as well as in
5946 case function is known to be outside hot spot (this is known with
5947 feedback only). Weight the size of function by number of registers
5948 to save as it is cheap to use one or two push instructions but very
5949 slow to use many of them. */
5950 if (count)
5951 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5952 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5953 || (flag_branch_probabilities
5954 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5955 cfun->machine->use_fast_prologue_epilogue = false;
5956 else
5957 cfun->machine->use_fast_prologue_epilogue
5958 = !expensive_function_p (count);
5959 }
5960 if (TARGET_PROLOGUE_USING_MOVE
5961 && cfun->machine->use_fast_prologue_epilogue)
5962 frame->save_regs_using_mov = true;
5963 else
5964 frame->save_regs_using_mov = false;
5965
5966
5967 /* Skip return address and saved base pointer. */
5968 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5969
5970 frame->hard_frame_pointer_offset = offset;
5971
5972 /* Do some sanity checking of stack_alignment_needed and
5973 preferred_alignment, since i386 port is the only using those features
5974 that may break easily. */
5975
5976 gcc_assert (!size || stack_alignment_needed);
5977 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5978 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5979 gcc_assert (stack_alignment_needed
5980 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5981
5982 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5983 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5984
5985 /* Register save area */
5986 offset += frame->nregs * UNITS_PER_WORD;
5987
5988 /* Va-arg area */
5989 if (ix86_save_varrargs_registers)
5990 {
5991 offset += X86_64_VARARGS_SIZE;
5992 frame->va_arg_size = X86_64_VARARGS_SIZE;
5993 }
5994 else
5995 frame->va_arg_size = 0;
5996
5997 /* Align start of frame for local function. */
5998 frame->padding1 = ((offset + stack_alignment_needed - 1)
5999 & -stack_alignment_needed) - offset;
6000
6001 offset += frame->padding1;
6002
6003 /* Frame pointer points here. */
6004 frame->frame_pointer_offset = offset;
6005
6006 offset += size;
6007
6008 /* Add outgoing arguments area. Can be skipped if we eliminated
6009 all the function calls as dead code.
6010 Skipping is however impossible when function calls alloca. Alloca
6011 expander assumes that last current_function_outgoing_args_size
6012 of stack frame are unused. */
6013 if (ACCUMULATE_OUTGOING_ARGS
6014 && (!current_function_is_leaf || current_function_calls_alloca
6015 || ix86_current_function_calls_tls_descriptor))
6016 {
6017 offset += current_function_outgoing_args_size;
6018 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6019 }
6020 else
6021 frame->outgoing_arguments_size = 0;
6022
6023 /* Align stack boundary. Only needed if we're calling another function
6024 or using alloca. */
6025 if (!current_function_is_leaf || current_function_calls_alloca
6026 || ix86_current_function_calls_tls_descriptor)
6027 frame->padding2 = ((offset + preferred_alignment - 1)
6028 & -preferred_alignment) - offset;
6029 else
6030 frame->padding2 = 0;
6031
6032 offset += frame->padding2;
6033
6034 /* We've reached end of stack frame. */
6035 frame->stack_pointer_offset = offset;
6036
6037 /* Size prologue needs to allocate. */
6038 frame->to_allocate =
6039 (size + frame->padding1 + frame->padding2
6040 + frame->outgoing_arguments_size + frame->va_arg_size);
6041
6042 if ((!frame->to_allocate && frame->nregs <= 1)
6043 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6044 frame->save_regs_using_mov = false;
6045
6046 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6047 && current_function_is_leaf
6048 && !ix86_current_function_calls_tls_descriptor)
6049 {
6050 frame->red_zone_size = frame->to_allocate;
6051 if (frame->save_regs_using_mov)
6052 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6053 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6054 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6055 }
6056 else
6057 frame->red_zone_size = 0;
6058 frame->to_allocate -= frame->red_zone_size;
6059 frame->stack_pointer_offset -= frame->red_zone_size;
6060 #if 0
6061 fprintf (stderr, "\n");
6062 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6063 fprintf (stderr, "size: %ld\n", (long)size);
6064 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6065 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6066 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6067 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6068 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6069 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6070 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6071 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6072 (long)frame->hard_frame_pointer_offset);
6073 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6074 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6075 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6076 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6077 #endif
6078 }
6079
6080 /* Emit code to save registers in the prologue. */
6081
6082 static void
6083 ix86_emit_save_regs (void)
6084 {
6085 unsigned int regno;
6086 rtx insn;
6087
6088 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6089 if (ix86_save_reg (regno, true))
6090 {
6091 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6092 RTX_FRAME_RELATED_P (insn) = 1;
6093 }
6094 }
6095
6096 /* Emit code to save registers using MOV insns. First register
6097 is restored from POINTER + OFFSET. */
6098 static void
6099 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6100 {
6101 unsigned int regno;
6102 rtx insn;
6103
6104 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6105 if (ix86_save_reg (regno, true))
6106 {
6107 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6108 Pmode, offset),
6109 gen_rtx_REG (Pmode, regno));
6110 RTX_FRAME_RELATED_P (insn) = 1;
6111 offset += UNITS_PER_WORD;
6112 }
6113 }
6114
6115 /* Expand prologue or epilogue stack adjustment.
6116 The pattern exist to put a dependency on all ebp-based memory accesses.
6117 STYLE should be negative if instructions should be marked as frame related,
6118 zero if %r11 register is live and cannot be freely used and positive
6119 otherwise. */
6120
6121 static void
6122 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6123 {
6124 rtx insn;
6125
6126 if (! TARGET_64BIT)
6127 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6128 else if (x86_64_immediate_operand (offset, DImode))
6129 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6130 else
6131 {
6132 rtx r11;
6133 /* r11 is used by indirect sibcall return as well, set before the
6134 epilogue and used after the epilogue. ATM indirect sibcall
6135 shouldn't be used together with huge frame sizes in one
6136 function because of the frame_size check in sibcall.c. */
6137 gcc_assert (style);
6138 r11 = gen_rtx_REG (DImode, R11_REG);
6139 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6140 if (style < 0)
6141 RTX_FRAME_RELATED_P (insn) = 1;
6142 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6143 offset));
6144 }
6145 if (style < 0)
6146 RTX_FRAME_RELATED_P (insn) = 1;
6147 }
6148
6149 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6150
6151 static rtx
6152 ix86_internal_arg_pointer (void)
6153 {
6154 bool has_force_align_arg_pointer =
6155 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6156 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6157 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6158 && DECL_NAME (current_function_decl)
6159 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6160 && DECL_FILE_SCOPE_P (current_function_decl))
6161 || ix86_force_align_arg_pointer
6162 || has_force_align_arg_pointer)
6163 {
6164 /* Nested functions can't realign the stack due to a register
6165 conflict. */
6166 if (DECL_CONTEXT (current_function_decl)
6167 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6168 {
6169 if (ix86_force_align_arg_pointer)
6170 warning (0, "-mstackrealign ignored for nested functions");
6171 if (has_force_align_arg_pointer)
6172 error ("%s not supported for nested functions",
6173 ix86_force_align_arg_pointer_string);
6174 return virtual_incoming_args_rtx;
6175 }
6176 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
6177 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6178 }
6179 else
6180 return virtual_incoming_args_rtx;
6181 }
6182
6183 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6184 This is called from dwarf2out.c to emit call frame instructions
6185 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6186 static void
6187 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6188 {
6189 rtx unspec = SET_SRC (pattern);
6190 gcc_assert (GET_CODE (unspec) == UNSPEC);
6191
6192 switch (index)
6193 {
6194 case UNSPEC_REG_SAVE:
6195 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6196 SET_DEST (pattern));
6197 break;
6198 case UNSPEC_DEF_CFA:
6199 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6200 INTVAL (XVECEXP (unspec, 0, 0)));
6201 break;
6202 default:
6203 gcc_unreachable ();
6204 }
6205 }
6206
6207 /* Expand the prologue into a bunch of separate insns. */
6208
6209 void
6210 ix86_expand_prologue (void)
6211 {
6212 rtx insn;
6213 bool pic_reg_used;
6214 struct ix86_frame frame;
6215 HOST_WIDE_INT allocate;
6216
6217 ix86_compute_frame_layout (&frame);
6218
6219 if (cfun->machine->force_align_arg_pointer)
6220 {
6221 rtx x, y;
6222
6223 /* Grab the argument pointer. */
6224 x = plus_constant (stack_pointer_rtx, 4);
6225 y = cfun->machine->force_align_arg_pointer;
6226 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6227 RTX_FRAME_RELATED_P (insn) = 1;
6228
6229 /* The unwind info consists of two parts: install the fafp as the cfa,
6230 and record the fafp as the "save register" of the stack pointer.
6231 The later is there in order that the unwinder can see where it
6232 should restore the stack pointer across the and insn. */
6233 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6234 x = gen_rtx_SET (VOIDmode, y, x);
6235 RTX_FRAME_RELATED_P (x) = 1;
6236 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6237 UNSPEC_REG_SAVE);
6238 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6239 RTX_FRAME_RELATED_P (y) = 1;
6240 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6241 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6242 REG_NOTES (insn) = x;
6243
6244 /* Align the stack. */
6245 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6246 GEN_INT (-16)));
6247
6248 /* And here we cheat like madmen with the unwind info. We force the
6249 cfa register back to sp+4, which is exactly what it was at the
6250 start of the function. Re-pushing the return address results in
6251 the return at the same spot relative to the cfa, and thus is
6252 correct wrt the unwind info. */
6253 x = cfun->machine->force_align_arg_pointer;
6254 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6255 insn = emit_insn (gen_push (x));
6256 RTX_FRAME_RELATED_P (insn) = 1;
6257
6258 x = GEN_INT (4);
6259 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6260 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6261 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6262 REG_NOTES (insn) = x;
6263 }
6264
6265 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6266 slower on all targets. Also sdb doesn't like it. */
6267
6268 if (frame_pointer_needed)
6269 {
6270 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6271 RTX_FRAME_RELATED_P (insn) = 1;
6272
6273 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6274 RTX_FRAME_RELATED_P (insn) = 1;
6275 }
6276
6277 allocate = frame.to_allocate;
6278
6279 if (!frame.save_regs_using_mov)
6280 ix86_emit_save_regs ();
6281 else
6282 allocate += frame.nregs * UNITS_PER_WORD;
6283
6284 /* When using red zone we may start register saving before allocating
6285 the stack frame saving one cycle of the prologue. */
6286 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6287 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6288 : stack_pointer_rtx,
6289 -frame.nregs * UNITS_PER_WORD);
6290
6291 if (allocate == 0)
6292 ;
6293 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6294 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6295 GEN_INT (-allocate), -1);
6296 else
6297 {
6298 /* Only valid for Win32. */
6299 rtx eax = gen_rtx_REG (Pmode, 0);
6300 bool eax_live;
6301 rtx t;
6302
6303 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6304
6305 if (TARGET_64BIT_MS_ABI)
6306 eax_live = false;
6307 else
6308 eax_live = ix86_eax_live_at_start_p ();
6309
6310 if (eax_live)
6311 {
6312 emit_insn (gen_push (eax));
6313 allocate -= UNITS_PER_WORD;
6314 }
6315
6316 emit_move_insn (eax, GEN_INT (allocate));
6317
6318 if (TARGET_64BIT)
6319 insn = gen_allocate_stack_worker_64 (eax);
6320 else
6321 insn = gen_allocate_stack_worker_32 (eax);
6322 insn = emit_insn (insn);
6323 RTX_FRAME_RELATED_P (insn) = 1;
6324 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6325 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6326 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6327 t, REG_NOTES (insn));
6328
6329 if (eax_live)
6330 {
6331 if (frame_pointer_needed)
6332 t = plus_constant (hard_frame_pointer_rtx,
6333 allocate
6334 - frame.to_allocate
6335 - frame.nregs * UNITS_PER_WORD);
6336 else
6337 t = plus_constant (stack_pointer_rtx, allocate);
6338 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6339 }
6340 }
6341
6342 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6343 {
6344 if (!frame_pointer_needed || !frame.to_allocate)
6345 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6346 else
6347 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6348 -frame.nregs * UNITS_PER_WORD);
6349 }
6350
6351 pic_reg_used = false;
6352 if (pic_offset_table_rtx
6353 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6354 || current_function_profile))
6355 {
6356 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6357
6358 if (alt_pic_reg_used != INVALID_REGNUM)
6359 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6360
6361 pic_reg_used = true;
6362 }
6363
6364 if (pic_reg_used)
6365 {
6366 if (TARGET_64BIT)
6367 {
6368 if (ix86_cmodel == CM_LARGE_PIC)
6369 {
6370 rtx tmp_reg = gen_rtx_REG (DImode,
6371 FIRST_REX_INT_REG + 3 /* R11 */);
6372 rtx label = gen_label_rtx ();
6373 emit_label (label);
6374 LABEL_PRESERVE_P (label) = 1;
6375 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6376 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6377 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6378 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6379 pic_offset_table_rtx, tmp_reg));
6380 }
6381 else
6382 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6383 }
6384 else
6385 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6386 }
6387
6388 /* Prevent function calls from be scheduled before the call to mcount.
6389 In the pic_reg_used case, make sure that the got load isn't deleted. */
6390 if (current_function_profile)
6391 {
6392 if (pic_reg_used)
6393 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6394 emit_insn (gen_blockage ());
6395 }
6396 }
6397
6398 /* Emit code to restore saved registers using MOV insns. First register
6399 is restored from POINTER + OFFSET. */
6400 static void
6401 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6402 int maybe_eh_return)
6403 {
6404 int regno;
6405 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6406
6407 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6408 if (ix86_save_reg (regno, maybe_eh_return))
6409 {
6410 /* Ensure that adjust_address won't be forced to produce pointer
6411 out of range allowed by x86-64 instruction set. */
6412 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6413 {
6414 rtx r11;
6415
6416 r11 = gen_rtx_REG (DImode, R11_REG);
6417 emit_move_insn (r11, GEN_INT (offset));
6418 emit_insn (gen_adddi3 (r11, r11, pointer));
6419 base_address = gen_rtx_MEM (Pmode, r11);
6420 offset = 0;
6421 }
6422 emit_move_insn (gen_rtx_REG (Pmode, regno),
6423 adjust_address (base_address, Pmode, offset));
6424 offset += UNITS_PER_WORD;
6425 }
6426 }
6427
6428 /* Restore function stack, frame, and registers. */
6429
6430 void
6431 ix86_expand_epilogue (int style)
6432 {
6433 int regno;
6434 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6435 struct ix86_frame frame;
6436 HOST_WIDE_INT offset;
6437
6438 ix86_compute_frame_layout (&frame);
6439
6440 /* Calculate start of saved registers relative to ebp. Special care
6441 must be taken for the normal return case of a function using
6442 eh_return: the eax and edx registers are marked as saved, but not
6443 restored along this path. */
6444 offset = frame.nregs;
6445 if (current_function_calls_eh_return && style != 2)
6446 offset -= 2;
6447 offset *= -UNITS_PER_WORD;
6448
6449 /* If we're only restoring one register and sp is not valid then
6450 using a move instruction to restore the register since it's
6451 less work than reloading sp and popping the register.
6452
6453 The default code result in stack adjustment using add/lea instruction,
6454 while this code results in LEAVE instruction (or discrete equivalent),
6455 so it is profitable in some other cases as well. Especially when there
6456 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6457 and there is exactly one register to pop. This heuristic may need some
6458 tuning in future. */
6459 if ((!sp_valid && frame.nregs <= 1)
6460 || (TARGET_EPILOGUE_USING_MOVE
6461 && cfun->machine->use_fast_prologue_epilogue
6462 && (frame.nregs > 1 || frame.to_allocate))
6463 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6464 || (frame_pointer_needed && TARGET_USE_LEAVE
6465 && cfun->machine->use_fast_prologue_epilogue
6466 && frame.nregs == 1)
6467 || current_function_calls_eh_return)
6468 {
6469 /* Restore registers. We can use ebp or esp to address the memory
6470 locations. If both are available, default to ebp, since offsets
6471 are known to be small. Only exception is esp pointing directly to the
6472 end of block of saved registers, where we may simplify addressing
6473 mode. */
6474
6475 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6476 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6477 frame.to_allocate, style == 2);
6478 else
6479 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6480 offset, style == 2);
6481
6482 /* eh_return epilogues need %ecx added to the stack pointer. */
6483 if (style == 2)
6484 {
6485 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6486
6487 if (frame_pointer_needed)
6488 {
6489 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6490 tmp = plus_constant (tmp, UNITS_PER_WORD);
6491 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6492
6493 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6494 emit_move_insn (hard_frame_pointer_rtx, tmp);
6495
6496 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6497 const0_rtx, style);
6498 }
6499 else
6500 {
6501 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6502 tmp = plus_constant (tmp, (frame.to_allocate
6503 + frame.nregs * UNITS_PER_WORD));
6504 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6505 }
6506 }
6507 else if (!frame_pointer_needed)
6508 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6509 GEN_INT (frame.to_allocate
6510 + frame.nregs * UNITS_PER_WORD),
6511 style);
6512 /* If not an i386, mov & pop is faster than "leave". */
6513 else if (TARGET_USE_LEAVE || optimize_size
6514 || !cfun->machine->use_fast_prologue_epilogue)
6515 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6516 else
6517 {
6518 pro_epilogue_adjust_stack (stack_pointer_rtx,
6519 hard_frame_pointer_rtx,
6520 const0_rtx, style);
6521 if (TARGET_64BIT)
6522 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6523 else
6524 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6525 }
6526 }
6527 else
6528 {
6529 /* First step is to deallocate the stack frame so that we can
6530 pop the registers. */
6531 if (!sp_valid)
6532 {
6533 gcc_assert (frame_pointer_needed);
6534 pro_epilogue_adjust_stack (stack_pointer_rtx,
6535 hard_frame_pointer_rtx,
6536 GEN_INT (offset), style);
6537 }
6538 else if (frame.to_allocate)
6539 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6540 GEN_INT (frame.to_allocate), style);
6541
6542 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6543 if (ix86_save_reg (regno, false))
6544 {
6545 if (TARGET_64BIT)
6546 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6547 else
6548 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6549 }
6550 if (frame_pointer_needed)
6551 {
6552 /* Leave results in shorter dependency chains on CPUs that are
6553 able to grok it fast. */
6554 if (TARGET_USE_LEAVE)
6555 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6556 else if (TARGET_64BIT)
6557 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6558 else
6559 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6560 }
6561 }
6562
6563 if (cfun->machine->force_align_arg_pointer)
6564 {
6565 emit_insn (gen_addsi3 (stack_pointer_rtx,
6566 cfun->machine->force_align_arg_pointer,
6567 GEN_INT (-4)));
6568 }
6569
6570 /* Sibcall epilogues don't want a return instruction. */
6571 if (style == 0)
6572 return;
6573
6574 if (current_function_pops_args && current_function_args_size)
6575 {
6576 rtx popc = GEN_INT (current_function_pops_args);
6577
6578 /* i386 can only pop 64K bytes. If asked to pop more, pop
6579 return address, do explicit add, and jump indirectly to the
6580 caller. */
6581
6582 if (current_function_pops_args >= 65536)
6583 {
6584 rtx ecx = gen_rtx_REG (SImode, 2);
6585
6586 /* There is no "pascal" calling convention in any 64bit ABI. */
6587 gcc_assert (!TARGET_64BIT);
6588
6589 emit_insn (gen_popsi1 (ecx));
6590 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6591 emit_jump_insn (gen_return_indirect_internal (ecx));
6592 }
6593 else
6594 emit_jump_insn (gen_return_pop_internal (popc));
6595 }
6596 else
6597 emit_jump_insn (gen_return_internal ());
6598 }
6599
6600 /* Reset from the function's potential modifications. */
6601
6602 static void
6603 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6604 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6605 {
6606 if (pic_offset_table_rtx)
6607 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6608 #if TARGET_MACHO
6609 /* Mach-O doesn't support labels at the end of objects, so if
6610 it looks like we might want one, insert a NOP. */
6611 {
6612 rtx insn = get_last_insn ();
6613 while (insn
6614 && NOTE_P (insn)
6615 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6616 insn = PREV_INSN (insn);
6617 if (insn
6618 && (LABEL_P (insn)
6619 || (NOTE_P (insn)
6620 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6621 fputs ("\tnop\n", file);
6622 }
6623 #endif
6624
6625 }
6626 \f
6627 /* Extract the parts of an RTL expression that is a valid memory address
6628 for an instruction. Return 0 if the structure of the address is
6629 grossly off. Return -1 if the address contains ASHIFT, so it is not
6630 strictly valid, but still used for computing length of lea instruction. */
6631
6632 int
6633 ix86_decompose_address (rtx addr, struct ix86_address *out)
6634 {
6635 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6636 rtx base_reg, index_reg;
6637 HOST_WIDE_INT scale = 1;
6638 rtx scale_rtx = NULL_RTX;
6639 int retval = 1;
6640 enum ix86_address_seg seg = SEG_DEFAULT;
6641
6642 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6643 base = addr;
6644 else if (GET_CODE (addr) == PLUS)
6645 {
6646 rtx addends[4], op;
6647 int n = 0, i;
6648
6649 op = addr;
6650 do
6651 {
6652 if (n >= 4)
6653 return 0;
6654 addends[n++] = XEXP (op, 1);
6655 op = XEXP (op, 0);
6656 }
6657 while (GET_CODE (op) == PLUS);
6658 if (n >= 4)
6659 return 0;
6660 addends[n] = op;
6661
6662 for (i = n; i >= 0; --i)
6663 {
6664 op = addends[i];
6665 switch (GET_CODE (op))
6666 {
6667 case MULT:
6668 if (index)
6669 return 0;
6670 index = XEXP (op, 0);
6671 scale_rtx = XEXP (op, 1);
6672 break;
6673
6674 case UNSPEC:
6675 if (XINT (op, 1) == UNSPEC_TP
6676 && TARGET_TLS_DIRECT_SEG_REFS
6677 && seg == SEG_DEFAULT)
6678 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6679 else
6680 return 0;
6681 break;
6682
6683 case REG:
6684 case SUBREG:
6685 if (!base)
6686 base = op;
6687 else if (!index)
6688 index = op;
6689 else
6690 return 0;
6691 break;
6692
6693 case CONST:
6694 case CONST_INT:
6695 case SYMBOL_REF:
6696 case LABEL_REF:
6697 if (disp)
6698 return 0;
6699 disp = op;
6700 break;
6701
6702 default:
6703 return 0;
6704 }
6705 }
6706 }
6707 else if (GET_CODE (addr) == MULT)
6708 {
6709 index = XEXP (addr, 0); /* index*scale */
6710 scale_rtx = XEXP (addr, 1);
6711 }
6712 else if (GET_CODE (addr) == ASHIFT)
6713 {
6714 rtx tmp;
6715
6716 /* We're called for lea too, which implements ashift on occasion. */
6717 index = XEXP (addr, 0);
6718 tmp = XEXP (addr, 1);
6719 if (!CONST_INT_P (tmp))
6720 return 0;
6721 scale = INTVAL (tmp);
6722 if ((unsigned HOST_WIDE_INT) scale > 3)
6723 return 0;
6724 scale = 1 << scale;
6725 retval = -1;
6726 }
6727 else
6728 disp = addr; /* displacement */
6729
6730 /* Extract the integral value of scale. */
6731 if (scale_rtx)
6732 {
6733 if (!CONST_INT_P (scale_rtx))
6734 return 0;
6735 scale = INTVAL (scale_rtx);
6736 }
6737
6738 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6739 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6740
6741 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6742 if (base_reg && index_reg && scale == 1
6743 && (index_reg == arg_pointer_rtx
6744 || index_reg == frame_pointer_rtx
6745 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6746 {
6747 rtx tmp;
6748 tmp = base, base = index, index = tmp;
6749 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6750 }
6751
6752 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6753 if ((base_reg == hard_frame_pointer_rtx
6754 || base_reg == frame_pointer_rtx
6755 || base_reg == arg_pointer_rtx) && !disp)
6756 disp = const0_rtx;
6757
6758 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6759 Avoid this by transforming to [%esi+0]. */
6760 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6761 && base_reg && !index_reg && !disp
6762 && REG_P (base_reg)
6763 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6764 disp = const0_rtx;
6765
6766 /* Special case: encode reg+reg instead of reg*2. */
6767 if (!base && index && scale && scale == 2)
6768 base = index, base_reg = index_reg, scale = 1;
6769
6770 /* Special case: scaling cannot be encoded without base or displacement. */
6771 if (!base && !disp && index && scale != 1)
6772 disp = const0_rtx;
6773
6774 out->base = base;
6775 out->index = index;
6776 out->disp = disp;
6777 out->scale = scale;
6778 out->seg = seg;
6779
6780 return retval;
6781 }
6782 \f
6783 /* Return cost of the memory address x.
6784 For i386, it is better to use a complex address than let gcc copy
6785 the address into a reg and make a new pseudo. But not if the address
6786 requires to two regs - that would mean more pseudos with longer
6787 lifetimes. */
6788 static int
6789 ix86_address_cost (rtx x)
6790 {
6791 struct ix86_address parts;
6792 int cost = 1;
6793 int ok = ix86_decompose_address (x, &parts);
6794
6795 gcc_assert (ok);
6796
6797 if (parts.base && GET_CODE (parts.base) == SUBREG)
6798 parts.base = SUBREG_REG (parts.base);
6799 if (parts.index && GET_CODE (parts.index) == SUBREG)
6800 parts.index = SUBREG_REG (parts.index);
6801
6802 /* Attempt to minimize number of registers in the address. */
6803 if ((parts.base
6804 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6805 || (parts.index
6806 && (!REG_P (parts.index)
6807 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6808 cost++;
6809
6810 if (parts.base
6811 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6812 && parts.index
6813 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6814 && parts.base != parts.index)
6815 cost++;
6816
6817 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6818 since it's predecode logic can't detect the length of instructions
6819 and it degenerates to vector decoded. Increase cost of such
6820 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6821 to split such addresses or even refuse such addresses at all.
6822
6823 Following addressing modes are affected:
6824 [base+scale*index]
6825 [scale*index+disp]
6826 [base+index]
6827
6828 The first and last case may be avoidable by explicitly coding the zero in
6829 memory address, but I don't have AMD-K6 machine handy to check this
6830 theory. */
6831
6832 if (TARGET_K6
6833 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6834 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6835 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6836 cost += 10;
6837
6838 return cost;
6839 }
6840 \f
6841 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6842 this is used for to form addresses to local data when -fPIC is in
6843 use. */
6844
6845 static bool
6846 darwin_local_data_pic (rtx disp)
6847 {
6848 if (GET_CODE (disp) == MINUS)
6849 {
6850 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6851 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6852 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6853 {
6854 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6855 if (! strcmp (sym_name, "<pic base>"))
6856 return true;
6857 }
6858 }
6859
6860 return false;
6861 }
6862
6863 /* Determine if a given RTX is a valid constant. We already know this
6864 satisfies CONSTANT_P. */
6865
6866 bool
6867 legitimate_constant_p (rtx x)
6868 {
6869 switch (GET_CODE (x))
6870 {
6871 case CONST:
6872 x = XEXP (x, 0);
6873
6874 if (GET_CODE (x) == PLUS)
6875 {
6876 if (!CONST_INT_P (XEXP (x, 1)))
6877 return false;
6878 x = XEXP (x, 0);
6879 }
6880
6881 if (TARGET_MACHO && darwin_local_data_pic (x))
6882 return true;
6883
6884 /* Only some unspecs are valid as "constants". */
6885 if (GET_CODE (x) == UNSPEC)
6886 switch (XINT (x, 1))
6887 {
6888 case UNSPEC_GOT:
6889 case UNSPEC_GOTOFF:
6890 case UNSPEC_PLTOFF:
6891 return TARGET_64BIT;
6892 case UNSPEC_TPOFF:
6893 case UNSPEC_NTPOFF:
6894 x = XVECEXP (x, 0, 0);
6895 return (GET_CODE (x) == SYMBOL_REF
6896 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6897 case UNSPEC_DTPOFF:
6898 x = XVECEXP (x, 0, 0);
6899 return (GET_CODE (x) == SYMBOL_REF
6900 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6901 default:
6902 return false;
6903 }
6904
6905 /* We must have drilled down to a symbol. */
6906 if (GET_CODE (x) == LABEL_REF)
6907 return true;
6908 if (GET_CODE (x) != SYMBOL_REF)
6909 return false;
6910 /* FALLTHRU */
6911
6912 case SYMBOL_REF:
6913 /* TLS symbols are never valid. */
6914 if (SYMBOL_REF_TLS_MODEL (x))
6915 return false;
6916
6917 /* DLLIMPORT symbols are never valid. */
6918 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6919 && SYMBOL_REF_DLLIMPORT_P (x))
6920 return false;
6921 break;
6922
6923 case CONST_DOUBLE:
6924 if (GET_MODE (x) == TImode
6925 && x != CONST0_RTX (TImode)
6926 && !TARGET_64BIT)
6927 return false;
6928 break;
6929
6930 case CONST_VECTOR:
6931 if (x == CONST0_RTX (GET_MODE (x)))
6932 return true;
6933 return false;
6934
6935 default:
6936 break;
6937 }
6938
6939 /* Otherwise we handle everything else in the move patterns. */
6940 return true;
6941 }
6942
6943 /* Determine if it's legal to put X into the constant pool. This
6944 is not possible for the address of thread-local symbols, which
6945 is checked above. */
6946
6947 static bool
6948 ix86_cannot_force_const_mem (rtx x)
6949 {
6950 /* We can always put integral constants and vectors in memory. */
6951 switch (GET_CODE (x))
6952 {
6953 case CONST_INT:
6954 case CONST_DOUBLE:
6955 case CONST_VECTOR:
6956 return false;
6957
6958 default:
6959 break;
6960 }
6961 return !legitimate_constant_p (x);
6962 }
6963
6964 /* Determine if a given RTX is a valid constant address. */
6965
6966 bool
6967 constant_address_p (rtx x)
6968 {
6969 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6970 }
6971
6972 /* Nonzero if the constant value X is a legitimate general operand
6973 when generating PIC code. It is given that flag_pic is on and
6974 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6975
6976 bool
6977 legitimate_pic_operand_p (rtx x)
6978 {
6979 rtx inner;
6980
6981 switch (GET_CODE (x))
6982 {
6983 case CONST:
6984 inner = XEXP (x, 0);
6985 if (GET_CODE (inner) == PLUS
6986 && CONST_INT_P (XEXP (inner, 1)))
6987 inner = XEXP (inner, 0);
6988
6989 /* Only some unspecs are valid as "constants". */
6990 if (GET_CODE (inner) == UNSPEC)
6991 switch (XINT (inner, 1))
6992 {
6993 case UNSPEC_GOT:
6994 case UNSPEC_GOTOFF:
6995 case UNSPEC_PLTOFF:
6996 return TARGET_64BIT;
6997 case UNSPEC_TPOFF:
6998 x = XVECEXP (inner, 0, 0);
6999 return (GET_CODE (x) == SYMBOL_REF
7000 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7001 default:
7002 return false;
7003 }
7004 /* FALLTHRU */
7005
7006 case SYMBOL_REF:
7007 case LABEL_REF:
7008 return legitimate_pic_address_disp_p (x);
7009
7010 default:
7011 return true;
7012 }
7013 }
7014
7015 /* Determine if a given CONST RTX is a valid memory displacement
7016 in PIC mode. */
7017
7018 int
7019 legitimate_pic_address_disp_p (rtx disp)
7020 {
7021 bool saw_plus;
7022
7023 /* In 64bit mode we can allow direct addresses of symbols and labels
7024 when they are not dynamic symbols. */
7025 if (TARGET_64BIT)
7026 {
7027 rtx op0 = disp, op1;
7028
7029 switch (GET_CODE (disp))
7030 {
7031 case LABEL_REF:
7032 return true;
7033
7034 case CONST:
7035 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7036 break;
7037 op0 = XEXP (XEXP (disp, 0), 0);
7038 op1 = XEXP (XEXP (disp, 0), 1);
7039 if (!CONST_INT_P (op1)
7040 || INTVAL (op1) >= 16*1024*1024
7041 || INTVAL (op1) < -16*1024*1024)
7042 break;
7043 if (GET_CODE (op0) == LABEL_REF)
7044 return true;
7045 if (GET_CODE (op0) != SYMBOL_REF)
7046 break;
7047 /* FALLTHRU */
7048
7049 case SYMBOL_REF:
7050 /* TLS references should always be enclosed in UNSPEC. */
7051 if (SYMBOL_REF_TLS_MODEL (op0))
7052 return false;
7053 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7054 && ix86_cmodel != CM_LARGE_PIC)
7055 return true;
7056 break;
7057
7058 default:
7059 break;
7060 }
7061 }
7062 if (GET_CODE (disp) != CONST)
7063 return 0;
7064 disp = XEXP (disp, 0);
7065
7066 if (TARGET_64BIT)
7067 {
7068 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7069 of GOT tables. We should not need these anyway. */
7070 if (GET_CODE (disp) != UNSPEC
7071 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7072 && XINT (disp, 1) != UNSPEC_GOTOFF
7073 && XINT (disp, 1) != UNSPEC_PLTOFF))
7074 return 0;
7075
7076 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7077 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7078 return 0;
7079 return 1;
7080 }
7081
7082 saw_plus = false;
7083 if (GET_CODE (disp) == PLUS)
7084 {
7085 if (!CONST_INT_P (XEXP (disp, 1)))
7086 return 0;
7087 disp = XEXP (disp, 0);
7088 saw_plus = true;
7089 }
7090
7091 if (TARGET_MACHO && darwin_local_data_pic (disp))
7092 return 1;
7093
7094 if (GET_CODE (disp) != UNSPEC)
7095 return 0;
7096
7097 switch (XINT (disp, 1))
7098 {
7099 case UNSPEC_GOT:
7100 if (saw_plus)
7101 return false;
7102 /* We need to check for both symbols and labels because VxWorks loads
7103 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7104 details. */
7105 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7106 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7107 case UNSPEC_GOTOFF:
7108 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7109 While ABI specify also 32bit relocation but we don't produce it in
7110 small PIC model at all. */
7111 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7112 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7113 && !TARGET_64BIT)
7114 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7115 return false;
7116 case UNSPEC_GOTTPOFF:
7117 case UNSPEC_GOTNTPOFF:
7118 case UNSPEC_INDNTPOFF:
7119 if (saw_plus)
7120 return false;
7121 disp = XVECEXP (disp, 0, 0);
7122 return (GET_CODE (disp) == SYMBOL_REF
7123 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7124 case UNSPEC_NTPOFF:
7125 disp = XVECEXP (disp, 0, 0);
7126 return (GET_CODE (disp) == SYMBOL_REF
7127 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7128 case UNSPEC_DTPOFF:
7129 disp = XVECEXP (disp, 0, 0);
7130 return (GET_CODE (disp) == SYMBOL_REF
7131 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7132 }
7133
7134 return 0;
7135 }
7136
7137 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7138 memory address for an instruction. The MODE argument is the machine mode
7139 for the MEM expression that wants to use this address.
7140
7141 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7142 convert common non-canonical forms to canonical form so that they will
7143 be recognized. */
7144
7145 int
7146 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7147 rtx addr, int strict)
7148 {
7149 struct ix86_address parts;
7150 rtx base, index, disp;
7151 HOST_WIDE_INT scale;
7152 const char *reason = NULL;
7153 rtx reason_rtx = NULL_RTX;
7154
7155 if (ix86_decompose_address (addr, &parts) <= 0)
7156 {
7157 reason = "decomposition failed";
7158 goto report_error;
7159 }
7160
7161 base = parts.base;
7162 index = parts.index;
7163 disp = parts.disp;
7164 scale = parts.scale;
7165
7166 /* Validate base register.
7167
7168 Don't allow SUBREG's that span more than a word here. It can lead to spill
7169 failures when the base is one word out of a two word structure, which is
7170 represented internally as a DImode int. */
7171
7172 if (base)
7173 {
7174 rtx reg;
7175 reason_rtx = base;
7176
7177 if (REG_P (base))
7178 reg = base;
7179 else if (GET_CODE (base) == SUBREG
7180 && REG_P (SUBREG_REG (base))
7181 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7182 <= UNITS_PER_WORD)
7183 reg = SUBREG_REG (base);
7184 else
7185 {
7186 reason = "base is not a register";
7187 goto report_error;
7188 }
7189
7190 if (GET_MODE (base) != Pmode)
7191 {
7192 reason = "base is not in Pmode";
7193 goto report_error;
7194 }
7195
7196 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7197 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7198 {
7199 reason = "base is not valid";
7200 goto report_error;
7201 }
7202 }
7203
7204 /* Validate index register.
7205
7206 Don't allow SUBREG's that span more than a word here -- same as above. */
7207
7208 if (index)
7209 {
7210 rtx reg;
7211 reason_rtx = index;
7212
7213 if (REG_P (index))
7214 reg = index;
7215 else if (GET_CODE (index) == SUBREG
7216 && REG_P (SUBREG_REG (index))
7217 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7218 <= UNITS_PER_WORD)
7219 reg = SUBREG_REG (index);
7220 else
7221 {
7222 reason = "index is not a register";
7223 goto report_error;
7224 }
7225
7226 if (GET_MODE (index) != Pmode)
7227 {
7228 reason = "index is not in Pmode";
7229 goto report_error;
7230 }
7231
7232 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7233 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7234 {
7235 reason = "index is not valid";
7236 goto report_error;
7237 }
7238 }
7239
7240 /* Validate scale factor. */
7241 if (scale != 1)
7242 {
7243 reason_rtx = GEN_INT (scale);
7244 if (!index)
7245 {
7246 reason = "scale without index";
7247 goto report_error;
7248 }
7249
7250 if (scale != 2 && scale != 4 && scale != 8)
7251 {
7252 reason = "scale is not a valid multiplier";
7253 goto report_error;
7254 }
7255 }
7256
7257 /* Validate displacement. */
7258 if (disp)
7259 {
7260 reason_rtx = disp;
7261
7262 if (GET_CODE (disp) == CONST
7263 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7264 switch (XINT (XEXP (disp, 0), 1))
7265 {
7266 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7267 used. While ABI specify also 32bit relocations, we don't produce
7268 them at all and use IP relative instead. */
7269 case UNSPEC_GOT:
7270 case UNSPEC_GOTOFF:
7271 gcc_assert (flag_pic);
7272 if (!TARGET_64BIT)
7273 goto is_legitimate_pic;
7274 reason = "64bit address unspec";
7275 goto report_error;
7276
7277 case UNSPEC_GOTPCREL:
7278 gcc_assert (flag_pic);
7279 goto is_legitimate_pic;
7280
7281 case UNSPEC_GOTTPOFF:
7282 case UNSPEC_GOTNTPOFF:
7283 case UNSPEC_INDNTPOFF:
7284 case UNSPEC_NTPOFF:
7285 case UNSPEC_DTPOFF:
7286 break;
7287
7288 default:
7289 reason = "invalid address unspec";
7290 goto report_error;
7291 }
7292
7293 else if (SYMBOLIC_CONST (disp)
7294 && (flag_pic
7295 || (TARGET_MACHO
7296 #if TARGET_MACHO
7297 && MACHOPIC_INDIRECT
7298 && !machopic_operand_p (disp)
7299 #endif
7300 )))
7301 {
7302
7303 is_legitimate_pic:
7304 if (TARGET_64BIT && (index || base))
7305 {
7306 /* foo@dtpoff(%rX) is ok. */
7307 if (GET_CODE (disp) != CONST
7308 || GET_CODE (XEXP (disp, 0)) != PLUS
7309 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7310 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7311 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7312 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7313 {
7314 reason = "non-constant pic memory reference";
7315 goto report_error;
7316 }
7317 }
7318 else if (! legitimate_pic_address_disp_p (disp))
7319 {
7320 reason = "displacement is an invalid pic construct";
7321 goto report_error;
7322 }
7323
7324 /* This code used to verify that a symbolic pic displacement
7325 includes the pic_offset_table_rtx register.
7326
7327 While this is good idea, unfortunately these constructs may
7328 be created by "adds using lea" optimization for incorrect
7329 code like:
7330
7331 int a;
7332 int foo(int i)
7333 {
7334 return *(&a+i);
7335 }
7336
7337 This code is nonsensical, but results in addressing
7338 GOT table with pic_offset_table_rtx base. We can't
7339 just refuse it easily, since it gets matched by
7340 "addsi3" pattern, that later gets split to lea in the
7341 case output register differs from input. While this
7342 can be handled by separate addsi pattern for this case
7343 that never results in lea, this seems to be easier and
7344 correct fix for crash to disable this test. */
7345 }
7346 else if (GET_CODE (disp) != LABEL_REF
7347 && !CONST_INT_P (disp)
7348 && (GET_CODE (disp) != CONST
7349 || !legitimate_constant_p (disp))
7350 && (GET_CODE (disp) != SYMBOL_REF
7351 || !legitimate_constant_p (disp)))
7352 {
7353 reason = "displacement is not constant";
7354 goto report_error;
7355 }
7356 else if (TARGET_64BIT
7357 && !x86_64_immediate_operand (disp, VOIDmode))
7358 {
7359 reason = "displacement is out of range";
7360 goto report_error;
7361 }
7362 }
7363
7364 /* Everything looks valid. */
7365 return TRUE;
7366
7367 report_error:
7368 return FALSE;
7369 }
7370 \f
7371 /* Return a unique alias set for the GOT. */
7372
7373 static alias_set_type
7374 ix86_GOT_alias_set (void)
7375 {
7376 static alias_set_type set = -1;
7377 if (set == -1)
7378 set = new_alias_set ();
7379 return set;
7380 }
7381
7382 /* Return a legitimate reference for ORIG (an address) using the
7383 register REG. If REG is 0, a new pseudo is generated.
7384
7385 There are two types of references that must be handled:
7386
7387 1. Global data references must load the address from the GOT, via
7388 the PIC reg. An insn is emitted to do this load, and the reg is
7389 returned.
7390
7391 2. Static data references, constant pool addresses, and code labels
7392 compute the address as an offset from the GOT, whose base is in
7393 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7394 differentiate them from global data objects. The returned
7395 address is the PIC reg + an unspec constant.
7396
7397 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7398 reg also appears in the address. */
7399
7400 static rtx
7401 legitimize_pic_address (rtx orig, rtx reg)
7402 {
7403 rtx addr = orig;
7404 rtx new_rtx = orig;
7405 rtx base;
7406
7407 #if TARGET_MACHO
7408 if (TARGET_MACHO && !TARGET_64BIT)
7409 {
7410 if (reg == 0)
7411 reg = gen_reg_rtx (Pmode);
7412 /* Use the generic Mach-O PIC machinery. */
7413 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7414 }
7415 #endif
7416
7417 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7418 new_rtx = addr;
7419 else if (TARGET_64BIT
7420 && ix86_cmodel != CM_SMALL_PIC
7421 && gotoff_operand (addr, Pmode))
7422 {
7423 rtx tmpreg;
7424 /* This symbol may be referenced via a displacement from the PIC
7425 base address (@GOTOFF). */
7426
7427 if (reload_in_progress)
7428 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7429 if (GET_CODE (addr) == CONST)
7430 addr = XEXP (addr, 0);
7431 if (GET_CODE (addr) == PLUS)
7432 {
7433 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7434 UNSPEC_GOTOFF);
7435 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7436 }
7437 else
7438 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7439 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7440 if (!reg)
7441 tmpreg = gen_reg_rtx (Pmode);
7442 else
7443 tmpreg = reg;
7444 emit_move_insn (tmpreg, new_rtx);
7445
7446 if (reg != 0)
7447 {
7448 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7449 tmpreg, 1, OPTAB_DIRECT);
7450 new_rtx = reg;
7451 }
7452 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7453 }
7454 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7455 {
7456 /* This symbol may be referenced via a displacement from the PIC
7457 base address (@GOTOFF). */
7458
7459 if (reload_in_progress)
7460 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7461 if (GET_CODE (addr) == CONST)
7462 addr = XEXP (addr, 0);
7463 if (GET_CODE (addr) == PLUS)
7464 {
7465 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7466 UNSPEC_GOTOFF);
7467 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7468 }
7469 else
7470 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7471 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7472 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7473
7474 if (reg != 0)
7475 {
7476 emit_move_insn (reg, new_rtx);
7477 new_rtx = reg;
7478 }
7479 }
7480 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7481 /* We can't use @GOTOFF for text labels on VxWorks;
7482 see gotoff_operand. */
7483 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7484 {
7485 /* Given that we've already handled dllimport variables separately
7486 in legitimize_address, and all other variables should satisfy
7487 legitimate_pic_address_disp_p, we should never arrive here. */
7488 gcc_assert (!TARGET_64BIT_MS_ABI);
7489
7490 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7491 {
7492 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7493 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7494 new_rtx = gen_const_mem (Pmode, new_rtx);
7495 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7496
7497 if (reg == 0)
7498 reg = gen_reg_rtx (Pmode);
7499 /* Use directly gen_movsi, otherwise the address is loaded
7500 into register for CSE. We don't want to CSE this addresses,
7501 instead we CSE addresses from the GOT table, so skip this. */
7502 emit_insn (gen_movsi (reg, new_rtx));
7503 new_rtx = reg;
7504 }
7505 else
7506 {
7507 /* This symbol must be referenced via a load from the
7508 Global Offset Table (@GOT). */
7509
7510 if (reload_in_progress)
7511 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7512 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7513 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7514 if (TARGET_64BIT)
7515 new_rtx = force_reg (Pmode, new_rtx);
7516 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7517 new_rtx = gen_const_mem (Pmode, new_rtx);
7518 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7519
7520 if (reg == 0)
7521 reg = gen_reg_rtx (Pmode);
7522 emit_move_insn (reg, new_rtx);
7523 new_rtx = reg;
7524 }
7525 }
7526 else
7527 {
7528 if (CONST_INT_P (addr)
7529 && !x86_64_immediate_operand (addr, VOIDmode))
7530 {
7531 if (reg)
7532 {
7533 emit_move_insn (reg, addr);
7534 new_rtx = reg;
7535 }
7536 else
7537 new_rtx = force_reg (Pmode, addr);
7538 }
7539 else if (GET_CODE (addr) == CONST)
7540 {
7541 addr = XEXP (addr, 0);
7542
7543 /* We must match stuff we generate before. Assume the only
7544 unspecs that can get here are ours. Not that we could do
7545 anything with them anyway.... */
7546 if (GET_CODE (addr) == UNSPEC
7547 || (GET_CODE (addr) == PLUS
7548 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7549 return orig;
7550 gcc_assert (GET_CODE (addr) == PLUS);
7551 }
7552 if (GET_CODE (addr) == PLUS)
7553 {
7554 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7555
7556 /* Check first to see if this is a constant offset from a @GOTOFF
7557 symbol reference. */
7558 if (gotoff_operand (op0, Pmode)
7559 && CONST_INT_P (op1))
7560 {
7561 if (!TARGET_64BIT)
7562 {
7563 if (reload_in_progress)
7564 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7565 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7566 UNSPEC_GOTOFF);
7567 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7568 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7569 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7570
7571 if (reg != 0)
7572 {
7573 emit_move_insn (reg, new_rtx);
7574 new_rtx = reg;
7575 }
7576 }
7577 else
7578 {
7579 if (INTVAL (op1) < -16*1024*1024
7580 || INTVAL (op1) >= 16*1024*1024)
7581 {
7582 if (!x86_64_immediate_operand (op1, Pmode))
7583 op1 = force_reg (Pmode, op1);
7584 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7585 }
7586 }
7587 }
7588 else
7589 {
7590 base = legitimize_pic_address (XEXP (addr, 0), reg);
7591 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7592 base == reg ? NULL_RTX : reg);
7593
7594 if (CONST_INT_P (new_rtx))
7595 new_rtx = plus_constant (base, INTVAL (new_rtx));
7596 else
7597 {
7598 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7599 {
7600 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7601 new_rtx = XEXP (new_rtx, 1);
7602 }
7603 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7604 }
7605 }
7606 }
7607 }
7608 return new_rtx;
7609 }
7610 \f
7611 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7612
7613 static rtx
7614 get_thread_pointer (int to_reg)
7615 {
7616 rtx tp, reg, insn;
7617
7618 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7619 if (!to_reg)
7620 return tp;
7621
7622 reg = gen_reg_rtx (Pmode);
7623 insn = gen_rtx_SET (VOIDmode, reg, tp);
7624 insn = emit_insn (insn);
7625
7626 return reg;
7627 }
7628
7629 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7630 false if we expect this to be used for a memory address and true if
7631 we expect to load the address into a register. */
7632
7633 static rtx
7634 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7635 {
7636 rtx dest, base, off, pic, tp;
7637 int type;
7638
7639 switch (model)
7640 {
7641 case TLS_MODEL_GLOBAL_DYNAMIC:
7642 dest = gen_reg_rtx (Pmode);
7643 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7644
7645 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7646 {
7647 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7648
7649 start_sequence ();
7650 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7651 insns = get_insns ();
7652 end_sequence ();
7653
7654 CONST_OR_PURE_CALL_P (insns) = 1;
7655 emit_libcall_block (insns, dest, rax, x);
7656 }
7657 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7658 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7659 else
7660 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7661
7662 if (TARGET_GNU2_TLS)
7663 {
7664 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7665
7666 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7667 }
7668 break;
7669
7670 case TLS_MODEL_LOCAL_DYNAMIC:
7671 base = gen_reg_rtx (Pmode);
7672 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7673
7674 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7675 {
7676 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7677
7678 start_sequence ();
7679 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7680 insns = get_insns ();
7681 end_sequence ();
7682
7683 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7684 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7685 CONST_OR_PURE_CALL_P (insns) = 1;
7686 emit_libcall_block (insns, base, rax, note);
7687 }
7688 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7689 emit_insn (gen_tls_local_dynamic_base_64 (base));
7690 else
7691 emit_insn (gen_tls_local_dynamic_base_32 (base));
7692
7693 if (TARGET_GNU2_TLS)
7694 {
7695 rtx x = ix86_tls_module_base ();
7696
7697 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7698 gen_rtx_MINUS (Pmode, x, tp));
7699 }
7700
7701 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7702 off = gen_rtx_CONST (Pmode, off);
7703
7704 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7705
7706 if (TARGET_GNU2_TLS)
7707 {
7708 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7709
7710 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7711 }
7712
7713 break;
7714
7715 case TLS_MODEL_INITIAL_EXEC:
7716 if (TARGET_64BIT)
7717 {
7718 pic = NULL;
7719 type = UNSPEC_GOTNTPOFF;
7720 }
7721 else if (flag_pic)
7722 {
7723 if (reload_in_progress)
7724 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7725 pic = pic_offset_table_rtx;
7726 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7727 }
7728 else if (!TARGET_ANY_GNU_TLS)
7729 {
7730 pic = gen_reg_rtx (Pmode);
7731 emit_insn (gen_set_got (pic));
7732 type = UNSPEC_GOTTPOFF;
7733 }
7734 else
7735 {
7736 pic = NULL;
7737 type = UNSPEC_INDNTPOFF;
7738 }
7739
7740 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7741 off = gen_rtx_CONST (Pmode, off);
7742 if (pic)
7743 off = gen_rtx_PLUS (Pmode, pic, off);
7744 off = gen_const_mem (Pmode, off);
7745 set_mem_alias_set (off, ix86_GOT_alias_set ());
7746
7747 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7748 {
7749 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7750 off = force_reg (Pmode, off);
7751 return gen_rtx_PLUS (Pmode, base, off);
7752 }
7753 else
7754 {
7755 base = get_thread_pointer (true);
7756 dest = gen_reg_rtx (Pmode);
7757 emit_insn (gen_subsi3 (dest, base, off));
7758 }
7759 break;
7760
7761 case TLS_MODEL_LOCAL_EXEC:
7762 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7763 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7764 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7765 off = gen_rtx_CONST (Pmode, off);
7766
7767 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7768 {
7769 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7770 return gen_rtx_PLUS (Pmode, base, off);
7771 }
7772 else
7773 {
7774 base = get_thread_pointer (true);
7775 dest = gen_reg_rtx (Pmode);
7776 emit_insn (gen_subsi3 (dest, base, off));
7777 }
7778 break;
7779
7780 default:
7781 gcc_unreachable ();
7782 }
7783
7784 return dest;
7785 }
7786
7787 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7788 to symbol DECL. */
7789
7790 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7791 htab_t dllimport_map;
7792
7793 static tree
7794 get_dllimport_decl (tree decl)
7795 {
7796 struct tree_map *h, in;
7797 void **loc;
7798 const char *name;
7799 const char *prefix;
7800 size_t namelen, prefixlen;
7801 char *imp_name;
7802 tree to;
7803 rtx rtl;
7804
7805 if (!dllimport_map)
7806 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7807
7808 in.hash = htab_hash_pointer (decl);
7809 in.base.from = decl;
7810 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7811 h = (struct tree_map *) *loc;
7812 if (h)
7813 return h->to;
7814
7815 *loc = h = GGC_NEW (struct tree_map);
7816 h->hash = in.hash;
7817 h->base.from = decl;
7818 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7819 DECL_ARTIFICIAL (to) = 1;
7820 DECL_IGNORED_P (to) = 1;
7821 DECL_EXTERNAL (to) = 1;
7822 TREE_READONLY (to) = 1;
7823
7824 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7825 name = targetm.strip_name_encoding (name);
7826 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7827 namelen = strlen (name);
7828 prefixlen = strlen (prefix);
7829 imp_name = (char *) alloca (namelen + prefixlen + 1);
7830 memcpy (imp_name, prefix, prefixlen);
7831 memcpy (imp_name + prefixlen, name, namelen + 1);
7832
7833 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7834 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7835 SET_SYMBOL_REF_DECL (rtl, to);
7836 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7837
7838 rtl = gen_const_mem (Pmode, rtl);
7839 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7840
7841 SET_DECL_RTL (to, rtl);
7842
7843 return to;
7844 }
7845
7846 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7847 true if we require the result be a register. */
7848
7849 static rtx
7850 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7851 {
7852 tree imp_decl;
7853 rtx x;
7854
7855 gcc_assert (SYMBOL_REF_DECL (symbol));
7856 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7857
7858 x = DECL_RTL (imp_decl);
7859 if (want_reg)
7860 x = force_reg (Pmode, x);
7861 return x;
7862 }
7863
7864 /* Try machine-dependent ways of modifying an illegitimate address
7865 to be legitimate. If we find one, return the new, valid address.
7866 This macro is used in only one place: `memory_address' in explow.c.
7867
7868 OLDX is the address as it was before break_out_memory_refs was called.
7869 In some cases it is useful to look at this to decide what needs to be done.
7870
7871 MODE and WIN are passed so that this macro can use
7872 GO_IF_LEGITIMATE_ADDRESS.
7873
7874 It is always safe for this macro to do nothing. It exists to recognize
7875 opportunities to optimize the output.
7876
7877 For the 80386, we handle X+REG by loading X into a register R and
7878 using R+REG. R will go in a general reg and indexing will be used.
7879 However, if REG is a broken-out memory address or multiplication,
7880 nothing needs to be done because REG can certainly go in a general reg.
7881
7882 When -fpic is used, special handling is needed for symbolic references.
7883 See comments by legitimize_pic_address in i386.c for details. */
7884
7885 rtx
7886 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7887 {
7888 int changed = 0;
7889 unsigned log;
7890
7891 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7892 if (log)
7893 return legitimize_tls_address (x, (enum tls_model) log, false);
7894 if (GET_CODE (x) == CONST
7895 && GET_CODE (XEXP (x, 0)) == PLUS
7896 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7897 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7898 {
7899 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7900 (enum tls_model) log, false);
7901 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7902 }
7903
7904 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7905 {
7906 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7907 return legitimize_dllimport_symbol (x, true);
7908 if (GET_CODE (x) == CONST
7909 && GET_CODE (XEXP (x, 0)) == PLUS
7910 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7911 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7912 {
7913 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7914 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7915 }
7916 }
7917
7918 if (flag_pic && SYMBOLIC_CONST (x))
7919 return legitimize_pic_address (x, 0);
7920
7921 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7922 if (GET_CODE (x) == ASHIFT
7923 && CONST_INT_P (XEXP (x, 1))
7924 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7925 {
7926 changed = 1;
7927 log = INTVAL (XEXP (x, 1));
7928 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7929 GEN_INT (1 << log));
7930 }
7931
7932 if (GET_CODE (x) == PLUS)
7933 {
7934 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7935
7936 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7937 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7938 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7939 {
7940 changed = 1;
7941 log = INTVAL (XEXP (XEXP (x, 0), 1));
7942 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7943 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7944 GEN_INT (1 << log));
7945 }
7946
7947 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7948 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7949 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7950 {
7951 changed = 1;
7952 log = INTVAL (XEXP (XEXP (x, 1), 1));
7953 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7954 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7955 GEN_INT (1 << log));
7956 }
7957
7958 /* Put multiply first if it isn't already. */
7959 if (GET_CODE (XEXP (x, 1)) == MULT)
7960 {
7961 rtx tmp = XEXP (x, 0);
7962 XEXP (x, 0) = XEXP (x, 1);
7963 XEXP (x, 1) = tmp;
7964 changed = 1;
7965 }
7966
7967 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7968 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7969 created by virtual register instantiation, register elimination, and
7970 similar optimizations. */
7971 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7972 {
7973 changed = 1;
7974 x = gen_rtx_PLUS (Pmode,
7975 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7976 XEXP (XEXP (x, 1), 0)),
7977 XEXP (XEXP (x, 1), 1));
7978 }
7979
7980 /* Canonicalize
7981 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7982 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7983 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7984 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7985 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7986 && CONSTANT_P (XEXP (x, 1)))
7987 {
7988 rtx constant;
7989 rtx other = NULL_RTX;
7990
7991 if (CONST_INT_P (XEXP (x, 1)))
7992 {
7993 constant = XEXP (x, 1);
7994 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7995 }
7996 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7997 {
7998 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7999 other = XEXP (x, 1);
8000 }
8001 else
8002 constant = 0;
8003
8004 if (constant)
8005 {
8006 changed = 1;
8007 x = gen_rtx_PLUS (Pmode,
8008 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8009 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8010 plus_constant (other, INTVAL (constant)));
8011 }
8012 }
8013
8014 if (changed && legitimate_address_p (mode, x, FALSE))
8015 return x;
8016
8017 if (GET_CODE (XEXP (x, 0)) == MULT)
8018 {
8019 changed = 1;
8020 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8021 }
8022
8023 if (GET_CODE (XEXP (x, 1)) == MULT)
8024 {
8025 changed = 1;
8026 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8027 }
8028
8029 if (changed
8030 && REG_P (XEXP (x, 1))
8031 && REG_P (XEXP (x, 0)))
8032 return x;
8033
8034 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8035 {
8036 changed = 1;
8037 x = legitimize_pic_address (x, 0);
8038 }
8039
8040 if (changed && legitimate_address_p (mode, x, FALSE))
8041 return x;
8042
8043 if (REG_P (XEXP (x, 0)))
8044 {
8045 rtx temp = gen_reg_rtx (Pmode);
8046 rtx val = force_operand (XEXP (x, 1), temp);
8047 if (val != temp)
8048 emit_move_insn (temp, val);
8049
8050 XEXP (x, 1) = temp;
8051 return x;
8052 }
8053
8054 else if (REG_P (XEXP (x, 1)))
8055 {
8056 rtx temp = gen_reg_rtx (Pmode);
8057 rtx val = force_operand (XEXP (x, 0), temp);
8058 if (val != temp)
8059 emit_move_insn (temp, val);
8060
8061 XEXP (x, 0) = temp;
8062 return x;
8063 }
8064 }
8065
8066 return x;
8067 }
8068 \f
8069 /* Print an integer constant expression in assembler syntax. Addition
8070 and subtraction are the only arithmetic that may appear in these
8071 expressions. FILE is the stdio stream to write to, X is the rtx, and
8072 CODE is the operand print code from the output string. */
8073
8074 static void
8075 output_pic_addr_const (FILE *file, rtx x, int code)
8076 {
8077 char buf[256];
8078
8079 switch (GET_CODE (x))
8080 {
8081 case PC:
8082 gcc_assert (flag_pic);
8083 putc ('.', file);
8084 break;
8085
8086 case SYMBOL_REF:
8087 if (! TARGET_MACHO || TARGET_64BIT)
8088 output_addr_const (file, x);
8089 else
8090 {
8091 const char *name = XSTR (x, 0);
8092
8093 /* Mark the decl as referenced so that cgraph will
8094 output the function. */
8095 if (SYMBOL_REF_DECL (x))
8096 mark_decl_referenced (SYMBOL_REF_DECL (x));
8097
8098 #if TARGET_MACHO
8099 if (MACHOPIC_INDIRECT
8100 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8101 name = machopic_indirection_name (x, /*stub_p=*/true);
8102 #endif
8103 assemble_name (file, name);
8104 }
8105 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8106 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8107 fputs ("@PLT", file);
8108 break;
8109
8110 case LABEL_REF:
8111 x = XEXP (x, 0);
8112 /* FALLTHRU */
8113 case CODE_LABEL:
8114 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8115 assemble_name (asm_out_file, buf);
8116 break;
8117
8118 case CONST_INT:
8119 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8120 break;
8121
8122 case CONST:
8123 /* This used to output parentheses around the expression,
8124 but that does not work on the 386 (either ATT or BSD assembler). */
8125 output_pic_addr_const (file, XEXP (x, 0), code);
8126 break;
8127
8128 case CONST_DOUBLE:
8129 if (GET_MODE (x) == VOIDmode)
8130 {
8131 /* We can use %d if the number is <32 bits and positive. */
8132 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8133 fprintf (file, "0x%lx%08lx",
8134 (unsigned long) CONST_DOUBLE_HIGH (x),
8135 (unsigned long) CONST_DOUBLE_LOW (x));
8136 else
8137 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8138 }
8139 else
8140 /* We can't handle floating point constants;
8141 PRINT_OPERAND must handle them. */
8142 output_operand_lossage ("floating constant misused");
8143 break;
8144
8145 case PLUS:
8146 /* Some assemblers need integer constants to appear first. */
8147 if (CONST_INT_P (XEXP (x, 0)))
8148 {
8149 output_pic_addr_const (file, XEXP (x, 0), code);
8150 putc ('+', file);
8151 output_pic_addr_const (file, XEXP (x, 1), code);
8152 }
8153 else
8154 {
8155 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8156 output_pic_addr_const (file, XEXP (x, 1), code);
8157 putc ('+', file);
8158 output_pic_addr_const (file, XEXP (x, 0), code);
8159 }
8160 break;
8161
8162 case MINUS:
8163 if (!TARGET_MACHO)
8164 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8165 output_pic_addr_const (file, XEXP (x, 0), code);
8166 putc ('-', file);
8167 output_pic_addr_const (file, XEXP (x, 1), code);
8168 if (!TARGET_MACHO)
8169 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8170 break;
8171
8172 case UNSPEC:
8173 gcc_assert (XVECLEN (x, 0) == 1);
8174 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8175 switch (XINT (x, 1))
8176 {
8177 case UNSPEC_GOT:
8178 fputs ("@GOT", file);
8179 break;
8180 case UNSPEC_GOTOFF:
8181 fputs ("@GOTOFF", file);
8182 break;
8183 case UNSPEC_PLTOFF:
8184 fputs ("@PLTOFF", file);
8185 break;
8186 case UNSPEC_GOTPCREL:
8187 fputs ("@GOTPCREL(%rip)", file);
8188 break;
8189 case UNSPEC_GOTTPOFF:
8190 /* FIXME: This might be @TPOFF in Sun ld too. */
8191 fputs ("@GOTTPOFF", file);
8192 break;
8193 case UNSPEC_TPOFF:
8194 fputs ("@TPOFF", file);
8195 break;
8196 case UNSPEC_NTPOFF:
8197 if (TARGET_64BIT)
8198 fputs ("@TPOFF", file);
8199 else
8200 fputs ("@NTPOFF", file);
8201 break;
8202 case UNSPEC_DTPOFF:
8203 fputs ("@DTPOFF", file);
8204 break;
8205 case UNSPEC_GOTNTPOFF:
8206 if (TARGET_64BIT)
8207 fputs ("@GOTTPOFF(%rip)", file);
8208 else
8209 fputs ("@GOTNTPOFF", file);
8210 break;
8211 case UNSPEC_INDNTPOFF:
8212 fputs ("@INDNTPOFF", file);
8213 break;
8214 default:
8215 output_operand_lossage ("invalid UNSPEC as operand");
8216 break;
8217 }
8218 break;
8219
8220 default:
8221 output_operand_lossage ("invalid expression as operand");
8222 }
8223 }
8224
8225 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8226 We need to emit DTP-relative relocations. */
8227
8228 static void ATTRIBUTE_UNUSED
8229 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8230 {
8231 fputs (ASM_LONG, file);
8232 output_addr_const (file, x);
8233 fputs ("@DTPOFF", file);
8234 switch (size)
8235 {
8236 case 4:
8237 break;
8238 case 8:
8239 fputs (", 0", file);
8240 break;
8241 default:
8242 gcc_unreachable ();
8243 }
8244 }
8245
8246 /* In the name of slightly smaller debug output, and to cater to
8247 general assembler lossage, recognize PIC+GOTOFF and turn it back
8248 into a direct symbol reference.
8249
8250 On Darwin, this is necessary to avoid a crash, because Darwin
8251 has a different PIC label for each routine but the DWARF debugging
8252 information is not associated with any particular routine, so it's
8253 necessary to remove references to the PIC label from RTL stored by
8254 the DWARF output code. */
8255
8256 static rtx
8257 ix86_delegitimize_address (rtx orig_x)
8258 {
8259 rtx x = orig_x;
8260 /* reg_addend is NULL or a multiple of some register. */
8261 rtx reg_addend = NULL_RTX;
8262 /* const_addend is NULL or a const_int. */
8263 rtx const_addend = NULL_RTX;
8264 /* This is the result, or NULL. */
8265 rtx result = NULL_RTX;
8266
8267 if (MEM_P (x))
8268 x = XEXP (x, 0);
8269
8270 if (TARGET_64BIT)
8271 {
8272 if (GET_CODE (x) != CONST
8273 || GET_CODE (XEXP (x, 0)) != UNSPEC
8274 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8275 || !MEM_P (orig_x))
8276 return orig_x;
8277 return XVECEXP (XEXP (x, 0), 0, 0);
8278 }
8279
8280 if (GET_CODE (x) != PLUS
8281 || GET_CODE (XEXP (x, 1)) != CONST)
8282 return orig_x;
8283
8284 if (REG_P (XEXP (x, 0))
8285 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8286 /* %ebx + GOT/GOTOFF */
8287 ;
8288 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8289 {
8290 /* %ebx + %reg * scale + GOT/GOTOFF */
8291 reg_addend = XEXP (x, 0);
8292 if (REG_P (XEXP (reg_addend, 0))
8293 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8294 reg_addend = XEXP (reg_addend, 1);
8295 else if (REG_P (XEXP (reg_addend, 1))
8296 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8297 reg_addend = XEXP (reg_addend, 0);
8298 else
8299 return orig_x;
8300 if (!REG_P (reg_addend)
8301 && GET_CODE (reg_addend) != MULT
8302 && GET_CODE (reg_addend) != ASHIFT)
8303 return orig_x;
8304 }
8305 else
8306 return orig_x;
8307
8308 x = XEXP (XEXP (x, 1), 0);
8309 if (GET_CODE (x) == PLUS
8310 && CONST_INT_P (XEXP (x, 1)))
8311 {
8312 const_addend = XEXP (x, 1);
8313 x = XEXP (x, 0);
8314 }
8315
8316 if (GET_CODE (x) == UNSPEC
8317 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8318 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8319 result = XVECEXP (x, 0, 0);
8320
8321 if (TARGET_MACHO && darwin_local_data_pic (x)
8322 && !MEM_P (orig_x))
8323 result = XEXP (x, 0);
8324
8325 if (! result)
8326 return orig_x;
8327
8328 if (const_addend)
8329 result = gen_rtx_PLUS (Pmode, result, const_addend);
8330 if (reg_addend)
8331 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8332 return result;
8333 }
8334
8335 /* If X is a machine specific address (i.e. a symbol or label being
8336 referenced as a displacement from the GOT implemented using an
8337 UNSPEC), then return the base term. Otherwise return X. */
8338
8339 rtx
8340 ix86_find_base_term (rtx x)
8341 {
8342 rtx term;
8343
8344 if (TARGET_64BIT)
8345 {
8346 if (GET_CODE (x) != CONST)
8347 return x;
8348 term = XEXP (x, 0);
8349 if (GET_CODE (term) == PLUS
8350 && (CONST_INT_P (XEXP (term, 1))
8351 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8352 term = XEXP (term, 0);
8353 if (GET_CODE (term) != UNSPEC
8354 || XINT (term, 1) != UNSPEC_GOTPCREL)
8355 return x;
8356
8357 term = XVECEXP (term, 0, 0);
8358
8359 if (GET_CODE (term) != SYMBOL_REF
8360 && GET_CODE (term) != LABEL_REF)
8361 return x;
8362
8363 return term;
8364 }
8365
8366 term = ix86_delegitimize_address (x);
8367
8368 if (GET_CODE (term) != SYMBOL_REF
8369 && GET_CODE (term) != LABEL_REF)
8370 return x;
8371
8372 return term;
8373 }
8374 \f
8375 static void
8376 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8377 int fp, FILE *file)
8378 {
8379 const char *suffix;
8380
8381 if (mode == CCFPmode || mode == CCFPUmode)
8382 {
8383 enum rtx_code second_code, bypass_code;
8384 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8385 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8386 code = ix86_fp_compare_code_to_integer (code);
8387 mode = CCmode;
8388 }
8389 if (reverse)
8390 code = reverse_condition (code);
8391
8392 switch (code)
8393 {
8394 case EQ:
8395 switch (mode)
8396 {
8397 case CCAmode:
8398 suffix = "a";
8399 break;
8400
8401 case CCCmode:
8402 suffix = "c";
8403 break;
8404
8405 case CCOmode:
8406 suffix = "o";
8407 break;
8408
8409 case CCSmode:
8410 suffix = "s";
8411 break;
8412
8413 default:
8414 suffix = "e";
8415 }
8416 break;
8417 case NE:
8418 switch (mode)
8419 {
8420 case CCAmode:
8421 suffix = "na";
8422 break;
8423
8424 case CCCmode:
8425 suffix = "nc";
8426 break;
8427
8428 case CCOmode:
8429 suffix = "no";
8430 break;
8431
8432 case CCSmode:
8433 suffix = "ns";
8434 break;
8435
8436 default:
8437 suffix = "ne";
8438 }
8439 break;
8440 case GT:
8441 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8442 suffix = "g";
8443 break;
8444 case GTU:
8445 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8446 Those same assemblers have the same but opposite lossage on cmov. */
8447 if (mode == CCmode)
8448 suffix = fp ? "nbe" : "a";
8449 else if (mode == CCCmode)
8450 suffix = "b";
8451 else
8452 gcc_unreachable ();
8453 break;
8454 case LT:
8455 switch (mode)
8456 {
8457 case CCNOmode:
8458 case CCGOCmode:
8459 suffix = "s";
8460 break;
8461
8462 case CCmode:
8463 case CCGCmode:
8464 suffix = "l";
8465 break;
8466
8467 default:
8468 gcc_unreachable ();
8469 }
8470 break;
8471 case LTU:
8472 gcc_assert (mode == CCmode || mode == CCCmode);
8473 suffix = "b";
8474 break;
8475 case GE:
8476 switch (mode)
8477 {
8478 case CCNOmode:
8479 case CCGOCmode:
8480 suffix = "ns";
8481 break;
8482
8483 case CCmode:
8484 case CCGCmode:
8485 suffix = "ge";
8486 break;
8487
8488 default:
8489 gcc_unreachable ();
8490 }
8491 break;
8492 case GEU:
8493 /* ??? As above. */
8494 gcc_assert (mode == CCmode || mode == CCCmode);
8495 suffix = fp ? "nb" : "ae";
8496 break;
8497 case LE:
8498 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8499 suffix = "le";
8500 break;
8501 case LEU:
8502 /* ??? As above. */
8503 if (mode == CCmode)
8504 suffix = "be";
8505 else if (mode == CCCmode)
8506 suffix = fp ? "nb" : "ae";
8507 else
8508 gcc_unreachable ();
8509 break;
8510 case UNORDERED:
8511 suffix = fp ? "u" : "p";
8512 break;
8513 case ORDERED:
8514 suffix = fp ? "nu" : "np";
8515 break;
8516 default:
8517 gcc_unreachable ();
8518 }
8519 fputs (suffix, file);
8520 }
8521
8522 /* Print the name of register X to FILE based on its machine mode and number.
8523 If CODE is 'w', pretend the mode is HImode.
8524 If CODE is 'b', pretend the mode is QImode.
8525 If CODE is 'k', pretend the mode is SImode.
8526 If CODE is 'q', pretend the mode is DImode.
8527 If CODE is 'h', pretend the reg is the 'high' byte register.
8528 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8529
8530 void
8531 print_reg (rtx x, int code, FILE *file)
8532 {
8533 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8534 && REGNO (x) != FRAME_POINTER_REGNUM
8535 && REGNO (x) != FLAGS_REG
8536 && REGNO (x) != FPSR_REG
8537 && REGNO (x) != FPCR_REG);
8538
8539 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8540 putc ('%', file);
8541
8542 if (code == 'w' || MMX_REG_P (x))
8543 code = 2;
8544 else if (code == 'b')
8545 code = 1;
8546 else if (code == 'k')
8547 code = 4;
8548 else if (code == 'q')
8549 code = 8;
8550 else if (code == 'y')
8551 code = 3;
8552 else if (code == 'h')
8553 code = 0;
8554 else
8555 code = GET_MODE_SIZE (GET_MODE (x));
8556
8557 /* Irritatingly, AMD extended registers use different naming convention
8558 from the normal registers. */
8559 if (REX_INT_REG_P (x))
8560 {
8561 gcc_assert (TARGET_64BIT);
8562 switch (code)
8563 {
8564 case 0:
8565 error ("extended registers have no high halves");
8566 break;
8567 case 1:
8568 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8569 break;
8570 case 2:
8571 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8572 break;
8573 case 4:
8574 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8575 break;
8576 case 8:
8577 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8578 break;
8579 default:
8580 error ("unsupported operand size for extended register");
8581 break;
8582 }
8583 return;
8584 }
8585 switch (code)
8586 {
8587 case 3:
8588 if (STACK_TOP_P (x))
8589 {
8590 fputs ("st(0)", file);
8591 break;
8592 }
8593 /* FALLTHRU */
8594 case 8:
8595 case 4:
8596 case 12:
8597 if (! ANY_FP_REG_P (x))
8598 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8599 /* FALLTHRU */
8600 case 16:
8601 case 2:
8602 normal:
8603 fputs (hi_reg_name[REGNO (x)], file);
8604 break;
8605 case 1:
8606 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8607 goto normal;
8608 fputs (qi_reg_name[REGNO (x)], file);
8609 break;
8610 case 0:
8611 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8612 goto normal;
8613 fputs (qi_high_reg_name[REGNO (x)], file);
8614 break;
8615 default:
8616 gcc_unreachable ();
8617 }
8618 }
8619
8620 /* Locate some local-dynamic symbol still in use by this function
8621 so that we can print its name in some tls_local_dynamic_base
8622 pattern. */
8623
8624 static int
8625 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8626 {
8627 rtx x = *px;
8628
8629 if (GET_CODE (x) == SYMBOL_REF
8630 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8631 {
8632 cfun->machine->some_ld_name = XSTR (x, 0);
8633 return 1;
8634 }
8635
8636 return 0;
8637 }
8638
8639 static const char *
8640 get_some_local_dynamic_name (void)
8641 {
8642 rtx insn;
8643
8644 if (cfun->machine->some_ld_name)
8645 return cfun->machine->some_ld_name;
8646
8647 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8648 if (INSN_P (insn)
8649 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8650 return cfun->machine->some_ld_name;
8651
8652 gcc_unreachable ();
8653 }
8654
8655 /* Meaning of CODE:
8656 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8657 C -- print opcode suffix for set/cmov insn.
8658 c -- like C, but print reversed condition
8659 F,f -- likewise, but for floating-point.
8660 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8661 otherwise nothing
8662 R -- print the prefix for register names.
8663 z -- print the opcode suffix for the size of the current operand.
8664 * -- print a star (in certain assembler syntax)
8665 A -- print an absolute memory reference.
8666 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8667 s -- print a shift double count, followed by the assemblers argument
8668 delimiter.
8669 b -- print the QImode name of the register for the indicated operand.
8670 %b0 would print %al if operands[0] is reg 0.
8671 w -- likewise, print the HImode name of the register.
8672 k -- likewise, print the SImode name of the register.
8673 q -- likewise, print the DImode name of the register.
8674 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8675 y -- print "st(0)" instead of "st" as a register.
8676 D -- print condition for SSE cmp instruction.
8677 P -- if PIC, print an @PLT suffix.
8678 X -- don't print any sort of PIC '@' suffix for a symbol.
8679 & -- print some in-use local-dynamic symbol name.
8680 H -- print a memory address offset by 8; used for sse high-parts
8681 + -- print a branch hint as 'cs' or 'ds' prefix
8682 ; -- print a semicolon (after prefixes due to bug in older gas).
8683 */
8684
8685 void
8686 print_operand (FILE *file, rtx x, int code)
8687 {
8688 if (code)
8689 {
8690 switch (code)
8691 {
8692 case '*':
8693 if (ASSEMBLER_DIALECT == ASM_ATT)
8694 putc ('*', file);
8695 return;
8696
8697 case '&':
8698 assemble_name (file, get_some_local_dynamic_name ());
8699 return;
8700
8701 case 'A':
8702 switch (ASSEMBLER_DIALECT)
8703 {
8704 case ASM_ATT:
8705 putc ('*', file);
8706 break;
8707
8708 case ASM_INTEL:
8709 /* Intel syntax. For absolute addresses, registers should not
8710 be surrounded by braces. */
8711 if (!REG_P (x))
8712 {
8713 putc ('[', file);
8714 PRINT_OPERAND (file, x, 0);
8715 putc (']', file);
8716 return;
8717 }
8718 break;
8719
8720 default:
8721 gcc_unreachable ();
8722 }
8723
8724 PRINT_OPERAND (file, x, 0);
8725 return;
8726
8727
8728 case 'L':
8729 if (ASSEMBLER_DIALECT == ASM_ATT)
8730 putc ('l', file);
8731 return;
8732
8733 case 'W':
8734 if (ASSEMBLER_DIALECT == ASM_ATT)
8735 putc ('w', file);
8736 return;
8737
8738 case 'B':
8739 if (ASSEMBLER_DIALECT == ASM_ATT)
8740 putc ('b', file);
8741 return;
8742
8743 case 'Q':
8744 if (ASSEMBLER_DIALECT == ASM_ATT)
8745 putc ('l', file);
8746 return;
8747
8748 case 'S':
8749 if (ASSEMBLER_DIALECT == ASM_ATT)
8750 putc ('s', file);
8751 return;
8752
8753 case 'T':
8754 if (ASSEMBLER_DIALECT == ASM_ATT)
8755 putc ('t', file);
8756 return;
8757
8758 case 'z':
8759 /* 387 opcodes don't get size suffixes if the operands are
8760 registers. */
8761 if (STACK_REG_P (x))
8762 return;
8763
8764 /* Likewise if using Intel opcodes. */
8765 if (ASSEMBLER_DIALECT == ASM_INTEL)
8766 return;
8767
8768 /* This is the size of op from size of operand. */
8769 switch (GET_MODE_SIZE (GET_MODE (x)))
8770 {
8771 case 1:
8772 putc ('b', file);
8773 return;
8774
8775 case 2:
8776 if (MEM_P (x))
8777 {
8778 #ifdef HAVE_GAS_FILDS_FISTS
8779 putc ('s', file);
8780 #endif
8781 return;
8782 }
8783 else
8784 putc ('w', file);
8785 return;
8786
8787 case 4:
8788 if (GET_MODE (x) == SFmode)
8789 {
8790 putc ('s', file);
8791 return;
8792 }
8793 else
8794 putc ('l', file);
8795 return;
8796
8797 case 12:
8798 case 16:
8799 putc ('t', file);
8800 return;
8801
8802 case 8:
8803 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8804 {
8805 #ifdef GAS_MNEMONICS
8806 putc ('q', file);
8807 #else
8808 putc ('l', file);
8809 putc ('l', file);
8810 #endif
8811 }
8812 else
8813 putc ('l', file);
8814 return;
8815
8816 default:
8817 gcc_unreachable ();
8818 }
8819
8820 case 'b':
8821 case 'w':
8822 case 'k':
8823 case 'q':
8824 case 'h':
8825 case 'y':
8826 case 'X':
8827 case 'P':
8828 break;
8829
8830 case 's':
8831 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8832 {
8833 PRINT_OPERAND (file, x, 0);
8834 putc (',', file);
8835 }
8836 return;
8837
8838 case 'D':
8839 /* Little bit of braindamage here. The SSE compare instructions
8840 does use completely different names for the comparisons that the
8841 fp conditional moves. */
8842 switch (GET_CODE (x))
8843 {
8844 case EQ:
8845 case UNEQ:
8846 fputs ("eq", file);
8847 break;
8848 case LT:
8849 case UNLT:
8850 fputs ("lt", file);
8851 break;
8852 case LE:
8853 case UNLE:
8854 fputs ("le", file);
8855 break;
8856 case UNORDERED:
8857 fputs ("unord", file);
8858 break;
8859 case NE:
8860 case LTGT:
8861 fputs ("neq", file);
8862 break;
8863 case UNGE:
8864 case GE:
8865 fputs ("nlt", file);
8866 break;
8867 case UNGT:
8868 case GT:
8869 fputs ("nle", file);
8870 break;
8871 case ORDERED:
8872 fputs ("ord", file);
8873 break;
8874 default:
8875 gcc_unreachable ();
8876 }
8877 return;
8878 case 'O':
8879 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8880 if (ASSEMBLER_DIALECT == ASM_ATT)
8881 {
8882 switch (GET_MODE (x))
8883 {
8884 case HImode: putc ('w', file); break;
8885 case SImode:
8886 case SFmode: putc ('l', file); break;
8887 case DImode:
8888 case DFmode: putc ('q', file); break;
8889 default: gcc_unreachable ();
8890 }
8891 putc ('.', file);
8892 }
8893 #endif
8894 return;
8895 case 'C':
8896 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8897 return;
8898 case 'F':
8899 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8900 if (ASSEMBLER_DIALECT == ASM_ATT)
8901 putc ('.', file);
8902 #endif
8903 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8904 return;
8905
8906 /* Like above, but reverse condition */
8907 case 'c':
8908 /* Check to see if argument to %c is really a constant
8909 and not a condition code which needs to be reversed. */
8910 if (!COMPARISON_P (x))
8911 {
8912 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8913 return;
8914 }
8915 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8916 return;
8917 case 'f':
8918 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8919 if (ASSEMBLER_DIALECT == ASM_ATT)
8920 putc ('.', file);
8921 #endif
8922 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8923 return;
8924
8925 case 'H':
8926 /* It doesn't actually matter what mode we use here, as we're
8927 only going to use this for printing. */
8928 x = adjust_address_nv (x, DImode, 8);
8929 break;
8930
8931 case '+':
8932 {
8933 rtx x;
8934
8935 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8936 return;
8937
8938 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8939 if (x)
8940 {
8941 int pred_val = INTVAL (XEXP (x, 0));
8942
8943 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8944 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8945 {
8946 int taken = pred_val > REG_BR_PROB_BASE / 2;
8947 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8948
8949 /* Emit hints only in the case default branch prediction
8950 heuristics would fail. */
8951 if (taken != cputaken)
8952 {
8953 /* We use 3e (DS) prefix for taken branches and
8954 2e (CS) prefix for not taken branches. */
8955 if (taken)
8956 fputs ("ds ; ", file);
8957 else
8958 fputs ("cs ; ", file);
8959 }
8960 }
8961 }
8962 return;
8963 }
8964
8965 case ';':
8966 #if TARGET_MACHO
8967 fputs (" ; ", file);
8968 #else
8969 fputc (' ', file);
8970 #endif
8971 return;
8972
8973 default:
8974 output_operand_lossage ("invalid operand code '%c'", code);
8975 }
8976 }
8977
8978 if (REG_P (x))
8979 print_reg (x, code, file);
8980
8981 else if (MEM_P (x))
8982 {
8983 /* No `byte ptr' prefix for call instructions. */
8984 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8985 {
8986 const char * size;
8987 switch (GET_MODE_SIZE (GET_MODE (x)))
8988 {
8989 case 1: size = "BYTE"; break;
8990 case 2: size = "WORD"; break;
8991 case 4: size = "DWORD"; break;
8992 case 8: size = "QWORD"; break;
8993 case 12: size = "XWORD"; break;
8994 case 16: size = "XMMWORD"; break;
8995 default:
8996 gcc_unreachable ();
8997 }
8998
8999 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9000 if (code == 'b')
9001 size = "BYTE";
9002 else if (code == 'w')
9003 size = "WORD";
9004 else if (code == 'k')
9005 size = "DWORD";
9006
9007 fputs (size, file);
9008 fputs (" PTR ", file);
9009 }
9010
9011 x = XEXP (x, 0);
9012 /* Avoid (%rip) for call operands. */
9013 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9014 && !CONST_INT_P (x))
9015 output_addr_const (file, x);
9016 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9017 output_operand_lossage ("invalid constraints for operand");
9018 else
9019 output_address (x);
9020 }
9021
9022 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9023 {
9024 REAL_VALUE_TYPE r;
9025 long l;
9026
9027 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9028 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9029
9030 if (ASSEMBLER_DIALECT == ASM_ATT)
9031 putc ('$', file);
9032 fprintf (file, "0x%08lx", l);
9033 }
9034
9035 /* These float cases don't actually occur as immediate operands. */
9036 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9037 {
9038 char dstr[30];
9039
9040 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9041 fprintf (file, "%s", dstr);
9042 }
9043
9044 else if (GET_CODE (x) == CONST_DOUBLE
9045 && GET_MODE (x) == XFmode)
9046 {
9047 char dstr[30];
9048
9049 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9050 fprintf (file, "%s", dstr);
9051 }
9052
9053 else
9054 {
9055 /* We have patterns that allow zero sets of memory, for instance.
9056 In 64-bit mode, we should probably support all 8-byte vectors,
9057 since we can in fact encode that into an immediate. */
9058 if (GET_CODE (x) == CONST_VECTOR)
9059 {
9060 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9061 x = const0_rtx;
9062 }
9063
9064 if (code != 'P')
9065 {
9066 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9067 {
9068 if (ASSEMBLER_DIALECT == ASM_ATT)
9069 putc ('$', file);
9070 }
9071 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9072 || GET_CODE (x) == LABEL_REF)
9073 {
9074 if (ASSEMBLER_DIALECT == ASM_ATT)
9075 putc ('$', file);
9076 else
9077 fputs ("OFFSET FLAT:", file);
9078 }
9079 }
9080 if (CONST_INT_P (x))
9081 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9082 else if (flag_pic)
9083 output_pic_addr_const (file, x, code);
9084 else
9085 output_addr_const (file, x);
9086 }
9087 }
9088 \f
9089 /* Print a memory operand whose address is ADDR. */
9090
9091 void
9092 print_operand_address (FILE *file, rtx addr)
9093 {
9094 struct ix86_address parts;
9095 rtx base, index, disp;
9096 int scale;
9097 int ok = ix86_decompose_address (addr, &parts);
9098
9099 gcc_assert (ok);
9100
9101 base = parts.base;
9102 index = parts.index;
9103 disp = parts.disp;
9104 scale = parts.scale;
9105
9106 switch (parts.seg)
9107 {
9108 case SEG_DEFAULT:
9109 break;
9110 case SEG_FS:
9111 case SEG_GS:
9112 if (USER_LABEL_PREFIX[0] == 0)
9113 putc ('%', file);
9114 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9115 break;
9116 default:
9117 gcc_unreachable ();
9118 }
9119
9120 if (!base && !index)
9121 {
9122 /* Displacement only requires special attention. */
9123
9124 if (CONST_INT_P (disp))
9125 {
9126 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9127 {
9128 if (USER_LABEL_PREFIX[0] == 0)
9129 putc ('%', file);
9130 fputs ("ds:", file);
9131 }
9132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9133 }
9134 else if (flag_pic)
9135 output_pic_addr_const (file, disp, 0);
9136 else
9137 output_addr_const (file, disp);
9138
9139 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9140 if (TARGET_64BIT)
9141 {
9142 if (GET_CODE (disp) == CONST
9143 && GET_CODE (XEXP (disp, 0)) == PLUS
9144 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9145 disp = XEXP (XEXP (disp, 0), 0);
9146 if (GET_CODE (disp) == LABEL_REF
9147 || (GET_CODE (disp) == SYMBOL_REF
9148 && SYMBOL_REF_TLS_MODEL (disp) == 0))
9149 fputs ("(%rip)", file);
9150 }
9151 }
9152 else
9153 {
9154 if (ASSEMBLER_DIALECT == ASM_ATT)
9155 {
9156 if (disp)
9157 {
9158 if (flag_pic)
9159 output_pic_addr_const (file, disp, 0);
9160 else if (GET_CODE (disp) == LABEL_REF)
9161 output_asm_label (disp);
9162 else
9163 output_addr_const (file, disp);
9164 }
9165
9166 putc ('(', file);
9167 if (base)
9168 print_reg (base, 0, file);
9169 if (index)
9170 {
9171 putc (',', file);
9172 print_reg (index, 0, file);
9173 if (scale != 1)
9174 fprintf (file, ",%d", scale);
9175 }
9176 putc (')', file);
9177 }
9178 else
9179 {
9180 rtx offset = NULL_RTX;
9181
9182 if (disp)
9183 {
9184 /* Pull out the offset of a symbol; print any symbol itself. */
9185 if (GET_CODE (disp) == CONST
9186 && GET_CODE (XEXP (disp, 0)) == PLUS
9187 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9188 {
9189 offset = XEXP (XEXP (disp, 0), 1);
9190 disp = gen_rtx_CONST (VOIDmode,
9191 XEXP (XEXP (disp, 0), 0));
9192 }
9193
9194 if (flag_pic)
9195 output_pic_addr_const (file, disp, 0);
9196 else if (GET_CODE (disp) == LABEL_REF)
9197 output_asm_label (disp);
9198 else if (CONST_INT_P (disp))
9199 offset = disp;
9200 else
9201 output_addr_const (file, disp);
9202 }
9203
9204 putc ('[', file);
9205 if (base)
9206 {
9207 print_reg (base, 0, file);
9208 if (offset)
9209 {
9210 if (INTVAL (offset) >= 0)
9211 putc ('+', file);
9212 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9213 }
9214 }
9215 else if (offset)
9216 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9217 else
9218 putc ('0', file);
9219
9220 if (index)
9221 {
9222 putc ('+', file);
9223 print_reg (index, 0, file);
9224 if (scale != 1)
9225 fprintf (file, "*%d", scale);
9226 }
9227 putc (']', file);
9228 }
9229 }
9230 }
9231
9232 bool
9233 output_addr_const_extra (FILE *file, rtx x)
9234 {
9235 rtx op;
9236
9237 if (GET_CODE (x) != UNSPEC)
9238 return false;
9239
9240 op = XVECEXP (x, 0, 0);
9241 switch (XINT (x, 1))
9242 {
9243 case UNSPEC_GOTTPOFF:
9244 output_addr_const (file, op);
9245 /* FIXME: This might be @TPOFF in Sun ld. */
9246 fputs ("@GOTTPOFF", file);
9247 break;
9248 case UNSPEC_TPOFF:
9249 output_addr_const (file, op);
9250 fputs ("@TPOFF", file);
9251 break;
9252 case UNSPEC_NTPOFF:
9253 output_addr_const (file, op);
9254 if (TARGET_64BIT)
9255 fputs ("@TPOFF", file);
9256 else
9257 fputs ("@NTPOFF", file);
9258 break;
9259 case UNSPEC_DTPOFF:
9260 output_addr_const (file, op);
9261 fputs ("@DTPOFF", file);
9262 break;
9263 case UNSPEC_GOTNTPOFF:
9264 output_addr_const (file, op);
9265 if (TARGET_64BIT)
9266 fputs ("@GOTTPOFF(%rip)", file);
9267 else
9268 fputs ("@GOTNTPOFF", file);
9269 break;
9270 case UNSPEC_INDNTPOFF:
9271 output_addr_const (file, op);
9272 fputs ("@INDNTPOFF", file);
9273 break;
9274
9275 default:
9276 return false;
9277 }
9278
9279 return true;
9280 }
9281 \f
9282 /* Split one or more DImode RTL references into pairs of SImode
9283 references. The RTL can be REG, offsettable MEM, integer constant, or
9284 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9285 split and "num" is its length. lo_half and hi_half are output arrays
9286 that parallel "operands". */
9287
9288 void
9289 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9290 {
9291 while (num--)
9292 {
9293 rtx op = operands[num];
9294
9295 /* simplify_subreg refuse to split volatile memory addresses,
9296 but we still have to handle it. */
9297 if (MEM_P (op))
9298 {
9299 lo_half[num] = adjust_address (op, SImode, 0);
9300 hi_half[num] = adjust_address (op, SImode, 4);
9301 }
9302 else
9303 {
9304 lo_half[num] = simplify_gen_subreg (SImode, op,
9305 GET_MODE (op) == VOIDmode
9306 ? DImode : GET_MODE (op), 0);
9307 hi_half[num] = simplify_gen_subreg (SImode, op,
9308 GET_MODE (op) == VOIDmode
9309 ? DImode : GET_MODE (op), 4);
9310 }
9311 }
9312 }
9313 /* Split one or more TImode RTL references into pairs of DImode
9314 references. The RTL can be REG, offsettable MEM, integer constant, or
9315 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9316 split and "num" is its length. lo_half and hi_half are output arrays
9317 that parallel "operands". */
9318
9319 void
9320 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9321 {
9322 while (num--)
9323 {
9324 rtx op = operands[num];
9325
9326 /* simplify_subreg refuse to split volatile memory addresses, but we
9327 still have to handle it. */
9328 if (MEM_P (op))
9329 {
9330 lo_half[num] = adjust_address (op, DImode, 0);
9331 hi_half[num] = adjust_address (op, DImode, 8);
9332 }
9333 else
9334 {
9335 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9336 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9337 }
9338 }
9339 }
9340 \f
9341 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9342 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9343 is the expression of the binary operation. The output may either be
9344 emitted here, or returned to the caller, like all output_* functions.
9345
9346 There is no guarantee that the operands are the same mode, as they
9347 might be within FLOAT or FLOAT_EXTEND expressions. */
9348
9349 #ifndef SYSV386_COMPAT
9350 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9351 wants to fix the assemblers because that causes incompatibility
9352 with gcc. No-one wants to fix gcc because that causes
9353 incompatibility with assemblers... You can use the option of
9354 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9355 #define SYSV386_COMPAT 1
9356 #endif
9357
9358 const char *
9359 output_387_binary_op (rtx insn, rtx *operands)
9360 {
9361 static char buf[30];
9362 const char *p;
9363 const char *ssep;
9364 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9365
9366 #ifdef ENABLE_CHECKING
9367 /* Even if we do not want to check the inputs, this documents input
9368 constraints. Which helps in understanding the following code. */
9369 if (STACK_REG_P (operands[0])
9370 && ((REG_P (operands[1])
9371 && REGNO (operands[0]) == REGNO (operands[1])
9372 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9373 || (REG_P (operands[2])
9374 && REGNO (operands[0]) == REGNO (operands[2])
9375 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9376 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9377 ; /* ok */
9378 else
9379 gcc_assert (is_sse);
9380 #endif
9381
9382 switch (GET_CODE (operands[3]))
9383 {
9384 case PLUS:
9385 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9386 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9387 p = "fiadd";
9388 else
9389 p = "fadd";
9390 ssep = "add";
9391 break;
9392
9393 case MINUS:
9394 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9395 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9396 p = "fisub";
9397 else
9398 p = "fsub";
9399 ssep = "sub";
9400 break;
9401
9402 case MULT:
9403 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9404 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9405 p = "fimul";
9406 else
9407 p = "fmul";
9408 ssep = "mul";
9409 break;
9410
9411 case DIV:
9412 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9413 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9414 p = "fidiv";
9415 else
9416 p = "fdiv";
9417 ssep = "div";
9418 break;
9419
9420 default:
9421 gcc_unreachable ();
9422 }
9423
9424 if (is_sse)
9425 {
9426 strcpy (buf, ssep);
9427 if (GET_MODE (operands[0]) == SFmode)
9428 strcat (buf, "ss\t{%2, %0|%0, %2}");
9429 else
9430 strcat (buf, "sd\t{%2, %0|%0, %2}");
9431 return buf;
9432 }
9433 strcpy (buf, p);
9434
9435 switch (GET_CODE (operands[3]))
9436 {
9437 case MULT:
9438 case PLUS:
9439 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9440 {
9441 rtx temp = operands[2];
9442 operands[2] = operands[1];
9443 operands[1] = temp;
9444 }
9445
9446 /* know operands[0] == operands[1]. */
9447
9448 if (MEM_P (operands[2]))
9449 {
9450 p = "%z2\t%2";
9451 break;
9452 }
9453
9454 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9455 {
9456 if (STACK_TOP_P (operands[0]))
9457 /* How is it that we are storing to a dead operand[2]?
9458 Well, presumably operands[1] is dead too. We can't
9459 store the result to st(0) as st(0) gets popped on this
9460 instruction. Instead store to operands[2] (which I
9461 think has to be st(1)). st(1) will be popped later.
9462 gcc <= 2.8.1 didn't have this check and generated
9463 assembly code that the Unixware assembler rejected. */
9464 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9465 else
9466 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9467 break;
9468 }
9469
9470 if (STACK_TOP_P (operands[0]))
9471 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9472 else
9473 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9474 break;
9475
9476 case MINUS:
9477 case DIV:
9478 if (MEM_P (operands[1]))
9479 {
9480 p = "r%z1\t%1";
9481 break;
9482 }
9483
9484 if (MEM_P (operands[2]))
9485 {
9486 p = "%z2\t%2";
9487 break;
9488 }
9489
9490 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9491 {
9492 #if SYSV386_COMPAT
9493 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9494 derived assemblers, confusingly reverse the direction of
9495 the operation for fsub{r} and fdiv{r} when the
9496 destination register is not st(0). The Intel assembler
9497 doesn't have this brain damage. Read !SYSV386_COMPAT to
9498 figure out what the hardware really does. */
9499 if (STACK_TOP_P (operands[0]))
9500 p = "{p\t%0, %2|rp\t%2, %0}";
9501 else
9502 p = "{rp\t%2, %0|p\t%0, %2}";
9503 #else
9504 if (STACK_TOP_P (operands[0]))
9505 /* As above for fmul/fadd, we can't store to st(0). */
9506 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9507 else
9508 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9509 #endif
9510 break;
9511 }
9512
9513 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9514 {
9515 #if SYSV386_COMPAT
9516 if (STACK_TOP_P (operands[0]))
9517 p = "{rp\t%0, %1|p\t%1, %0}";
9518 else
9519 p = "{p\t%1, %0|rp\t%0, %1}";
9520 #else
9521 if (STACK_TOP_P (operands[0]))
9522 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9523 else
9524 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9525 #endif
9526 break;
9527 }
9528
9529 if (STACK_TOP_P (operands[0]))
9530 {
9531 if (STACK_TOP_P (operands[1]))
9532 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9533 else
9534 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9535 break;
9536 }
9537 else if (STACK_TOP_P (operands[1]))
9538 {
9539 #if SYSV386_COMPAT
9540 p = "{\t%1, %0|r\t%0, %1}";
9541 #else
9542 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9543 #endif
9544 }
9545 else
9546 {
9547 #if SYSV386_COMPAT
9548 p = "{r\t%2, %0|\t%0, %2}";
9549 #else
9550 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9551 #endif
9552 }
9553 break;
9554
9555 default:
9556 gcc_unreachable ();
9557 }
9558
9559 strcat (buf, p);
9560 return buf;
9561 }
9562
9563 /* Return needed mode for entity in optimize_mode_switching pass. */
9564
9565 int
9566 ix86_mode_needed (int entity, rtx insn)
9567 {
9568 enum attr_i387_cw mode;
9569
9570 /* The mode UNINITIALIZED is used to store control word after a
9571 function call or ASM pattern. The mode ANY specify that function
9572 has no requirements on the control word and make no changes in the
9573 bits we are interested in. */
9574
9575 if (CALL_P (insn)
9576 || (NONJUMP_INSN_P (insn)
9577 && (asm_noperands (PATTERN (insn)) >= 0
9578 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9579 return I387_CW_UNINITIALIZED;
9580
9581 if (recog_memoized (insn) < 0)
9582 return I387_CW_ANY;
9583
9584 mode = get_attr_i387_cw (insn);
9585
9586 switch (entity)
9587 {
9588 case I387_TRUNC:
9589 if (mode == I387_CW_TRUNC)
9590 return mode;
9591 break;
9592
9593 case I387_FLOOR:
9594 if (mode == I387_CW_FLOOR)
9595 return mode;
9596 break;
9597
9598 case I387_CEIL:
9599 if (mode == I387_CW_CEIL)
9600 return mode;
9601 break;
9602
9603 case I387_MASK_PM:
9604 if (mode == I387_CW_MASK_PM)
9605 return mode;
9606 break;
9607
9608 default:
9609 gcc_unreachable ();
9610 }
9611
9612 return I387_CW_ANY;
9613 }
9614
9615 /* Output code to initialize control word copies used by trunc?f?i and
9616 rounding patterns. CURRENT_MODE is set to current control word,
9617 while NEW_MODE is set to new control word. */
9618
9619 void
9620 emit_i387_cw_initialization (int mode)
9621 {
9622 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9623 rtx new_mode;
9624
9625 enum ix86_stack_slot slot;
9626
9627 rtx reg = gen_reg_rtx (HImode);
9628
9629 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9630 emit_move_insn (reg, copy_rtx (stored_mode));
9631
9632 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9633 {
9634 switch (mode)
9635 {
9636 case I387_CW_TRUNC:
9637 /* round toward zero (truncate) */
9638 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9639 slot = SLOT_CW_TRUNC;
9640 break;
9641
9642 case I387_CW_FLOOR:
9643 /* round down toward -oo */
9644 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9645 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9646 slot = SLOT_CW_FLOOR;
9647 break;
9648
9649 case I387_CW_CEIL:
9650 /* round up toward +oo */
9651 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9652 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9653 slot = SLOT_CW_CEIL;
9654 break;
9655
9656 case I387_CW_MASK_PM:
9657 /* mask precision exception for nearbyint() */
9658 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9659 slot = SLOT_CW_MASK_PM;
9660 break;
9661
9662 default:
9663 gcc_unreachable ();
9664 }
9665 }
9666 else
9667 {
9668 switch (mode)
9669 {
9670 case I387_CW_TRUNC:
9671 /* round toward zero (truncate) */
9672 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9673 slot = SLOT_CW_TRUNC;
9674 break;
9675
9676 case I387_CW_FLOOR:
9677 /* round down toward -oo */
9678 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9679 slot = SLOT_CW_FLOOR;
9680 break;
9681
9682 case I387_CW_CEIL:
9683 /* round up toward +oo */
9684 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9685 slot = SLOT_CW_CEIL;
9686 break;
9687
9688 case I387_CW_MASK_PM:
9689 /* mask precision exception for nearbyint() */
9690 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9691 slot = SLOT_CW_MASK_PM;
9692 break;
9693
9694 default:
9695 gcc_unreachable ();
9696 }
9697 }
9698
9699 gcc_assert (slot < MAX_386_STACK_LOCALS);
9700
9701 new_mode = assign_386_stack_local (HImode, slot);
9702 emit_move_insn (new_mode, reg);
9703 }
9704
9705 /* Output code for INSN to convert a float to a signed int. OPERANDS
9706 are the insn operands. The output may be [HSD]Imode and the input
9707 operand may be [SDX]Fmode. */
9708
9709 const char *
9710 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9711 {
9712 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9713 int dimode_p = GET_MODE (operands[0]) == DImode;
9714 int round_mode = get_attr_i387_cw (insn);
9715
9716 /* Jump through a hoop or two for DImode, since the hardware has no
9717 non-popping instruction. We used to do this a different way, but
9718 that was somewhat fragile and broke with post-reload splitters. */
9719 if ((dimode_p || fisttp) && !stack_top_dies)
9720 output_asm_insn ("fld\t%y1", operands);
9721
9722 gcc_assert (STACK_TOP_P (operands[1]));
9723 gcc_assert (MEM_P (operands[0]));
9724 gcc_assert (GET_MODE (operands[1]) != TFmode);
9725
9726 if (fisttp)
9727 output_asm_insn ("fisttp%z0\t%0", operands);
9728 else
9729 {
9730 if (round_mode != I387_CW_ANY)
9731 output_asm_insn ("fldcw\t%3", operands);
9732 if (stack_top_dies || dimode_p)
9733 output_asm_insn ("fistp%z0\t%0", operands);
9734 else
9735 output_asm_insn ("fist%z0\t%0", operands);
9736 if (round_mode != I387_CW_ANY)
9737 output_asm_insn ("fldcw\t%2", operands);
9738 }
9739
9740 return "";
9741 }
9742
9743 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9744 have the values zero or one, indicates the ffreep insn's operand
9745 from the OPERANDS array. */
9746
9747 static const char *
9748 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9749 {
9750 if (TARGET_USE_FFREEP)
9751 #if HAVE_AS_IX86_FFREEP
9752 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9753 #else
9754 {
9755 static char retval[] = ".word\t0xc_df";
9756 int regno = REGNO (operands[opno]);
9757
9758 gcc_assert (FP_REGNO_P (regno));
9759
9760 retval[9] = '0' + (regno - FIRST_STACK_REG);
9761 return retval;
9762 }
9763 #endif
9764
9765 return opno ? "fstp\t%y1" : "fstp\t%y0";
9766 }
9767
9768
9769 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9770 should be used. UNORDERED_P is true when fucom should be used. */
9771
9772 const char *
9773 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9774 {
9775 int stack_top_dies;
9776 rtx cmp_op0, cmp_op1;
9777 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9778
9779 if (eflags_p)
9780 {
9781 cmp_op0 = operands[0];
9782 cmp_op1 = operands[1];
9783 }
9784 else
9785 {
9786 cmp_op0 = operands[1];
9787 cmp_op1 = operands[2];
9788 }
9789
9790 if (is_sse)
9791 {
9792 if (GET_MODE (operands[0]) == SFmode)
9793 if (unordered_p)
9794 return "ucomiss\t{%1, %0|%0, %1}";
9795 else
9796 return "comiss\t{%1, %0|%0, %1}";
9797 else
9798 if (unordered_p)
9799 return "ucomisd\t{%1, %0|%0, %1}";
9800 else
9801 return "comisd\t{%1, %0|%0, %1}";
9802 }
9803
9804 gcc_assert (STACK_TOP_P (cmp_op0));
9805
9806 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9807
9808 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9809 {
9810 if (stack_top_dies)
9811 {
9812 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9813 return output_387_ffreep (operands, 1);
9814 }
9815 else
9816 return "ftst\n\tfnstsw\t%0";
9817 }
9818
9819 if (STACK_REG_P (cmp_op1)
9820 && stack_top_dies
9821 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9822 && REGNO (cmp_op1) != FIRST_STACK_REG)
9823 {
9824 /* If both the top of the 387 stack dies, and the other operand
9825 is also a stack register that dies, then this must be a
9826 `fcompp' float compare */
9827
9828 if (eflags_p)
9829 {
9830 /* There is no double popping fcomi variant. Fortunately,
9831 eflags is immune from the fstp's cc clobbering. */
9832 if (unordered_p)
9833 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9834 else
9835 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9836 return output_387_ffreep (operands, 0);
9837 }
9838 else
9839 {
9840 if (unordered_p)
9841 return "fucompp\n\tfnstsw\t%0";
9842 else
9843 return "fcompp\n\tfnstsw\t%0";
9844 }
9845 }
9846 else
9847 {
9848 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9849
9850 static const char * const alt[16] =
9851 {
9852 "fcom%z2\t%y2\n\tfnstsw\t%0",
9853 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9854 "fucom%z2\t%y2\n\tfnstsw\t%0",
9855 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9856
9857 "ficom%z2\t%y2\n\tfnstsw\t%0",
9858 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9859 NULL,
9860 NULL,
9861
9862 "fcomi\t{%y1, %0|%0, %y1}",
9863 "fcomip\t{%y1, %0|%0, %y1}",
9864 "fucomi\t{%y1, %0|%0, %y1}",
9865 "fucomip\t{%y1, %0|%0, %y1}",
9866
9867 NULL,
9868 NULL,
9869 NULL,
9870 NULL
9871 };
9872
9873 int mask;
9874 const char *ret;
9875
9876 mask = eflags_p << 3;
9877 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9878 mask |= unordered_p << 1;
9879 mask |= stack_top_dies;
9880
9881 gcc_assert (mask < 16);
9882 ret = alt[mask];
9883 gcc_assert (ret);
9884
9885 return ret;
9886 }
9887 }
9888
9889 void
9890 ix86_output_addr_vec_elt (FILE *file, int value)
9891 {
9892 const char *directive = ASM_LONG;
9893
9894 #ifdef ASM_QUAD
9895 if (TARGET_64BIT)
9896 directive = ASM_QUAD;
9897 #else
9898 gcc_assert (!TARGET_64BIT);
9899 #endif
9900
9901 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9902 }
9903
9904 void
9905 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9906 {
9907 const char *directive = ASM_LONG;
9908
9909 #ifdef ASM_QUAD
9910 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9911 directive = ASM_QUAD;
9912 #else
9913 gcc_assert (!TARGET_64BIT);
9914 #endif
9915 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9916 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9917 fprintf (file, "%s%s%d-%s%d\n",
9918 directive, LPREFIX, value, LPREFIX, rel);
9919 else if (HAVE_AS_GOTOFF_IN_DATA)
9920 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9921 #if TARGET_MACHO
9922 else if (TARGET_MACHO)
9923 {
9924 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9925 machopic_output_function_base_name (file);
9926 fprintf(file, "\n");
9927 }
9928 #endif
9929 else
9930 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9931 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9932 }
9933 \f
9934 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9935 for the target. */
9936
9937 void
9938 ix86_expand_clear (rtx dest)
9939 {
9940 rtx tmp;
9941
9942 /* We play register width games, which are only valid after reload. */
9943 gcc_assert (reload_completed);
9944
9945 /* Avoid HImode and its attendant prefix byte. */
9946 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9947 dest = gen_rtx_REG (SImode, REGNO (dest));
9948 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9949
9950 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9951 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9952 {
9953 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9954 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9955 }
9956
9957 emit_insn (tmp);
9958 }
9959
9960 /* X is an unchanging MEM. If it is a constant pool reference, return
9961 the constant pool rtx, else NULL. */
9962
9963 rtx
9964 maybe_get_pool_constant (rtx x)
9965 {
9966 x = ix86_delegitimize_address (XEXP (x, 0));
9967
9968 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9969 return get_pool_constant (x);
9970
9971 return NULL_RTX;
9972 }
9973
9974 void
9975 ix86_expand_move (enum machine_mode mode, rtx operands[])
9976 {
9977 int strict = (reload_in_progress || reload_completed);
9978 rtx op0, op1;
9979 enum tls_model model;
9980
9981 op0 = operands[0];
9982 op1 = operands[1];
9983
9984 if (GET_CODE (op1) == SYMBOL_REF)
9985 {
9986 model = SYMBOL_REF_TLS_MODEL (op1);
9987 if (model)
9988 {
9989 op1 = legitimize_tls_address (op1, model, true);
9990 op1 = force_operand (op1, op0);
9991 if (op1 == op0)
9992 return;
9993 }
9994 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9995 && SYMBOL_REF_DLLIMPORT_P (op1))
9996 op1 = legitimize_dllimport_symbol (op1, false);
9997 }
9998 else if (GET_CODE (op1) == CONST
9999 && GET_CODE (XEXP (op1, 0)) == PLUS
10000 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10001 {
10002 rtx addend = XEXP (XEXP (op1, 0), 1);
10003 rtx symbol = XEXP (XEXP (op1, 0), 0);
10004 rtx tmp = NULL;
10005
10006 model = SYMBOL_REF_TLS_MODEL (symbol);
10007 if (model)
10008 tmp = legitimize_tls_address (symbol, model, true);
10009 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10010 && SYMBOL_REF_DLLIMPORT_P (symbol))
10011 tmp = legitimize_dllimport_symbol (symbol, true);
10012
10013 if (tmp)
10014 {
10015 tmp = force_operand (tmp, NULL);
10016 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10017 op0, 1, OPTAB_DIRECT);
10018 if (tmp == op0)
10019 return;
10020 }
10021 }
10022
10023 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10024 {
10025 if (TARGET_MACHO && !TARGET_64BIT)
10026 {
10027 #if TARGET_MACHO
10028 if (MACHOPIC_PURE)
10029 {
10030 rtx temp = ((reload_in_progress
10031 || ((op0 && REG_P (op0))
10032 && mode == Pmode))
10033 ? op0 : gen_reg_rtx (Pmode));
10034 op1 = machopic_indirect_data_reference (op1, temp);
10035 op1 = machopic_legitimize_pic_address (op1, mode,
10036 temp == op1 ? 0 : temp);
10037 }
10038 else if (MACHOPIC_INDIRECT)
10039 op1 = machopic_indirect_data_reference (op1, 0);
10040 if (op0 == op1)
10041 return;
10042 #endif
10043 }
10044 else
10045 {
10046 if (MEM_P (op0))
10047 op1 = force_reg (Pmode, op1);
10048 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10049 {
10050 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10051 op1 = legitimize_pic_address (op1, reg);
10052 if (op0 == op1)
10053 return;
10054 }
10055 }
10056 }
10057 else
10058 {
10059 if (MEM_P (op0)
10060 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10061 || !push_operand (op0, mode))
10062 && MEM_P (op1))
10063 op1 = force_reg (mode, op1);
10064
10065 if (push_operand (op0, mode)
10066 && ! general_no_elim_operand (op1, mode))
10067 op1 = copy_to_mode_reg (mode, op1);
10068
10069 /* Force large constants in 64bit compilation into register
10070 to get them CSEed. */
10071 if (TARGET_64BIT && mode == DImode
10072 && immediate_operand (op1, mode)
10073 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10074 && !register_operand (op0, mode)
10075 && optimize && !reload_completed && !reload_in_progress)
10076 op1 = copy_to_mode_reg (mode, op1);
10077
10078 if (FLOAT_MODE_P (mode))
10079 {
10080 /* If we are loading a floating point constant to a register,
10081 force the value to memory now, since we'll get better code
10082 out the back end. */
10083
10084 if (strict)
10085 ;
10086 else if (GET_CODE (op1) == CONST_DOUBLE)
10087 {
10088 op1 = validize_mem (force_const_mem (mode, op1));
10089 if (!register_operand (op0, mode))
10090 {
10091 rtx temp = gen_reg_rtx (mode);
10092 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10093 emit_move_insn (op0, temp);
10094 return;
10095 }
10096 }
10097 }
10098 }
10099
10100 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10101 }
10102
10103 void
10104 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10105 {
10106 rtx op0 = operands[0], op1 = operands[1];
10107 unsigned int align = GET_MODE_ALIGNMENT (mode);
10108
10109 /* Force constants other than zero into memory. We do not know how
10110 the instructions used to build constants modify the upper 64 bits
10111 of the register, once we have that information we may be able
10112 to handle some of them more efficiently. */
10113 if ((reload_in_progress | reload_completed) == 0
10114 && register_operand (op0, mode)
10115 && (CONSTANT_P (op1)
10116 || (GET_CODE (op1) == SUBREG
10117 && CONSTANT_P (SUBREG_REG (op1))))
10118 && standard_sse_constant_p (op1) <= 0)
10119 op1 = validize_mem (force_const_mem (mode, op1));
10120
10121 /* TDmode values are passed as TImode on the stack. Timode values
10122 are moved via xmm registers, and moving them to stack can result in
10123 unaligned memory access. Use ix86_expand_vector_move_misalign()
10124 if memory operand is not aligned correctly. */
10125 if (can_create_pseudo_p ()
10126 && (mode == TImode) && !TARGET_64BIT
10127 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10128 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10129 {
10130 rtx tmp[2];
10131
10132 /* ix86_expand_vector_move_misalign() does not like constants ... */
10133 if (CONSTANT_P (op1)
10134 || (GET_CODE (op1) == SUBREG
10135 && CONSTANT_P (SUBREG_REG (op1))))
10136 op1 = validize_mem (force_const_mem (mode, op1));
10137
10138 /* ... nor both arguments in memory. */
10139 if (!register_operand (op0, mode)
10140 && !register_operand (op1, mode))
10141 op1 = force_reg (mode, op1);
10142
10143 tmp[0] = op0; tmp[1] = op1;
10144 ix86_expand_vector_move_misalign (mode, tmp);
10145 return;
10146 }
10147
10148 /* Make operand1 a register if it isn't already. */
10149 if (can_create_pseudo_p ()
10150 && !register_operand (op0, mode)
10151 && !register_operand (op1, mode))
10152 {
10153 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10154 return;
10155 }
10156
10157 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10158 }
10159
10160 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10161 straight to ix86_expand_vector_move. */
10162 /* Code generation for scalar reg-reg moves of single and double precision data:
10163 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10164 movaps reg, reg
10165 else
10166 movss reg, reg
10167 if (x86_sse_partial_reg_dependency == true)
10168 movapd reg, reg
10169 else
10170 movsd reg, reg
10171
10172 Code generation for scalar loads of double precision data:
10173 if (x86_sse_split_regs == true)
10174 movlpd mem, reg (gas syntax)
10175 else
10176 movsd mem, reg
10177
10178 Code generation for unaligned packed loads of single precision data
10179 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10180 if (x86_sse_unaligned_move_optimal)
10181 movups mem, reg
10182
10183 if (x86_sse_partial_reg_dependency == true)
10184 {
10185 xorps reg, reg
10186 movlps mem, reg
10187 movhps mem+8, reg
10188 }
10189 else
10190 {
10191 movlps mem, reg
10192 movhps mem+8, reg
10193 }
10194
10195 Code generation for unaligned packed loads of double precision data
10196 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10197 if (x86_sse_unaligned_move_optimal)
10198 movupd mem, reg
10199
10200 if (x86_sse_split_regs == true)
10201 {
10202 movlpd mem, reg
10203 movhpd mem+8, reg
10204 }
10205 else
10206 {
10207 movsd mem, reg
10208 movhpd mem+8, reg
10209 }
10210 */
10211
10212 void
10213 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10214 {
10215 rtx op0, op1, m;
10216
10217 op0 = operands[0];
10218 op1 = operands[1];
10219
10220 if (MEM_P (op1))
10221 {
10222 /* If we're optimizing for size, movups is the smallest. */
10223 if (optimize_size)
10224 {
10225 op0 = gen_lowpart (V4SFmode, op0);
10226 op1 = gen_lowpart (V4SFmode, op1);
10227 emit_insn (gen_sse_movups (op0, op1));
10228 return;
10229 }
10230
10231 /* ??? If we have typed data, then it would appear that using
10232 movdqu is the only way to get unaligned data loaded with
10233 integer type. */
10234 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10235 {
10236 op0 = gen_lowpart (V16QImode, op0);
10237 op1 = gen_lowpart (V16QImode, op1);
10238 emit_insn (gen_sse2_movdqu (op0, op1));
10239 return;
10240 }
10241
10242 if (TARGET_SSE2 && mode == V2DFmode)
10243 {
10244 rtx zero;
10245
10246 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10247 {
10248 op0 = gen_lowpart (V2DFmode, op0);
10249 op1 = gen_lowpart (V2DFmode, op1);
10250 emit_insn (gen_sse2_movupd (op0, op1));
10251 return;
10252 }
10253
10254 /* When SSE registers are split into halves, we can avoid
10255 writing to the top half twice. */
10256 if (TARGET_SSE_SPLIT_REGS)
10257 {
10258 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10259 zero = op0;
10260 }
10261 else
10262 {
10263 /* ??? Not sure about the best option for the Intel chips.
10264 The following would seem to satisfy; the register is
10265 entirely cleared, breaking the dependency chain. We
10266 then store to the upper half, with a dependency depth
10267 of one. A rumor has it that Intel recommends two movsd
10268 followed by an unpacklpd, but this is unconfirmed. And
10269 given that the dependency depth of the unpacklpd would
10270 still be one, I'm not sure why this would be better. */
10271 zero = CONST0_RTX (V2DFmode);
10272 }
10273
10274 m = adjust_address (op1, DFmode, 0);
10275 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10276 m = adjust_address (op1, DFmode, 8);
10277 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10278 }
10279 else
10280 {
10281 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10282 {
10283 op0 = gen_lowpart (V4SFmode, op0);
10284 op1 = gen_lowpart (V4SFmode, op1);
10285 emit_insn (gen_sse_movups (op0, op1));
10286 return;
10287 }
10288
10289 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10290 emit_move_insn (op0, CONST0_RTX (mode));
10291 else
10292 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10293
10294 if (mode != V4SFmode)
10295 op0 = gen_lowpart (V4SFmode, op0);
10296 m = adjust_address (op1, V2SFmode, 0);
10297 emit_insn (gen_sse_loadlps (op0, op0, m));
10298 m = adjust_address (op1, V2SFmode, 8);
10299 emit_insn (gen_sse_loadhps (op0, op0, m));
10300 }
10301 }
10302 else if (MEM_P (op0))
10303 {
10304 /* If we're optimizing for size, movups is the smallest. */
10305 if (optimize_size)
10306 {
10307 op0 = gen_lowpart (V4SFmode, op0);
10308 op1 = gen_lowpart (V4SFmode, op1);
10309 emit_insn (gen_sse_movups (op0, op1));
10310 return;
10311 }
10312
10313 /* ??? Similar to above, only less clear because of quote
10314 typeless stores unquote. */
10315 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10316 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10317 {
10318 op0 = gen_lowpart (V16QImode, op0);
10319 op1 = gen_lowpart (V16QImode, op1);
10320 emit_insn (gen_sse2_movdqu (op0, op1));
10321 return;
10322 }
10323
10324 if (TARGET_SSE2 && mode == V2DFmode)
10325 {
10326 m = adjust_address (op0, DFmode, 0);
10327 emit_insn (gen_sse2_storelpd (m, op1));
10328 m = adjust_address (op0, DFmode, 8);
10329 emit_insn (gen_sse2_storehpd (m, op1));
10330 }
10331 else
10332 {
10333 if (mode != V4SFmode)
10334 op1 = gen_lowpart (V4SFmode, op1);
10335 m = adjust_address (op0, V2SFmode, 0);
10336 emit_insn (gen_sse_storelps (m, op1));
10337 m = adjust_address (op0, V2SFmode, 8);
10338 emit_insn (gen_sse_storehps (m, op1));
10339 }
10340 }
10341 else
10342 gcc_unreachable ();
10343 }
10344
10345 /* Expand a push in MODE. This is some mode for which we do not support
10346 proper push instructions, at least from the registers that we expect
10347 the value to live in. */
10348
10349 void
10350 ix86_expand_push (enum machine_mode mode, rtx x)
10351 {
10352 rtx tmp;
10353
10354 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10355 GEN_INT (-GET_MODE_SIZE (mode)),
10356 stack_pointer_rtx, 1, OPTAB_DIRECT);
10357 if (tmp != stack_pointer_rtx)
10358 emit_move_insn (stack_pointer_rtx, tmp);
10359
10360 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10361 emit_move_insn (tmp, x);
10362 }
10363
10364 /* Helper function of ix86_fixup_binary_operands to canonicalize
10365 operand order. Returns true if the operands should be swapped. */
10366
10367 static bool
10368 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10369 rtx operands[])
10370 {
10371 rtx dst = operands[0];
10372 rtx src1 = operands[1];
10373 rtx src2 = operands[2];
10374
10375 /* If the operation is not commutative, we can't do anything. */
10376 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10377 return false;
10378
10379 /* Highest priority is that src1 should match dst. */
10380 if (rtx_equal_p (dst, src1))
10381 return false;
10382 if (rtx_equal_p (dst, src2))
10383 return true;
10384
10385 /* Next highest priority is that immediate constants come second. */
10386 if (immediate_operand (src2, mode))
10387 return false;
10388 if (immediate_operand (src1, mode))
10389 return true;
10390
10391 /* Lowest priority is that memory references should come second. */
10392 if (MEM_P (src2))
10393 return false;
10394 if (MEM_P (src1))
10395 return true;
10396
10397 return false;
10398 }
10399
10400
10401 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10402 destination to use for the operation. If different from the true
10403 destination in operands[0], a copy operation will be required. */
10404
10405 rtx
10406 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10407 rtx operands[])
10408 {
10409 rtx dst = operands[0];
10410 rtx src1 = operands[1];
10411 rtx src2 = operands[2];
10412
10413 /* Canonicalize operand order. */
10414 if (ix86_swap_binary_operands_p (code, mode, operands))
10415 {
10416 rtx temp = src1;
10417 src1 = src2;
10418 src2 = temp;
10419 }
10420
10421 /* Both source operands cannot be in memory. */
10422 if (MEM_P (src1) && MEM_P (src2))
10423 {
10424 /* Optimization: Only read from memory once. */
10425 if (rtx_equal_p (src1, src2))
10426 {
10427 src2 = force_reg (mode, src2);
10428 src1 = src2;
10429 }
10430 else
10431 src2 = force_reg (mode, src2);
10432 }
10433
10434 /* If the destination is memory, and we do not have matching source
10435 operands, do things in registers. */
10436 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10437 dst = gen_reg_rtx (mode);
10438
10439 /* Source 1 cannot be a constant. */
10440 if (CONSTANT_P (src1))
10441 src1 = force_reg (mode, src1);
10442
10443 /* Source 1 cannot be a non-matching memory. */
10444 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10445 src1 = force_reg (mode, src1);
10446
10447 operands[1] = src1;
10448 operands[2] = src2;
10449 return dst;
10450 }
10451
10452 /* Similarly, but assume that the destination has already been
10453 set up properly. */
10454
10455 void
10456 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10457 enum machine_mode mode, rtx operands[])
10458 {
10459 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10460 gcc_assert (dst == operands[0]);
10461 }
10462
10463 /* Attempt to expand a binary operator. Make the expansion closer to the
10464 actual machine, then just general_operand, which will allow 3 separate
10465 memory references (one output, two input) in a single insn. */
10466
10467 void
10468 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10469 rtx operands[])
10470 {
10471 rtx src1, src2, dst, op, clob;
10472
10473 dst = ix86_fixup_binary_operands (code, mode, operands);
10474 src1 = operands[1];
10475 src2 = operands[2];
10476
10477 /* Emit the instruction. */
10478
10479 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10480 if (reload_in_progress)
10481 {
10482 /* Reload doesn't know about the flags register, and doesn't know that
10483 it doesn't want to clobber it. We can only do this with PLUS. */
10484 gcc_assert (code == PLUS);
10485 emit_insn (op);
10486 }
10487 else
10488 {
10489 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10490 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10491 }
10492
10493 /* Fix up the destination if needed. */
10494 if (dst != operands[0])
10495 emit_move_insn (operands[0], dst);
10496 }
10497
10498 /* Return TRUE or FALSE depending on whether the binary operator meets the
10499 appropriate constraints. */
10500
10501 int
10502 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10503 rtx operands[3])
10504 {
10505 rtx dst = operands[0];
10506 rtx src1 = operands[1];
10507 rtx src2 = operands[2];
10508
10509 /* Both source operands cannot be in memory. */
10510 if (MEM_P (src1) && MEM_P (src2))
10511 return 0;
10512
10513 /* Canonicalize operand order for commutative operators. */
10514 if (ix86_swap_binary_operands_p (code, mode, operands))
10515 {
10516 rtx temp = src1;
10517 src1 = src2;
10518 src2 = temp;
10519 }
10520
10521 /* If the destination is memory, we must have a matching source operand. */
10522 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10523 return 0;
10524
10525 /* Source 1 cannot be a constant. */
10526 if (CONSTANT_P (src1))
10527 return 0;
10528
10529 /* Source 1 cannot be a non-matching memory. */
10530 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10531 return 0;
10532
10533 return 1;
10534 }
10535
10536 /* Attempt to expand a unary operator. Make the expansion closer to the
10537 actual machine, then just general_operand, which will allow 2 separate
10538 memory references (one output, one input) in a single insn. */
10539
10540 void
10541 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10542 rtx operands[])
10543 {
10544 int matching_memory;
10545 rtx src, dst, op, clob;
10546
10547 dst = operands[0];
10548 src = operands[1];
10549
10550 /* If the destination is memory, and we do not have matching source
10551 operands, do things in registers. */
10552 matching_memory = 0;
10553 if (MEM_P (dst))
10554 {
10555 if (rtx_equal_p (dst, src))
10556 matching_memory = 1;
10557 else
10558 dst = gen_reg_rtx (mode);
10559 }
10560
10561 /* When source operand is memory, destination must match. */
10562 if (MEM_P (src) && !matching_memory)
10563 src = force_reg (mode, src);
10564
10565 /* Emit the instruction. */
10566
10567 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10568 if (reload_in_progress || code == NOT)
10569 {
10570 /* Reload doesn't know about the flags register, and doesn't know that
10571 it doesn't want to clobber it. */
10572 gcc_assert (code == NOT);
10573 emit_insn (op);
10574 }
10575 else
10576 {
10577 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10578 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10579 }
10580
10581 /* Fix up the destination if needed. */
10582 if (dst != operands[0])
10583 emit_move_insn (operands[0], dst);
10584 }
10585
10586 /* Return TRUE or FALSE depending on whether the unary operator meets the
10587 appropriate constraints. */
10588
10589 int
10590 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10591 enum machine_mode mode ATTRIBUTE_UNUSED,
10592 rtx operands[2] ATTRIBUTE_UNUSED)
10593 {
10594 /* If one of operands is memory, source and destination must match. */
10595 if ((MEM_P (operands[0])
10596 || MEM_P (operands[1]))
10597 && ! rtx_equal_p (operands[0], operands[1]))
10598 return FALSE;
10599 return TRUE;
10600 }
10601
10602 /* Post-reload splitter for converting an SF or DFmode value in an
10603 SSE register into an unsigned SImode. */
10604
10605 void
10606 ix86_split_convert_uns_si_sse (rtx operands[])
10607 {
10608 enum machine_mode vecmode;
10609 rtx value, large, zero_or_two31, input, two31, x;
10610
10611 large = operands[1];
10612 zero_or_two31 = operands[2];
10613 input = operands[3];
10614 two31 = operands[4];
10615 vecmode = GET_MODE (large);
10616 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10617
10618 /* Load up the value into the low element. We must ensure that the other
10619 elements are valid floats -- zero is the easiest such value. */
10620 if (MEM_P (input))
10621 {
10622 if (vecmode == V4SFmode)
10623 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10624 else
10625 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10626 }
10627 else
10628 {
10629 input = gen_rtx_REG (vecmode, REGNO (input));
10630 emit_move_insn (value, CONST0_RTX (vecmode));
10631 if (vecmode == V4SFmode)
10632 emit_insn (gen_sse_movss (value, value, input));
10633 else
10634 emit_insn (gen_sse2_movsd (value, value, input));
10635 }
10636
10637 emit_move_insn (large, two31);
10638 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10639
10640 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10641 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10642
10643 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10644 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10645
10646 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10647 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10648
10649 large = gen_rtx_REG (V4SImode, REGNO (large));
10650 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10651
10652 x = gen_rtx_REG (V4SImode, REGNO (value));
10653 if (vecmode == V4SFmode)
10654 emit_insn (gen_sse2_cvttps2dq (x, value));
10655 else
10656 emit_insn (gen_sse2_cvttpd2dq (x, value));
10657 value = x;
10658
10659 emit_insn (gen_xorv4si3 (value, value, large));
10660 }
10661
10662 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10663 Expects the 64-bit DImode to be supplied in a pair of integral
10664 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10665 -mfpmath=sse, !optimize_size only. */
10666
10667 void
10668 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10669 {
10670 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10671 rtx int_xmm, fp_xmm;
10672 rtx biases, exponents;
10673 rtx x;
10674
10675 int_xmm = gen_reg_rtx (V4SImode);
10676 if (TARGET_INTER_UNIT_MOVES)
10677 emit_insn (gen_movdi_to_sse (int_xmm, input));
10678 else if (TARGET_SSE_SPLIT_REGS)
10679 {
10680 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10681 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10682 }
10683 else
10684 {
10685 x = gen_reg_rtx (V2DImode);
10686 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10687 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10688 }
10689
10690 x = gen_rtx_CONST_VECTOR (V4SImode,
10691 gen_rtvec (4, GEN_INT (0x43300000UL),
10692 GEN_INT (0x45300000UL),
10693 const0_rtx, const0_rtx));
10694 exponents = validize_mem (force_const_mem (V4SImode, x));
10695
10696 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10697 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10698
10699 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10700 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10701 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10702 (0x1.0p84 + double(fp_value_hi_xmm)).
10703 Note these exponents differ by 32. */
10704
10705 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10706
10707 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10708 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10709 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10710 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10711 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10712 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10713 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10714 biases = validize_mem (force_const_mem (V2DFmode, biases));
10715 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10716
10717 /* Add the upper and lower DFmode values together. */
10718 if (TARGET_SSE3)
10719 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10720 else
10721 {
10722 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10723 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10724 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10725 }
10726
10727 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10728 }
10729
10730 /* Convert an unsigned SImode value into a DFmode. Only currently used
10731 for SSE, but applicable anywhere. */
10732
10733 void
10734 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10735 {
10736 REAL_VALUE_TYPE TWO31r;
10737 rtx x, fp;
10738
10739 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10740 NULL, 1, OPTAB_DIRECT);
10741
10742 fp = gen_reg_rtx (DFmode);
10743 emit_insn (gen_floatsidf2 (fp, x));
10744
10745 real_ldexp (&TWO31r, &dconst1, 31);
10746 x = const_double_from_real_value (TWO31r, DFmode);
10747
10748 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10749 if (x != target)
10750 emit_move_insn (target, x);
10751 }
10752
10753 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10754 32-bit mode; otherwise we have a direct convert instruction. */
10755
10756 void
10757 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10758 {
10759 REAL_VALUE_TYPE TWO32r;
10760 rtx fp_lo, fp_hi, x;
10761
10762 fp_lo = gen_reg_rtx (DFmode);
10763 fp_hi = gen_reg_rtx (DFmode);
10764
10765 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10766
10767 real_ldexp (&TWO32r, &dconst1, 32);
10768 x = const_double_from_real_value (TWO32r, DFmode);
10769 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10770
10771 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10772
10773 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10774 0, OPTAB_DIRECT);
10775 if (x != target)
10776 emit_move_insn (target, x);
10777 }
10778
10779 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10780 For x86_32, -mfpmath=sse, !optimize_size only. */
10781 void
10782 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10783 {
10784 REAL_VALUE_TYPE ONE16r;
10785 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10786
10787 real_ldexp (&ONE16r, &dconst1, 16);
10788 x = const_double_from_real_value (ONE16r, SFmode);
10789 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10790 NULL, 0, OPTAB_DIRECT);
10791 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10792 NULL, 0, OPTAB_DIRECT);
10793 fp_hi = gen_reg_rtx (SFmode);
10794 fp_lo = gen_reg_rtx (SFmode);
10795 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10796 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10797 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10798 0, OPTAB_DIRECT);
10799 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10800 0, OPTAB_DIRECT);
10801 if (!rtx_equal_p (target, fp_hi))
10802 emit_move_insn (target, fp_hi);
10803 }
10804
10805 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10806 then replicate the value for all elements of the vector
10807 register. */
10808
10809 rtx
10810 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10811 {
10812 rtvec v;
10813 switch (mode)
10814 {
10815 case SImode:
10816 gcc_assert (vect);
10817 v = gen_rtvec (4, value, value, value, value);
10818 return gen_rtx_CONST_VECTOR (V4SImode, v);
10819
10820 case DImode:
10821 gcc_assert (vect);
10822 v = gen_rtvec (2, value, value);
10823 return gen_rtx_CONST_VECTOR (V2DImode, v);
10824
10825 case SFmode:
10826 if (vect)
10827 v = gen_rtvec (4, value, value, value, value);
10828 else
10829 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10830 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10831 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10832
10833 case DFmode:
10834 if (vect)
10835 v = gen_rtvec (2, value, value);
10836 else
10837 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10838 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10839
10840 default:
10841 gcc_unreachable ();
10842 }
10843 }
10844
10845 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10846 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10847 for an SSE register. If VECT is true, then replicate the mask for
10848 all elements of the vector register. If INVERT is true, then create
10849 a mask excluding the sign bit. */
10850
10851 rtx
10852 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10853 {
10854 enum machine_mode vec_mode, imode;
10855 HOST_WIDE_INT hi, lo;
10856 int shift = 63;
10857 rtx v;
10858 rtx mask;
10859
10860 /* Find the sign bit, sign extended to 2*HWI. */
10861 switch (mode)
10862 {
10863 case SImode:
10864 case SFmode:
10865 imode = SImode;
10866 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10867 lo = 0x80000000, hi = lo < 0;
10868 break;
10869
10870 case DImode:
10871 case DFmode:
10872 imode = DImode;
10873 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10874 if (HOST_BITS_PER_WIDE_INT >= 64)
10875 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10876 else
10877 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10878 break;
10879
10880 case TImode:
10881 case TFmode:
10882 imode = TImode;
10883 vec_mode = VOIDmode;
10884 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10885 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10886 break;
10887
10888 default:
10889 gcc_unreachable ();
10890 }
10891
10892 if (invert)
10893 lo = ~lo, hi = ~hi;
10894
10895 /* Force this value into the low part of a fp vector constant. */
10896 mask = immed_double_const (lo, hi, imode);
10897 mask = gen_lowpart (mode, mask);
10898
10899 if (vec_mode == VOIDmode)
10900 return force_reg (mode, mask);
10901
10902 v = ix86_build_const_vector (mode, vect, mask);
10903 return force_reg (vec_mode, v);
10904 }
10905
10906 /* Generate code for floating point ABS or NEG. */
10907
10908 void
10909 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10910 rtx operands[])
10911 {
10912 rtx mask, set, use, clob, dst, src;
10913 bool matching_memory;
10914 bool use_sse = false;
10915 bool vector_mode = VECTOR_MODE_P (mode);
10916 enum machine_mode elt_mode = mode;
10917
10918 if (vector_mode)
10919 {
10920 elt_mode = GET_MODE_INNER (mode);
10921 use_sse = true;
10922 }
10923 else if (mode == TFmode)
10924 use_sse = true;
10925 else if (TARGET_SSE_MATH)
10926 use_sse = SSE_FLOAT_MODE_P (mode);
10927
10928 /* NEG and ABS performed with SSE use bitwise mask operations.
10929 Create the appropriate mask now. */
10930 if (use_sse)
10931 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10932 else
10933 mask = NULL_RTX;
10934
10935 dst = operands[0];
10936 src = operands[1];
10937
10938 /* If the destination is memory, and we don't have matching source
10939 operands or we're using the x87, do things in registers. */
10940 matching_memory = false;
10941 if (MEM_P (dst))
10942 {
10943 if (use_sse && rtx_equal_p (dst, src))
10944 matching_memory = true;
10945 else
10946 dst = gen_reg_rtx (mode);
10947 }
10948 if (MEM_P (src) && !matching_memory)
10949 src = force_reg (mode, src);
10950
10951 if (vector_mode)
10952 {
10953 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10954 set = gen_rtx_SET (VOIDmode, dst, set);
10955 emit_insn (set);
10956 }
10957 else
10958 {
10959 set = gen_rtx_fmt_e (code, mode, src);
10960 set = gen_rtx_SET (VOIDmode, dst, set);
10961 if (mask)
10962 {
10963 use = gen_rtx_USE (VOIDmode, mask);
10964 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10965 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10966 gen_rtvec (3, set, use, clob)));
10967 }
10968 else
10969 emit_insn (set);
10970 }
10971
10972 if (dst != operands[0])
10973 emit_move_insn (operands[0], dst);
10974 }
10975
10976 /* Expand a copysign operation. Special case operand 0 being a constant. */
10977
10978 void
10979 ix86_expand_copysign (rtx operands[])
10980 {
10981 enum machine_mode mode, vmode;
10982 rtx dest, op0, op1, mask, nmask;
10983
10984 dest = operands[0];
10985 op0 = operands[1];
10986 op1 = operands[2];
10987
10988 mode = GET_MODE (dest);
10989 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10990
10991 if (GET_CODE (op0) == CONST_DOUBLE)
10992 {
10993 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10994
10995 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10996 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10997
10998 if (mode == SFmode || mode == DFmode)
10999 {
11000 if (op0 == CONST0_RTX (mode))
11001 op0 = CONST0_RTX (vmode);
11002 else
11003 {
11004 rtvec v;
11005
11006 if (mode == SFmode)
11007 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11008 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11009 else
11010 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11011 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11012 }
11013 }
11014
11015 mask = ix86_build_signbit_mask (mode, 0, 0);
11016
11017 if (mode == SFmode)
11018 copysign_insn = gen_copysignsf3_const;
11019 else if (mode == DFmode)
11020 copysign_insn = gen_copysigndf3_const;
11021 else
11022 copysign_insn = gen_copysigntf3_const;
11023
11024 emit_insn (copysign_insn (dest, op0, op1, mask));
11025 }
11026 else
11027 {
11028 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11029
11030 nmask = ix86_build_signbit_mask (mode, 0, 1);
11031 mask = ix86_build_signbit_mask (mode, 0, 0);
11032
11033 if (mode == SFmode)
11034 copysign_insn = gen_copysignsf3_var;
11035 else if (mode == DFmode)
11036 copysign_insn = gen_copysigndf3_var;
11037 else
11038 copysign_insn = gen_copysigntf3_var;
11039
11040 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11041 }
11042 }
11043
11044 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11045 be a constant, and so has already been expanded into a vector constant. */
11046
11047 void
11048 ix86_split_copysign_const (rtx operands[])
11049 {
11050 enum machine_mode mode, vmode;
11051 rtx dest, op0, op1, mask, x;
11052
11053 dest = operands[0];
11054 op0 = operands[1];
11055 op1 = operands[2];
11056 mask = operands[3];
11057
11058 mode = GET_MODE (dest);
11059 vmode = GET_MODE (mask);
11060
11061 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11062 x = gen_rtx_AND (vmode, dest, mask);
11063 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11064
11065 if (op0 != CONST0_RTX (vmode))
11066 {
11067 x = gen_rtx_IOR (vmode, dest, op0);
11068 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11069 }
11070 }
11071
11072 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11073 so we have to do two masks. */
11074
11075 void
11076 ix86_split_copysign_var (rtx operands[])
11077 {
11078 enum machine_mode mode, vmode;
11079 rtx dest, scratch, op0, op1, mask, nmask, x;
11080
11081 dest = operands[0];
11082 scratch = operands[1];
11083 op0 = operands[2];
11084 op1 = operands[3];
11085 nmask = operands[4];
11086 mask = operands[5];
11087
11088 mode = GET_MODE (dest);
11089 vmode = GET_MODE (mask);
11090
11091 if (rtx_equal_p (op0, op1))
11092 {
11093 /* Shouldn't happen often (it's useless, obviously), but when it does
11094 we'd generate incorrect code if we continue below. */
11095 emit_move_insn (dest, op0);
11096 return;
11097 }
11098
11099 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11100 {
11101 gcc_assert (REGNO (op1) == REGNO (scratch));
11102
11103 x = gen_rtx_AND (vmode, scratch, mask);
11104 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11105
11106 dest = mask;
11107 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11108 x = gen_rtx_NOT (vmode, dest);
11109 x = gen_rtx_AND (vmode, x, op0);
11110 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11111 }
11112 else
11113 {
11114 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11115 {
11116 x = gen_rtx_AND (vmode, scratch, mask);
11117 }
11118 else /* alternative 2,4 */
11119 {
11120 gcc_assert (REGNO (mask) == REGNO (scratch));
11121 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11122 x = gen_rtx_AND (vmode, scratch, op1);
11123 }
11124 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11125
11126 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11127 {
11128 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11129 x = gen_rtx_AND (vmode, dest, nmask);
11130 }
11131 else /* alternative 3,4 */
11132 {
11133 gcc_assert (REGNO (nmask) == REGNO (dest));
11134 dest = nmask;
11135 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11136 x = gen_rtx_AND (vmode, dest, op0);
11137 }
11138 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11139 }
11140
11141 x = gen_rtx_IOR (vmode, dest, scratch);
11142 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11143 }
11144
11145 /* Return TRUE or FALSE depending on whether the first SET in INSN
11146 has source and destination with matching CC modes, and that the
11147 CC mode is at least as constrained as REQ_MODE. */
11148
11149 int
11150 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11151 {
11152 rtx set;
11153 enum machine_mode set_mode;
11154
11155 set = PATTERN (insn);
11156 if (GET_CODE (set) == PARALLEL)
11157 set = XVECEXP (set, 0, 0);
11158 gcc_assert (GET_CODE (set) == SET);
11159 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11160
11161 set_mode = GET_MODE (SET_DEST (set));
11162 switch (set_mode)
11163 {
11164 case CCNOmode:
11165 if (req_mode != CCNOmode
11166 && (req_mode != CCmode
11167 || XEXP (SET_SRC (set), 1) != const0_rtx))
11168 return 0;
11169 break;
11170 case CCmode:
11171 if (req_mode == CCGCmode)
11172 return 0;
11173 /* FALLTHRU */
11174 case CCGCmode:
11175 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11176 return 0;
11177 /* FALLTHRU */
11178 case CCGOCmode:
11179 if (req_mode == CCZmode)
11180 return 0;
11181 /* FALLTHRU */
11182 case CCZmode:
11183 break;
11184
11185 default:
11186 gcc_unreachable ();
11187 }
11188
11189 return (GET_MODE (SET_SRC (set)) == set_mode);
11190 }
11191
11192 /* Generate insn patterns to do an integer compare of OPERANDS. */
11193
11194 static rtx
11195 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11196 {
11197 enum machine_mode cmpmode;
11198 rtx tmp, flags;
11199
11200 cmpmode = SELECT_CC_MODE (code, op0, op1);
11201 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11202
11203 /* This is very simple, but making the interface the same as in the
11204 FP case makes the rest of the code easier. */
11205 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11206 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11207
11208 /* Return the test that should be put into the flags user, i.e.
11209 the bcc, scc, or cmov instruction. */
11210 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11211 }
11212
11213 /* Figure out whether to use ordered or unordered fp comparisons.
11214 Return the appropriate mode to use. */
11215
11216 enum machine_mode
11217 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11218 {
11219 /* ??? In order to make all comparisons reversible, we do all comparisons
11220 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11221 all forms trapping and nontrapping comparisons, we can make inequality
11222 comparisons trapping again, since it results in better code when using
11223 FCOM based compares. */
11224 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11225 }
11226
11227 enum machine_mode
11228 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11229 {
11230 enum machine_mode mode = GET_MODE (op0);
11231
11232 if (SCALAR_FLOAT_MODE_P (mode))
11233 {
11234 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11235 return ix86_fp_compare_mode (code);
11236 }
11237
11238 switch (code)
11239 {
11240 /* Only zero flag is needed. */
11241 case EQ: /* ZF=0 */
11242 case NE: /* ZF!=0 */
11243 return CCZmode;
11244 /* Codes needing carry flag. */
11245 case GEU: /* CF=0 */
11246 case LTU: /* CF=1 */
11247 /* Detect overflow checks. They need just the carry flag. */
11248 if (GET_CODE (op0) == PLUS
11249 && rtx_equal_p (op1, XEXP (op0, 0)))
11250 return CCCmode;
11251 else
11252 return CCmode;
11253 case GTU: /* CF=0 & ZF=0 */
11254 case LEU: /* CF=1 | ZF=1 */
11255 /* Detect overflow checks. They need just the carry flag. */
11256 if (GET_CODE (op0) == MINUS
11257 && rtx_equal_p (op1, XEXP (op0, 0)))
11258 return CCCmode;
11259 else
11260 return CCmode;
11261 /* Codes possibly doable only with sign flag when
11262 comparing against zero. */
11263 case GE: /* SF=OF or SF=0 */
11264 case LT: /* SF<>OF or SF=1 */
11265 if (op1 == const0_rtx)
11266 return CCGOCmode;
11267 else
11268 /* For other cases Carry flag is not required. */
11269 return CCGCmode;
11270 /* Codes doable only with sign flag when comparing
11271 against zero, but we miss jump instruction for it
11272 so we need to use relational tests against overflow
11273 that thus needs to be zero. */
11274 case GT: /* ZF=0 & SF=OF */
11275 case LE: /* ZF=1 | SF<>OF */
11276 if (op1 == const0_rtx)
11277 return CCNOmode;
11278 else
11279 return CCGCmode;
11280 /* strcmp pattern do (use flags) and combine may ask us for proper
11281 mode. */
11282 case USE:
11283 return CCmode;
11284 default:
11285 gcc_unreachable ();
11286 }
11287 }
11288
11289 /* Return the fixed registers used for condition codes. */
11290
11291 static bool
11292 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11293 {
11294 *p1 = FLAGS_REG;
11295 *p2 = FPSR_REG;
11296 return true;
11297 }
11298
11299 /* If two condition code modes are compatible, return a condition code
11300 mode which is compatible with both. Otherwise, return
11301 VOIDmode. */
11302
11303 static enum machine_mode
11304 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11305 {
11306 if (m1 == m2)
11307 return m1;
11308
11309 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11310 return VOIDmode;
11311
11312 if ((m1 == CCGCmode && m2 == CCGOCmode)
11313 || (m1 == CCGOCmode && m2 == CCGCmode))
11314 return CCGCmode;
11315
11316 switch (m1)
11317 {
11318 default:
11319 gcc_unreachable ();
11320
11321 case CCmode:
11322 case CCGCmode:
11323 case CCGOCmode:
11324 case CCNOmode:
11325 case CCAmode:
11326 case CCCmode:
11327 case CCOmode:
11328 case CCSmode:
11329 case CCZmode:
11330 switch (m2)
11331 {
11332 default:
11333 return VOIDmode;
11334
11335 case CCmode:
11336 case CCGCmode:
11337 case CCGOCmode:
11338 case CCNOmode:
11339 case CCAmode:
11340 case CCCmode:
11341 case CCOmode:
11342 case CCSmode:
11343 case CCZmode:
11344 return CCmode;
11345 }
11346
11347 case CCFPmode:
11348 case CCFPUmode:
11349 /* These are only compatible with themselves, which we already
11350 checked above. */
11351 return VOIDmode;
11352 }
11353 }
11354
11355 /* Split comparison code CODE into comparisons we can do using branch
11356 instructions. BYPASS_CODE is comparison code for branch that will
11357 branch around FIRST_CODE and SECOND_CODE. If some of branches
11358 is not required, set value to UNKNOWN.
11359 We never require more than two branches. */
11360
11361 void
11362 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11363 enum rtx_code *first_code,
11364 enum rtx_code *second_code)
11365 {
11366 *first_code = code;
11367 *bypass_code = UNKNOWN;
11368 *second_code = UNKNOWN;
11369
11370 /* The fcomi comparison sets flags as follows:
11371
11372 cmp ZF PF CF
11373 > 0 0 0
11374 < 0 0 1
11375 = 1 0 0
11376 un 1 1 1 */
11377
11378 switch (code)
11379 {
11380 case GT: /* GTU - CF=0 & ZF=0 */
11381 case GE: /* GEU - CF=0 */
11382 case ORDERED: /* PF=0 */
11383 case UNORDERED: /* PF=1 */
11384 case UNEQ: /* EQ - ZF=1 */
11385 case UNLT: /* LTU - CF=1 */
11386 case UNLE: /* LEU - CF=1 | ZF=1 */
11387 case LTGT: /* EQ - ZF=0 */
11388 break;
11389 case LT: /* LTU - CF=1 - fails on unordered */
11390 *first_code = UNLT;
11391 *bypass_code = UNORDERED;
11392 break;
11393 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11394 *first_code = UNLE;
11395 *bypass_code = UNORDERED;
11396 break;
11397 case EQ: /* EQ - ZF=1 - fails on unordered */
11398 *first_code = UNEQ;
11399 *bypass_code = UNORDERED;
11400 break;
11401 case NE: /* NE - ZF=0 - fails on unordered */
11402 *first_code = LTGT;
11403 *second_code = UNORDERED;
11404 break;
11405 case UNGE: /* GEU - CF=0 - fails on unordered */
11406 *first_code = GE;
11407 *second_code = UNORDERED;
11408 break;
11409 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11410 *first_code = GT;
11411 *second_code = UNORDERED;
11412 break;
11413 default:
11414 gcc_unreachable ();
11415 }
11416 if (!TARGET_IEEE_FP)
11417 {
11418 *second_code = UNKNOWN;
11419 *bypass_code = UNKNOWN;
11420 }
11421 }
11422
11423 /* Return cost of comparison done fcom + arithmetics operations on AX.
11424 All following functions do use number of instructions as a cost metrics.
11425 In future this should be tweaked to compute bytes for optimize_size and
11426 take into account performance of various instructions on various CPUs. */
11427 static int
11428 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11429 {
11430 if (!TARGET_IEEE_FP)
11431 return 4;
11432 /* The cost of code output by ix86_expand_fp_compare. */
11433 switch (code)
11434 {
11435 case UNLE:
11436 case UNLT:
11437 case LTGT:
11438 case GT:
11439 case GE:
11440 case UNORDERED:
11441 case ORDERED:
11442 case UNEQ:
11443 return 4;
11444 break;
11445 case LT:
11446 case NE:
11447 case EQ:
11448 case UNGE:
11449 return 5;
11450 break;
11451 case LE:
11452 case UNGT:
11453 return 6;
11454 break;
11455 default:
11456 gcc_unreachable ();
11457 }
11458 }
11459
11460 /* Return cost of comparison done using fcomi operation.
11461 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11462 static int
11463 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11464 {
11465 enum rtx_code bypass_code, first_code, second_code;
11466 /* Return arbitrarily high cost when instruction is not supported - this
11467 prevents gcc from using it. */
11468 if (!TARGET_CMOVE)
11469 return 1024;
11470 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11471 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11472 }
11473
11474 /* Return cost of comparison done using sahf operation.
11475 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11476 static int
11477 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11478 {
11479 enum rtx_code bypass_code, first_code, second_code;
11480 /* Return arbitrarily high cost when instruction is not preferred - this
11481 avoids gcc from using it. */
11482 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11483 return 1024;
11484 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11485 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11486 }
11487
11488 /* Compute cost of the comparison done using any method.
11489 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11490 static int
11491 ix86_fp_comparison_cost (enum rtx_code code)
11492 {
11493 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11494 int min;
11495
11496 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11497 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11498
11499 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11500 if (min > sahf_cost)
11501 min = sahf_cost;
11502 if (min > fcomi_cost)
11503 min = fcomi_cost;
11504 return min;
11505 }
11506
11507 /* Return true if we should use an FCOMI instruction for this
11508 fp comparison. */
11509
11510 int
11511 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11512 {
11513 enum rtx_code swapped_code = swap_condition (code);
11514
11515 return ((ix86_fp_comparison_cost (code)
11516 == ix86_fp_comparison_fcomi_cost (code))
11517 || (ix86_fp_comparison_cost (swapped_code)
11518 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11519 }
11520
11521 /* Swap, force into registers, or otherwise massage the two operands
11522 to a fp comparison. The operands are updated in place; the new
11523 comparison code is returned. */
11524
11525 static enum rtx_code
11526 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11527 {
11528 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11529 rtx op0 = *pop0, op1 = *pop1;
11530 enum machine_mode op_mode = GET_MODE (op0);
11531 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11532
11533 /* All of the unordered compare instructions only work on registers.
11534 The same is true of the fcomi compare instructions. The XFmode
11535 compare instructions require registers except when comparing
11536 against zero or when converting operand 1 from fixed point to
11537 floating point. */
11538
11539 if (!is_sse
11540 && (fpcmp_mode == CCFPUmode
11541 || (op_mode == XFmode
11542 && ! (standard_80387_constant_p (op0) == 1
11543 || standard_80387_constant_p (op1) == 1)
11544 && GET_CODE (op1) != FLOAT)
11545 || ix86_use_fcomi_compare (code)))
11546 {
11547 op0 = force_reg (op_mode, op0);
11548 op1 = force_reg (op_mode, op1);
11549 }
11550 else
11551 {
11552 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11553 things around if they appear profitable, otherwise force op0
11554 into a register. */
11555
11556 if (standard_80387_constant_p (op0) == 0
11557 || (MEM_P (op0)
11558 && ! (standard_80387_constant_p (op1) == 0
11559 || MEM_P (op1))))
11560 {
11561 rtx tmp;
11562 tmp = op0, op0 = op1, op1 = tmp;
11563 code = swap_condition (code);
11564 }
11565
11566 if (!REG_P (op0))
11567 op0 = force_reg (op_mode, op0);
11568
11569 if (CONSTANT_P (op1))
11570 {
11571 int tmp = standard_80387_constant_p (op1);
11572 if (tmp == 0)
11573 op1 = validize_mem (force_const_mem (op_mode, op1));
11574 else if (tmp == 1)
11575 {
11576 if (TARGET_CMOVE)
11577 op1 = force_reg (op_mode, op1);
11578 }
11579 else
11580 op1 = force_reg (op_mode, op1);
11581 }
11582 }
11583
11584 /* Try to rearrange the comparison to make it cheaper. */
11585 if (ix86_fp_comparison_cost (code)
11586 > ix86_fp_comparison_cost (swap_condition (code))
11587 && (REG_P (op1) || can_create_pseudo_p ()))
11588 {
11589 rtx tmp;
11590 tmp = op0, op0 = op1, op1 = tmp;
11591 code = swap_condition (code);
11592 if (!REG_P (op0))
11593 op0 = force_reg (op_mode, op0);
11594 }
11595
11596 *pop0 = op0;
11597 *pop1 = op1;
11598 return code;
11599 }
11600
11601 /* Convert comparison codes we use to represent FP comparison to integer
11602 code that will result in proper branch. Return UNKNOWN if no such code
11603 is available. */
11604
11605 enum rtx_code
11606 ix86_fp_compare_code_to_integer (enum rtx_code code)
11607 {
11608 switch (code)
11609 {
11610 case GT:
11611 return GTU;
11612 case GE:
11613 return GEU;
11614 case ORDERED:
11615 case UNORDERED:
11616 return code;
11617 break;
11618 case UNEQ:
11619 return EQ;
11620 break;
11621 case UNLT:
11622 return LTU;
11623 break;
11624 case UNLE:
11625 return LEU;
11626 break;
11627 case LTGT:
11628 return NE;
11629 break;
11630 default:
11631 return UNKNOWN;
11632 }
11633 }
11634
11635 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11636
11637 static rtx
11638 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11639 rtx *second_test, rtx *bypass_test)
11640 {
11641 enum machine_mode fpcmp_mode, intcmp_mode;
11642 rtx tmp, tmp2;
11643 int cost = ix86_fp_comparison_cost (code);
11644 enum rtx_code bypass_code, first_code, second_code;
11645
11646 fpcmp_mode = ix86_fp_compare_mode (code);
11647 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11648
11649 if (second_test)
11650 *second_test = NULL_RTX;
11651 if (bypass_test)
11652 *bypass_test = NULL_RTX;
11653
11654 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11655
11656 /* Do fcomi/sahf based test when profitable. */
11657 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11658 && (bypass_code == UNKNOWN || bypass_test)
11659 && (second_code == UNKNOWN || second_test))
11660 {
11661 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11662 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11663 tmp);
11664 if (TARGET_CMOVE)
11665 emit_insn (tmp);
11666 else
11667 {
11668 gcc_assert (TARGET_SAHF);
11669
11670 if (!scratch)
11671 scratch = gen_reg_rtx (HImode);
11672 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11673
11674 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11675 }
11676
11677 /* The FP codes work out to act like unsigned. */
11678 intcmp_mode = fpcmp_mode;
11679 code = first_code;
11680 if (bypass_code != UNKNOWN)
11681 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11682 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11683 const0_rtx);
11684 if (second_code != UNKNOWN)
11685 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11686 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11687 const0_rtx);
11688 }
11689 else
11690 {
11691 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11692 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11693 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11694 if (!scratch)
11695 scratch = gen_reg_rtx (HImode);
11696 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11697
11698 /* In the unordered case, we have to check C2 for NaN's, which
11699 doesn't happen to work out to anything nice combination-wise.
11700 So do some bit twiddling on the value we've got in AH to come
11701 up with an appropriate set of condition codes. */
11702
11703 intcmp_mode = CCNOmode;
11704 switch (code)
11705 {
11706 case GT:
11707 case UNGT:
11708 if (code == GT || !TARGET_IEEE_FP)
11709 {
11710 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11711 code = EQ;
11712 }
11713 else
11714 {
11715 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11716 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11717 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11718 intcmp_mode = CCmode;
11719 code = GEU;
11720 }
11721 break;
11722 case LT:
11723 case UNLT:
11724 if (code == LT && TARGET_IEEE_FP)
11725 {
11726 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11727 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11728 intcmp_mode = CCmode;
11729 code = EQ;
11730 }
11731 else
11732 {
11733 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11734 code = NE;
11735 }
11736 break;
11737 case GE:
11738 case UNGE:
11739 if (code == GE || !TARGET_IEEE_FP)
11740 {
11741 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11742 code = EQ;
11743 }
11744 else
11745 {
11746 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11747 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11748 GEN_INT (0x01)));
11749 code = NE;
11750 }
11751 break;
11752 case LE:
11753 case UNLE:
11754 if (code == LE && TARGET_IEEE_FP)
11755 {
11756 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11757 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11758 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11759 intcmp_mode = CCmode;
11760 code = LTU;
11761 }
11762 else
11763 {
11764 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11765 code = NE;
11766 }
11767 break;
11768 case EQ:
11769 case UNEQ:
11770 if (code == EQ && TARGET_IEEE_FP)
11771 {
11772 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11773 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11774 intcmp_mode = CCmode;
11775 code = EQ;
11776 }
11777 else
11778 {
11779 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11780 code = NE;
11781 break;
11782 }
11783 break;
11784 case NE:
11785 case LTGT:
11786 if (code == NE && TARGET_IEEE_FP)
11787 {
11788 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11789 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11790 GEN_INT (0x40)));
11791 code = NE;
11792 }
11793 else
11794 {
11795 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11796 code = EQ;
11797 }
11798 break;
11799
11800 case UNORDERED:
11801 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11802 code = NE;
11803 break;
11804 case ORDERED:
11805 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11806 code = EQ;
11807 break;
11808
11809 default:
11810 gcc_unreachable ();
11811 }
11812 }
11813
11814 /* Return the test that should be put into the flags user, i.e.
11815 the bcc, scc, or cmov instruction. */
11816 return gen_rtx_fmt_ee (code, VOIDmode,
11817 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11818 const0_rtx);
11819 }
11820
11821 rtx
11822 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11823 {
11824 rtx op0, op1, ret;
11825 op0 = ix86_compare_op0;
11826 op1 = ix86_compare_op1;
11827
11828 if (second_test)
11829 *second_test = NULL_RTX;
11830 if (bypass_test)
11831 *bypass_test = NULL_RTX;
11832
11833 if (ix86_compare_emitted)
11834 {
11835 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11836 ix86_compare_emitted = NULL_RTX;
11837 }
11838 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11839 {
11840 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11841 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11842 second_test, bypass_test);
11843 }
11844 else
11845 ret = ix86_expand_int_compare (code, op0, op1);
11846
11847 return ret;
11848 }
11849
11850 /* Return true if the CODE will result in nontrivial jump sequence. */
11851 bool
11852 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11853 {
11854 enum rtx_code bypass_code, first_code, second_code;
11855 if (!TARGET_CMOVE)
11856 return true;
11857 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11858 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11859 }
11860
11861 void
11862 ix86_expand_branch (enum rtx_code code, rtx label)
11863 {
11864 rtx tmp;
11865
11866 /* If we have emitted a compare insn, go straight to simple.
11867 ix86_expand_compare won't emit anything if ix86_compare_emitted
11868 is non NULL. */
11869 if (ix86_compare_emitted)
11870 goto simple;
11871
11872 switch (GET_MODE (ix86_compare_op0))
11873 {
11874 case QImode:
11875 case HImode:
11876 case SImode:
11877 simple:
11878 tmp = ix86_expand_compare (code, NULL, NULL);
11879 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11880 gen_rtx_LABEL_REF (VOIDmode, label),
11881 pc_rtx);
11882 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11883 return;
11884
11885 case SFmode:
11886 case DFmode:
11887 case XFmode:
11888 {
11889 rtvec vec;
11890 int use_fcomi;
11891 enum rtx_code bypass_code, first_code, second_code;
11892
11893 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11894 &ix86_compare_op1);
11895
11896 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11897
11898 /* Check whether we will use the natural sequence with one jump. If
11899 so, we can expand jump early. Otherwise delay expansion by
11900 creating compound insn to not confuse optimizers. */
11901 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
11902 {
11903 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11904 gen_rtx_LABEL_REF (VOIDmode, label),
11905 pc_rtx, NULL_RTX, NULL_RTX);
11906 }
11907 else
11908 {
11909 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11910 ix86_compare_op0, ix86_compare_op1);
11911 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11912 gen_rtx_LABEL_REF (VOIDmode, label),
11913 pc_rtx);
11914 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11915
11916 use_fcomi = ix86_use_fcomi_compare (code);
11917 vec = rtvec_alloc (3 + !use_fcomi);
11918 RTVEC_ELT (vec, 0) = tmp;
11919 RTVEC_ELT (vec, 1)
11920 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
11921 RTVEC_ELT (vec, 2)
11922 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
11923 if (! use_fcomi)
11924 RTVEC_ELT (vec, 3)
11925 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11926
11927 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11928 }
11929 return;
11930 }
11931
11932 case DImode:
11933 if (TARGET_64BIT)
11934 goto simple;
11935 case TImode:
11936 /* Expand DImode branch into multiple compare+branch. */
11937 {
11938 rtx lo[2], hi[2], label2;
11939 enum rtx_code code1, code2, code3;
11940 enum machine_mode submode;
11941
11942 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11943 {
11944 tmp = ix86_compare_op0;
11945 ix86_compare_op0 = ix86_compare_op1;
11946 ix86_compare_op1 = tmp;
11947 code = swap_condition (code);
11948 }
11949 if (GET_MODE (ix86_compare_op0) == DImode)
11950 {
11951 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11952 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11953 submode = SImode;
11954 }
11955 else
11956 {
11957 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11958 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11959 submode = DImode;
11960 }
11961
11962 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11963 avoid two branches. This costs one extra insn, so disable when
11964 optimizing for size. */
11965
11966 if ((code == EQ || code == NE)
11967 && (!optimize_size
11968 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11969 {
11970 rtx xor0, xor1;
11971
11972 xor1 = hi[0];
11973 if (hi[1] != const0_rtx)
11974 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11975 NULL_RTX, 0, OPTAB_WIDEN);
11976
11977 xor0 = lo[0];
11978 if (lo[1] != const0_rtx)
11979 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11980 NULL_RTX, 0, OPTAB_WIDEN);
11981
11982 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11983 NULL_RTX, 0, OPTAB_WIDEN);
11984
11985 ix86_compare_op0 = tmp;
11986 ix86_compare_op1 = const0_rtx;
11987 ix86_expand_branch (code, label);
11988 return;
11989 }
11990
11991 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11992 op1 is a constant and the low word is zero, then we can just
11993 examine the high word. */
11994
11995 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11996 switch (code)
11997 {
11998 case LT: case LTU: case GE: case GEU:
11999 ix86_compare_op0 = hi[0];
12000 ix86_compare_op1 = hi[1];
12001 ix86_expand_branch (code, label);
12002 return;
12003 default:
12004 break;
12005 }
12006
12007 /* Otherwise, we need two or three jumps. */
12008
12009 label2 = gen_label_rtx ();
12010
12011 code1 = code;
12012 code2 = swap_condition (code);
12013 code3 = unsigned_condition (code);
12014
12015 switch (code)
12016 {
12017 case LT: case GT: case LTU: case GTU:
12018 break;
12019
12020 case LE: code1 = LT; code2 = GT; break;
12021 case GE: code1 = GT; code2 = LT; break;
12022 case LEU: code1 = LTU; code2 = GTU; break;
12023 case GEU: code1 = GTU; code2 = LTU; break;
12024
12025 case EQ: code1 = UNKNOWN; code2 = NE; break;
12026 case NE: code2 = UNKNOWN; break;
12027
12028 default:
12029 gcc_unreachable ();
12030 }
12031
12032 /*
12033 * a < b =>
12034 * if (hi(a) < hi(b)) goto true;
12035 * if (hi(a) > hi(b)) goto false;
12036 * if (lo(a) < lo(b)) goto true;
12037 * false:
12038 */
12039
12040 ix86_compare_op0 = hi[0];
12041 ix86_compare_op1 = hi[1];
12042
12043 if (code1 != UNKNOWN)
12044 ix86_expand_branch (code1, label);
12045 if (code2 != UNKNOWN)
12046 ix86_expand_branch (code2, label2);
12047
12048 ix86_compare_op0 = lo[0];
12049 ix86_compare_op1 = lo[1];
12050 ix86_expand_branch (code3, label);
12051
12052 if (code2 != UNKNOWN)
12053 emit_label (label2);
12054 return;
12055 }
12056
12057 default:
12058 gcc_unreachable ();
12059 }
12060 }
12061
12062 /* Split branch based on floating point condition. */
12063 void
12064 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12065 rtx target1, rtx target2, rtx tmp, rtx pushed)
12066 {
12067 rtx second, bypass;
12068 rtx label = NULL_RTX;
12069 rtx condition;
12070 int bypass_probability = -1, second_probability = -1, probability = -1;
12071 rtx i;
12072
12073 if (target2 != pc_rtx)
12074 {
12075 rtx tmp = target2;
12076 code = reverse_condition_maybe_unordered (code);
12077 target2 = target1;
12078 target1 = tmp;
12079 }
12080
12081 condition = ix86_expand_fp_compare (code, op1, op2,
12082 tmp, &second, &bypass);
12083
12084 /* Remove pushed operand from stack. */
12085 if (pushed)
12086 ix86_free_from_memory (GET_MODE (pushed));
12087
12088 if (split_branch_probability >= 0)
12089 {
12090 /* Distribute the probabilities across the jumps.
12091 Assume the BYPASS and SECOND to be always test
12092 for UNORDERED. */
12093 probability = split_branch_probability;
12094
12095 /* Value of 1 is low enough to make no need for probability
12096 to be updated. Later we may run some experiments and see
12097 if unordered values are more frequent in practice. */
12098 if (bypass)
12099 bypass_probability = 1;
12100 if (second)
12101 second_probability = 1;
12102 }
12103 if (bypass != NULL_RTX)
12104 {
12105 label = gen_label_rtx ();
12106 i = emit_jump_insn (gen_rtx_SET
12107 (VOIDmode, pc_rtx,
12108 gen_rtx_IF_THEN_ELSE (VOIDmode,
12109 bypass,
12110 gen_rtx_LABEL_REF (VOIDmode,
12111 label),
12112 pc_rtx)));
12113 if (bypass_probability >= 0)
12114 REG_NOTES (i)
12115 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12116 GEN_INT (bypass_probability),
12117 REG_NOTES (i));
12118 }
12119 i = emit_jump_insn (gen_rtx_SET
12120 (VOIDmode, pc_rtx,
12121 gen_rtx_IF_THEN_ELSE (VOIDmode,
12122 condition, target1, target2)));
12123 if (probability >= 0)
12124 REG_NOTES (i)
12125 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12126 GEN_INT (probability),
12127 REG_NOTES (i));
12128 if (second != NULL_RTX)
12129 {
12130 i = emit_jump_insn (gen_rtx_SET
12131 (VOIDmode, pc_rtx,
12132 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12133 target2)));
12134 if (second_probability >= 0)
12135 REG_NOTES (i)
12136 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12137 GEN_INT (second_probability),
12138 REG_NOTES (i));
12139 }
12140 if (label != NULL_RTX)
12141 emit_label (label);
12142 }
12143
12144 int
12145 ix86_expand_setcc (enum rtx_code code, rtx dest)
12146 {
12147 rtx ret, tmp, tmpreg, equiv;
12148 rtx second_test, bypass_test;
12149
12150 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12151 return 0; /* FAIL */
12152
12153 gcc_assert (GET_MODE (dest) == QImode);
12154
12155 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12156 PUT_MODE (ret, QImode);
12157
12158 tmp = dest;
12159 tmpreg = dest;
12160
12161 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12162 if (bypass_test || second_test)
12163 {
12164 rtx test = second_test;
12165 int bypass = 0;
12166 rtx tmp2 = gen_reg_rtx (QImode);
12167 if (bypass_test)
12168 {
12169 gcc_assert (!second_test);
12170 test = bypass_test;
12171 bypass = 1;
12172 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12173 }
12174 PUT_MODE (test, QImode);
12175 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12176
12177 if (bypass)
12178 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12179 else
12180 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12181 }
12182
12183 /* Attach a REG_EQUAL note describing the comparison result. */
12184 if (ix86_compare_op0 && ix86_compare_op1)
12185 {
12186 equiv = simplify_gen_relational (code, QImode,
12187 GET_MODE (ix86_compare_op0),
12188 ix86_compare_op0, ix86_compare_op1);
12189 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12190 }
12191
12192 return 1; /* DONE */
12193 }
12194
12195 /* Expand comparison setting or clearing carry flag. Return true when
12196 successful and set pop for the operation. */
12197 static bool
12198 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12199 {
12200 enum machine_mode mode =
12201 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12202
12203 /* Do not handle DImode compares that go through special path. */
12204 if (mode == (TARGET_64BIT ? TImode : DImode))
12205 return false;
12206
12207 if (SCALAR_FLOAT_MODE_P (mode))
12208 {
12209 rtx second_test = NULL, bypass_test = NULL;
12210 rtx compare_op, compare_seq;
12211
12212 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12213
12214 /* Shortcut: following common codes never translate
12215 into carry flag compares. */
12216 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12217 || code == ORDERED || code == UNORDERED)
12218 return false;
12219
12220 /* These comparisons require zero flag; swap operands so they won't. */
12221 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12222 && !TARGET_IEEE_FP)
12223 {
12224 rtx tmp = op0;
12225 op0 = op1;
12226 op1 = tmp;
12227 code = swap_condition (code);
12228 }
12229
12230 /* Try to expand the comparison and verify that we end up with
12231 carry flag based comparison. This fails to be true only when
12232 we decide to expand comparison using arithmetic that is not
12233 too common scenario. */
12234 start_sequence ();
12235 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12236 &second_test, &bypass_test);
12237 compare_seq = get_insns ();
12238 end_sequence ();
12239
12240 if (second_test || bypass_test)
12241 return false;
12242
12243 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12244 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12245 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12246 else
12247 code = GET_CODE (compare_op);
12248
12249 if (code != LTU && code != GEU)
12250 return false;
12251
12252 emit_insn (compare_seq);
12253 *pop = compare_op;
12254 return true;
12255 }
12256
12257 if (!INTEGRAL_MODE_P (mode))
12258 return false;
12259
12260 switch (code)
12261 {
12262 case LTU:
12263 case GEU:
12264 break;
12265
12266 /* Convert a==0 into (unsigned)a<1. */
12267 case EQ:
12268 case NE:
12269 if (op1 != const0_rtx)
12270 return false;
12271 op1 = const1_rtx;
12272 code = (code == EQ ? LTU : GEU);
12273 break;
12274
12275 /* Convert a>b into b<a or a>=b-1. */
12276 case GTU:
12277 case LEU:
12278 if (CONST_INT_P (op1))
12279 {
12280 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12281 /* Bail out on overflow. We still can swap operands but that
12282 would force loading of the constant into register. */
12283 if (op1 == const0_rtx
12284 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12285 return false;
12286 code = (code == GTU ? GEU : LTU);
12287 }
12288 else
12289 {
12290 rtx tmp = op1;
12291 op1 = op0;
12292 op0 = tmp;
12293 code = (code == GTU ? LTU : GEU);
12294 }
12295 break;
12296
12297 /* Convert a>=0 into (unsigned)a<0x80000000. */
12298 case LT:
12299 case GE:
12300 if (mode == DImode || op1 != const0_rtx)
12301 return false;
12302 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12303 code = (code == LT ? GEU : LTU);
12304 break;
12305 case LE:
12306 case GT:
12307 if (mode == DImode || op1 != constm1_rtx)
12308 return false;
12309 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12310 code = (code == LE ? GEU : LTU);
12311 break;
12312
12313 default:
12314 return false;
12315 }
12316 /* Swapping operands may cause constant to appear as first operand. */
12317 if (!nonimmediate_operand (op0, VOIDmode))
12318 {
12319 if (!can_create_pseudo_p ())
12320 return false;
12321 op0 = force_reg (mode, op0);
12322 }
12323 ix86_compare_op0 = op0;
12324 ix86_compare_op1 = op1;
12325 *pop = ix86_expand_compare (code, NULL, NULL);
12326 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12327 return true;
12328 }
12329
12330 int
12331 ix86_expand_int_movcc (rtx operands[])
12332 {
12333 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12334 rtx compare_seq, compare_op;
12335 rtx second_test, bypass_test;
12336 enum machine_mode mode = GET_MODE (operands[0]);
12337 bool sign_bit_compare_p = false;;
12338
12339 start_sequence ();
12340 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12341 compare_seq = get_insns ();
12342 end_sequence ();
12343
12344 compare_code = GET_CODE (compare_op);
12345
12346 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12347 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12348 sign_bit_compare_p = true;
12349
12350 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12351 HImode insns, we'd be swallowed in word prefix ops. */
12352
12353 if ((mode != HImode || TARGET_FAST_PREFIX)
12354 && (mode != (TARGET_64BIT ? TImode : DImode))
12355 && CONST_INT_P (operands[2])
12356 && CONST_INT_P (operands[3]))
12357 {
12358 rtx out = operands[0];
12359 HOST_WIDE_INT ct = INTVAL (operands[2]);
12360 HOST_WIDE_INT cf = INTVAL (operands[3]);
12361 HOST_WIDE_INT diff;
12362
12363 diff = ct - cf;
12364 /* Sign bit compares are better done using shifts than we do by using
12365 sbb. */
12366 if (sign_bit_compare_p
12367 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12368 ix86_compare_op1, &compare_op))
12369 {
12370 /* Detect overlap between destination and compare sources. */
12371 rtx tmp = out;
12372
12373 if (!sign_bit_compare_p)
12374 {
12375 bool fpcmp = false;
12376
12377 compare_code = GET_CODE (compare_op);
12378
12379 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12380 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12381 {
12382 fpcmp = true;
12383 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12384 }
12385
12386 /* To simplify rest of code, restrict to the GEU case. */
12387 if (compare_code == LTU)
12388 {
12389 HOST_WIDE_INT tmp = ct;
12390 ct = cf;
12391 cf = tmp;
12392 compare_code = reverse_condition (compare_code);
12393 code = reverse_condition (code);
12394 }
12395 else
12396 {
12397 if (fpcmp)
12398 PUT_CODE (compare_op,
12399 reverse_condition_maybe_unordered
12400 (GET_CODE (compare_op)));
12401 else
12402 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12403 }
12404 diff = ct - cf;
12405
12406 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12407 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12408 tmp = gen_reg_rtx (mode);
12409
12410 if (mode == DImode)
12411 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12412 else
12413 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12414 }
12415 else
12416 {
12417 if (code == GT || code == GE)
12418 code = reverse_condition (code);
12419 else
12420 {
12421 HOST_WIDE_INT tmp = ct;
12422 ct = cf;
12423 cf = tmp;
12424 diff = ct - cf;
12425 }
12426 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12427 ix86_compare_op1, VOIDmode, 0, -1);
12428 }
12429
12430 if (diff == 1)
12431 {
12432 /*
12433 * cmpl op0,op1
12434 * sbbl dest,dest
12435 * [addl dest, ct]
12436 *
12437 * Size 5 - 8.
12438 */
12439 if (ct)
12440 tmp = expand_simple_binop (mode, PLUS,
12441 tmp, GEN_INT (ct),
12442 copy_rtx (tmp), 1, OPTAB_DIRECT);
12443 }
12444 else if (cf == -1)
12445 {
12446 /*
12447 * cmpl op0,op1
12448 * sbbl dest,dest
12449 * orl $ct, dest
12450 *
12451 * Size 8.
12452 */
12453 tmp = expand_simple_binop (mode, IOR,
12454 tmp, GEN_INT (ct),
12455 copy_rtx (tmp), 1, OPTAB_DIRECT);
12456 }
12457 else if (diff == -1 && ct)
12458 {
12459 /*
12460 * cmpl op0,op1
12461 * sbbl dest,dest
12462 * notl dest
12463 * [addl dest, cf]
12464 *
12465 * Size 8 - 11.
12466 */
12467 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12468 if (cf)
12469 tmp = expand_simple_binop (mode, PLUS,
12470 copy_rtx (tmp), GEN_INT (cf),
12471 copy_rtx (tmp), 1, OPTAB_DIRECT);
12472 }
12473 else
12474 {
12475 /*
12476 * cmpl op0,op1
12477 * sbbl dest,dest
12478 * [notl dest]
12479 * andl cf - ct, dest
12480 * [addl dest, ct]
12481 *
12482 * Size 8 - 11.
12483 */
12484
12485 if (cf == 0)
12486 {
12487 cf = ct;
12488 ct = 0;
12489 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12490 }
12491
12492 tmp = expand_simple_binop (mode, AND,
12493 copy_rtx (tmp),
12494 gen_int_mode (cf - ct, mode),
12495 copy_rtx (tmp), 1, OPTAB_DIRECT);
12496 if (ct)
12497 tmp = expand_simple_binop (mode, PLUS,
12498 copy_rtx (tmp), GEN_INT (ct),
12499 copy_rtx (tmp), 1, OPTAB_DIRECT);
12500 }
12501
12502 if (!rtx_equal_p (tmp, out))
12503 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12504
12505 return 1; /* DONE */
12506 }
12507
12508 if (diff < 0)
12509 {
12510 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12511
12512 HOST_WIDE_INT tmp;
12513 tmp = ct, ct = cf, cf = tmp;
12514 diff = -diff;
12515
12516 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12517 {
12518 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12519
12520 /* We may be reversing unordered compare to normal compare, that
12521 is not valid in general (we may convert non-trapping condition
12522 to trapping one), however on i386 we currently emit all
12523 comparisons unordered. */
12524 compare_code = reverse_condition_maybe_unordered (compare_code);
12525 code = reverse_condition_maybe_unordered (code);
12526 }
12527 else
12528 {
12529 compare_code = reverse_condition (compare_code);
12530 code = reverse_condition (code);
12531 }
12532 }
12533
12534 compare_code = UNKNOWN;
12535 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12536 && CONST_INT_P (ix86_compare_op1))
12537 {
12538 if (ix86_compare_op1 == const0_rtx
12539 && (code == LT || code == GE))
12540 compare_code = code;
12541 else if (ix86_compare_op1 == constm1_rtx)
12542 {
12543 if (code == LE)
12544 compare_code = LT;
12545 else if (code == GT)
12546 compare_code = GE;
12547 }
12548 }
12549
12550 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12551 if (compare_code != UNKNOWN
12552 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12553 && (cf == -1 || ct == -1))
12554 {
12555 /* If lea code below could be used, only optimize
12556 if it results in a 2 insn sequence. */
12557
12558 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12559 || diff == 3 || diff == 5 || diff == 9)
12560 || (compare_code == LT && ct == -1)
12561 || (compare_code == GE && cf == -1))
12562 {
12563 /*
12564 * notl op1 (if necessary)
12565 * sarl $31, op1
12566 * orl cf, op1
12567 */
12568 if (ct != -1)
12569 {
12570 cf = ct;
12571 ct = -1;
12572 code = reverse_condition (code);
12573 }
12574
12575 out = emit_store_flag (out, code, ix86_compare_op0,
12576 ix86_compare_op1, VOIDmode, 0, -1);
12577
12578 out = expand_simple_binop (mode, IOR,
12579 out, GEN_INT (cf),
12580 out, 1, OPTAB_DIRECT);
12581 if (out != operands[0])
12582 emit_move_insn (operands[0], out);
12583
12584 return 1; /* DONE */
12585 }
12586 }
12587
12588
12589 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12590 || diff == 3 || diff == 5 || diff == 9)
12591 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12592 && (mode != DImode
12593 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12594 {
12595 /*
12596 * xorl dest,dest
12597 * cmpl op1,op2
12598 * setcc dest
12599 * lea cf(dest*(ct-cf)),dest
12600 *
12601 * Size 14.
12602 *
12603 * This also catches the degenerate setcc-only case.
12604 */
12605
12606 rtx tmp;
12607 int nops;
12608
12609 out = emit_store_flag (out, code, ix86_compare_op0,
12610 ix86_compare_op1, VOIDmode, 0, 1);
12611
12612 nops = 0;
12613 /* On x86_64 the lea instruction operates on Pmode, so we need
12614 to get arithmetics done in proper mode to match. */
12615 if (diff == 1)
12616 tmp = copy_rtx (out);
12617 else
12618 {
12619 rtx out1;
12620 out1 = copy_rtx (out);
12621 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12622 nops++;
12623 if (diff & 1)
12624 {
12625 tmp = gen_rtx_PLUS (mode, tmp, out1);
12626 nops++;
12627 }
12628 }
12629 if (cf != 0)
12630 {
12631 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12632 nops++;
12633 }
12634 if (!rtx_equal_p (tmp, out))
12635 {
12636 if (nops == 1)
12637 out = force_operand (tmp, copy_rtx (out));
12638 else
12639 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12640 }
12641 if (!rtx_equal_p (out, operands[0]))
12642 emit_move_insn (operands[0], copy_rtx (out));
12643
12644 return 1; /* DONE */
12645 }
12646
12647 /*
12648 * General case: Jumpful:
12649 * xorl dest,dest cmpl op1, op2
12650 * cmpl op1, op2 movl ct, dest
12651 * setcc dest jcc 1f
12652 * decl dest movl cf, dest
12653 * andl (cf-ct),dest 1:
12654 * addl ct,dest
12655 *
12656 * Size 20. Size 14.
12657 *
12658 * This is reasonably steep, but branch mispredict costs are
12659 * high on modern cpus, so consider failing only if optimizing
12660 * for space.
12661 */
12662
12663 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12664 && BRANCH_COST >= 2)
12665 {
12666 if (cf == 0)
12667 {
12668 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12669
12670 cf = ct;
12671 ct = 0;
12672
12673 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12674 {
12675 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12676
12677 /* We may be reversing unordered compare to normal compare,
12678 that is not valid in general (we may convert non-trapping
12679 condition to trapping one), however on i386 we currently
12680 emit all comparisons unordered. */
12681 code = reverse_condition_maybe_unordered (code);
12682 }
12683 else
12684 {
12685 code = reverse_condition (code);
12686 if (compare_code != UNKNOWN)
12687 compare_code = reverse_condition (compare_code);
12688 }
12689 }
12690
12691 if (compare_code != UNKNOWN)
12692 {
12693 /* notl op1 (if needed)
12694 sarl $31, op1
12695 andl (cf-ct), op1
12696 addl ct, op1
12697
12698 For x < 0 (resp. x <= -1) there will be no notl,
12699 so if possible swap the constants to get rid of the
12700 complement.
12701 True/false will be -1/0 while code below (store flag
12702 followed by decrement) is 0/-1, so the constants need
12703 to be exchanged once more. */
12704
12705 if (compare_code == GE || !cf)
12706 {
12707 code = reverse_condition (code);
12708 compare_code = LT;
12709 }
12710 else
12711 {
12712 HOST_WIDE_INT tmp = cf;
12713 cf = ct;
12714 ct = tmp;
12715 }
12716
12717 out = emit_store_flag (out, code, ix86_compare_op0,
12718 ix86_compare_op1, VOIDmode, 0, -1);
12719 }
12720 else
12721 {
12722 out = emit_store_flag (out, code, ix86_compare_op0,
12723 ix86_compare_op1, VOIDmode, 0, 1);
12724
12725 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12726 copy_rtx (out), 1, OPTAB_DIRECT);
12727 }
12728
12729 out = expand_simple_binop (mode, AND, copy_rtx (out),
12730 gen_int_mode (cf - ct, mode),
12731 copy_rtx (out), 1, OPTAB_DIRECT);
12732 if (ct)
12733 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12734 copy_rtx (out), 1, OPTAB_DIRECT);
12735 if (!rtx_equal_p (out, operands[0]))
12736 emit_move_insn (operands[0], copy_rtx (out));
12737
12738 return 1; /* DONE */
12739 }
12740 }
12741
12742 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12743 {
12744 /* Try a few things more with specific constants and a variable. */
12745
12746 optab op;
12747 rtx var, orig_out, out, tmp;
12748
12749 if (BRANCH_COST <= 2)
12750 return 0; /* FAIL */
12751
12752 /* If one of the two operands is an interesting constant, load a
12753 constant with the above and mask it in with a logical operation. */
12754
12755 if (CONST_INT_P (operands[2]))
12756 {
12757 var = operands[3];
12758 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12759 operands[3] = constm1_rtx, op = and_optab;
12760 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12761 operands[3] = const0_rtx, op = ior_optab;
12762 else
12763 return 0; /* FAIL */
12764 }
12765 else if (CONST_INT_P (operands[3]))
12766 {
12767 var = operands[2];
12768 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12769 operands[2] = constm1_rtx, op = and_optab;
12770 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12771 operands[2] = const0_rtx, op = ior_optab;
12772 else
12773 return 0; /* FAIL */
12774 }
12775 else
12776 return 0; /* FAIL */
12777
12778 orig_out = operands[0];
12779 tmp = gen_reg_rtx (mode);
12780 operands[0] = tmp;
12781
12782 /* Recurse to get the constant loaded. */
12783 if (ix86_expand_int_movcc (operands) == 0)
12784 return 0; /* FAIL */
12785
12786 /* Mask in the interesting variable. */
12787 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12788 OPTAB_WIDEN);
12789 if (!rtx_equal_p (out, orig_out))
12790 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12791
12792 return 1; /* DONE */
12793 }
12794
12795 /*
12796 * For comparison with above,
12797 *
12798 * movl cf,dest
12799 * movl ct,tmp
12800 * cmpl op1,op2
12801 * cmovcc tmp,dest
12802 *
12803 * Size 15.
12804 */
12805
12806 if (! nonimmediate_operand (operands[2], mode))
12807 operands[2] = force_reg (mode, operands[2]);
12808 if (! nonimmediate_operand (operands[3], mode))
12809 operands[3] = force_reg (mode, operands[3]);
12810
12811 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12812 {
12813 rtx tmp = gen_reg_rtx (mode);
12814 emit_move_insn (tmp, operands[3]);
12815 operands[3] = tmp;
12816 }
12817 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12818 {
12819 rtx tmp = gen_reg_rtx (mode);
12820 emit_move_insn (tmp, operands[2]);
12821 operands[2] = tmp;
12822 }
12823
12824 if (! register_operand (operands[2], VOIDmode)
12825 && (mode == QImode
12826 || ! register_operand (operands[3], VOIDmode)))
12827 operands[2] = force_reg (mode, operands[2]);
12828
12829 if (mode == QImode
12830 && ! register_operand (operands[3], VOIDmode))
12831 operands[3] = force_reg (mode, operands[3]);
12832
12833 emit_insn (compare_seq);
12834 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12835 gen_rtx_IF_THEN_ELSE (mode,
12836 compare_op, operands[2],
12837 operands[3])));
12838 if (bypass_test)
12839 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12840 gen_rtx_IF_THEN_ELSE (mode,
12841 bypass_test,
12842 copy_rtx (operands[3]),
12843 copy_rtx (operands[0]))));
12844 if (second_test)
12845 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12846 gen_rtx_IF_THEN_ELSE (mode,
12847 second_test,
12848 copy_rtx (operands[2]),
12849 copy_rtx (operands[0]))));
12850
12851 return 1; /* DONE */
12852 }
12853
12854 /* Swap, force into registers, or otherwise massage the two operands
12855 to an sse comparison with a mask result. Thus we differ a bit from
12856 ix86_prepare_fp_compare_args which expects to produce a flags result.
12857
12858 The DEST operand exists to help determine whether to commute commutative
12859 operators. The POP0/POP1 operands are updated in place. The new
12860 comparison code is returned, or UNKNOWN if not implementable. */
12861
12862 static enum rtx_code
12863 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12864 rtx *pop0, rtx *pop1)
12865 {
12866 rtx tmp;
12867
12868 switch (code)
12869 {
12870 case LTGT:
12871 case UNEQ:
12872 /* We have no LTGT as an operator. We could implement it with
12873 NE & ORDERED, but this requires an extra temporary. It's
12874 not clear that it's worth it. */
12875 return UNKNOWN;
12876
12877 case LT:
12878 case LE:
12879 case UNGT:
12880 case UNGE:
12881 /* These are supported directly. */
12882 break;
12883
12884 case EQ:
12885 case NE:
12886 case UNORDERED:
12887 case ORDERED:
12888 /* For commutative operators, try to canonicalize the destination
12889 operand to be first in the comparison - this helps reload to
12890 avoid extra moves. */
12891 if (!dest || !rtx_equal_p (dest, *pop1))
12892 break;
12893 /* FALLTHRU */
12894
12895 case GE:
12896 case GT:
12897 case UNLE:
12898 case UNLT:
12899 /* These are not supported directly. Swap the comparison operands
12900 to transform into something that is supported. */
12901 tmp = *pop0;
12902 *pop0 = *pop1;
12903 *pop1 = tmp;
12904 code = swap_condition (code);
12905 break;
12906
12907 default:
12908 gcc_unreachable ();
12909 }
12910
12911 return code;
12912 }
12913
12914 /* Detect conditional moves that exactly match min/max operational
12915 semantics. Note that this is IEEE safe, as long as we don't
12916 interchange the operands.
12917
12918 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12919 and TRUE if the operation is successful and instructions are emitted. */
12920
12921 static bool
12922 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12923 rtx cmp_op1, rtx if_true, rtx if_false)
12924 {
12925 enum machine_mode mode;
12926 bool is_min;
12927 rtx tmp;
12928
12929 if (code == LT)
12930 ;
12931 else if (code == UNGE)
12932 {
12933 tmp = if_true;
12934 if_true = if_false;
12935 if_false = tmp;
12936 }
12937 else
12938 return false;
12939
12940 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12941 is_min = true;
12942 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12943 is_min = false;
12944 else
12945 return false;
12946
12947 mode = GET_MODE (dest);
12948
12949 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12950 but MODE may be a vector mode and thus not appropriate. */
12951 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12952 {
12953 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12954 rtvec v;
12955
12956 if_true = force_reg (mode, if_true);
12957 v = gen_rtvec (2, if_true, if_false);
12958 tmp = gen_rtx_UNSPEC (mode, v, u);
12959 }
12960 else
12961 {
12962 code = is_min ? SMIN : SMAX;
12963 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12964 }
12965
12966 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12967 return true;
12968 }
12969
12970 /* Expand an sse vector comparison. Return the register with the result. */
12971
12972 static rtx
12973 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12974 rtx op_true, rtx op_false)
12975 {
12976 enum machine_mode mode = GET_MODE (dest);
12977 rtx x;
12978
12979 cmp_op0 = force_reg (mode, cmp_op0);
12980 if (!nonimmediate_operand (cmp_op1, mode))
12981 cmp_op1 = force_reg (mode, cmp_op1);
12982
12983 if (optimize
12984 || reg_overlap_mentioned_p (dest, op_true)
12985 || reg_overlap_mentioned_p (dest, op_false))
12986 dest = gen_reg_rtx (mode);
12987
12988 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12989 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12990
12991 return dest;
12992 }
12993
12994 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12995 operations. This is used for both scalar and vector conditional moves. */
12996
12997 static void
12998 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12999 {
13000 enum machine_mode mode = GET_MODE (dest);
13001 rtx t2, t3, x;
13002
13003 if (op_false == CONST0_RTX (mode))
13004 {
13005 op_true = force_reg (mode, op_true);
13006 x = gen_rtx_AND (mode, cmp, op_true);
13007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13008 }
13009 else if (op_true == CONST0_RTX (mode))
13010 {
13011 op_false = force_reg (mode, op_false);
13012 x = gen_rtx_NOT (mode, cmp);
13013 x = gen_rtx_AND (mode, x, op_false);
13014 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13015 }
13016 else
13017 {
13018 op_true = force_reg (mode, op_true);
13019 op_false = force_reg (mode, op_false);
13020
13021 t2 = gen_reg_rtx (mode);
13022 if (optimize)
13023 t3 = gen_reg_rtx (mode);
13024 else
13025 t3 = dest;
13026
13027 x = gen_rtx_AND (mode, op_true, cmp);
13028 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13029
13030 x = gen_rtx_NOT (mode, cmp);
13031 x = gen_rtx_AND (mode, x, op_false);
13032 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13033
13034 x = gen_rtx_IOR (mode, t3, t2);
13035 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13036 }
13037 }
13038
13039 /* Expand a floating-point conditional move. Return true if successful. */
13040
13041 int
13042 ix86_expand_fp_movcc (rtx operands[])
13043 {
13044 enum machine_mode mode = GET_MODE (operands[0]);
13045 enum rtx_code code = GET_CODE (operands[1]);
13046 rtx tmp, compare_op, second_test, bypass_test;
13047
13048 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13049 {
13050 enum machine_mode cmode;
13051
13052 /* Since we've no cmove for sse registers, don't force bad register
13053 allocation just to gain access to it. Deny movcc when the
13054 comparison mode doesn't match the move mode. */
13055 cmode = GET_MODE (ix86_compare_op0);
13056 if (cmode == VOIDmode)
13057 cmode = GET_MODE (ix86_compare_op1);
13058 if (cmode != mode)
13059 return 0;
13060
13061 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13062 &ix86_compare_op0,
13063 &ix86_compare_op1);
13064 if (code == UNKNOWN)
13065 return 0;
13066
13067 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13068 ix86_compare_op1, operands[2],
13069 operands[3]))
13070 return 1;
13071
13072 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13073 ix86_compare_op1, operands[2], operands[3]);
13074 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13075 return 1;
13076 }
13077
13078 /* The floating point conditional move instructions don't directly
13079 support conditions resulting from a signed integer comparison. */
13080
13081 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13082
13083 /* The floating point conditional move instructions don't directly
13084 support signed integer comparisons. */
13085
13086 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13087 {
13088 gcc_assert (!second_test && !bypass_test);
13089 tmp = gen_reg_rtx (QImode);
13090 ix86_expand_setcc (code, tmp);
13091 code = NE;
13092 ix86_compare_op0 = tmp;
13093 ix86_compare_op1 = const0_rtx;
13094 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13095 }
13096 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13097 {
13098 tmp = gen_reg_rtx (mode);
13099 emit_move_insn (tmp, operands[3]);
13100 operands[3] = tmp;
13101 }
13102 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13103 {
13104 tmp = gen_reg_rtx (mode);
13105 emit_move_insn (tmp, operands[2]);
13106 operands[2] = tmp;
13107 }
13108
13109 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13110 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13111 operands[2], operands[3])));
13112 if (bypass_test)
13113 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13114 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13115 operands[3], operands[0])));
13116 if (second_test)
13117 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13118 gen_rtx_IF_THEN_ELSE (mode, second_test,
13119 operands[2], operands[0])));
13120
13121 return 1;
13122 }
13123
13124 /* Expand a floating-point vector conditional move; a vcond operation
13125 rather than a movcc operation. */
13126
13127 bool
13128 ix86_expand_fp_vcond (rtx operands[])
13129 {
13130 enum rtx_code code = GET_CODE (operands[3]);
13131 rtx cmp;
13132
13133 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13134 &operands[4], &operands[5]);
13135 if (code == UNKNOWN)
13136 return false;
13137
13138 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13139 operands[5], operands[1], operands[2]))
13140 return true;
13141
13142 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13143 operands[1], operands[2]);
13144 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13145 return true;
13146 }
13147
13148 /* Expand a signed/unsigned integral vector conditional move. */
13149
13150 bool
13151 ix86_expand_int_vcond (rtx operands[])
13152 {
13153 enum machine_mode mode = GET_MODE (operands[0]);
13154 enum rtx_code code = GET_CODE (operands[3]);
13155 bool negate = false;
13156 rtx x, cop0, cop1;
13157
13158 cop0 = operands[4];
13159 cop1 = operands[5];
13160
13161 /* Canonicalize the comparison to EQ, GT, GTU. */
13162 switch (code)
13163 {
13164 case EQ:
13165 case GT:
13166 case GTU:
13167 break;
13168
13169 case NE:
13170 case LE:
13171 case LEU:
13172 code = reverse_condition (code);
13173 negate = true;
13174 break;
13175
13176 case GE:
13177 case GEU:
13178 code = reverse_condition (code);
13179 negate = true;
13180 /* FALLTHRU */
13181
13182 case LT:
13183 case LTU:
13184 code = swap_condition (code);
13185 x = cop0, cop0 = cop1, cop1 = x;
13186 break;
13187
13188 default:
13189 gcc_unreachable ();
13190 }
13191
13192 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13193 if (mode == V2DImode)
13194 {
13195 switch (code)
13196 {
13197 case EQ:
13198 /* SSE4.1 supports EQ. */
13199 if (!TARGET_SSE4_1)
13200 return false;
13201 break;
13202
13203 case GT:
13204 case GTU:
13205 /* SSE4.2 supports GT/GTU. */
13206 if (!TARGET_SSE4_2)
13207 return false;
13208 break;
13209
13210 default:
13211 gcc_unreachable ();
13212 }
13213 }
13214
13215 /* Unsigned parallel compare is not supported by the hardware. Play some
13216 tricks to turn this into a signed comparison against 0. */
13217 if (code == GTU)
13218 {
13219 cop0 = force_reg (mode, cop0);
13220
13221 switch (mode)
13222 {
13223 case V4SImode:
13224 case V2DImode:
13225 {
13226 rtx t1, t2, mask;
13227
13228 /* Perform a parallel modulo subtraction. */
13229 t1 = gen_reg_rtx (mode);
13230 emit_insn ((mode == V4SImode
13231 ? gen_subv4si3
13232 : gen_subv2di3) (t1, cop0, cop1));
13233
13234 /* Extract the original sign bit of op0. */
13235 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13236 true, false);
13237 t2 = gen_reg_rtx (mode);
13238 emit_insn ((mode == V4SImode
13239 ? gen_andv4si3
13240 : gen_andv2di3) (t2, cop0, mask));
13241
13242 /* XOR it back into the result of the subtraction. This results
13243 in the sign bit set iff we saw unsigned underflow. */
13244 x = gen_reg_rtx (mode);
13245 emit_insn ((mode == V4SImode
13246 ? gen_xorv4si3
13247 : gen_xorv2di3) (x, t1, t2));
13248
13249 code = GT;
13250 }
13251 break;
13252
13253 case V16QImode:
13254 case V8HImode:
13255 /* Perform a parallel unsigned saturating subtraction. */
13256 x = gen_reg_rtx (mode);
13257 emit_insn (gen_rtx_SET (VOIDmode, x,
13258 gen_rtx_US_MINUS (mode, cop0, cop1)));
13259
13260 code = EQ;
13261 negate = !negate;
13262 break;
13263
13264 default:
13265 gcc_unreachable ();
13266 }
13267
13268 cop0 = x;
13269 cop1 = CONST0_RTX (mode);
13270 }
13271
13272 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13273 operands[1+negate], operands[2-negate]);
13274
13275 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13276 operands[2-negate]);
13277 return true;
13278 }
13279
13280 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13281 true if we should do zero extension, else sign extension. HIGH_P is
13282 true if we want the N/2 high elements, else the low elements. */
13283
13284 void
13285 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13286 {
13287 enum machine_mode imode = GET_MODE (operands[1]);
13288 rtx (*unpack)(rtx, rtx, rtx);
13289 rtx se, dest;
13290
13291 switch (imode)
13292 {
13293 case V16QImode:
13294 if (high_p)
13295 unpack = gen_vec_interleave_highv16qi;
13296 else
13297 unpack = gen_vec_interleave_lowv16qi;
13298 break;
13299 case V8HImode:
13300 if (high_p)
13301 unpack = gen_vec_interleave_highv8hi;
13302 else
13303 unpack = gen_vec_interleave_lowv8hi;
13304 break;
13305 case V4SImode:
13306 if (high_p)
13307 unpack = gen_vec_interleave_highv4si;
13308 else
13309 unpack = gen_vec_interleave_lowv4si;
13310 break;
13311 default:
13312 gcc_unreachable ();
13313 }
13314
13315 dest = gen_lowpart (imode, operands[0]);
13316
13317 if (unsigned_p)
13318 se = force_reg (imode, CONST0_RTX (imode));
13319 else
13320 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13321 operands[1], pc_rtx, pc_rtx);
13322
13323 emit_insn (unpack (dest, operands[1], se));
13324 }
13325
13326 /* This function performs the same task as ix86_expand_sse_unpack,
13327 but with SSE4.1 instructions. */
13328
13329 void
13330 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13331 {
13332 enum machine_mode imode = GET_MODE (operands[1]);
13333 rtx (*unpack)(rtx, rtx);
13334 rtx src, dest;
13335
13336 switch (imode)
13337 {
13338 case V16QImode:
13339 if (unsigned_p)
13340 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13341 else
13342 unpack = gen_sse4_1_extendv8qiv8hi2;
13343 break;
13344 case V8HImode:
13345 if (unsigned_p)
13346 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13347 else
13348 unpack = gen_sse4_1_extendv4hiv4si2;
13349 break;
13350 case V4SImode:
13351 if (unsigned_p)
13352 unpack = gen_sse4_1_zero_extendv2siv2di2;
13353 else
13354 unpack = gen_sse4_1_extendv2siv2di2;
13355 break;
13356 default:
13357 gcc_unreachable ();
13358 }
13359
13360 dest = operands[0];
13361 if (high_p)
13362 {
13363 /* Shift higher 8 bytes to lower 8 bytes. */
13364 src = gen_reg_rtx (imode);
13365 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13366 gen_lowpart (TImode, operands[1]),
13367 GEN_INT (64)));
13368 }
13369 else
13370 src = operands[1];
13371
13372 emit_insn (unpack (dest, src));
13373 }
13374
13375 /* Expand conditional increment or decrement using adb/sbb instructions.
13376 The default case using setcc followed by the conditional move can be
13377 done by generic code. */
13378 int
13379 ix86_expand_int_addcc (rtx operands[])
13380 {
13381 enum rtx_code code = GET_CODE (operands[1]);
13382 rtx compare_op;
13383 rtx val = const0_rtx;
13384 bool fpcmp = false;
13385 enum machine_mode mode = GET_MODE (operands[0]);
13386
13387 if (operands[3] != const1_rtx
13388 && operands[3] != constm1_rtx)
13389 return 0;
13390 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13391 ix86_compare_op1, &compare_op))
13392 return 0;
13393 code = GET_CODE (compare_op);
13394
13395 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13396 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13397 {
13398 fpcmp = true;
13399 code = ix86_fp_compare_code_to_integer (code);
13400 }
13401
13402 if (code != LTU)
13403 {
13404 val = constm1_rtx;
13405 if (fpcmp)
13406 PUT_CODE (compare_op,
13407 reverse_condition_maybe_unordered
13408 (GET_CODE (compare_op)));
13409 else
13410 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13411 }
13412 PUT_MODE (compare_op, mode);
13413
13414 /* Construct either adc or sbb insn. */
13415 if ((code == LTU) == (operands[3] == constm1_rtx))
13416 {
13417 switch (GET_MODE (operands[0]))
13418 {
13419 case QImode:
13420 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13421 break;
13422 case HImode:
13423 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13424 break;
13425 case SImode:
13426 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13427 break;
13428 case DImode:
13429 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13430 break;
13431 default:
13432 gcc_unreachable ();
13433 }
13434 }
13435 else
13436 {
13437 switch (GET_MODE (operands[0]))
13438 {
13439 case QImode:
13440 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13441 break;
13442 case HImode:
13443 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13444 break;
13445 case SImode:
13446 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13447 break;
13448 case DImode:
13449 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13450 break;
13451 default:
13452 gcc_unreachable ();
13453 }
13454 }
13455 return 1; /* DONE */
13456 }
13457
13458
13459 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13460 works for floating pointer parameters and nonoffsetable memories.
13461 For pushes, it returns just stack offsets; the values will be saved
13462 in the right order. Maximally three parts are generated. */
13463
13464 static int
13465 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13466 {
13467 int size;
13468
13469 if (!TARGET_64BIT)
13470 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13471 else
13472 size = (GET_MODE_SIZE (mode) + 4) / 8;
13473
13474 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13475 gcc_assert (size >= 2 && size <= 3);
13476
13477 /* Optimize constant pool reference to immediates. This is used by fp
13478 moves, that force all constants to memory to allow combining. */
13479 if (MEM_P (operand) && MEM_READONLY_P (operand))
13480 {
13481 rtx tmp = maybe_get_pool_constant (operand);
13482 if (tmp)
13483 operand = tmp;
13484 }
13485
13486 if (MEM_P (operand) && !offsettable_memref_p (operand))
13487 {
13488 /* The only non-offsetable memories we handle are pushes. */
13489 int ok = push_operand (operand, VOIDmode);
13490
13491 gcc_assert (ok);
13492
13493 operand = copy_rtx (operand);
13494 PUT_MODE (operand, Pmode);
13495 parts[0] = parts[1] = parts[2] = operand;
13496 return size;
13497 }
13498
13499 if (GET_CODE (operand) == CONST_VECTOR)
13500 {
13501 enum machine_mode imode = int_mode_for_mode (mode);
13502 /* Caution: if we looked through a constant pool memory above,
13503 the operand may actually have a different mode now. That's
13504 ok, since we want to pun this all the way back to an integer. */
13505 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13506 gcc_assert (operand != NULL);
13507 mode = imode;
13508 }
13509
13510 if (!TARGET_64BIT)
13511 {
13512 if (mode == DImode)
13513 split_di (&operand, 1, &parts[0], &parts[1]);
13514 else
13515 {
13516 if (REG_P (operand))
13517 {
13518 gcc_assert (reload_completed);
13519 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13520 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13521 if (size == 3)
13522 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13523 }
13524 else if (offsettable_memref_p (operand))
13525 {
13526 operand = adjust_address (operand, SImode, 0);
13527 parts[0] = operand;
13528 parts[1] = adjust_address (operand, SImode, 4);
13529 if (size == 3)
13530 parts[2] = adjust_address (operand, SImode, 8);
13531 }
13532 else if (GET_CODE (operand) == CONST_DOUBLE)
13533 {
13534 REAL_VALUE_TYPE r;
13535 long l[4];
13536
13537 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13538 switch (mode)
13539 {
13540 case XFmode:
13541 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13542 parts[2] = gen_int_mode (l[2], SImode);
13543 break;
13544 case DFmode:
13545 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13546 break;
13547 default:
13548 gcc_unreachable ();
13549 }
13550 parts[1] = gen_int_mode (l[1], SImode);
13551 parts[0] = gen_int_mode (l[0], SImode);
13552 }
13553 else
13554 gcc_unreachable ();
13555 }
13556 }
13557 else
13558 {
13559 if (mode == TImode)
13560 split_ti (&operand, 1, &parts[0], &parts[1]);
13561 if (mode == XFmode || mode == TFmode)
13562 {
13563 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13564 if (REG_P (operand))
13565 {
13566 gcc_assert (reload_completed);
13567 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13568 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13569 }
13570 else if (offsettable_memref_p (operand))
13571 {
13572 operand = adjust_address (operand, DImode, 0);
13573 parts[0] = operand;
13574 parts[1] = adjust_address (operand, upper_mode, 8);
13575 }
13576 else if (GET_CODE (operand) == CONST_DOUBLE)
13577 {
13578 REAL_VALUE_TYPE r;
13579 long l[4];
13580
13581 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13582 real_to_target (l, &r, mode);
13583
13584 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13585 if (HOST_BITS_PER_WIDE_INT >= 64)
13586 parts[0]
13587 = gen_int_mode
13588 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13589 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13590 DImode);
13591 else
13592 parts[0] = immed_double_const (l[0], l[1], DImode);
13593
13594 if (upper_mode == SImode)
13595 parts[1] = gen_int_mode (l[2], SImode);
13596 else if (HOST_BITS_PER_WIDE_INT >= 64)
13597 parts[1]
13598 = gen_int_mode
13599 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13600 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13601 DImode);
13602 else
13603 parts[1] = immed_double_const (l[2], l[3], DImode);
13604 }
13605 else
13606 gcc_unreachable ();
13607 }
13608 }
13609
13610 return size;
13611 }
13612
13613 /* Emit insns to perform a move or push of DI, DF, and XF values.
13614 Return false when normal moves are needed; true when all required
13615 insns have been emitted. Operands 2-4 contain the input values
13616 int the correct order; operands 5-7 contain the output values. */
13617
13618 void
13619 ix86_split_long_move (rtx operands[])
13620 {
13621 rtx part[2][3];
13622 int nparts;
13623 int push = 0;
13624 int collisions = 0;
13625 enum machine_mode mode = GET_MODE (operands[0]);
13626
13627 /* The DFmode expanders may ask us to move double.
13628 For 64bit target this is single move. By hiding the fact
13629 here we simplify i386.md splitters. */
13630 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13631 {
13632 /* Optimize constant pool reference to immediates. This is used by
13633 fp moves, that force all constants to memory to allow combining. */
13634
13635 if (MEM_P (operands[1])
13636 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13637 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13638 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13639 if (push_operand (operands[0], VOIDmode))
13640 {
13641 operands[0] = copy_rtx (operands[0]);
13642 PUT_MODE (operands[0], Pmode);
13643 }
13644 else
13645 operands[0] = gen_lowpart (DImode, operands[0]);
13646 operands[1] = gen_lowpart (DImode, operands[1]);
13647 emit_move_insn (operands[0], operands[1]);
13648 return;
13649 }
13650
13651 /* The only non-offsettable memory we handle is push. */
13652 if (push_operand (operands[0], VOIDmode))
13653 push = 1;
13654 else
13655 gcc_assert (!MEM_P (operands[0])
13656 || offsettable_memref_p (operands[0]));
13657
13658 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13659 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13660
13661 /* When emitting push, take care for source operands on the stack. */
13662 if (push && MEM_P (operands[1])
13663 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13664 {
13665 if (nparts == 3)
13666 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13667 XEXP (part[1][2], 0));
13668 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13669 XEXP (part[1][1], 0));
13670 }
13671
13672 /* We need to do copy in the right order in case an address register
13673 of the source overlaps the destination. */
13674 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13675 {
13676 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13677 collisions++;
13678 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13679 collisions++;
13680 if (nparts == 3
13681 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13682 collisions++;
13683
13684 /* Collision in the middle part can be handled by reordering. */
13685 if (collisions == 1 && nparts == 3
13686 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13687 {
13688 rtx tmp;
13689 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13690 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13691 }
13692
13693 /* If there are more collisions, we can't handle it by reordering.
13694 Do an lea to the last part and use only one colliding move. */
13695 else if (collisions > 1)
13696 {
13697 rtx base;
13698
13699 collisions = 1;
13700
13701 base = part[0][nparts - 1];
13702
13703 /* Handle the case when the last part isn't valid for lea.
13704 Happens in 64-bit mode storing the 12-byte XFmode. */
13705 if (GET_MODE (base) != Pmode)
13706 base = gen_rtx_REG (Pmode, REGNO (base));
13707
13708 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13709 part[1][0] = replace_equiv_address (part[1][0], base);
13710 part[1][1] = replace_equiv_address (part[1][1],
13711 plus_constant (base, UNITS_PER_WORD));
13712 if (nparts == 3)
13713 part[1][2] = replace_equiv_address (part[1][2],
13714 plus_constant (base, 8));
13715 }
13716 }
13717
13718 if (push)
13719 {
13720 if (!TARGET_64BIT)
13721 {
13722 if (nparts == 3)
13723 {
13724 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13725 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13726 emit_move_insn (part[0][2], part[1][2]);
13727 }
13728 }
13729 else
13730 {
13731 /* In 64bit mode we don't have 32bit push available. In case this is
13732 register, it is OK - we will just use larger counterpart. We also
13733 retype memory - these comes from attempt to avoid REX prefix on
13734 moving of second half of TFmode value. */
13735 if (GET_MODE (part[1][1]) == SImode)
13736 {
13737 switch (GET_CODE (part[1][1]))
13738 {
13739 case MEM:
13740 part[1][1] = adjust_address (part[1][1], DImode, 0);
13741 break;
13742
13743 case REG:
13744 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13745 break;
13746
13747 default:
13748 gcc_unreachable ();
13749 }
13750
13751 if (GET_MODE (part[1][0]) == SImode)
13752 part[1][0] = part[1][1];
13753 }
13754 }
13755 emit_move_insn (part[0][1], part[1][1]);
13756 emit_move_insn (part[0][0], part[1][0]);
13757 return;
13758 }
13759
13760 /* Choose correct order to not overwrite the source before it is copied. */
13761 if ((REG_P (part[0][0])
13762 && REG_P (part[1][1])
13763 && (REGNO (part[0][0]) == REGNO (part[1][1])
13764 || (nparts == 3
13765 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13766 || (collisions > 0
13767 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13768 {
13769 if (nparts == 3)
13770 {
13771 operands[2] = part[0][2];
13772 operands[3] = part[0][1];
13773 operands[4] = part[0][0];
13774 operands[5] = part[1][2];
13775 operands[6] = part[1][1];
13776 operands[7] = part[1][0];
13777 }
13778 else
13779 {
13780 operands[2] = part[0][1];
13781 operands[3] = part[0][0];
13782 operands[5] = part[1][1];
13783 operands[6] = part[1][0];
13784 }
13785 }
13786 else
13787 {
13788 if (nparts == 3)
13789 {
13790 operands[2] = part[0][0];
13791 operands[3] = part[0][1];
13792 operands[4] = part[0][2];
13793 operands[5] = part[1][0];
13794 operands[6] = part[1][1];
13795 operands[7] = part[1][2];
13796 }
13797 else
13798 {
13799 operands[2] = part[0][0];
13800 operands[3] = part[0][1];
13801 operands[5] = part[1][0];
13802 operands[6] = part[1][1];
13803 }
13804 }
13805
13806 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13807 if (optimize_size)
13808 {
13809 if (CONST_INT_P (operands[5])
13810 && operands[5] != const0_rtx
13811 && REG_P (operands[2]))
13812 {
13813 if (CONST_INT_P (operands[6])
13814 && INTVAL (operands[6]) == INTVAL (operands[5]))
13815 operands[6] = operands[2];
13816
13817 if (nparts == 3
13818 && CONST_INT_P (operands[7])
13819 && INTVAL (operands[7]) == INTVAL (operands[5]))
13820 operands[7] = operands[2];
13821 }
13822
13823 if (nparts == 3
13824 && CONST_INT_P (operands[6])
13825 && operands[6] != const0_rtx
13826 && REG_P (operands[3])
13827 && CONST_INT_P (operands[7])
13828 && INTVAL (operands[7]) == INTVAL (operands[6]))
13829 operands[7] = operands[3];
13830 }
13831
13832 emit_move_insn (operands[2], operands[5]);
13833 emit_move_insn (operands[3], operands[6]);
13834 if (nparts == 3)
13835 emit_move_insn (operands[4], operands[7]);
13836
13837 return;
13838 }
13839
13840 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13841 left shift by a constant, either using a single shift or
13842 a sequence of add instructions. */
13843
13844 static void
13845 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13846 {
13847 if (count == 1)
13848 {
13849 emit_insn ((mode == DImode
13850 ? gen_addsi3
13851 : gen_adddi3) (operand, operand, operand));
13852 }
13853 else if (!optimize_size
13854 && count * ix86_cost->add <= ix86_cost->shift_const)
13855 {
13856 int i;
13857 for (i=0; i<count; i++)
13858 {
13859 emit_insn ((mode == DImode
13860 ? gen_addsi3
13861 : gen_adddi3) (operand, operand, operand));
13862 }
13863 }
13864 else
13865 emit_insn ((mode == DImode
13866 ? gen_ashlsi3
13867 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13868 }
13869
13870 void
13871 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13872 {
13873 rtx low[2], high[2];
13874 int count;
13875 const int single_width = mode == DImode ? 32 : 64;
13876
13877 if (CONST_INT_P (operands[2]))
13878 {
13879 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13880 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13881
13882 if (count >= single_width)
13883 {
13884 emit_move_insn (high[0], low[1]);
13885 emit_move_insn (low[0], const0_rtx);
13886
13887 if (count > single_width)
13888 ix86_expand_ashl_const (high[0], count - single_width, mode);
13889 }
13890 else
13891 {
13892 if (!rtx_equal_p (operands[0], operands[1]))
13893 emit_move_insn (operands[0], operands[1]);
13894 emit_insn ((mode == DImode
13895 ? gen_x86_shld_1
13896 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13897 ix86_expand_ashl_const (low[0], count, mode);
13898 }
13899 return;
13900 }
13901
13902 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13903
13904 if (operands[1] == const1_rtx)
13905 {
13906 /* Assuming we've chosen a QImode capable registers, then 1 << N
13907 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13908 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13909 {
13910 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13911
13912 ix86_expand_clear (low[0]);
13913 ix86_expand_clear (high[0]);
13914 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13915
13916 d = gen_lowpart (QImode, low[0]);
13917 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13918 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13919 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13920
13921 d = gen_lowpart (QImode, high[0]);
13922 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13923 s = gen_rtx_NE (QImode, flags, const0_rtx);
13924 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13925 }
13926
13927 /* Otherwise, we can get the same results by manually performing
13928 a bit extract operation on bit 5/6, and then performing the two
13929 shifts. The two methods of getting 0/1 into low/high are exactly
13930 the same size. Avoiding the shift in the bit extract case helps
13931 pentium4 a bit; no one else seems to care much either way. */
13932 else
13933 {
13934 rtx x;
13935
13936 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13937 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13938 else
13939 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13940 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13941
13942 emit_insn ((mode == DImode
13943 ? gen_lshrsi3
13944 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13945 emit_insn ((mode == DImode
13946 ? gen_andsi3
13947 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13948 emit_move_insn (low[0], high[0]);
13949 emit_insn ((mode == DImode
13950 ? gen_xorsi3
13951 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13952 }
13953
13954 emit_insn ((mode == DImode
13955 ? gen_ashlsi3
13956 : gen_ashldi3) (low[0], low[0], operands[2]));
13957 emit_insn ((mode == DImode
13958 ? gen_ashlsi3
13959 : gen_ashldi3) (high[0], high[0], operands[2]));
13960 return;
13961 }
13962
13963 if (operands[1] == constm1_rtx)
13964 {
13965 /* For -1 << N, we can avoid the shld instruction, because we
13966 know that we're shifting 0...31/63 ones into a -1. */
13967 emit_move_insn (low[0], constm1_rtx);
13968 if (optimize_size)
13969 emit_move_insn (high[0], low[0]);
13970 else
13971 emit_move_insn (high[0], constm1_rtx);
13972 }
13973 else
13974 {
13975 if (!rtx_equal_p (operands[0], operands[1]))
13976 emit_move_insn (operands[0], operands[1]);
13977
13978 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13979 emit_insn ((mode == DImode
13980 ? gen_x86_shld_1
13981 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13982 }
13983
13984 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13985
13986 if (TARGET_CMOVE && scratch)
13987 {
13988 ix86_expand_clear (scratch);
13989 emit_insn ((mode == DImode
13990 ? gen_x86_shift_adj_1
13991 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13992 }
13993 else
13994 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13995 }
13996
13997 void
13998 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13999 {
14000 rtx low[2], high[2];
14001 int count;
14002 const int single_width = mode == DImode ? 32 : 64;
14003
14004 if (CONST_INT_P (operands[2]))
14005 {
14006 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14007 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14008
14009 if (count == single_width * 2 - 1)
14010 {
14011 emit_move_insn (high[0], high[1]);
14012 emit_insn ((mode == DImode
14013 ? gen_ashrsi3
14014 : gen_ashrdi3) (high[0], high[0],
14015 GEN_INT (single_width - 1)));
14016 emit_move_insn (low[0], high[0]);
14017
14018 }
14019 else if (count >= single_width)
14020 {
14021 emit_move_insn (low[0], high[1]);
14022 emit_move_insn (high[0], low[0]);
14023 emit_insn ((mode == DImode
14024 ? gen_ashrsi3
14025 : gen_ashrdi3) (high[0], high[0],
14026 GEN_INT (single_width - 1)));
14027 if (count > single_width)
14028 emit_insn ((mode == DImode
14029 ? gen_ashrsi3
14030 : gen_ashrdi3) (low[0], low[0],
14031 GEN_INT (count - single_width)));
14032 }
14033 else
14034 {
14035 if (!rtx_equal_p (operands[0], operands[1]))
14036 emit_move_insn (operands[0], operands[1]);
14037 emit_insn ((mode == DImode
14038 ? gen_x86_shrd_1
14039 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14040 emit_insn ((mode == DImode
14041 ? gen_ashrsi3
14042 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14043 }
14044 }
14045 else
14046 {
14047 if (!rtx_equal_p (operands[0], operands[1]))
14048 emit_move_insn (operands[0], operands[1]);
14049
14050 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14051
14052 emit_insn ((mode == DImode
14053 ? gen_x86_shrd_1
14054 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14055 emit_insn ((mode == DImode
14056 ? gen_ashrsi3
14057 : gen_ashrdi3) (high[0], high[0], operands[2]));
14058
14059 if (TARGET_CMOVE && scratch)
14060 {
14061 emit_move_insn (scratch, high[0]);
14062 emit_insn ((mode == DImode
14063 ? gen_ashrsi3
14064 : gen_ashrdi3) (scratch, scratch,
14065 GEN_INT (single_width - 1)));
14066 emit_insn ((mode == DImode
14067 ? gen_x86_shift_adj_1
14068 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14069 scratch));
14070 }
14071 else
14072 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14073 }
14074 }
14075
14076 void
14077 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14078 {
14079 rtx low[2], high[2];
14080 int count;
14081 const int single_width = mode == DImode ? 32 : 64;
14082
14083 if (CONST_INT_P (operands[2]))
14084 {
14085 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14086 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14087
14088 if (count >= single_width)
14089 {
14090 emit_move_insn (low[0], high[1]);
14091 ix86_expand_clear (high[0]);
14092
14093 if (count > single_width)
14094 emit_insn ((mode == DImode
14095 ? gen_lshrsi3
14096 : gen_lshrdi3) (low[0], low[0],
14097 GEN_INT (count - single_width)));
14098 }
14099 else
14100 {
14101 if (!rtx_equal_p (operands[0], operands[1]))
14102 emit_move_insn (operands[0], operands[1]);
14103 emit_insn ((mode == DImode
14104 ? gen_x86_shrd_1
14105 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14106 emit_insn ((mode == DImode
14107 ? gen_lshrsi3
14108 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14109 }
14110 }
14111 else
14112 {
14113 if (!rtx_equal_p (operands[0], operands[1]))
14114 emit_move_insn (operands[0], operands[1]);
14115
14116 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14117
14118 emit_insn ((mode == DImode
14119 ? gen_x86_shrd_1
14120 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14121 emit_insn ((mode == DImode
14122 ? gen_lshrsi3
14123 : gen_lshrdi3) (high[0], high[0], operands[2]));
14124
14125 /* Heh. By reversing the arguments, we can reuse this pattern. */
14126 if (TARGET_CMOVE && scratch)
14127 {
14128 ix86_expand_clear (scratch);
14129 emit_insn ((mode == DImode
14130 ? gen_x86_shift_adj_1
14131 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14132 scratch));
14133 }
14134 else
14135 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14136 }
14137 }
14138
14139 /* Predict just emitted jump instruction to be taken with probability PROB. */
14140 static void
14141 predict_jump (int prob)
14142 {
14143 rtx insn = get_last_insn ();
14144 gcc_assert (JUMP_P (insn));
14145 REG_NOTES (insn)
14146 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14147 GEN_INT (prob),
14148 REG_NOTES (insn));
14149 }
14150
14151 /* Helper function for the string operations below. Dest VARIABLE whether
14152 it is aligned to VALUE bytes. If true, jump to the label. */
14153 static rtx
14154 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14155 {
14156 rtx label = gen_label_rtx ();
14157 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14158 if (GET_MODE (variable) == DImode)
14159 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14160 else
14161 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14162 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14163 1, label);
14164 if (epilogue)
14165 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14166 else
14167 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14168 return label;
14169 }
14170
14171 /* Adjust COUNTER by the VALUE. */
14172 static void
14173 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14174 {
14175 if (GET_MODE (countreg) == DImode)
14176 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14177 else
14178 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14179 }
14180
14181 /* Zero extend possibly SImode EXP to Pmode register. */
14182 rtx
14183 ix86_zero_extend_to_Pmode (rtx exp)
14184 {
14185 rtx r;
14186 if (GET_MODE (exp) == VOIDmode)
14187 return force_reg (Pmode, exp);
14188 if (GET_MODE (exp) == Pmode)
14189 return copy_to_mode_reg (Pmode, exp);
14190 r = gen_reg_rtx (Pmode);
14191 emit_insn (gen_zero_extendsidi2 (r, exp));
14192 return r;
14193 }
14194
14195 /* Divide COUNTREG by SCALE. */
14196 static rtx
14197 scale_counter (rtx countreg, int scale)
14198 {
14199 rtx sc;
14200 rtx piece_size_mask;
14201
14202 if (scale == 1)
14203 return countreg;
14204 if (CONST_INT_P (countreg))
14205 return GEN_INT (INTVAL (countreg) / scale);
14206 gcc_assert (REG_P (countreg));
14207
14208 piece_size_mask = GEN_INT (scale - 1);
14209 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14210 GEN_INT (exact_log2 (scale)),
14211 NULL, 1, OPTAB_DIRECT);
14212 return sc;
14213 }
14214
14215 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14216 DImode for constant loop counts. */
14217
14218 static enum machine_mode
14219 counter_mode (rtx count_exp)
14220 {
14221 if (GET_MODE (count_exp) != VOIDmode)
14222 return GET_MODE (count_exp);
14223 if (GET_CODE (count_exp) != CONST_INT)
14224 return Pmode;
14225 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14226 return DImode;
14227 return SImode;
14228 }
14229
14230 /* When SRCPTR is non-NULL, output simple loop to move memory
14231 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14232 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14233 equivalent loop to set memory by VALUE (supposed to be in MODE).
14234
14235 The size is rounded down to whole number of chunk size moved at once.
14236 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14237
14238
14239 static void
14240 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14241 rtx destptr, rtx srcptr, rtx value,
14242 rtx count, enum machine_mode mode, int unroll,
14243 int expected_size)
14244 {
14245 rtx out_label, top_label, iter, tmp;
14246 enum machine_mode iter_mode = counter_mode (count);
14247 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14248 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14249 rtx size;
14250 rtx x_addr;
14251 rtx y_addr;
14252 int i;
14253
14254 top_label = gen_label_rtx ();
14255 out_label = gen_label_rtx ();
14256 iter = gen_reg_rtx (iter_mode);
14257
14258 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14259 NULL, 1, OPTAB_DIRECT);
14260 /* Those two should combine. */
14261 if (piece_size == const1_rtx)
14262 {
14263 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14264 true, out_label);
14265 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14266 }
14267 emit_move_insn (iter, const0_rtx);
14268
14269 emit_label (top_label);
14270
14271 tmp = convert_modes (Pmode, iter_mode, iter, true);
14272 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14273 destmem = change_address (destmem, mode, x_addr);
14274
14275 if (srcmem)
14276 {
14277 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14278 srcmem = change_address (srcmem, mode, y_addr);
14279
14280 /* When unrolling for chips that reorder memory reads and writes,
14281 we can save registers by using single temporary.
14282 Also using 4 temporaries is overkill in 32bit mode. */
14283 if (!TARGET_64BIT && 0)
14284 {
14285 for (i = 0; i < unroll; i++)
14286 {
14287 if (i)
14288 {
14289 destmem =
14290 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14291 srcmem =
14292 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14293 }
14294 emit_move_insn (destmem, srcmem);
14295 }
14296 }
14297 else
14298 {
14299 rtx tmpreg[4];
14300 gcc_assert (unroll <= 4);
14301 for (i = 0; i < unroll; i++)
14302 {
14303 tmpreg[i] = gen_reg_rtx (mode);
14304 if (i)
14305 {
14306 srcmem =
14307 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14308 }
14309 emit_move_insn (tmpreg[i], srcmem);
14310 }
14311 for (i = 0; i < unroll; i++)
14312 {
14313 if (i)
14314 {
14315 destmem =
14316 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14317 }
14318 emit_move_insn (destmem, tmpreg[i]);
14319 }
14320 }
14321 }
14322 else
14323 for (i = 0; i < unroll; i++)
14324 {
14325 if (i)
14326 destmem =
14327 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14328 emit_move_insn (destmem, value);
14329 }
14330
14331 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14332 true, OPTAB_LIB_WIDEN);
14333 if (tmp != iter)
14334 emit_move_insn (iter, tmp);
14335
14336 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14337 true, top_label);
14338 if (expected_size != -1)
14339 {
14340 expected_size /= GET_MODE_SIZE (mode) * unroll;
14341 if (expected_size == 0)
14342 predict_jump (0);
14343 else if (expected_size > REG_BR_PROB_BASE)
14344 predict_jump (REG_BR_PROB_BASE - 1);
14345 else
14346 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14347 }
14348 else
14349 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14350 iter = ix86_zero_extend_to_Pmode (iter);
14351 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14352 true, OPTAB_LIB_WIDEN);
14353 if (tmp != destptr)
14354 emit_move_insn (destptr, tmp);
14355 if (srcptr)
14356 {
14357 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14358 true, OPTAB_LIB_WIDEN);
14359 if (tmp != srcptr)
14360 emit_move_insn (srcptr, tmp);
14361 }
14362 emit_label (out_label);
14363 }
14364
14365 /* Output "rep; mov" instruction.
14366 Arguments have same meaning as for previous function */
14367 static void
14368 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14369 rtx destptr, rtx srcptr,
14370 rtx count,
14371 enum machine_mode mode)
14372 {
14373 rtx destexp;
14374 rtx srcexp;
14375 rtx countreg;
14376
14377 /* If the size is known, it is shorter to use rep movs. */
14378 if (mode == QImode && CONST_INT_P (count)
14379 && !(INTVAL (count) & 3))
14380 mode = SImode;
14381
14382 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14383 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14384 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14385 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14386 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14387 if (mode != QImode)
14388 {
14389 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14390 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14391 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14392 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14393 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14394 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14395 }
14396 else
14397 {
14398 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14399 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14400 }
14401 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14402 destexp, srcexp));
14403 }
14404
14405 /* Output "rep; stos" instruction.
14406 Arguments have same meaning as for previous function */
14407 static void
14408 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14409 rtx count,
14410 enum machine_mode mode)
14411 {
14412 rtx destexp;
14413 rtx countreg;
14414
14415 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14416 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14417 value = force_reg (mode, gen_lowpart (mode, value));
14418 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14419 if (mode != QImode)
14420 {
14421 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14422 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14423 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14424 }
14425 else
14426 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14427 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14428 }
14429
14430 static void
14431 emit_strmov (rtx destmem, rtx srcmem,
14432 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14433 {
14434 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14435 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14436 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14437 }
14438
14439 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14440 static void
14441 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14442 rtx destptr, rtx srcptr, rtx count, int max_size)
14443 {
14444 rtx src, dest;
14445 if (CONST_INT_P (count))
14446 {
14447 HOST_WIDE_INT countval = INTVAL (count);
14448 int offset = 0;
14449
14450 if ((countval & 0x10) && max_size > 16)
14451 {
14452 if (TARGET_64BIT)
14453 {
14454 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14455 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14456 }
14457 else
14458 gcc_unreachable ();
14459 offset += 16;
14460 }
14461 if ((countval & 0x08) && max_size > 8)
14462 {
14463 if (TARGET_64BIT)
14464 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14465 else
14466 {
14467 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14468 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14469 }
14470 offset += 8;
14471 }
14472 if ((countval & 0x04) && max_size > 4)
14473 {
14474 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14475 offset += 4;
14476 }
14477 if ((countval & 0x02) && max_size > 2)
14478 {
14479 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14480 offset += 2;
14481 }
14482 if ((countval & 0x01) && max_size > 1)
14483 {
14484 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14485 offset += 1;
14486 }
14487 return;
14488 }
14489 if (max_size > 8)
14490 {
14491 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14492 count, 1, OPTAB_DIRECT);
14493 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14494 count, QImode, 1, 4);
14495 return;
14496 }
14497
14498 /* When there are stringops, we can cheaply increase dest and src pointers.
14499 Otherwise we save code size by maintaining offset (zero is readily
14500 available from preceding rep operation) and using x86 addressing modes.
14501 */
14502 if (TARGET_SINGLE_STRINGOP)
14503 {
14504 if (max_size > 4)
14505 {
14506 rtx label = ix86_expand_aligntest (count, 4, true);
14507 src = change_address (srcmem, SImode, srcptr);
14508 dest = change_address (destmem, SImode, destptr);
14509 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14510 emit_label (label);
14511 LABEL_NUSES (label) = 1;
14512 }
14513 if (max_size > 2)
14514 {
14515 rtx label = ix86_expand_aligntest (count, 2, true);
14516 src = change_address (srcmem, HImode, srcptr);
14517 dest = change_address (destmem, HImode, destptr);
14518 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14519 emit_label (label);
14520 LABEL_NUSES (label) = 1;
14521 }
14522 if (max_size > 1)
14523 {
14524 rtx label = ix86_expand_aligntest (count, 1, true);
14525 src = change_address (srcmem, QImode, srcptr);
14526 dest = change_address (destmem, QImode, destptr);
14527 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14528 emit_label (label);
14529 LABEL_NUSES (label) = 1;
14530 }
14531 }
14532 else
14533 {
14534 rtx offset = force_reg (Pmode, const0_rtx);
14535 rtx tmp;
14536
14537 if (max_size > 4)
14538 {
14539 rtx label = ix86_expand_aligntest (count, 4, true);
14540 src = change_address (srcmem, SImode, srcptr);
14541 dest = change_address (destmem, SImode, destptr);
14542 emit_move_insn (dest, src);
14543 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14544 true, OPTAB_LIB_WIDEN);
14545 if (tmp != offset)
14546 emit_move_insn (offset, tmp);
14547 emit_label (label);
14548 LABEL_NUSES (label) = 1;
14549 }
14550 if (max_size > 2)
14551 {
14552 rtx label = ix86_expand_aligntest (count, 2, true);
14553 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14554 src = change_address (srcmem, HImode, tmp);
14555 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14556 dest = change_address (destmem, HImode, tmp);
14557 emit_move_insn (dest, src);
14558 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14559 true, OPTAB_LIB_WIDEN);
14560 if (tmp != offset)
14561 emit_move_insn (offset, tmp);
14562 emit_label (label);
14563 LABEL_NUSES (label) = 1;
14564 }
14565 if (max_size > 1)
14566 {
14567 rtx label = ix86_expand_aligntest (count, 1, true);
14568 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14569 src = change_address (srcmem, QImode, tmp);
14570 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14571 dest = change_address (destmem, QImode, tmp);
14572 emit_move_insn (dest, src);
14573 emit_label (label);
14574 LABEL_NUSES (label) = 1;
14575 }
14576 }
14577 }
14578
14579 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14580 static void
14581 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14582 rtx count, int max_size)
14583 {
14584 count =
14585 expand_simple_binop (counter_mode (count), AND, count,
14586 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14587 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14588 gen_lowpart (QImode, value), count, QImode,
14589 1, max_size / 2);
14590 }
14591
14592 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14593 static void
14594 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14595 {
14596 rtx dest;
14597
14598 if (CONST_INT_P (count))
14599 {
14600 HOST_WIDE_INT countval = INTVAL (count);
14601 int offset = 0;
14602
14603 if ((countval & 0x10) && max_size > 16)
14604 {
14605 if (TARGET_64BIT)
14606 {
14607 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14608 emit_insn (gen_strset (destptr, dest, value));
14609 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14610 emit_insn (gen_strset (destptr, dest, value));
14611 }
14612 else
14613 gcc_unreachable ();
14614 offset += 16;
14615 }
14616 if ((countval & 0x08) && max_size > 8)
14617 {
14618 if (TARGET_64BIT)
14619 {
14620 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14621 emit_insn (gen_strset (destptr, dest, value));
14622 }
14623 else
14624 {
14625 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14626 emit_insn (gen_strset (destptr, dest, value));
14627 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14628 emit_insn (gen_strset (destptr, dest, value));
14629 }
14630 offset += 8;
14631 }
14632 if ((countval & 0x04) && max_size > 4)
14633 {
14634 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14635 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14636 offset += 4;
14637 }
14638 if ((countval & 0x02) && max_size > 2)
14639 {
14640 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14641 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14642 offset += 2;
14643 }
14644 if ((countval & 0x01) && max_size > 1)
14645 {
14646 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14647 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14648 offset += 1;
14649 }
14650 return;
14651 }
14652 if (max_size > 32)
14653 {
14654 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14655 return;
14656 }
14657 if (max_size > 16)
14658 {
14659 rtx label = ix86_expand_aligntest (count, 16, true);
14660 if (TARGET_64BIT)
14661 {
14662 dest = change_address (destmem, DImode, destptr);
14663 emit_insn (gen_strset (destptr, dest, value));
14664 emit_insn (gen_strset (destptr, dest, value));
14665 }
14666 else
14667 {
14668 dest = change_address (destmem, SImode, destptr);
14669 emit_insn (gen_strset (destptr, dest, value));
14670 emit_insn (gen_strset (destptr, dest, value));
14671 emit_insn (gen_strset (destptr, dest, value));
14672 emit_insn (gen_strset (destptr, dest, value));
14673 }
14674 emit_label (label);
14675 LABEL_NUSES (label) = 1;
14676 }
14677 if (max_size > 8)
14678 {
14679 rtx label = ix86_expand_aligntest (count, 8, true);
14680 if (TARGET_64BIT)
14681 {
14682 dest = change_address (destmem, DImode, destptr);
14683 emit_insn (gen_strset (destptr, dest, value));
14684 }
14685 else
14686 {
14687 dest = change_address (destmem, SImode, destptr);
14688 emit_insn (gen_strset (destptr, dest, value));
14689 emit_insn (gen_strset (destptr, dest, value));
14690 }
14691 emit_label (label);
14692 LABEL_NUSES (label) = 1;
14693 }
14694 if (max_size > 4)
14695 {
14696 rtx label = ix86_expand_aligntest (count, 4, true);
14697 dest = change_address (destmem, SImode, destptr);
14698 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14699 emit_label (label);
14700 LABEL_NUSES (label) = 1;
14701 }
14702 if (max_size > 2)
14703 {
14704 rtx label = ix86_expand_aligntest (count, 2, true);
14705 dest = change_address (destmem, HImode, destptr);
14706 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14707 emit_label (label);
14708 LABEL_NUSES (label) = 1;
14709 }
14710 if (max_size > 1)
14711 {
14712 rtx label = ix86_expand_aligntest (count, 1, true);
14713 dest = change_address (destmem, QImode, destptr);
14714 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14715 emit_label (label);
14716 LABEL_NUSES (label) = 1;
14717 }
14718 }
14719
14720 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14721 DESIRED_ALIGNMENT. */
14722 static void
14723 expand_movmem_prologue (rtx destmem, rtx srcmem,
14724 rtx destptr, rtx srcptr, rtx count,
14725 int align, int desired_alignment)
14726 {
14727 if (align <= 1 && desired_alignment > 1)
14728 {
14729 rtx label = ix86_expand_aligntest (destptr, 1, false);
14730 srcmem = change_address (srcmem, QImode, srcptr);
14731 destmem = change_address (destmem, QImode, destptr);
14732 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14733 ix86_adjust_counter (count, 1);
14734 emit_label (label);
14735 LABEL_NUSES (label) = 1;
14736 }
14737 if (align <= 2 && desired_alignment > 2)
14738 {
14739 rtx label = ix86_expand_aligntest (destptr, 2, false);
14740 srcmem = change_address (srcmem, HImode, srcptr);
14741 destmem = change_address (destmem, HImode, destptr);
14742 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14743 ix86_adjust_counter (count, 2);
14744 emit_label (label);
14745 LABEL_NUSES (label) = 1;
14746 }
14747 if (align <= 4 && desired_alignment > 4)
14748 {
14749 rtx label = ix86_expand_aligntest (destptr, 4, false);
14750 srcmem = change_address (srcmem, SImode, srcptr);
14751 destmem = change_address (destmem, SImode, destptr);
14752 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14753 ix86_adjust_counter (count, 4);
14754 emit_label (label);
14755 LABEL_NUSES (label) = 1;
14756 }
14757 gcc_assert (desired_alignment <= 8);
14758 }
14759
14760 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14761 DESIRED_ALIGNMENT. */
14762 static void
14763 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14764 int align, int desired_alignment)
14765 {
14766 if (align <= 1 && desired_alignment > 1)
14767 {
14768 rtx label = ix86_expand_aligntest (destptr, 1, false);
14769 destmem = change_address (destmem, QImode, destptr);
14770 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14771 ix86_adjust_counter (count, 1);
14772 emit_label (label);
14773 LABEL_NUSES (label) = 1;
14774 }
14775 if (align <= 2 && desired_alignment > 2)
14776 {
14777 rtx label = ix86_expand_aligntest (destptr, 2, false);
14778 destmem = change_address (destmem, HImode, destptr);
14779 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14780 ix86_adjust_counter (count, 2);
14781 emit_label (label);
14782 LABEL_NUSES (label) = 1;
14783 }
14784 if (align <= 4 && desired_alignment > 4)
14785 {
14786 rtx label = ix86_expand_aligntest (destptr, 4, false);
14787 destmem = change_address (destmem, SImode, destptr);
14788 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14789 ix86_adjust_counter (count, 4);
14790 emit_label (label);
14791 LABEL_NUSES (label) = 1;
14792 }
14793 gcc_assert (desired_alignment <= 8);
14794 }
14795
14796 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14797 static enum stringop_alg
14798 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14799 int *dynamic_check)
14800 {
14801 const struct stringop_algs * algs;
14802
14803 *dynamic_check = -1;
14804 if (memset)
14805 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14806 else
14807 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14808 if (stringop_alg != no_stringop)
14809 return stringop_alg;
14810 /* rep; movq or rep; movl is the smallest variant. */
14811 else if (optimize_size)
14812 {
14813 if (!count || (count & 3))
14814 return rep_prefix_1_byte;
14815 else
14816 return rep_prefix_4_byte;
14817 }
14818 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14819 */
14820 else if (expected_size != -1 && expected_size < 4)
14821 return loop_1_byte;
14822 else if (expected_size != -1)
14823 {
14824 unsigned int i;
14825 enum stringop_alg alg = libcall;
14826 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14827 {
14828 gcc_assert (algs->size[i].max);
14829 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14830 {
14831 if (algs->size[i].alg != libcall)
14832 alg = algs->size[i].alg;
14833 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14834 last non-libcall inline algorithm. */
14835 if (TARGET_INLINE_ALL_STRINGOPS)
14836 {
14837 /* When the current size is best to be copied by a libcall,
14838 but we are still forced to inline, run the heuristic bellow
14839 that will pick code for medium sized blocks. */
14840 if (alg != libcall)
14841 return alg;
14842 break;
14843 }
14844 else
14845 return algs->size[i].alg;
14846 }
14847 }
14848 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14849 }
14850 /* When asked to inline the call anyway, try to pick meaningful choice.
14851 We look for maximal size of block that is faster to copy by hand and
14852 take blocks of at most of that size guessing that average size will
14853 be roughly half of the block.
14854
14855 If this turns out to be bad, we might simply specify the preferred
14856 choice in ix86_costs. */
14857 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14858 && algs->unknown_size == libcall)
14859 {
14860 int max = -1;
14861 enum stringop_alg alg;
14862 int i;
14863
14864 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14865 if (algs->size[i].alg != libcall && algs->size[i].alg)
14866 max = algs->size[i].max;
14867 if (max == -1)
14868 max = 4096;
14869 alg = decide_alg (count, max / 2, memset, dynamic_check);
14870 gcc_assert (*dynamic_check == -1);
14871 gcc_assert (alg != libcall);
14872 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14873 *dynamic_check = max;
14874 return alg;
14875 }
14876 return algs->unknown_size;
14877 }
14878
14879 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14880 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14881 static int
14882 decide_alignment (int align,
14883 enum stringop_alg alg,
14884 int expected_size)
14885 {
14886 int desired_align = 0;
14887 switch (alg)
14888 {
14889 case no_stringop:
14890 gcc_unreachable ();
14891 case loop:
14892 case unrolled_loop:
14893 desired_align = GET_MODE_SIZE (Pmode);
14894 break;
14895 case rep_prefix_8_byte:
14896 desired_align = 8;
14897 break;
14898 case rep_prefix_4_byte:
14899 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14900 copying whole cacheline at once. */
14901 if (TARGET_PENTIUMPRO)
14902 desired_align = 8;
14903 else
14904 desired_align = 4;
14905 break;
14906 case rep_prefix_1_byte:
14907 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14908 copying whole cacheline at once. */
14909 if (TARGET_PENTIUMPRO)
14910 desired_align = 8;
14911 else
14912 desired_align = 1;
14913 break;
14914 case loop_1_byte:
14915 desired_align = 1;
14916 break;
14917 case libcall:
14918 return 0;
14919 }
14920
14921 if (optimize_size)
14922 desired_align = 1;
14923 if (desired_align < align)
14924 desired_align = align;
14925 if (expected_size != -1 && expected_size < 4)
14926 desired_align = align;
14927 return desired_align;
14928 }
14929
14930 /* Return the smallest power of 2 greater than VAL. */
14931 static int
14932 smallest_pow2_greater_than (int val)
14933 {
14934 int ret = 1;
14935 while (ret <= val)
14936 ret <<= 1;
14937 return ret;
14938 }
14939
14940 /* Expand string move (memcpy) operation. Use i386 string operations when
14941 profitable. expand_clrmem contains similar code. The code depends upon
14942 architecture, block size and alignment, but always has the same
14943 overall structure:
14944
14945 1) Prologue guard: Conditional that jumps up to epilogues for small
14946 blocks that can be handled by epilogue alone. This is faster but
14947 also needed for correctness, since prologue assume the block is larger
14948 than the desired alignment.
14949
14950 Optional dynamic check for size and libcall for large
14951 blocks is emitted here too, with -minline-stringops-dynamically.
14952
14953 2) Prologue: copy first few bytes in order to get destination aligned
14954 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14955 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14956 We emit either a jump tree on power of two sized blocks, or a byte loop.
14957
14958 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14959 with specified algorithm.
14960
14961 4) Epilogue: code copying tail of the block that is too small to be
14962 handled by main body (or up to size guarded by prologue guard). */
14963
14964 int
14965 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14966 rtx expected_align_exp, rtx expected_size_exp)
14967 {
14968 rtx destreg;
14969 rtx srcreg;
14970 rtx label = NULL;
14971 rtx tmp;
14972 rtx jump_around_label = NULL;
14973 HOST_WIDE_INT align = 1;
14974 unsigned HOST_WIDE_INT count = 0;
14975 HOST_WIDE_INT expected_size = -1;
14976 int size_needed = 0, epilogue_size_needed;
14977 int desired_align = 0;
14978 enum stringop_alg alg;
14979 int dynamic_check;
14980
14981 if (CONST_INT_P (align_exp))
14982 align = INTVAL (align_exp);
14983 /* i386 can do misaligned access on reasonably increased cost. */
14984 if (CONST_INT_P (expected_align_exp)
14985 && INTVAL (expected_align_exp) > align)
14986 align = INTVAL (expected_align_exp);
14987 if (CONST_INT_P (count_exp))
14988 count = expected_size = INTVAL (count_exp);
14989 if (CONST_INT_P (expected_size_exp) && count == 0)
14990 expected_size = INTVAL (expected_size_exp);
14991
14992 /* Step 0: Decide on preferred algorithm, desired alignment and
14993 size of chunks to be copied by main loop. */
14994
14995 alg = decide_alg (count, expected_size, false, &dynamic_check);
14996 desired_align = decide_alignment (align, alg, expected_size);
14997
14998 if (!TARGET_ALIGN_STRINGOPS)
14999 align = desired_align;
15000
15001 if (alg == libcall)
15002 return 0;
15003 gcc_assert (alg != no_stringop);
15004 if (!count)
15005 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15006 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15007 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15008 switch (alg)
15009 {
15010 case libcall:
15011 case no_stringop:
15012 gcc_unreachable ();
15013 case loop:
15014 size_needed = GET_MODE_SIZE (Pmode);
15015 break;
15016 case unrolled_loop:
15017 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15018 break;
15019 case rep_prefix_8_byte:
15020 size_needed = 8;
15021 break;
15022 case rep_prefix_4_byte:
15023 size_needed = 4;
15024 break;
15025 case rep_prefix_1_byte:
15026 case loop_1_byte:
15027 size_needed = 1;
15028 break;
15029 }
15030
15031 epilogue_size_needed = size_needed;
15032
15033 /* Step 1: Prologue guard. */
15034
15035 /* Alignment code needs count to be in register. */
15036 if (CONST_INT_P (count_exp) && desired_align > align)
15037 {
15038 enum machine_mode mode = SImode;
15039 if (TARGET_64BIT && (count & ~0xffffffff))
15040 mode = DImode;
15041 count_exp = force_reg (mode, count_exp);
15042 }
15043 gcc_assert (desired_align >= 1 && align >= 1);
15044
15045 /* Ensure that alignment prologue won't copy past end of block. */
15046 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15047 {
15048 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15049 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15050 Make sure it is power of 2. */
15051 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15052
15053 label = gen_label_rtx ();
15054 emit_cmp_and_jump_insns (count_exp,
15055 GEN_INT (epilogue_size_needed),
15056 LTU, 0, counter_mode (count_exp), 1, label);
15057 if (GET_CODE (count_exp) == CONST_INT)
15058 ;
15059 else if (expected_size == -1 || expected_size < epilogue_size_needed)
15060 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15061 else
15062 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15063 }
15064 /* Emit code to decide on runtime whether library call or inline should be
15065 used. */
15066 if (dynamic_check != -1)
15067 {
15068 rtx hot_label = gen_label_rtx ();
15069 jump_around_label = gen_label_rtx ();
15070 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15071 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15072 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15073 emit_block_move_via_libcall (dst, src, count_exp, false);
15074 emit_jump (jump_around_label);
15075 emit_label (hot_label);
15076 }
15077
15078 /* Step 2: Alignment prologue. */
15079
15080 if (desired_align > align)
15081 {
15082 /* Except for the first move in epilogue, we no longer know
15083 constant offset in aliasing info. It don't seems to worth
15084 the pain to maintain it for the first move, so throw away
15085 the info early. */
15086 src = change_address (src, BLKmode, srcreg);
15087 dst = change_address (dst, BLKmode, destreg);
15088 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15089 desired_align);
15090 }
15091 if (label && size_needed == 1)
15092 {
15093 emit_label (label);
15094 LABEL_NUSES (label) = 1;
15095 label = NULL;
15096 }
15097
15098 /* Step 3: Main loop. */
15099
15100 switch (alg)
15101 {
15102 case libcall:
15103 case no_stringop:
15104 gcc_unreachable ();
15105 case loop_1_byte:
15106 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15107 count_exp, QImode, 1, expected_size);
15108 break;
15109 case loop:
15110 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15111 count_exp, Pmode, 1, expected_size);
15112 break;
15113 case unrolled_loop:
15114 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15115 registers for 4 temporaries anyway. */
15116 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15117 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15118 expected_size);
15119 break;
15120 case rep_prefix_8_byte:
15121 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15122 DImode);
15123 break;
15124 case rep_prefix_4_byte:
15125 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15126 SImode);
15127 break;
15128 case rep_prefix_1_byte:
15129 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15130 QImode);
15131 break;
15132 }
15133 /* Adjust properly the offset of src and dest memory for aliasing. */
15134 if (CONST_INT_P (count_exp))
15135 {
15136 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15137 (count / size_needed) * size_needed);
15138 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15139 (count / size_needed) * size_needed);
15140 }
15141 else
15142 {
15143 src = change_address (src, BLKmode, srcreg);
15144 dst = change_address (dst, BLKmode, destreg);
15145 }
15146
15147 /* Step 4: Epilogue to copy the remaining bytes. */
15148
15149 if (label)
15150 {
15151 /* When the main loop is done, COUNT_EXP might hold original count,
15152 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15153 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15154 bytes. Compensate if needed. */
15155
15156 if (size_needed < epilogue_size_needed)
15157 {
15158 tmp =
15159 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15160 GEN_INT (size_needed - 1), count_exp, 1,
15161 OPTAB_DIRECT);
15162 if (tmp != count_exp)
15163 emit_move_insn (count_exp, tmp);
15164 }
15165 emit_label (label);
15166 LABEL_NUSES (label) = 1;
15167 }
15168
15169 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15170 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15171 epilogue_size_needed);
15172 if (jump_around_label)
15173 emit_label (jump_around_label);
15174 return 1;
15175 }
15176
15177 /* Helper function for memcpy. For QImode value 0xXY produce
15178 0xXYXYXYXY of wide specified by MODE. This is essentially
15179 a * 0x10101010, but we can do slightly better than
15180 synth_mult by unwinding the sequence by hand on CPUs with
15181 slow multiply. */
15182 static rtx
15183 promote_duplicated_reg (enum machine_mode mode, rtx val)
15184 {
15185 enum machine_mode valmode = GET_MODE (val);
15186 rtx tmp;
15187 int nops = mode == DImode ? 3 : 2;
15188
15189 gcc_assert (mode == SImode || mode == DImode);
15190 if (val == const0_rtx)
15191 return copy_to_mode_reg (mode, const0_rtx);
15192 if (CONST_INT_P (val))
15193 {
15194 HOST_WIDE_INT v = INTVAL (val) & 255;
15195
15196 v |= v << 8;
15197 v |= v << 16;
15198 if (mode == DImode)
15199 v |= (v << 16) << 16;
15200 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15201 }
15202
15203 if (valmode == VOIDmode)
15204 valmode = QImode;
15205 if (valmode != QImode)
15206 val = gen_lowpart (QImode, val);
15207 if (mode == QImode)
15208 return val;
15209 if (!TARGET_PARTIAL_REG_STALL)
15210 nops--;
15211 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15212 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15213 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15214 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15215 {
15216 rtx reg = convert_modes (mode, QImode, val, true);
15217 tmp = promote_duplicated_reg (mode, const1_rtx);
15218 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15219 OPTAB_DIRECT);
15220 }
15221 else
15222 {
15223 rtx reg = convert_modes (mode, QImode, val, true);
15224
15225 if (!TARGET_PARTIAL_REG_STALL)
15226 if (mode == SImode)
15227 emit_insn (gen_movsi_insv_1 (reg, reg));
15228 else
15229 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15230 else
15231 {
15232 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15233 NULL, 1, OPTAB_DIRECT);
15234 reg =
15235 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15236 }
15237 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15238 NULL, 1, OPTAB_DIRECT);
15239 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15240 if (mode == SImode)
15241 return reg;
15242 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15243 NULL, 1, OPTAB_DIRECT);
15244 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15245 return reg;
15246 }
15247 }
15248
15249 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15250 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15251 alignment from ALIGN to DESIRED_ALIGN. */
15252 static rtx
15253 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15254 {
15255 rtx promoted_val;
15256
15257 if (TARGET_64BIT
15258 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15259 promoted_val = promote_duplicated_reg (DImode, val);
15260 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15261 promoted_val = promote_duplicated_reg (SImode, val);
15262 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15263 promoted_val = promote_duplicated_reg (HImode, val);
15264 else
15265 promoted_val = val;
15266
15267 return promoted_val;
15268 }
15269
15270 /* Expand string clear operation (bzero). Use i386 string operations when
15271 profitable. See expand_movmem comment for explanation of individual
15272 steps performed. */
15273 int
15274 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15275 rtx expected_align_exp, rtx expected_size_exp)
15276 {
15277 rtx destreg;
15278 rtx label = NULL;
15279 rtx tmp;
15280 rtx jump_around_label = NULL;
15281 HOST_WIDE_INT align = 1;
15282 unsigned HOST_WIDE_INT count = 0;
15283 HOST_WIDE_INT expected_size = -1;
15284 int size_needed = 0, epilogue_size_needed;
15285 int desired_align = 0;
15286 enum stringop_alg alg;
15287 rtx promoted_val = NULL;
15288 bool force_loopy_epilogue = false;
15289 int dynamic_check;
15290
15291 if (CONST_INT_P (align_exp))
15292 align = INTVAL (align_exp);
15293 /* i386 can do misaligned access on reasonably increased cost. */
15294 if (CONST_INT_P (expected_align_exp)
15295 && INTVAL (expected_align_exp) > align)
15296 align = INTVAL (expected_align_exp);
15297 if (CONST_INT_P (count_exp))
15298 count = expected_size = INTVAL (count_exp);
15299 if (CONST_INT_P (expected_size_exp) && count == 0)
15300 expected_size = INTVAL (expected_size_exp);
15301
15302 /* Step 0: Decide on preferred algorithm, desired alignment and
15303 size of chunks to be copied by main loop. */
15304
15305 alg = decide_alg (count, expected_size, true, &dynamic_check);
15306 desired_align = decide_alignment (align, alg, expected_size);
15307
15308 if (!TARGET_ALIGN_STRINGOPS)
15309 align = desired_align;
15310
15311 if (alg == libcall)
15312 return 0;
15313 gcc_assert (alg != no_stringop);
15314 if (!count)
15315 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15316 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15317 switch (alg)
15318 {
15319 case libcall:
15320 case no_stringop:
15321 gcc_unreachable ();
15322 case loop:
15323 size_needed = GET_MODE_SIZE (Pmode);
15324 break;
15325 case unrolled_loop:
15326 size_needed = GET_MODE_SIZE (Pmode) * 4;
15327 break;
15328 case rep_prefix_8_byte:
15329 size_needed = 8;
15330 break;
15331 case rep_prefix_4_byte:
15332 size_needed = 4;
15333 break;
15334 case rep_prefix_1_byte:
15335 case loop_1_byte:
15336 size_needed = 1;
15337 break;
15338 }
15339 epilogue_size_needed = size_needed;
15340
15341 /* Step 1: Prologue guard. */
15342
15343 /* Alignment code needs count to be in register. */
15344 if (CONST_INT_P (count_exp) && desired_align > align)
15345 {
15346 enum machine_mode mode = SImode;
15347 if (TARGET_64BIT && (count & ~0xffffffff))
15348 mode = DImode;
15349 count_exp = force_reg (mode, count_exp);
15350 }
15351 /* Do the cheap promotion to allow better CSE across the
15352 main loop and epilogue (ie one load of the big constant in the
15353 front of all code. */
15354 if (CONST_INT_P (val_exp))
15355 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15356 desired_align, align);
15357 /* Ensure that alignment prologue won't copy past end of block. */
15358 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15359 {
15360 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15361 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15362 Make sure it is power of 2. */
15363 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15364
15365 /* To improve performance of small blocks, we jump around the VAL
15366 promoting mode. This mean that if the promoted VAL is not constant,
15367 we might not use it in the epilogue and have to use byte
15368 loop variant. */
15369 if (epilogue_size_needed > 2 && !promoted_val)
15370 force_loopy_epilogue = true;
15371 label = gen_label_rtx ();
15372 emit_cmp_and_jump_insns (count_exp,
15373 GEN_INT (epilogue_size_needed),
15374 LTU, 0, counter_mode (count_exp), 1, label);
15375 if (GET_CODE (count_exp) == CONST_INT)
15376 ;
15377 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15378 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15379 else
15380 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15381 }
15382 if (dynamic_check != -1)
15383 {
15384 rtx hot_label = gen_label_rtx ();
15385 jump_around_label = gen_label_rtx ();
15386 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15387 LEU, 0, counter_mode (count_exp), 1, hot_label);
15388 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15389 set_storage_via_libcall (dst, count_exp, val_exp, false);
15390 emit_jump (jump_around_label);
15391 emit_label (hot_label);
15392 }
15393
15394 /* Step 2: Alignment prologue. */
15395
15396 /* Do the expensive promotion once we branched off the small blocks. */
15397 if (!promoted_val)
15398 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15399 desired_align, align);
15400 gcc_assert (desired_align >= 1 && align >= 1);
15401
15402 if (desired_align > align)
15403 {
15404 /* Except for the first move in epilogue, we no longer know
15405 constant offset in aliasing info. It don't seems to worth
15406 the pain to maintain it for the first move, so throw away
15407 the info early. */
15408 dst = change_address (dst, BLKmode, destreg);
15409 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15410 desired_align);
15411 }
15412 if (label && size_needed == 1)
15413 {
15414 emit_label (label);
15415 LABEL_NUSES (label) = 1;
15416 label = NULL;
15417 }
15418
15419 /* Step 3: Main loop. */
15420
15421 switch (alg)
15422 {
15423 case libcall:
15424 case no_stringop:
15425 gcc_unreachable ();
15426 case loop_1_byte:
15427 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15428 count_exp, QImode, 1, expected_size);
15429 break;
15430 case loop:
15431 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15432 count_exp, Pmode, 1, expected_size);
15433 break;
15434 case unrolled_loop:
15435 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15436 count_exp, Pmode, 4, expected_size);
15437 break;
15438 case rep_prefix_8_byte:
15439 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15440 DImode);
15441 break;
15442 case rep_prefix_4_byte:
15443 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15444 SImode);
15445 break;
15446 case rep_prefix_1_byte:
15447 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15448 QImode);
15449 break;
15450 }
15451 /* Adjust properly the offset of src and dest memory for aliasing. */
15452 if (CONST_INT_P (count_exp))
15453 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15454 (count / size_needed) * size_needed);
15455 else
15456 dst = change_address (dst, BLKmode, destreg);
15457
15458 /* Step 4: Epilogue to copy the remaining bytes. */
15459
15460 if (label)
15461 {
15462 /* When the main loop is done, COUNT_EXP might hold original count,
15463 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15464 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15465 bytes. Compensate if needed. */
15466
15467 if (size_needed < desired_align - align)
15468 {
15469 tmp =
15470 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15471 GEN_INT (size_needed - 1), count_exp, 1,
15472 OPTAB_DIRECT);
15473 size_needed = desired_align - align + 1;
15474 if (tmp != count_exp)
15475 emit_move_insn (count_exp, tmp);
15476 }
15477 emit_label (label);
15478 LABEL_NUSES (label) = 1;
15479 }
15480 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15481 {
15482 if (force_loopy_epilogue)
15483 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15484 size_needed);
15485 else
15486 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15487 size_needed);
15488 }
15489 if (jump_around_label)
15490 emit_label (jump_around_label);
15491 return 1;
15492 }
15493
15494 /* Expand the appropriate insns for doing strlen if not just doing
15495 repnz; scasb
15496
15497 out = result, initialized with the start address
15498 align_rtx = alignment of the address.
15499 scratch = scratch register, initialized with the startaddress when
15500 not aligned, otherwise undefined
15501
15502 This is just the body. It needs the initializations mentioned above and
15503 some address computing at the end. These things are done in i386.md. */
15504
15505 static void
15506 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15507 {
15508 int align;
15509 rtx tmp;
15510 rtx align_2_label = NULL_RTX;
15511 rtx align_3_label = NULL_RTX;
15512 rtx align_4_label = gen_label_rtx ();
15513 rtx end_0_label = gen_label_rtx ();
15514 rtx mem;
15515 rtx tmpreg = gen_reg_rtx (SImode);
15516 rtx scratch = gen_reg_rtx (SImode);
15517 rtx cmp;
15518
15519 align = 0;
15520 if (CONST_INT_P (align_rtx))
15521 align = INTVAL (align_rtx);
15522
15523 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15524
15525 /* Is there a known alignment and is it less than 4? */
15526 if (align < 4)
15527 {
15528 rtx scratch1 = gen_reg_rtx (Pmode);
15529 emit_move_insn (scratch1, out);
15530 /* Is there a known alignment and is it not 2? */
15531 if (align != 2)
15532 {
15533 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15534 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15535
15536 /* Leave just the 3 lower bits. */
15537 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15538 NULL_RTX, 0, OPTAB_WIDEN);
15539
15540 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15541 Pmode, 1, align_4_label);
15542 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15543 Pmode, 1, align_2_label);
15544 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15545 Pmode, 1, align_3_label);
15546 }
15547 else
15548 {
15549 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15550 check if is aligned to 4 - byte. */
15551
15552 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15553 NULL_RTX, 0, OPTAB_WIDEN);
15554
15555 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15556 Pmode, 1, align_4_label);
15557 }
15558
15559 mem = change_address (src, QImode, out);
15560
15561 /* Now compare the bytes. */
15562
15563 /* Compare the first n unaligned byte on a byte per byte basis. */
15564 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15565 QImode, 1, end_0_label);
15566
15567 /* Increment the address. */
15568 if (TARGET_64BIT)
15569 emit_insn (gen_adddi3 (out, out, const1_rtx));
15570 else
15571 emit_insn (gen_addsi3 (out, out, const1_rtx));
15572
15573 /* Not needed with an alignment of 2 */
15574 if (align != 2)
15575 {
15576 emit_label (align_2_label);
15577
15578 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15579 end_0_label);
15580
15581 if (TARGET_64BIT)
15582 emit_insn (gen_adddi3 (out, out, const1_rtx));
15583 else
15584 emit_insn (gen_addsi3 (out, out, const1_rtx));
15585
15586 emit_label (align_3_label);
15587 }
15588
15589 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15590 end_0_label);
15591
15592 if (TARGET_64BIT)
15593 emit_insn (gen_adddi3 (out, out, const1_rtx));
15594 else
15595 emit_insn (gen_addsi3 (out, out, const1_rtx));
15596 }
15597
15598 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15599 align this loop. It gives only huge programs, but does not help to
15600 speed up. */
15601 emit_label (align_4_label);
15602
15603 mem = change_address (src, SImode, out);
15604 emit_move_insn (scratch, mem);
15605 if (TARGET_64BIT)
15606 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15607 else
15608 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15609
15610 /* This formula yields a nonzero result iff one of the bytes is zero.
15611 This saves three branches inside loop and many cycles. */
15612
15613 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15614 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15615 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15616 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15617 gen_int_mode (0x80808080, SImode)));
15618 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15619 align_4_label);
15620
15621 if (TARGET_CMOVE)
15622 {
15623 rtx reg = gen_reg_rtx (SImode);
15624 rtx reg2 = gen_reg_rtx (Pmode);
15625 emit_move_insn (reg, tmpreg);
15626 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15627
15628 /* If zero is not in the first two bytes, move two bytes forward. */
15629 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15630 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15631 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15632 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15633 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15634 reg,
15635 tmpreg)));
15636 /* Emit lea manually to avoid clobbering of flags. */
15637 emit_insn (gen_rtx_SET (SImode, reg2,
15638 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15639
15640 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15641 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15642 emit_insn (gen_rtx_SET (VOIDmode, out,
15643 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15644 reg2,
15645 out)));
15646
15647 }
15648 else
15649 {
15650 rtx end_2_label = gen_label_rtx ();
15651 /* Is zero in the first two bytes? */
15652
15653 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15654 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15655 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15656 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15657 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15658 pc_rtx);
15659 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15660 JUMP_LABEL (tmp) = end_2_label;
15661
15662 /* Not in the first two. Move two bytes forward. */
15663 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15664 if (TARGET_64BIT)
15665 emit_insn (gen_adddi3 (out, out, const2_rtx));
15666 else
15667 emit_insn (gen_addsi3 (out, out, const2_rtx));
15668
15669 emit_label (end_2_label);
15670
15671 }
15672
15673 /* Avoid branch in fixing the byte. */
15674 tmpreg = gen_lowpart (QImode, tmpreg);
15675 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15676 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
15677 if (TARGET_64BIT)
15678 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15679 else
15680 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15681
15682 emit_label (end_0_label);
15683 }
15684
15685 /* Expand strlen. */
15686
15687 int
15688 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15689 {
15690 rtx addr, scratch1, scratch2, scratch3, scratch4;
15691
15692 /* The generic case of strlen expander is long. Avoid it's
15693 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15694
15695 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15696 && !TARGET_INLINE_ALL_STRINGOPS
15697 && !optimize_size
15698 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15699 return 0;
15700
15701 addr = force_reg (Pmode, XEXP (src, 0));
15702 scratch1 = gen_reg_rtx (Pmode);
15703
15704 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15705 && !optimize_size)
15706 {
15707 /* Well it seems that some optimizer does not combine a call like
15708 foo(strlen(bar), strlen(bar));
15709 when the move and the subtraction is done here. It does calculate
15710 the length just once when these instructions are done inside of
15711 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15712 often used and I use one fewer register for the lifetime of
15713 output_strlen_unroll() this is better. */
15714
15715 emit_move_insn (out, addr);
15716
15717 ix86_expand_strlensi_unroll_1 (out, src, align);
15718
15719 /* strlensi_unroll_1 returns the address of the zero at the end of
15720 the string, like memchr(), so compute the length by subtracting
15721 the start address. */
15722 if (TARGET_64BIT)
15723 emit_insn (gen_subdi3 (out, out, addr));
15724 else
15725 emit_insn (gen_subsi3 (out, out, addr));
15726 }
15727 else
15728 {
15729 rtx unspec;
15730 scratch2 = gen_reg_rtx (Pmode);
15731 scratch3 = gen_reg_rtx (Pmode);
15732 scratch4 = force_reg (Pmode, constm1_rtx);
15733
15734 emit_move_insn (scratch3, addr);
15735 eoschar = force_reg (QImode, eoschar);
15736
15737 src = replace_equiv_address_nv (src, scratch3);
15738
15739 /* If .md starts supporting :P, this can be done in .md. */
15740 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15741 scratch4), UNSPEC_SCAS);
15742 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15743 if (TARGET_64BIT)
15744 {
15745 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15746 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15747 }
15748 else
15749 {
15750 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15751 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15752 }
15753 }
15754 return 1;
15755 }
15756
15757 /* For given symbol (function) construct code to compute address of it's PLT
15758 entry in large x86-64 PIC model. */
15759 rtx
15760 construct_plt_address (rtx symbol)
15761 {
15762 rtx tmp = gen_reg_rtx (Pmode);
15763 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15764
15765 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15766 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15767
15768 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15769 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15770 return tmp;
15771 }
15772
15773 void
15774 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15775 rtx callarg2 ATTRIBUTE_UNUSED,
15776 rtx pop, int sibcall)
15777 {
15778 rtx use = NULL, call;
15779
15780 if (pop == const0_rtx)
15781 pop = NULL;
15782 gcc_assert (!TARGET_64BIT || !pop);
15783
15784 if (TARGET_MACHO && !TARGET_64BIT)
15785 {
15786 #if TARGET_MACHO
15787 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15788 fnaddr = machopic_indirect_call_target (fnaddr);
15789 #endif
15790 }
15791 else
15792 {
15793 /* Static functions and indirect calls don't need the pic register. */
15794 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15795 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15796 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15797 use_reg (&use, pic_offset_table_rtx);
15798 }
15799
15800 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15801 {
15802 rtx al = gen_rtx_REG (QImode, 0);
15803 emit_move_insn (al, callarg2);
15804 use_reg (&use, al);
15805 }
15806
15807 if (ix86_cmodel == CM_LARGE_PIC
15808 && GET_CODE (fnaddr) == MEM
15809 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15810 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15811 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15812 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15813 {
15814 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15815 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15816 }
15817 if (sibcall && TARGET_64BIT
15818 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15819 {
15820 rtx addr;
15821 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15822 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15823 emit_move_insn (fnaddr, addr);
15824 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15825 }
15826
15827 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15828 if (retval)
15829 call = gen_rtx_SET (VOIDmode, retval, call);
15830 if (pop)
15831 {
15832 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15833 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15834 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15835 }
15836
15837 call = emit_call_insn (call);
15838 if (use)
15839 CALL_INSN_FUNCTION_USAGE (call) = use;
15840 }
15841
15842 \f
15843 /* Clear stack slot assignments remembered from previous functions.
15844 This is called from INIT_EXPANDERS once before RTL is emitted for each
15845 function. */
15846
15847 static struct machine_function *
15848 ix86_init_machine_status (void)
15849 {
15850 struct machine_function *f;
15851
15852 f = GGC_CNEW (struct machine_function);
15853 f->use_fast_prologue_epilogue_nregs = -1;
15854 f->tls_descriptor_call_expanded_p = 0;
15855
15856 return f;
15857 }
15858
15859 /* Return a MEM corresponding to a stack slot with mode MODE.
15860 Allocate a new slot if necessary.
15861
15862 The RTL for a function can have several slots available: N is
15863 which slot to use. */
15864
15865 rtx
15866 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15867 {
15868 struct stack_local_entry *s;
15869
15870 gcc_assert (n < MAX_386_STACK_LOCALS);
15871
15872 /* Virtual slot is valid only before vregs are instantiated. */
15873 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15874
15875 for (s = ix86_stack_locals; s; s = s->next)
15876 if (s->mode == mode && s->n == n)
15877 return copy_rtx (s->rtl);
15878
15879 s = (struct stack_local_entry *)
15880 ggc_alloc (sizeof (struct stack_local_entry));
15881 s->n = n;
15882 s->mode = mode;
15883 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15884
15885 s->next = ix86_stack_locals;
15886 ix86_stack_locals = s;
15887 return s->rtl;
15888 }
15889
15890 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15891
15892 static GTY(()) rtx ix86_tls_symbol;
15893 rtx
15894 ix86_tls_get_addr (void)
15895 {
15896
15897 if (!ix86_tls_symbol)
15898 {
15899 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15900 (TARGET_ANY_GNU_TLS
15901 && !TARGET_64BIT)
15902 ? "___tls_get_addr"
15903 : "__tls_get_addr");
15904 }
15905
15906 return ix86_tls_symbol;
15907 }
15908
15909 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15910
15911 static GTY(()) rtx ix86_tls_module_base_symbol;
15912 rtx
15913 ix86_tls_module_base (void)
15914 {
15915
15916 if (!ix86_tls_module_base_symbol)
15917 {
15918 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15919 "_TLS_MODULE_BASE_");
15920 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15921 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15922 }
15923
15924 return ix86_tls_module_base_symbol;
15925 }
15926 \f
15927 /* Calculate the length of the memory address in the instruction
15928 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15929
15930 int
15931 memory_address_length (rtx addr)
15932 {
15933 struct ix86_address parts;
15934 rtx base, index, disp;
15935 int len;
15936 int ok;
15937
15938 if (GET_CODE (addr) == PRE_DEC
15939 || GET_CODE (addr) == POST_INC
15940 || GET_CODE (addr) == PRE_MODIFY
15941 || GET_CODE (addr) == POST_MODIFY)
15942 return 0;
15943
15944 ok = ix86_decompose_address (addr, &parts);
15945 gcc_assert (ok);
15946
15947 if (parts.base && GET_CODE (parts.base) == SUBREG)
15948 parts.base = SUBREG_REG (parts.base);
15949 if (parts.index && GET_CODE (parts.index) == SUBREG)
15950 parts.index = SUBREG_REG (parts.index);
15951
15952 base = parts.base;
15953 index = parts.index;
15954 disp = parts.disp;
15955 len = 0;
15956
15957 /* Rule of thumb:
15958 - esp as the base always wants an index,
15959 - ebp as the base always wants a displacement. */
15960
15961 /* Register Indirect. */
15962 if (base && !index && !disp)
15963 {
15964 /* esp (for its index) and ebp (for its displacement) need
15965 the two-byte modrm form. */
15966 if (addr == stack_pointer_rtx
15967 || addr == arg_pointer_rtx
15968 || addr == frame_pointer_rtx
15969 || addr == hard_frame_pointer_rtx)
15970 len = 1;
15971 }
15972
15973 /* Direct Addressing. */
15974 else if (disp && !base && !index)
15975 len = 4;
15976
15977 else
15978 {
15979 /* Find the length of the displacement constant. */
15980 if (disp)
15981 {
15982 if (base && satisfies_constraint_K (disp))
15983 len = 1;
15984 else
15985 len = 4;
15986 }
15987 /* ebp always wants a displacement. */
15988 else if (base == hard_frame_pointer_rtx)
15989 len = 1;
15990
15991 /* An index requires the two-byte modrm form.... */
15992 if (index
15993 /* ...like esp, which always wants an index. */
15994 || base == stack_pointer_rtx
15995 || base == arg_pointer_rtx
15996 || base == frame_pointer_rtx)
15997 len += 1;
15998 }
15999
16000 return len;
16001 }
16002
16003 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16004 is set, expect that insn have 8bit immediate alternative. */
16005 int
16006 ix86_attr_length_immediate_default (rtx insn, int shortform)
16007 {
16008 int len = 0;
16009 int i;
16010 extract_insn_cached (insn);
16011 for (i = recog_data.n_operands - 1; i >= 0; --i)
16012 if (CONSTANT_P (recog_data.operand[i]))
16013 {
16014 gcc_assert (!len);
16015 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16016 len = 1;
16017 else
16018 {
16019 switch (get_attr_mode (insn))
16020 {
16021 case MODE_QI:
16022 len+=1;
16023 break;
16024 case MODE_HI:
16025 len+=2;
16026 break;
16027 case MODE_SI:
16028 len+=4;
16029 break;
16030 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16031 case MODE_DI:
16032 len+=4;
16033 break;
16034 default:
16035 fatal_insn ("unknown insn mode", insn);
16036 }
16037 }
16038 }
16039 return len;
16040 }
16041 /* Compute default value for "length_address" attribute. */
16042 int
16043 ix86_attr_length_address_default (rtx insn)
16044 {
16045 int i;
16046
16047 if (get_attr_type (insn) == TYPE_LEA)
16048 {
16049 rtx set = PATTERN (insn);
16050
16051 if (GET_CODE (set) == PARALLEL)
16052 set = XVECEXP (set, 0, 0);
16053
16054 gcc_assert (GET_CODE (set) == SET);
16055
16056 return memory_address_length (SET_SRC (set));
16057 }
16058
16059 extract_insn_cached (insn);
16060 for (i = recog_data.n_operands - 1; i >= 0; --i)
16061 if (MEM_P (recog_data.operand[i]))
16062 {
16063 return memory_address_length (XEXP (recog_data.operand[i], 0));
16064 break;
16065 }
16066 return 0;
16067 }
16068 \f
16069 /* Return the maximum number of instructions a cpu can issue. */
16070
16071 static int
16072 ix86_issue_rate (void)
16073 {
16074 switch (ix86_tune)
16075 {
16076 case PROCESSOR_PENTIUM:
16077 case PROCESSOR_K6:
16078 return 2;
16079
16080 case PROCESSOR_PENTIUMPRO:
16081 case PROCESSOR_PENTIUM4:
16082 case PROCESSOR_ATHLON:
16083 case PROCESSOR_K8:
16084 case PROCESSOR_AMDFAM10:
16085 case PROCESSOR_NOCONA:
16086 case PROCESSOR_GENERIC32:
16087 case PROCESSOR_GENERIC64:
16088 return 3;
16089
16090 case PROCESSOR_CORE2:
16091 return 4;
16092
16093 default:
16094 return 1;
16095 }
16096 }
16097
16098 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16099 by DEP_INSN and nothing set by DEP_INSN. */
16100
16101 static int
16102 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16103 {
16104 rtx set, set2;
16105
16106 /* Simplify the test for uninteresting insns. */
16107 if (insn_type != TYPE_SETCC
16108 && insn_type != TYPE_ICMOV
16109 && insn_type != TYPE_FCMOV
16110 && insn_type != TYPE_IBR)
16111 return 0;
16112
16113 if ((set = single_set (dep_insn)) != 0)
16114 {
16115 set = SET_DEST (set);
16116 set2 = NULL_RTX;
16117 }
16118 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16119 && XVECLEN (PATTERN (dep_insn), 0) == 2
16120 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16121 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16122 {
16123 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16124 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16125 }
16126 else
16127 return 0;
16128
16129 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16130 return 0;
16131
16132 /* This test is true if the dependent insn reads the flags but
16133 not any other potentially set register. */
16134 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16135 return 0;
16136
16137 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16138 return 0;
16139
16140 return 1;
16141 }
16142
16143 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16144 address with operands set by DEP_INSN. */
16145
16146 static int
16147 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16148 {
16149 rtx addr;
16150
16151 if (insn_type == TYPE_LEA
16152 && TARGET_PENTIUM)
16153 {
16154 addr = PATTERN (insn);
16155
16156 if (GET_CODE (addr) == PARALLEL)
16157 addr = XVECEXP (addr, 0, 0);
16158
16159 gcc_assert (GET_CODE (addr) == SET);
16160
16161 addr = SET_SRC (addr);
16162 }
16163 else
16164 {
16165 int i;
16166 extract_insn_cached (insn);
16167 for (i = recog_data.n_operands - 1; i >= 0; --i)
16168 if (MEM_P (recog_data.operand[i]))
16169 {
16170 addr = XEXP (recog_data.operand[i], 0);
16171 goto found;
16172 }
16173 return 0;
16174 found:;
16175 }
16176
16177 return modified_in_p (addr, dep_insn);
16178 }
16179
16180 static int
16181 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16182 {
16183 enum attr_type insn_type, dep_insn_type;
16184 enum attr_memory memory;
16185 rtx set, set2;
16186 int dep_insn_code_number;
16187
16188 /* Anti and output dependencies have zero cost on all CPUs. */
16189 if (REG_NOTE_KIND (link) != 0)
16190 return 0;
16191
16192 dep_insn_code_number = recog_memoized (dep_insn);
16193
16194 /* If we can't recognize the insns, we can't really do anything. */
16195 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16196 return cost;
16197
16198 insn_type = get_attr_type (insn);
16199 dep_insn_type = get_attr_type (dep_insn);
16200
16201 switch (ix86_tune)
16202 {
16203 case PROCESSOR_PENTIUM:
16204 /* Address Generation Interlock adds a cycle of latency. */
16205 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16206 cost += 1;
16207
16208 /* ??? Compares pair with jump/setcc. */
16209 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16210 cost = 0;
16211
16212 /* Floating point stores require value to be ready one cycle earlier. */
16213 if (insn_type == TYPE_FMOV
16214 && get_attr_memory (insn) == MEMORY_STORE
16215 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16216 cost += 1;
16217 break;
16218
16219 case PROCESSOR_PENTIUMPRO:
16220 memory = get_attr_memory (insn);
16221
16222 /* INT->FP conversion is expensive. */
16223 if (get_attr_fp_int_src (dep_insn))
16224 cost += 5;
16225
16226 /* There is one cycle extra latency between an FP op and a store. */
16227 if (insn_type == TYPE_FMOV
16228 && (set = single_set (dep_insn)) != NULL_RTX
16229 && (set2 = single_set (insn)) != NULL_RTX
16230 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16231 && MEM_P (SET_DEST (set2)))
16232 cost += 1;
16233
16234 /* Show ability of reorder buffer to hide latency of load by executing
16235 in parallel with previous instruction in case
16236 previous instruction is not needed to compute the address. */
16237 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16238 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16239 {
16240 /* Claim moves to take one cycle, as core can issue one load
16241 at time and the next load can start cycle later. */
16242 if (dep_insn_type == TYPE_IMOV
16243 || dep_insn_type == TYPE_FMOV)
16244 cost = 1;
16245 else if (cost > 1)
16246 cost--;
16247 }
16248 break;
16249
16250 case PROCESSOR_K6:
16251 memory = get_attr_memory (insn);
16252
16253 /* The esp dependency is resolved before the instruction is really
16254 finished. */
16255 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16256 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16257 return 1;
16258
16259 /* INT->FP conversion is expensive. */
16260 if (get_attr_fp_int_src (dep_insn))
16261 cost += 5;
16262
16263 /* Show ability of reorder buffer to hide latency of load by executing
16264 in parallel with previous instruction in case
16265 previous instruction is not needed to compute the address. */
16266 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16267 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16268 {
16269 /* Claim moves to take one cycle, as core can issue one load
16270 at time and the next load can start cycle later. */
16271 if (dep_insn_type == TYPE_IMOV
16272 || dep_insn_type == TYPE_FMOV)
16273 cost = 1;
16274 else if (cost > 2)
16275 cost -= 2;
16276 else
16277 cost = 1;
16278 }
16279 break;
16280
16281 case PROCESSOR_ATHLON:
16282 case PROCESSOR_K8:
16283 case PROCESSOR_AMDFAM10:
16284 case PROCESSOR_GENERIC32:
16285 case PROCESSOR_GENERIC64:
16286 memory = get_attr_memory (insn);
16287
16288 /* Show ability of reorder buffer to hide latency of load by executing
16289 in parallel with previous instruction in case
16290 previous instruction is not needed to compute the address. */
16291 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16292 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16293 {
16294 enum attr_unit unit = get_attr_unit (insn);
16295 int loadcost = 3;
16296
16297 /* Because of the difference between the length of integer and
16298 floating unit pipeline preparation stages, the memory operands
16299 for floating point are cheaper.
16300
16301 ??? For Athlon it the difference is most probably 2. */
16302 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16303 loadcost = 3;
16304 else
16305 loadcost = TARGET_ATHLON ? 2 : 0;
16306
16307 if (cost >= loadcost)
16308 cost -= loadcost;
16309 else
16310 cost = 0;
16311 }
16312
16313 default:
16314 break;
16315 }
16316
16317 return cost;
16318 }
16319
16320 /* How many alternative schedules to try. This should be as wide as the
16321 scheduling freedom in the DFA, but no wider. Making this value too
16322 large results extra work for the scheduler. */
16323
16324 static int
16325 ia32_multipass_dfa_lookahead (void)
16326 {
16327 if (ix86_tune == PROCESSOR_PENTIUM)
16328 return 2;
16329
16330 if (ix86_tune == PROCESSOR_PENTIUMPRO
16331 || ix86_tune == PROCESSOR_K6)
16332 return 1;
16333
16334 else
16335 return 0;
16336 }
16337
16338 \f
16339 /* Compute the alignment given to a constant that is being placed in memory.
16340 EXP is the constant and ALIGN is the alignment that the object would
16341 ordinarily have.
16342 The value of this function is used instead of that alignment to align
16343 the object. */
16344
16345 int
16346 ix86_constant_alignment (tree exp, int align)
16347 {
16348 if (TREE_CODE (exp) == REAL_CST)
16349 {
16350 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16351 return 64;
16352 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16353 return 128;
16354 }
16355 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16356 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16357 return BITS_PER_WORD;
16358
16359 return align;
16360 }
16361
16362 /* Compute the alignment for a static variable.
16363 TYPE is the data type, and ALIGN is the alignment that
16364 the object would ordinarily have. The value of this function is used
16365 instead of that alignment to align the object. */
16366
16367 int
16368 ix86_data_alignment (tree type, int align)
16369 {
16370 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16371
16372 if (AGGREGATE_TYPE_P (type)
16373 && TYPE_SIZE (type)
16374 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16375 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16376 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16377 && align < max_align)
16378 align = max_align;
16379
16380 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16381 to 16byte boundary. */
16382 if (TARGET_64BIT)
16383 {
16384 if (AGGREGATE_TYPE_P (type)
16385 && TYPE_SIZE (type)
16386 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16387 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16388 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16389 return 128;
16390 }
16391
16392 if (TREE_CODE (type) == ARRAY_TYPE)
16393 {
16394 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16395 return 64;
16396 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16397 return 128;
16398 }
16399 else if (TREE_CODE (type) == COMPLEX_TYPE)
16400 {
16401
16402 if (TYPE_MODE (type) == DCmode && align < 64)
16403 return 64;
16404 if (TYPE_MODE (type) == XCmode && align < 128)
16405 return 128;
16406 }
16407 else if ((TREE_CODE (type) == RECORD_TYPE
16408 || TREE_CODE (type) == UNION_TYPE
16409 || TREE_CODE (type) == QUAL_UNION_TYPE)
16410 && TYPE_FIELDS (type))
16411 {
16412 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16413 return 64;
16414 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16415 return 128;
16416 }
16417 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16418 || TREE_CODE (type) == INTEGER_TYPE)
16419 {
16420 if (TYPE_MODE (type) == DFmode && align < 64)
16421 return 64;
16422 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16423 return 128;
16424 }
16425
16426 return align;
16427 }
16428
16429 /* Compute the alignment for a local variable.
16430 TYPE is the data type, and ALIGN is the alignment that
16431 the object would ordinarily have. The value of this macro is used
16432 instead of that alignment to align the object. */
16433
16434 int
16435 ix86_local_alignment (tree type, int align)
16436 {
16437 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16438 to 16byte boundary. */
16439 if (TARGET_64BIT)
16440 {
16441 if (AGGREGATE_TYPE_P (type)
16442 && TYPE_SIZE (type)
16443 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16444 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16445 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16446 return 128;
16447 }
16448 if (TREE_CODE (type) == ARRAY_TYPE)
16449 {
16450 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16451 return 64;
16452 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16453 return 128;
16454 }
16455 else if (TREE_CODE (type) == COMPLEX_TYPE)
16456 {
16457 if (TYPE_MODE (type) == DCmode && align < 64)
16458 return 64;
16459 if (TYPE_MODE (type) == XCmode && align < 128)
16460 return 128;
16461 }
16462 else if ((TREE_CODE (type) == RECORD_TYPE
16463 || TREE_CODE (type) == UNION_TYPE
16464 || TREE_CODE (type) == QUAL_UNION_TYPE)
16465 && TYPE_FIELDS (type))
16466 {
16467 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16468 return 64;
16469 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16470 return 128;
16471 }
16472 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16473 || TREE_CODE (type) == INTEGER_TYPE)
16474 {
16475
16476 if (TYPE_MODE (type) == DFmode && align < 64)
16477 return 64;
16478 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16479 return 128;
16480 }
16481 return align;
16482 }
16483 \f
16484 /* Emit RTL insns to initialize the variable parts of a trampoline.
16485 FNADDR is an RTX for the address of the function's pure code.
16486 CXT is an RTX for the static chain value for the function. */
16487 void
16488 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16489 {
16490 if (!TARGET_64BIT)
16491 {
16492 /* Compute offset from the end of the jmp to the target function. */
16493 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16494 plus_constant (tramp, 10),
16495 NULL_RTX, 1, OPTAB_DIRECT);
16496 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16497 gen_int_mode (0xb9, QImode));
16498 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16499 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16500 gen_int_mode (0xe9, QImode));
16501 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16502 }
16503 else
16504 {
16505 int offset = 0;
16506 /* Try to load address using shorter movl instead of movabs.
16507 We may want to support movq for kernel mode, but kernel does not use
16508 trampolines at the moment. */
16509 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16510 {
16511 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16512 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16513 gen_int_mode (0xbb41, HImode));
16514 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16515 gen_lowpart (SImode, fnaddr));
16516 offset += 6;
16517 }
16518 else
16519 {
16520 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16521 gen_int_mode (0xbb49, HImode));
16522 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16523 fnaddr);
16524 offset += 10;
16525 }
16526 /* Load static chain using movabs to r10. */
16527 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16528 gen_int_mode (0xba49, HImode));
16529 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16530 cxt);
16531 offset += 10;
16532 /* Jump to the r11 */
16533 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16534 gen_int_mode (0xff49, HImode));
16535 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16536 gen_int_mode (0xe3, QImode));
16537 offset += 3;
16538 gcc_assert (offset <= TRAMPOLINE_SIZE);
16539 }
16540
16541 #ifdef ENABLE_EXECUTE_STACK
16542 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16543 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16544 #endif
16545 }
16546 \f
16547 /* Codes for all the SSE/MMX builtins. */
16548 enum ix86_builtins
16549 {
16550 IX86_BUILTIN_ADDPS,
16551 IX86_BUILTIN_ADDSS,
16552 IX86_BUILTIN_DIVPS,
16553 IX86_BUILTIN_DIVSS,
16554 IX86_BUILTIN_MULPS,
16555 IX86_BUILTIN_MULSS,
16556 IX86_BUILTIN_SUBPS,
16557 IX86_BUILTIN_SUBSS,
16558
16559 IX86_BUILTIN_CMPEQPS,
16560 IX86_BUILTIN_CMPLTPS,
16561 IX86_BUILTIN_CMPLEPS,
16562 IX86_BUILTIN_CMPGTPS,
16563 IX86_BUILTIN_CMPGEPS,
16564 IX86_BUILTIN_CMPNEQPS,
16565 IX86_BUILTIN_CMPNLTPS,
16566 IX86_BUILTIN_CMPNLEPS,
16567 IX86_BUILTIN_CMPNGTPS,
16568 IX86_BUILTIN_CMPNGEPS,
16569 IX86_BUILTIN_CMPORDPS,
16570 IX86_BUILTIN_CMPUNORDPS,
16571 IX86_BUILTIN_CMPEQSS,
16572 IX86_BUILTIN_CMPLTSS,
16573 IX86_BUILTIN_CMPLESS,
16574 IX86_BUILTIN_CMPNEQSS,
16575 IX86_BUILTIN_CMPNLTSS,
16576 IX86_BUILTIN_CMPNLESS,
16577 IX86_BUILTIN_CMPNGTSS,
16578 IX86_BUILTIN_CMPNGESS,
16579 IX86_BUILTIN_CMPORDSS,
16580 IX86_BUILTIN_CMPUNORDSS,
16581
16582 IX86_BUILTIN_COMIEQSS,
16583 IX86_BUILTIN_COMILTSS,
16584 IX86_BUILTIN_COMILESS,
16585 IX86_BUILTIN_COMIGTSS,
16586 IX86_BUILTIN_COMIGESS,
16587 IX86_BUILTIN_COMINEQSS,
16588 IX86_BUILTIN_UCOMIEQSS,
16589 IX86_BUILTIN_UCOMILTSS,
16590 IX86_BUILTIN_UCOMILESS,
16591 IX86_BUILTIN_UCOMIGTSS,
16592 IX86_BUILTIN_UCOMIGESS,
16593 IX86_BUILTIN_UCOMINEQSS,
16594
16595 IX86_BUILTIN_CVTPI2PS,
16596 IX86_BUILTIN_CVTPS2PI,
16597 IX86_BUILTIN_CVTSI2SS,
16598 IX86_BUILTIN_CVTSI642SS,
16599 IX86_BUILTIN_CVTSS2SI,
16600 IX86_BUILTIN_CVTSS2SI64,
16601 IX86_BUILTIN_CVTTPS2PI,
16602 IX86_BUILTIN_CVTTSS2SI,
16603 IX86_BUILTIN_CVTTSS2SI64,
16604
16605 IX86_BUILTIN_MAXPS,
16606 IX86_BUILTIN_MAXSS,
16607 IX86_BUILTIN_MINPS,
16608 IX86_BUILTIN_MINSS,
16609
16610 IX86_BUILTIN_LOADUPS,
16611 IX86_BUILTIN_STOREUPS,
16612 IX86_BUILTIN_MOVSS,
16613
16614 IX86_BUILTIN_MOVHLPS,
16615 IX86_BUILTIN_MOVLHPS,
16616 IX86_BUILTIN_LOADHPS,
16617 IX86_BUILTIN_LOADLPS,
16618 IX86_BUILTIN_STOREHPS,
16619 IX86_BUILTIN_STORELPS,
16620
16621 IX86_BUILTIN_MASKMOVQ,
16622 IX86_BUILTIN_MOVMSKPS,
16623 IX86_BUILTIN_PMOVMSKB,
16624
16625 IX86_BUILTIN_MOVNTPS,
16626 IX86_BUILTIN_MOVNTQ,
16627
16628 IX86_BUILTIN_LOADDQU,
16629 IX86_BUILTIN_STOREDQU,
16630
16631 IX86_BUILTIN_PACKSSWB,
16632 IX86_BUILTIN_PACKSSDW,
16633 IX86_BUILTIN_PACKUSWB,
16634
16635 IX86_BUILTIN_PADDB,
16636 IX86_BUILTIN_PADDW,
16637 IX86_BUILTIN_PADDD,
16638 IX86_BUILTIN_PADDQ,
16639 IX86_BUILTIN_PADDSB,
16640 IX86_BUILTIN_PADDSW,
16641 IX86_BUILTIN_PADDUSB,
16642 IX86_BUILTIN_PADDUSW,
16643 IX86_BUILTIN_PSUBB,
16644 IX86_BUILTIN_PSUBW,
16645 IX86_BUILTIN_PSUBD,
16646 IX86_BUILTIN_PSUBQ,
16647 IX86_BUILTIN_PSUBSB,
16648 IX86_BUILTIN_PSUBSW,
16649 IX86_BUILTIN_PSUBUSB,
16650 IX86_BUILTIN_PSUBUSW,
16651
16652 IX86_BUILTIN_PAND,
16653 IX86_BUILTIN_PANDN,
16654 IX86_BUILTIN_POR,
16655 IX86_BUILTIN_PXOR,
16656
16657 IX86_BUILTIN_PAVGB,
16658 IX86_BUILTIN_PAVGW,
16659
16660 IX86_BUILTIN_PCMPEQB,
16661 IX86_BUILTIN_PCMPEQW,
16662 IX86_BUILTIN_PCMPEQD,
16663 IX86_BUILTIN_PCMPGTB,
16664 IX86_BUILTIN_PCMPGTW,
16665 IX86_BUILTIN_PCMPGTD,
16666
16667 IX86_BUILTIN_PMADDWD,
16668
16669 IX86_BUILTIN_PMAXSW,
16670 IX86_BUILTIN_PMAXUB,
16671 IX86_BUILTIN_PMINSW,
16672 IX86_BUILTIN_PMINUB,
16673
16674 IX86_BUILTIN_PMULHUW,
16675 IX86_BUILTIN_PMULHW,
16676 IX86_BUILTIN_PMULLW,
16677
16678 IX86_BUILTIN_PSADBW,
16679 IX86_BUILTIN_PSHUFW,
16680
16681 IX86_BUILTIN_PSLLW,
16682 IX86_BUILTIN_PSLLD,
16683 IX86_BUILTIN_PSLLQ,
16684 IX86_BUILTIN_PSRAW,
16685 IX86_BUILTIN_PSRAD,
16686 IX86_BUILTIN_PSRLW,
16687 IX86_BUILTIN_PSRLD,
16688 IX86_BUILTIN_PSRLQ,
16689 IX86_BUILTIN_PSLLWI,
16690 IX86_BUILTIN_PSLLDI,
16691 IX86_BUILTIN_PSLLQI,
16692 IX86_BUILTIN_PSRAWI,
16693 IX86_BUILTIN_PSRADI,
16694 IX86_BUILTIN_PSRLWI,
16695 IX86_BUILTIN_PSRLDI,
16696 IX86_BUILTIN_PSRLQI,
16697
16698 IX86_BUILTIN_PUNPCKHBW,
16699 IX86_BUILTIN_PUNPCKHWD,
16700 IX86_BUILTIN_PUNPCKHDQ,
16701 IX86_BUILTIN_PUNPCKLBW,
16702 IX86_BUILTIN_PUNPCKLWD,
16703 IX86_BUILTIN_PUNPCKLDQ,
16704
16705 IX86_BUILTIN_SHUFPS,
16706
16707 IX86_BUILTIN_RCPPS,
16708 IX86_BUILTIN_RCPSS,
16709 IX86_BUILTIN_RSQRTPS,
16710 IX86_BUILTIN_RSQRTSS,
16711 IX86_BUILTIN_RSQRTF,
16712 IX86_BUILTIN_SQRTPS,
16713 IX86_BUILTIN_SQRTSS,
16714
16715 IX86_BUILTIN_UNPCKHPS,
16716 IX86_BUILTIN_UNPCKLPS,
16717
16718 IX86_BUILTIN_ANDPS,
16719 IX86_BUILTIN_ANDNPS,
16720 IX86_BUILTIN_ORPS,
16721 IX86_BUILTIN_XORPS,
16722
16723 IX86_BUILTIN_EMMS,
16724 IX86_BUILTIN_LDMXCSR,
16725 IX86_BUILTIN_STMXCSR,
16726 IX86_BUILTIN_SFENCE,
16727
16728 /* 3DNow! Original */
16729 IX86_BUILTIN_FEMMS,
16730 IX86_BUILTIN_PAVGUSB,
16731 IX86_BUILTIN_PF2ID,
16732 IX86_BUILTIN_PFACC,
16733 IX86_BUILTIN_PFADD,
16734 IX86_BUILTIN_PFCMPEQ,
16735 IX86_BUILTIN_PFCMPGE,
16736 IX86_BUILTIN_PFCMPGT,
16737 IX86_BUILTIN_PFMAX,
16738 IX86_BUILTIN_PFMIN,
16739 IX86_BUILTIN_PFMUL,
16740 IX86_BUILTIN_PFRCP,
16741 IX86_BUILTIN_PFRCPIT1,
16742 IX86_BUILTIN_PFRCPIT2,
16743 IX86_BUILTIN_PFRSQIT1,
16744 IX86_BUILTIN_PFRSQRT,
16745 IX86_BUILTIN_PFSUB,
16746 IX86_BUILTIN_PFSUBR,
16747 IX86_BUILTIN_PI2FD,
16748 IX86_BUILTIN_PMULHRW,
16749
16750 /* 3DNow! Athlon Extensions */
16751 IX86_BUILTIN_PF2IW,
16752 IX86_BUILTIN_PFNACC,
16753 IX86_BUILTIN_PFPNACC,
16754 IX86_BUILTIN_PI2FW,
16755 IX86_BUILTIN_PSWAPDSI,
16756 IX86_BUILTIN_PSWAPDSF,
16757
16758 /* SSE2 */
16759 IX86_BUILTIN_ADDPD,
16760 IX86_BUILTIN_ADDSD,
16761 IX86_BUILTIN_DIVPD,
16762 IX86_BUILTIN_DIVSD,
16763 IX86_BUILTIN_MULPD,
16764 IX86_BUILTIN_MULSD,
16765 IX86_BUILTIN_SUBPD,
16766 IX86_BUILTIN_SUBSD,
16767
16768 IX86_BUILTIN_CMPEQPD,
16769 IX86_BUILTIN_CMPLTPD,
16770 IX86_BUILTIN_CMPLEPD,
16771 IX86_BUILTIN_CMPGTPD,
16772 IX86_BUILTIN_CMPGEPD,
16773 IX86_BUILTIN_CMPNEQPD,
16774 IX86_BUILTIN_CMPNLTPD,
16775 IX86_BUILTIN_CMPNLEPD,
16776 IX86_BUILTIN_CMPNGTPD,
16777 IX86_BUILTIN_CMPNGEPD,
16778 IX86_BUILTIN_CMPORDPD,
16779 IX86_BUILTIN_CMPUNORDPD,
16780 IX86_BUILTIN_CMPEQSD,
16781 IX86_BUILTIN_CMPLTSD,
16782 IX86_BUILTIN_CMPLESD,
16783 IX86_BUILTIN_CMPNEQSD,
16784 IX86_BUILTIN_CMPNLTSD,
16785 IX86_BUILTIN_CMPNLESD,
16786 IX86_BUILTIN_CMPORDSD,
16787 IX86_BUILTIN_CMPUNORDSD,
16788
16789 IX86_BUILTIN_COMIEQSD,
16790 IX86_BUILTIN_COMILTSD,
16791 IX86_BUILTIN_COMILESD,
16792 IX86_BUILTIN_COMIGTSD,
16793 IX86_BUILTIN_COMIGESD,
16794 IX86_BUILTIN_COMINEQSD,
16795 IX86_BUILTIN_UCOMIEQSD,
16796 IX86_BUILTIN_UCOMILTSD,
16797 IX86_BUILTIN_UCOMILESD,
16798 IX86_BUILTIN_UCOMIGTSD,
16799 IX86_BUILTIN_UCOMIGESD,
16800 IX86_BUILTIN_UCOMINEQSD,
16801
16802 IX86_BUILTIN_MAXPD,
16803 IX86_BUILTIN_MAXSD,
16804 IX86_BUILTIN_MINPD,
16805 IX86_BUILTIN_MINSD,
16806
16807 IX86_BUILTIN_ANDPD,
16808 IX86_BUILTIN_ANDNPD,
16809 IX86_BUILTIN_ORPD,
16810 IX86_BUILTIN_XORPD,
16811
16812 IX86_BUILTIN_SQRTPD,
16813 IX86_BUILTIN_SQRTSD,
16814
16815 IX86_BUILTIN_UNPCKHPD,
16816 IX86_BUILTIN_UNPCKLPD,
16817
16818 IX86_BUILTIN_SHUFPD,
16819
16820 IX86_BUILTIN_LOADUPD,
16821 IX86_BUILTIN_STOREUPD,
16822 IX86_BUILTIN_MOVSD,
16823
16824 IX86_BUILTIN_LOADHPD,
16825 IX86_BUILTIN_LOADLPD,
16826
16827 IX86_BUILTIN_CVTDQ2PD,
16828 IX86_BUILTIN_CVTDQ2PS,
16829
16830 IX86_BUILTIN_CVTPD2DQ,
16831 IX86_BUILTIN_CVTPD2PI,
16832 IX86_BUILTIN_CVTPD2PS,
16833 IX86_BUILTIN_CVTTPD2DQ,
16834 IX86_BUILTIN_CVTTPD2PI,
16835
16836 IX86_BUILTIN_CVTPI2PD,
16837 IX86_BUILTIN_CVTSI2SD,
16838 IX86_BUILTIN_CVTSI642SD,
16839
16840 IX86_BUILTIN_CVTSD2SI,
16841 IX86_BUILTIN_CVTSD2SI64,
16842 IX86_BUILTIN_CVTSD2SS,
16843 IX86_BUILTIN_CVTSS2SD,
16844 IX86_BUILTIN_CVTTSD2SI,
16845 IX86_BUILTIN_CVTTSD2SI64,
16846
16847 IX86_BUILTIN_CVTPS2DQ,
16848 IX86_BUILTIN_CVTPS2PD,
16849 IX86_BUILTIN_CVTTPS2DQ,
16850
16851 IX86_BUILTIN_MOVNTI,
16852 IX86_BUILTIN_MOVNTPD,
16853 IX86_BUILTIN_MOVNTDQ,
16854
16855 /* SSE2 MMX */
16856 IX86_BUILTIN_MASKMOVDQU,
16857 IX86_BUILTIN_MOVMSKPD,
16858 IX86_BUILTIN_PMOVMSKB128,
16859
16860 IX86_BUILTIN_PACKSSWB128,
16861 IX86_BUILTIN_PACKSSDW128,
16862 IX86_BUILTIN_PACKUSWB128,
16863
16864 IX86_BUILTIN_PADDB128,
16865 IX86_BUILTIN_PADDW128,
16866 IX86_BUILTIN_PADDD128,
16867 IX86_BUILTIN_PADDQ128,
16868 IX86_BUILTIN_PADDSB128,
16869 IX86_BUILTIN_PADDSW128,
16870 IX86_BUILTIN_PADDUSB128,
16871 IX86_BUILTIN_PADDUSW128,
16872 IX86_BUILTIN_PSUBB128,
16873 IX86_BUILTIN_PSUBW128,
16874 IX86_BUILTIN_PSUBD128,
16875 IX86_BUILTIN_PSUBQ128,
16876 IX86_BUILTIN_PSUBSB128,
16877 IX86_BUILTIN_PSUBSW128,
16878 IX86_BUILTIN_PSUBUSB128,
16879 IX86_BUILTIN_PSUBUSW128,
16880
16881 IX86_BUILTIN_PAND128,
16882 IX86_BUILTIN_PANDN128,
16883 IX86_BUILTIN_POR128,
16884 IX86_BUILTIN_PXOR128,
16885
16886 IX86_BUILTIN_PAVGB128,
16887 IX86_BUILTIN_PAVGW128,
16888
16889 IX86_BUILTIN_PCMPEQB128,
16890 IX86_BUILTIN_PCMPEQW128,
16891 IX86_BUILTIN_PCMPEQD128,
16892 IX86_BUILTIN_PCMPGTB128,
16893 IX86_BUILTIN_PCMPGTW128,
16894 IX86_BUILTIN_PCMPGTD128,
16895
16896 IX86_BUILTIN_PMADDWD128,
16897
16898 IX86_BUILTIN_PMAXSW128,
16899 IX86_BUILTIN_PMAXUB128,
16900 IX86_BUILTIN_PMINSW128,
16901 IX86_BUILTIN_PMINUB128,
16902
16903 IX86_BUILTIN_PMULUDQ,
16904 IX86_BUILTIN_PMULUDQ128,
16905 IX86_BUILTIN_PMULHUW128,
16906 IX86_BUILTIN_PMULHW128,
16907 IX86_BUILTIN_PMULLW128,
16908
16909 IX86_BUILTIN_PSADBW128,
16910 IX86_BUILTIN_PSHUFHW,
16911 IX86_BUILTIN_PSHUFLW,
16912 IX86_BUILTIN_PSHUFD,
16913
16914 IX86_BUILTIN_PSLLDQI128,
16915 IX86_BUILTIN_PSLLWI128,
16916 IX86_BUILTIN_PSLLDI128,
16917 IX86_BUILTIN_PSLLQI128,
16918 IX86_BUILTIN_PSRAWI128,
16919 IX86_BUILTIN_PSRADI128,
16920 IX86_BUILTIN_PSRLDQI128,
16921 IX86_BUILTIN_PSRLWI128,
16922 IX86_BUILTIN_PSRLDI128,
16923 IX86_BUILTIN_PSRLQI128,
16924
16925 IX86_BUILTIN_PSLLDQ128,
16926 IX86_BUILTIN_PSLLW128,
16927 IX86_BUILTIN_PSLLD128,
16928 IX86_BUILTIN_PSLLQ128,
16929 IX86_BUILTIN_PSRAW128,
16930 IX86_BUILTIN_PSRAD128,
16931 IX86_BUILTIN_PSRLW128,
16932 IX86_BUILTIN_PSRLD128,
16933 IX86_BUILTIN_PSRLQ128,
16934
16935 IX86_BUILTIN_PUNPCKHBW128,
16936 IX86_BUILTIN_PUNPCKHWD128,
16937 IX86_BUILTIN_PUNPCKHDQ128,
16938 IX86_BUILTIN_PUNPCKHQDQ128,
16939 IX86_BUILTIN_PUNPCKLBW128,
16940 IX86_BUILTIN_PUNPCKLWD128,
16941 IX86_BUILTIN_PUNPCKLDQ128,
16942 IX86_BUILTIN_PUNPCKLQDQ128,
16943
16944 IX86_BUILTIN_CLFLUSH,
16945 IX86_BUILTIN_MFENCE,
16946 IX86_BUILTIN_LFENCE,
16947
16948 /* Prescott New Instructions. */
16949 IX86_BUILTIN_ADDSUBPS,
16950 IX86_BUILTIN_HADDPS,
16951 IX86_BUILTIN_HSUBPS,
16952 IX86_BUILTIN_MOVSHDUP,
16953 IX86_BUILTIN_MOVSLDUP,
16954 IX86_BUILTIN_ADDSUBPD,
16955 IX86_BUILTIN_HADDPD,
16956 IX86_BUILTIN_HSUBPD,
16957 IX86_BUILTIN_LDDQU,
16958
16959 IX86_BUILTIN_MONITOR,
16960 IX86_BUILTIN_MWAIT,
16961
16962 /* SSSE3. */
16963 IX86_BUILTIN_PHADDW,
16964 IX86_BUILTIN_PHADDD,
16965 IX86_BUILTIN_PHADDSW,
16966 IX86_BUILTIN_PHSUBW,
16967 IX86_BUILTIN_PHSUBD,
16968 IX86_BUILTIN_PHSUBSW,
16969 IX86_BUILTIN_PMADDUBSW,
16970 IX86_BUILTIN_PMULHRSW,
16971 IX86_BUILTIN_PSHUFB,
16972 IX86_BUILTIN_PSIGNB,
16973 IX86_BUILTIN_PSIGNW,
16974 IX86_BUILTIN_PSIGND,
16975 IX86_BUILTIN_PALIGNR,
16976 IX86_BUILTIN_PABSB,
16977 IX86_BUILTIN_PABSW,
16978 IX86_BUILTIN_PABSD,
16979
16980 IX86_BUILTIN_PHADDW128,
16981 IX86_BUILTIN_PHADDD128,
16982 IX86_BUILTIN_PHADDSW128,
16983 IX86_BUILTIN_PHSUBW128,
16984 IX86_BUILTIN_PHSUBD128,
16985 IX86_BUILTIN_PHSUBSW128,
16986 IX86_BUILTIN_PMADDUBSW128,
16987 IX86_BUILTIN_PMULHRSW128,
16988 IX86_BUILTIN_PSHUFB128,
16989 IX86_BUILTIN_PSIGNB128,
16990 IX86_BUILTIN_PSIGNW128,
16991 IX86_BUILTIN_PSIGND128,
16992 IX86_BUILTIN_PALIGNR128,
16993 IX86_BUILTIN_PABSB128,
16994 IX86_BUILTIN_PABSW128,
16995 IX86_BUILTIN_PABSD128,
16996
16997 /* AMDFAM10 - SSE4A New Instructions. */
16998 IX86_BUILTIN_MOVNTSD,
16999 IX86_BUILTIN_MOVNTSS,
17000 IX86_BUILTIN_EXTRQI,
17001 IX86_BUILTIN_EXTRQ,
17002 IX86_BUILTIN_INSERTQI,
17003 IX86_BUILTIN_INSERTQ,
17004
17005 /* SSE4.1. */
17006 IX86_BUILTIN_BLENDPD,
17007 IX86_BUILTIN_BLENDPS,
17008 IX86_BUILTIN_BLENDVPD,
17009 IX86_BUILTIN_BLENDVPS,
17010 IX86_BUILTIN_PBLENDVB128,
17011 IX86_BUILTIN_PBLENDW128,
17012
17013 IX86_BUILTIN_DPPD,
17014 IX86_BUILTIN_DPPS,
17015
17016 IX86_BUILTIN_INSERTPS128,
17017
17018 IX86_BUILTIN_MOVNTDQA,
17019 IX86_BUILTIN_MPSADBW128,
17020 IX86_BUILTIN_PACKUSDW128,
17021 IX86_BUILTIN_PCMPEQQ,
17022 IX86_BUILTIN_PHMINPOSUW128,
17023
17024 IX86_BUILTIN_PMAXSB128,
17025 IX86_BUILTIN_PMAXSD128,
17026 IX86_BUILTIN_PMAXUD128,
17027 IX86_BUILTIN_PMAXUW128,
17028
17029 IX86_BUILTIN_PMINSB128,
17030 IX86_BUILTIN_PMINSD128,
17031 IX86_BUILTIN_PMINUD128,
17032 IX86_BUILTIN_PMINUW128,
17033
17034 IX86_BUILTIN_PMOVSXBW128,
17035 IX86_BUILTIN_PMOVSXBD128,
17036 IX86_BUILTIN_PMOVSXBQ128,
17037 IX86_BUILTIN_PMOVSXWD128,
17038 IX86_BUILTIN_PMOVSXWQ128,
17039 IX86_BUILTIN_PMOVSXDQ128,
17040
17041 IX86_BUILTIN_PMOVZXBW128,
17042 IX86_BUILTIN_PMOVZXBD128,
17043 IX86_BUILTIN_PMOVZXBQ128,
17044 IX86_BUILTIN_PMOVZXWD128,
17045 IX86_BUILTIN_PMOVZXWQ128,
17046 IX86_BUILTIN_PMOVZXDQ128,
17047
17048 IX86_BUILTIN_PMULDQ128,
17049 IX86_BUILTIN_PMULLD128,
17050
17051 IX86_BUILTIN_ROUNDPD,
17052 IX86_BUILTIN_ROUNDPS,
17053 IX86_BUILTIN_ROUNDSD,
17054 IX86_BUILTIN_ROUNDSS,
17055
17056 IX86_BUILTIN_PTESTZ,
17057 IX86_BUILTIN_PTESTC,
17058 IX86_BUILTIN_PTESTNZC,
17059
17060 IX86_BUILTIN_VEC_INIT_V2SI,
17061 IX86_BUILTIN_VEC_INIT_V4HI,
17062 IX86_BUILTIN_VEC_INIT_V8QI,
17063 IX86_BUILTIN_VEC_EXT_V2DF,
17064 IX86_BUILTIN_VEC_EXT_V2DI,
17065 IX86_BUILTIN_VEC_EXT_V4SF,
17066 IX86_BUILTIN_VEC_EXT_V4SI,
17067 IX86_BUILTIN_VEC_EXT_V8HI,
17068 IX86_BUILTIN_VEC_EXT_V2SI,
17069 IX86_BUILTIN_VEC_EXT_V4HI,
17070 IX86_BUILTIN_VEC_EXT_V16QI,
17071 IX86_BUILTIN_VEC_SET_V2DI,
17072 IX86_BUILTIN_VEC_SET_V4SF,
17073 IX86_BUILTIN_VEC_SET_V4SI,
17074 IX86_BUILTIN_VEC_SET_V8HI,
17075 IX86_BUILTIN_VEC_SET_V4HI,
17076 IX86_BUILTIN_VEC_SET_V16QI,
17077
17078 IX86_BUILTIN_VEC_PACK_SFIX,
17079
17080 /* SSE4.2. */
17081 IX86_BUILTIN_CRC32QI,
17082 IX86_BUILTIN_CRC32HI,
17083 IX86_BUILTIN_CRC32SI,
17084 IX86_BUILTIN_CRC32DI,
17085
17086 IX86_BUILTIN_PCMPESTRI128,
17087 IX86_BUILTIN_PCMPESTRM128,
17088 IX86_BUILTIN_PCMPESTRA128,
17089 IX86_BUILTIN_PCMPESTRC128,
17090 IX86_BUILTIN_PCMPESTRO128,
17091 IX86_BUILTIN_PCMPESTRS128,
17092 IX86_BUILTIN_PCMPESTRZ128,
17093 IX86_BUILTIN_PCMPISTRI128,
17094 IX86_BUILTIN_PCMPISTRM128,
17095 IX86_BUILTIN_PCMPISTRA128,
17096 IX86_BUILTIN_PCMPISTRC128,
17097 IX86_BUILTIN_PCMPISTRO128,
17098 IX86_BUILTIN_PCMPISTRS128,
17099 IX86_BUILTIN_PCMPISTRZ128,
17100
17101 IX86_BUILTIN_PCMPGTQ,
17102
17103 /* TFmode support builtins. */
17104 IX86_BUILTIN_INFQ,
17105 IX86_BUILTIN_FABSQ,
17106 IX86_BUILTIN_COPYSIGNQ,
17107
17108 IX86_BUILTIN_MAX
17109 };
17110
17111 /* Table for the ix86 builtin decls. */
17112 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17113
17114 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17115 * if the target_flags include one of MASK. Stores the function decl
17116 * in the ix86_builtins array.
17117 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17118
17119 static inline tree
17120 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17121 {
17122 tree decl = NULL_TREE;
17123
17124 if (mask & ix86_isa_flags
17125 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17126 {
17127 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17128 NULL, NULL_TREE);
17129 ix86_builtins[(int) code] = decl;
17130 }
17131
17132 return decl;
17133 }
17134
17135 /* Like def_builtin, but also marks the function decl "const". */
17136
17137 static inline tree
17138 def_builtin_const (int mask, const char *name, tree type,
17139 enum ix86_builtins code)
17140 {
17141 tree decl = def_builtin (mask, name, type, code);
17142 if (decl)
17143 TREE_READONLY (decl) = 1;
17144 return decl;
17145 }
17146
17147 /* Bits for builtin_description.flag. */
17148
17149 /* Set when we don't support the comparison natively, and should
17150 swap_comparison in order to support it. */
17151 #define BUILTIN_DESC_SWAP_OPERANDS 1
17152
17153 struct builtin_description
17154 {
17155 const unsigned int mask;
17156 const enum insn_code icode;
17157 const char *const name;
17158 const enum ix86_builtins code;
17159 const enum rtx_code comparison;
17160 const int flag;
17161 };
17162
17163 static const struct builtin_description bdesc_comi[] =
17164 {
17165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17166 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17167 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17168 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17169 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17174 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17189 };
17190
17191 static const struct builtin_description bdesc_ptest[] =
17192 {
17193 /* SSE4.1 */
17194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17197 };
17198
17199 static const struct builtin_description bdesc_pcmpestr[] =
17200 {
17201 /* SSE4.2 */
17202 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17203 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17204 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17205 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17206 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17207 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17208 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17209 };
17210
17211 static const struct builtin_description bdesc_pcmpistr[] =
17212 {
17213 /* SSE4.2 */
17214 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17215 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17216 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17217 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17218 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17219 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17220 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17221 };
17222
17223 static const struct builtin_description bdesc_crc32[] =
17224 {
17225 /* SSE4.2 */
17226 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17228 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17229 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17230 };
17231
17232 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17233 static const struct builtin_description bdesc_sse_3arg[] =
17234 {
17235 /* SSE4.1 */
17236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17239 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17241 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17242 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17243 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17245 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17246 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17247 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17248 };
17249
17250 static const struct builtin_description bdesc_2arg[] =
17251 {
17252 /* SSE */
17253 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17254 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17255 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17256 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17261
17262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17284
17285 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17289
17290 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17292 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17293 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17294
17295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17300
17301 /* MMX */
17302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17310
17311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17319
17320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17323
17324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17328
17329 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17330 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17331
17332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17338
17339 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17340 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17341 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17342 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17343
17344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17350
17351 /* Special. */
17352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17355
17356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17359
17360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17366
17367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17373
17374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17378
17379 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17381
17382 /* SSE2 */
17383 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17384 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17385 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17386 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17391
17392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17412
17413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17414 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17417
17418 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17420 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17421 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17422
17423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17426
17427 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17428
17429 /* SSE2 MMX */
17430 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17431 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17432 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17433 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17434 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17435 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17436 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17437 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17438
17439 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17440 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17441 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17442 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17443 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17444 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17445 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17446 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17447
17448 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17449 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17450
17451 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17453 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17454 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17455
17456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17458
17459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17465
17466 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17467 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17468 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17470
17471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17479
17480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17483
17484 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17486
17487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17489
17490 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17491 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17492 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17493
17494 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17495 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17496 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17497
17498 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17499 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17500
17501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17502
17503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17504 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17507
17508 /* SSE3 MMX */
17509 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17510 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17511 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17512 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17513 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17514 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17515
17516 /* SSSE3 */
17517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17534 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17535 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17536 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17539 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17540 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17541
17542 /* SSE4.1 */
17543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17555
17556 /* SSE4.2 */
17557 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17558 };
17559
17560 static const struct builtin_description bdesc_1arg[] =
17561 {
17562 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17564
17565 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17568
17569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17571 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17574 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17575
17576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17578
17579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17580
17581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17583
17584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17589
17590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17591
17592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17594 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17595 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17596
17597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17600
17601 /* SSE3 */
17602 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17603 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17604
17605 /* SSSE3 */
17606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17611 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17612
17613 /* SSE4.1 */
17614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17627
17628 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17631 };
17632
17633 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17634 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17635 builtins. */
17636 static void
17637 ix86_init_mmx_sse_builtins (void)
17638 {
17639 const struct builtin_description * d;
17640 size_t i;
17641
17642 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17643 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17644 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17645 tree V2DI_type_node
17646 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17647 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17648 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17649 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17650 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17651 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17652 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17653
17654 tree pchar_type_node = build_pointer_type (char_type_node);
17655 tree pcchar_type_node = build_pointer_type (
17656 build_type_variant (char_type_node, 1, 0));
17657 tree pfloat_type_node = build_pointer_type (float_type_node);
17658 tree pcfloat_type_node = build_pointer_type (
17659 build_type_variant (float_type_node, 1, 0));
17660 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17661 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17662 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17663
17664 /* Comparisons. */
17665 tree int_ftype_v4sf_v4sf
17666 = build_function_type_list (integer_type_node,
17667 V4SF_type_node, V4SF_type_node, NULL_TREE);
17668 tree v4si_ftype_v4sf_v4sf
17669 = build_function_type_list (V4SI_type_node,
17670 V4SF_type_node, V4SF_type_node, NULL_TREE);
17671 /* MMX/SSE/integer conversions. */
17672 tree int_ftype_v4sf
17673 = build_function_type_list (integer_type_node,
17674 V4SF_type_node, NULL_TREE);
17675 tree int64_ftype_v4sf
17676 = build_function_type_list (long_long_integer_type_node,
17677 V4SF_type_node, NULL_TREE);
17678 tree int_ftype_v8qi
17679 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17680 tree v4sf_ftype_v4sf_int
17681 = build_function_type_list (V4SF_type_node,
17682 V4SF_type_node, integer_type_node, NULL_TREE);
17683 tree v4sf_ftype_v4sf_int64
17684 = build_function_type_list (V4SF_type_node,
17685 V4SF_type_node, long_long_integer_type_node,
17686 NULL_TREE);
17687 tree v4sf_ftype_v4sf_v2si
17688 = build_function_type_list (V4SF_type_node,
17689 V4SF_type_node, V2SI_type_node, NULL_TREE);
17690
17691 /* Miscellaneous. */
17692 tree v8qi_ftype_v4hi_v4hi
17693 = build_function_type_list (V8QI_type_node,
17694 V4HI_type_node, V4HI_type_node, NULL_TREE);
17695 tree v4hi_ftype_v2si_v2si
17696 = build_function_type_list (V4HI_type_node,
17697 V2SI_type_node, V2SI_type_node, NULL_TREE);
17698 tree v4sf_ftype_v4sf_v4sf_int
17699 = build_function_type_list (V4SF_type_node,
17700 V4SF_type_node, V4SF_type_node,
17701 integer_type_node, NULL_TREE);
17702 tree v2si_ftype_v4hi_v4hi
17703 = build_function_type_list (V2SI_type_node,
17704 V4HI_type_node, V4HI_type_node, NULL_TREE);
17705 tree v4hi_ftype_v4hi_int
17706 = build_function_type_list (V4HI_type_node,
17707 V4HI_type_node, integer_type_node, NULL_TREE);
17708 tree v4hi_ftype_v4hi_di
17709 = build_function_type_list (V4HI_type_node,
17710 V4HI_type_node, long_long_unsigned_type_node,
17711 NULL_TREE);
17712 tree v2si_ftype_v2si_di
17713 = build_function_type_list (V2SI_type_node,
17714 V2SI_type_node, long_long_unsigned_type_node,
17715 NULL_TREE);
17716 tree void_ftype_void
17717 = build_function_type (void_type_node, void_list_node);
17718 tree void_ftype_unsigned
17719 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17720 tree void_ftype_unsigned_unsigned
17721 = build_function_type_list (void_type_node, unsigned_type_node,
17722 unsigned_type_node, NULL_TREE);
17723 tree void_ftype_pcvoid_unsigned_unsigned
17724 = build_function_type_list (void_type_node, const_ptr_type_node,
17725 unsigned_type_node, unsigned_type_node,
17726 NULL_TREE);
17727 tree unsigned_ftype_void
17728 = build_function_type (unsigned_type_node, void_list_node);
17729 tree v2si_ftype_v4sf
17730 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17731 /* Loads/stores. */
17732 tree void_ftype_v8qi_v8qi_pchar
17733 = build_function_type_list (void_type_node,
17734 V8QI_type_node, V8QI_type_node,
17735 pchar_type_node, NULL_TREE);
17736 tree v4sf_ftype_pcfloat
17737 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17738 /* @@@ the type is bogus */
17739 tree v4sf_ftype_v4sf_pv2si
17740 = build_function_type_list (V4SF_type_node,
17741 V4SF_type_node, pv2si_type_node, NULL_TREE);
17742 tree void_ftype_pv2si_v4sf
17743 = build_function_type_list (void_type_node,
17744 pv2si_type_node, V4SF_type_node, NULL_TREE);
17745 tree void_ftype_pfloat_v4sf
17746 = build_function_type_list (void_type_node,
17747 pfloat_type_node, V4SF_type_node, NULL_TREE);
17748 tree void_ftype_pdi_di
17749 = build_function_type_list (void_type_node,
17750 pdi_type_node, long_long_unsigned_type_node,
17751 NULL_TREE);
17752 tree void_ftype_pv2di_v2di
17753 = build_function_type_list (void_type_node,
17754 pv2di_type_node, V2DI_type_node, NULL_TREE);
17755 /* Normal vector unops. */
17756 tree v4sf_ftype_v4sf
17757 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17758 tree v16qi_ftype_v16qi
17759 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17760 tree v8hi_ftype_v8hi
17761 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17762 tree v4si_ftype_v4si
17763 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17764 tree v8qi_ftype_v8qi
17765 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17766 tree v4hi_ftype_v4hi
17767 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17768
17769 /* Normal vector binops. */
17770 tree v4sf_ftype_v4sf_v4sf
17771 = build_function_type_list (V4SF_type_node,
17772 V4SF_type_node, V4SF_type_node, NULL_TREE);
17773 tree v8qi_ftype_v8qi_v8qi
17774 = build_function_type_list (V8QI_type_node,
17775 V8QI_type_node, V8QI_type_node, NULL_TREE);
17776 tree v4hi_ftype_v4hi_v4hi
17777 = build_function_type_list (V4HI_type_node,
17778 V4HI_type_node, V4HI_type_node, NULL_TREE);
17779 tree v2si_ftype_v2si_v2si
17780 = build_function_type_list (V2SI_type_node,
17781 V2SI_type_node, V2SI_type_node, NULL_TREE);
17782 tree di_ftype_di_di
17783 = build_function_type_list (long_long_unsigned_type_node,
17784 long_long_unsigned_type_node,
17785 long_long_unsigned_type_node, NULL_TREE);
17786
17787 tree di_ftype_di_di_int
17788 = build_function_type_list (long_long_unsigned_type_node,
17789 long_long_unsigned_type_node,
17790 long_long_unsigned_type_node,
17791 integer_type_node, NULL_TREE);
17792
17793 tree v2si_ftype_v2sf
17794 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17795 tree v2sf_ftype_v2si
17796 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17797 tree v2si_ftype_v2si
17798 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17799 tree v2sf_ftype_v2sf
17800 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17801 tree v2sf_ftype_v2sf_v2sf
17802 = build_function_type_list (V2SF_type_node,
17803 V2SF_type_node, V2SF_type_node, NULL_TREE);
17804 tree v2si_ftype_v2sf_v2sf
17805 = build_function_type_list (V2SI_type_node,
17806 V2SF_type_node, V2SF_type_node, NULL_TREE);
17807 tree pint_type_node = build_pointer_type (integer_type_node);
17808 tree pdouble_type_node = build_pointer_type (double_type_node);
17809 tree pcdouble_type_node = build_pointer_type (
17810 build_type_variant (double_type_node, 1, 0));
17811 tree int_ftype_v2df_v2df
17812 = build_function_type_list (integer_type_node,
17813 V2DF_type_node, V2DF_type_node, NULL_TREE);
17814
17815 tree void_ftype_pcvoid
17816 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17817 tree v4sf_ftype_v4si
17818 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17819 tree v4si_ftype_v4sf
17820 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17821 tree v2df_ftype_v4si
17822 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17823 tree v4si_ftype_v2df
17824 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17825 tree v4si_ftype_v2df_v2df
17826 = build_function_type_list (V4SI_type_node,
17827 V2DF_type_node, V2DF_type_node, NULL_TREE);
17828 tree v2si_ftype_v2df
17829 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17830 tree v4sf_ftype_v2df
17831 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17832 tree v2df_ftype_v2si
17833 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17834 tree v2df_ftype_v4sf
17835 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17836 tree int_ftype_v2df
17837 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17838 tree int64_ftype_v2df
17839 = build_function_type_list (long_long_integer_type_node,
17840 V2DF_type_node, NULL_TREE);
17841 tree v2df_ftype_v2df_int
17842 = build_function_type_list (V2DF_type_node,
17843 V2DF_type_node, integer_type_node, NULL_TREE);
17844 tree v2df_ftype_v2df_int64
17845 = build_function_type_list (V2DF_type_node,
17846 V2DF_type_node, long_long_integer_type_node,
17847 NULL_TREE);
17848 tree v4sf_ftype_v4sf_v2df
17849 = build_function_type_list (V4SF_type_node,
17850 V4SF_type_node, V2DF_type_node, NULL_TREE);
17851 tree v2df_ftype_v2df_v4sf
17852 = build_function_type_list (V2DF_type_node,
17853 V2DF_type_node, V4SF_type_node, NULL_TREE);
17854 tree v2df_ftype_v2df_v2df_int
17855 = build_function_type_list (V2DF_type_node,
17856 V2DF_type_node, V2DF_type_node,
17857 integer_type_node,
17858 NULL_TREE);
17859 tree v2df_ftype_v2df_pcdouble
17860 = build_function_type_list (V2DF_type_node,
17861 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17862 tree void_ftype_pdouble_v2df
17863 = build_function_type_list (void_type_node,
17864 pdouble_type_node, V2DF_type_node, NULL_TREE);
17865 tree void_ftype_pint_int
17866 = build_function_type_list (void_type_node,
17867 pint_type_node, integer_type_node, NULL_TREE);
17868 tree void_ftype_v16qi_v16qi_pchar
17869 = build_function_type_list (void_type_node,
17870 V16QI_type_node, V16QI_type_node,
17871 pchar_type_node, NULL_TREE);
17872 tree v2df_ftype_pcdouble
17873 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17874 tree v2df_ftype_v2df_v2df
17875 = build_function_type_list (V2DF_type_node,
17876 V2DF_type_node, V2DF_type_node, NULL_TREE);
17877 tree v16qi_ftype_v16qi_v16qi
17878 = build_function_type_list (V16QI_type_node,
17879 V16QI_type_node, V16QI_type_node, NULL_TREE);
17880 tree v8hi_ftype_v8hi_v8hi
17881 = build_function_type_list (V8HI_type_node,
17882 V8HI_type_node, V8HI_type_node, NULL_TREE);
17883 tree v4si_ftype_v4si_v4si
17884 = build_function_type_list (V4SI_type_node,
17885 V4SI_type_node, V4SI_type_node, NULL_TREE);
17886 tree v2di_ftype_v2di_v2di
17887 = build_function_type_list (V2DI_type_node,
17888 V2DI_type_node, V2DI_type_node, NULL_TREE);
17889 tree v2di_ftype_v2df_v2df
17890 = build_function_type_list (V2DI_type_node,
17891 V2DF_type_node, V2DF_type_node, NULL_TREE);
17892 tree v2df_ftype_v2df
17893 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17894 tree v2di_ftype_v2di_int
17895 = build_function_type_list (V2DI_type_node,
17896 V2DI_type_node, integer_type_node, NULL_TREE);
17897 tree v2di_ftype_v2di_v2di_int
17898 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17899 V2DI_type_node, integer_type_node, NULL_TREE);
17900 tree v4si_ftype_v4si_int
17901 = build_function_type_list (V4SI_type_node,
17902 V4SI_type_node, integer_type_node, NULL_TREE);
17903 tree v8hi_ftype_v8hi_int
17904 = build_function_type_list (V8HI_type_node,
17905 V8HI_type_node, integer_type_node, NULL_TREE);
17906 tree v4si_ftype_v8hi_v8hi
17907 = build_function_type_list (V4SI_type_node,
17908 V8HI_type_node, V8HI_type_node, NULL_TREE);
17909 tree di_ftype_v8qi_v8qi
17910 = build_function_type_list (long_long_unsigned_type_node,
17911 V8QI_type_node, V8QI_type_node, NULL_TREE);
17912 tree di_ftype_v2si_v2si
17913 = build_function_type_list (long_long_unsigned_type_node,
17914 V2SI_type_node, V2SI_type_node, NULL_TREE);
17915 tree v2di_ftype_v16qi_v16qi
17916 = build_function_type_list (V2DI_type_node,
17917 V16QI_type_node, V16QI_type_node, NULL_TREE);
17918 tree v2di_ftype_v4si_v4si
17919 = build_function_type_list (V2DI_type_node,
17920 V4SI_type_node, V4SI_type_node, NULL_TREE);
17921 tree int_ftype_v16qi
17922 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17923 tree v16qi_ftype_pcchar
17924 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17925 tree void_ftype_pchar_v16qi
17926 = build_function_type_list (void_type_node,
17927 pchar_type_node, V16QI_type_node, NULL_TREE);
17928
17929 tree v2di_ftype_v2di_unsigned_unsigned
17930 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17931 unsigned_type_node, unsigned_type_node,
17932 NULL_TREE);
17933 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17934 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17935 unsigned_type_node, unsigned_type_node,
17936 NULL_TREE);
17937 tree v2di_ftype_v2di_v16qi
17938 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17939 NULL_TREE);
17940 tree v2df_ftype_v2df_v2df_v2df
17941 = build_function_type_list (V2DF_type_node,
17942 V2DF_type_node, V2DF_type_node,
17943 V2DF_type_node, NULL_TREE);
17944 tree v4sf_ftype_v4sf_v4sf_v4sf
17945 = build_function_type_list (V4SF_type_node,
17946 V4SF_type_node, V4SF_type_node,
17947 V4SF_type_node, NULL_TREE);
17948 tree v8hi_ftype_v16qi
17949 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17950 NULL_TREE);
17951 tree v4si_ftype_v16qi
17952 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17953 NULL_TREE);
17954 tree v2di_ftype_v16qi
17955 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17956 NULL_TREE);
17957 tree v4si_ftype_v8hi
17958 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17959 NULL_TREE);
17960 tree v2di_ftype_v8hi
17961 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17962 NULL_TREE);
17963 tree v2di_ftype_v4si
17964 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17965 NULL_TREE);
17966 tree v2di_ftype_pv2di
17967 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17968 NULL_TREE);
17969 tree v16qi_ftype_v16qi_v16qi_int
17970 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17971 V16QI_type_node, integer_type_node,
17972 NULL_TREE);
17973 tree v16qi_ftype_v16qi_v16qi_v16qi
17974 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17975 V16QI_type_node, V16QI_type_node,
17976 NULL_TREE);
17977 tree v8hi_ftype_v8hi_v8hi_int
17978 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17979 V8HI_type_node, integer_type_node,
17980 NULL_TREE);
17981 tree v4si_ftype_v4si_v4si_int
17982 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17983 V4SI_type_node, integer_type_node,
17984 NULL_TREE);
17985 tree int_ftype_v2di_v2di
17986 = build_function_type_list (integer_type_node,
17987 V2DI_type_node, V2DI_type_node,
17988 NULL_TREE);
17989 tree int_ftype_v16qi_int_v16qi_int_int
17990 = build_function_type_list (integer_type_node,
17991 V16QI_type_node,
17992 integer_type_node,
17993 V16QI_type_node,
17994 integer_type_node,
17995 integer_type_node,
17996 NULL_TREE);
17997 tree v16qi_ftype_v16qi_int_v16qi_int_int
17998 = build_function_type_list (V16QI_type_node,
17999 V16QI_type_node,
18000 integer_type_node,
18001 V16QI_type_node,
18002 integer_type_node,
18003 integer_type_node,
18004 NULL_TREE);
18005 tree int_ftype_v16qi_v16qi_int
18006 = build_function_type_list (integer_type_node,
18007 V16QI_type_node,
18008 V16QI_type_node,
18009 integer_type_node,
18010 NULL_TREE);
18011 tree ftype;
18012
18013 /* The __float80 type. */
18014 if (TYPE_MODE (long_double_type_node) == XFmode)
18015 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18016 "__float80");
18017 else
18018 {
18019 /* The __float80 type. */
18020 tree float80_type_node = make_node (REAL_TYPE);
18021
18022 TYPE_PRECISION (float80_type_node) = 80;
18023 layout_type (float80_type_node);
18024 (*lang_hooks.types.register_builtin_type) (float80_type_node,
18025 "__float80");
18026 }
18027
18028 if (TARGET_64BIT)
18029 {
18030 tree float128_type_node = make_node (REAL_TYPE);
18031
18032 TYPE_PRECISION (float128_type_node) = 128;
18033 layout_type (float128_type_node);
18034 (*lang_hooks.types.register_builtin_type) (float128_type_node,
18035 "__float128");
18036
18037 /* TFmode support builtins. */
18038 ftype = build_function_type (float128_type_node,
18039 void_list_node);
18040 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
18041
18042 ftype = build_function_type_list (float128_type_node,
18043 float128_type_node,
18044 NULL_TREE);
18045 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
18046
18047 ftype = build_function_type_list (float128_type_node,
18048 float128_type_node,
18049 float128_type_node,
18050 NULL_TREE);
18051 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
18052 }
18053
18054 /* Add all SSE builtins that are more or less simple operations on
18055 three operands. */
18056 for (i = 0, d = bdesc_sse_3arg;
18057 i < ARRAY_SIZE (bdesc_sse_3arg);
18058 i++, d++)
18059 {
18060 /* Use one of the operands; the target can have a different mode for
18061 mask-generating compares. */
18062 enum machine_mode mode;
18063 tree type;
18064
18065 if (d->name == 0)
18066 continue;
18067 mode = insn_data[d->icode].operand[1].mode;
18068
18069 switch (mode)
18070 {
18071 case V16QImode:
18072 type = v16qi_ftype_v16qi_v16qi_int;
18073 break;
18074 case V8HImode:
18075 type = v8hi_ftype_v8hi_v8hi_int;
18076 break;
18077 case V4SImode:
18078 type = v4si_ftype_v4si_v4si_int;
18079 break;
18080 case V2DImode:
18081 type = v2di_ftype_v2di_v2di_int;
18082 break;
18083 case V2DFmode:
18084 type = v2df_ftype_v2df_v2df_int;
18085 break;
18086 case V4SFmode:
18087 type = v4sf_ftype_v4sf_v4sf_int;
18088 break;
18089 default:
18090 gcc_unreachable ();
18091 }
18092
18093 /* Override for variable blends. */
18094 switch (d->icode)
18095 {
18096 case CODE_FOR_sse4_1_blendvpd:
18097 type = v2df_ftype_v2df_v2df_v2df;
18098 break;
18099 case CODE_FOR_sse4_1_blendvps:
18100 type = v4sf_ftype_v4sf_v4sf_v4sf;
18101 break;
18102 case CODE_FOR_sse4_1_pblendvb:
18103 type = v16qi_ftype_v16qi_v16qi_v16qi;
18104 break;
18105 default:
18106 break;
18107 }
18108
18109 def_builtin_const (d->mask, d->name, type, d->code);
18110 }
18111
18112 /* Add all builtins that are more or less simple operations on two
18113 operands. */
18114 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18115 {
18116 /* Use one of the operands; the target can have a different mode for
18117 mask-generating compares. */
18118 enum machine_mode mode;
18119 tree type;
18120
18121 if (d->name == 0)
18122 continue;
18123 mode = insn_data[d->icode].operand[1].mode;
18124
18125 switch (mode)
18126 {
18127 case V16QImode:
18128 type = v16qi_ftype_v16qi_v16qi;
18129 break;
18130 case V8HImode:
18131 type = v8hi_ftype_v8hi_v8hi;
18132 break;
18133 case V4SImode:
18134 type = v4si_ftype_v4si_v4si;
18135 break;
18136 case V2DImode:
18137 type = v2di_ftype_v2di_v2di;
18138 break;
18139 case V2DFmode:
18140 type = v2df_ftype_v2df_v2df;
18141 break;
18142 case V4SFmode:
18143 type = v4sf_ftype_v4sf_v4sf;
18144 break;
18145 case V8QImode:
18146 type = v8qi_ftype_v8qi_v8qi;
18147 break;
18148 case V4HImode:
18149 type = v4hi_ftype_v4hi_v4hi;
18150 break;
18151 case V2SImode:
18152 type = v2si_ftype_v2si_v2si;
18153 break;
18154 case DImode:
18155 type = di_ftype_di_di;
18156 break;
18157
18158 default:
18159 gcc_unreachable ();
18160 }
18161
18162 /* Override for comparisons. */
18163 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18164 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
18165 type = v4si_ftype_v4sf_v4sf;
18166
18167 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
18168 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18169 type = v2di_ftype_v2df_v2df;
18170
18171 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
18172 type = v4si_ftype_v2df_v2df;
18173
18174 def_builtin_const (d->mask, d->name, type, d->code);
18175 }
18176
18177 /* Add all builtins that are more or less simple operations on 1 operand. */
18178 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18179 {
18180 enum machine_mode mode;
18181 tree type;
18182
18183 if (d->name == 0)
18184 continue;
18185 mode = insn_data[d->icode].operand[1].mode;
18186
18187 switch (mode)
18188 {
18189 case V16QImode:
18190 type = v16qi_ftype_v16qi;
18191 break;
18192 case V8HImode:
18193 type = v8hi_ftype_v8hi;
18194 break;
18195 case V4SImode:
18196 type = v4si_ftype_v4si;
18197 break;
18198 case V2DFmode:
18199 type = v2df_ftype_v2df;
18200 break;
18201 case V4SFmode:
18202 type = v4sf_ftype_v4sf;
18203 break;
18204 case V8QImode:
18205 type = v8qi_ftype_v8qi;
18206 break;
18207 case V4HImode:
18208 type = v4hi_ftype_v4hi;
18209 break;
18210 case V2SImode:
18211 type = v2si_ftype_v2si;
18212 break;
18213
18214 default:
18215 abort ();
18216 }
18217
18218 def_builtin_const (d->mask, d->name, type, d->code);
18219 }
18220
18221 /* pcmpestr[im] insns. */
18222 for (i = 0, d = bdesc_pcmpestr;
18223 i < ARRAY_SIZE (bdesc_pcmpestr);
18224 i++, d++)
18225 {
18226 if (d->code == IX86_BUILTIN_PCMPESTRM128)
18227 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
18228 else
18229 ftype = int_ftype_v16qi_int_v16qi_int_int;
18230 def_builtin_const (d->mask, d->name, ftype, d->code);
18231 }
18232
18233 /* pcmpistr[im] insns. */
18234 for (i = 0, d = bdesc_pcmpistr;
18235 i < ARRAY_SIZE (bdesc_pcmpistr);
18236 i++, d++)
18237 {
18238 if (d->code == IX86_BUILTIN_PCMPISTRM128)
18239 ftype = v16qi_ftype_v16qi_v16qi_int;
18240 else
18241 ftype = int_ftype_v16qi_v16qi_int;
18242 def_builtin_const (d->mask, d->name, ftype, d->code);
18243 }
18244
18245 /* Add the remaining MMX insns with somewhat more complicated types. */
18246 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
18247 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
18248 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
18249 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
18250
18251 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
18252 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
18253 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
18254
18255 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
18256 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
18257
18258 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
18259 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
18260
18261 /* comi/ucomi insns. */
18262 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18263 if (d->mask == OPTION_MASK_ISA_SSE2)
18264 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
18265 else
18266 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
18267
18268 /* ptest insns. */
18269 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
18270 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18271
18272 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18273 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18274 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18275
18276 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18277 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18278 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18279 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18280 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18281 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18282 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18283 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18284 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18285 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18286 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18287
18288 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18289
18290 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18291 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18292
18293 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18294 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18295 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18296 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18297
18298 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18299 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18300 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18301 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18302
18303 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18304
18305 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18306
18307 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18308 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18309 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18310 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18311 ftype = build_function_type_list (float_type_node,
18312 float_type_node,
18313 NULL_TREE);
18314 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
18315 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18316 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18317
18318 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18319
18320 /* Original 3DNow! */
18321 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18322 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18323 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18324 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18325 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18326 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18327 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18328 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18329 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18330 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18331 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18332 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18333 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18334 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18335 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18336 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18337 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18338 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18339 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18340 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18341
18342 /* 3DNow! extension as used in the Athlon CPU. */
18343 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18344 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18345 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18346 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18347 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18348 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18349
18350 /* SSE2 */
18351 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18352
18353 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18354 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18355
18356 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18357 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18358
18359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18360 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18361 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18362 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18363 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18364
18365 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18366 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18368 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18369
18370 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18371 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18372
18373 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18374
18375 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18376 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18377
18378 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18380 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18381 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18382 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18383
18384 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18385
18386 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18387 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18388 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18389 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18390
18391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18392 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18393 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18394
18395 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18396 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18398 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18399
18400 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18401 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18402 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18403
18404 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18405 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18406
18407 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18408 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18409
18410 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18411 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18412 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18413 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18414 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18415 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18416 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18417
18418 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18419 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18420 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18421 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18422 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18423 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18424 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18425
18426 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18427 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18428 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18429 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18430
18431 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18432
18433 /* Prescott New Instructions. */
18434 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18435 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18436 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18437
18438 /* SSSE3. */
18439 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18440 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18441
18442 /* SSE4.1. */
18443 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18444 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18445 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18446 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18447 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18448 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18449 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18450 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18451 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18452 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18453 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18454 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18455 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18456 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18457 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18458 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18459 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18460 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18461
18462 /* SSE4.2. */
18463 ftype = build_function_type_list (unsigned_type_node,
18464 unsigned_type_node,
18465 unsigned_char_type_node,
18466 NULL_TREE);
18467 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18468 ftype = build_function_type_list (unsigned_type_node,
18469 unsigned_type_node,
18470 short_unsigned_type_node,
18471 NULL_TREE);
18472 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18473 ftype = build_function_type_list (unsigned_type_node,
18474 unsigned_type_node,
18475 unsigned_type_node,
18476 NULL_TREE);
18477 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18478 ftype = build_function_type_list (long_long_unsigned_type_node,
18479 long_long_unsigned_type_node,
18480 long_long_unsigned_type_node,
18481 NULL_TREE);
18482 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18483
18484 /* AMDFAM10 SSE4A New built-ins */
18485 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18486 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18487 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18488 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18489 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18490 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18491
18492 /* Access to the vec_init patterns. */
18493 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18494 integer_type_node, NULL_TREE);
18495 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18496
18497 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18498 short_integer_type_node,
18499 short_integer_type_node,
18500 short_integer_type_node, NULL_TREE);
18501 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18502
18503 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18504 char_type_node, char_type_node,
18505 char_type_node, char_type_node,
18506 char_type_node, char_type_node,
18507 char_type_node, NULL_TREE);
18508 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18509
18510 /* Access to the vec_extract patterns. */
18511 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18512 integer_type_node, NULL_TREE);
18513 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18514
18515 ftype = build_function_type_list (long_long_integer_type_node,
18516 V2DI_type_node, integer_type_node,
18517 NULL_TREE);
18518 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18519
18520 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18521 integer_type_node, NULL_TREE);
18522 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18523
18524 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18525 integer_type_node, NULL_TREE);
18526 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18527
18528 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18529 integer_type_node, NULL_TREE);
18530 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18531
18532 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18533 integer_type_node, NULL_TREE);
18534 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18535
18536 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18537 integer_type_node, NULL_TREE);
18538 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18539
18540 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18541 integer_type_node, NULL_TREE);
18542 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18543
18544 /* Access to the vec_set patterns. */
18545 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18546 intDI_type_node,
18547 integer_type_node, NULL_TREE);
18548 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18549
18550 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18551 float_type_node,
18552 integer_type_node, NULL_TREE);
18553 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18554
18555 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18556 intSI_type_node,
18557 integer_type_node, NULL_TREE);
18558 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18559
18560 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18561 intHI_type_node,
18562 integer_type_node, NULL_TREE);
18563 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18564
18565 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18566 intHI_type_node,
18567 integer_type_node, NULL_TREE);
18568 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18569
18570 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18571 intQI_type_node,
18572 integer_type_node, NULL_TREE);
18573 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18574 }
18575
18576 static void
18577 ix86_init_builtins (void)
18578 {
18579 if (TARGET_MMX)
18580 ix86_init_mmx_sse_builtins ();
18581 }
18582
18583 /* Errors in the source file can cause expand_expr to return const0_rtx
18584 where we expect a vector. To avoid crashing, use one of the vector
18585 clear instructions. */
18586 static rtx
18587 safe_vector_operand (rtx x, enum machine_mode mode)
18588 {
18589 if (x == const0_rtx)
18590 x = CONST0_RTX (mode);
18591 return x;
18592 }
18593
18594 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18595 4 operands. The third argument must be a constant smaller than 8
18596 bits or xmm0. */
18597
18598 static rtx
18599 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18600 rtx target)
18601 {
18602 rtx pat;
18603 tree arg0 = CALL_EXPR_ARG (exp, 0);
18604 tree arg1 = CALL_EXPR_ARG (exp, 1);
18605 tree arg2 = CALL_EXPR_ARG (exp, 2);
18606 rtx op0 = expand_normal (arg0);
18607 rtx op1 = expand_normal (arg1);
18608 rtx op2 = expand_normal (arg2);
18609 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18610 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18611 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18612 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18613
18614 if (VECTOR_MODE_P (mode1))
18615 op0 = safe_vector_operand (op0, mode1);
18616 if (VECTOR_MODE_P (mode2))
18617 op1 = safe_vector_operand (op1, mode2);
18618 if (VECTOR_MODE_P (mode3))
18619 op2 = safe_vector_operand (op2, mode3);
18620
18621 if (optimize
18622 || target == 0
18623 || GET_MODE (target) != tmode
18624 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18625 target = gen_reg_rtx (tmode);
18626
18627 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18628 op0 = copy_to_mode_reg (mode1, op0);
18629 if ((optimize && !register_operand (op1, mode2))
18630 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18631 op1 = copy_to_mode_reg (mode2, op1);
18632
18633 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18634 switch (icode)
18635 {
18636 case CODE_FOR_sse4_1_blendvpd:
18637 case CODE_FOR_sse4_1_blendvps:
18638 case CODE_FOR_sse4_1_pblendvb:
18639 op2 = copy_to_mode_reg (mode3, op2);
18640 break;
18641
18642 case CODE_FOR_sse4_1_roundsd:
18643 case CODE_FOR_sse4_1_roundss:
18644 error ("the third argument must be a 4-bit immediate");
18645 return const0_rtx;
18646
18647 default:
18648 error ("the third argument must be an 8-bit immediate");
18649 return const0_rtx;
18650 }
18651
18652 pat = GEN_FCN (icode) (target, op0, op1, op2);
18653 if (! pat)
18654 return 0;
18655 emit_insn (pat);
18656 return target;
18657 }
18658
18659 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18660
18661 static rtx
18662 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18663 {
18664 rtx pat;
18665 tree arg0 = CALL_EXPR_ARG (exp, 0);
18666 tree arg1 = CALL_EXPR_ARG (exp, 1);
18667 rtx op0 = expand_normal (arg0);
18668 rtx op1 = expand_normal (arg1);
18669 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18670 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18671 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18672
18673 if (optimize
18674 || !target
18675 || GET_MODE (target) != tmode
18676 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18677 target = gen_reg_rtx (tmode);
18678
18679 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18680 op0 = copy_to_mode_reg (mode0, op0);
18681 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18682 {
18683 op1 = copy_to_reg (op1);
18684 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18685 }
18686
18687 pat = GEN_FCN (icode) (target, op0, op1);
18688 if (! pat)
18689 return 0;
18690 emit_insn (pat);
18691 return target;
18692 }
18693
18694 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18695
18696 static rtx
18697 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18698 {
18699 rtx pat, xops[3];
18700 tree arg0 = CALL_EXPR_ARG (exp, 0);
18701 tree arg1 = CALL_EXPR_ARG (exp, 1);
18702 rtx op0 = expand_normal (arg0);
18703 rtx op1 = expand_normal (arg1);
18704 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18705 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18706 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18707
18708 if (VECTOR_MODE_P (mode0))
18709 op0 = safe_vector_operand (op0, mode0);
18710 if (VECTOR_MODE_P (mode1))
18711 op1 = safe_vector_operand (op1, mode1);
18712
18713 if (optimize || !target
18714 || GET_MODE (target) != tmode
18715 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18716 target = gen_reg_rtx (tmode);
18717
18718 if (GET_MODE (op1) == SImode && mode1 == TImode)
18719 {
18720 rtx x = gen_reg_rtx (V4SImode);
18721 emit_insn (gen_sse2_loadd (x, op1));
18722 op1 = gen_lowpart (TImode, x);
18723 }
18724
18725 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18726 op0 = copy_to_mode_reg (mode0, op0);
18727 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18728 op1 = copy_to_mode_reg (mode1, op1);
18729
18730 /* ??? Using ix86_fixup_binary_operands is problematic when
18731 we've got mismatched modes. Fake it. */
18732
18733 xops[0] = target;
18734 xops[1] = op0;
18735 xops[2] = op1;
18736
18737 if (tmode == mode0 && tmode == mode1)
18738 {
18739 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18740 op0 = xops[1];
18741 op1 = xops[2];
18742 }
18743 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18744 {
18745 op0 = force_reg (mode0, op0);
18746 op1 = force_reg (mode1, op1);
18747 target = gen_reg_rtx (tmode);
18748 }
18749
18750 pat = GEN_FCN (icode) (target, op0, op1);
18751 if (! pat)
18752 return 0;
18753 emit_insn (pat);
18754 return target;
18755 }
18756
18757 /* Subroutine of ix86_expand_builtin to take care of stores. */
18758
18759 static rtx
18760 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18761 {
18762 rtx pat;
18763 tree arg0 = CALL_EXPR_ARG (exp, 0);
18764 tree arg1 = CALL_EXPR_ARG (exp, 1);
18765 rtx op0 = expand_normal (arg0);
18766 rtx op1 = expand_normal (arg1);
18767 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18768 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18769
18770 if (VECTOR_MODE_P (mode1))
18771 op1 = safe_vector_operand (op1, mode1);
18772
18773 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18774 op1 = copy_to_mode_reg (mode1, op1);
18775
18776 pat = GEN_FCN (icode) (op0, op1);
18777 if (pat)
18778 emit_insn (pat);
18779 return 0;
18780 }
18781
18782 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18783
18784 static rtx
18785 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18786 rtx target, int do_load)
18787 {
18788 rtx pat;
18789 tree arg0 = CALL_EXPR_ARG (exp, 0);
18790 rtx op0 = expand_normal (arg0);
18791 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18792 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18793
18794 if (optimize || !target
18795 || GET_MODE (target) != tmode
18796 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18797 target = gen_reg_rtx (tmode);
18798 if (do_load)
18799 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18800 else
18801 {
18802 if (VECTOR_MODE_P (mode0))
18803 op0 = safe_vector_operand (op0, mode0);
18804
18805 if ((optimize && !register_operand (op0, mode0))
18806 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18807 op0 = copy_to_mode_reg (mode0, op0);
18808 }
18809
18810 switch (icode)
18811 {
18812 case CODE_FOR_sse4_1_roundpd:
18813 case CODE_FOR_sse4_1_roundps:
18814 {
18815 tree arg1 = CALL_EXPR_ARG (exp, 1);
18816 rtx op1 = expand_normal (arg1);
18817 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18818
18819 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18820 {
18821 error ("the second argument must be a 4-bit immediate");
18822 return const0_rtx;
18823 }
18824 pat = GEN_FCN (icode) (target, op0, op1);
18825 }
18826 break;
18827 default:
18828 pat = GEN_FCN (icode) (target, op0);
18829 break;
18830 }
18831
18832 if (! pat)
18833 return 0;
18834 emit_insn (pat);
18835 return target;
18836 }
18837
18838 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18839 sqrtss, rsqrtss, rcpss. */
18840
18841 static rtx
18842 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18843 {
18844 rtx pat;
18845 tree arg0 = CALL_EXPR_ARG (exp, 0);
18846 rtx op1, op0 = expand_normal (arg0);
18847 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18848 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18849
18850 if (optimize || !target
18851 || GET_MODE (target) != tmode
18852 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18853 target = gen_reg_rtx (tmode);
18854
18855 if (VECTOR_MODE_P (mode0))
18856 op0 = safe_vector_operand (op0, mode0);
18857
18858 if ((optimize && !register_operand (op0, mode0))
18859 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18860 op0 = copy_to_mode_reg (mode0, op0);
18861
18862 op1 = op0;
18863 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18864 op1 = copy_to_mode_reg (mode0, op1);
18865
18866 pat = GEN_FCN (icode) (target, op0, op1);
18867 if (! pat)
18868 return 0;
18869 emit_insn (pat);
18870 return target;
18871 }
18872
18873 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18874
18875 static rtx
18876 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18877 rtx target)
18878 {
18879 rtx pat;
18880 tree arg0 = CALL_EXPR_ARG (exp, 0);
18881 tree arg1 = CALL_EXPR_ARG (exp, 1);
18882 rtx op0 = expand_normal (arg0);
18883 rtx op1 = expand_normal (arg1);
18884 rtx op2;
18885 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18886 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18887 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18888 enum rtx_code comparison = d->comparison;
18889
18890 if (VECTOR_MODE_P (mode0))
18891 op0 = safe_vector_operand (op0, mode0);
18892 if (VECTOR_MODE_P (mode1))
18893 op1 = safe_vector_operand (op1, mode1);
18894
18895 /* Swap operands if we have a comparison that isn't available in
18896 hardware. */
18897 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18898 {
18899 rtx tmp = gen_reg_rtx (mode1);
18900 emit_move_insn (tmp, op1);
18901 op1 = op0;
18902 op0 = tmp;
18903 }
18904
18905 if (optimize || !target
18906 || GET_MODE (target) != tmode
18907 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18908 target = gen_reg_rtx (tmode);
18909
18910 if ((optimize && !register_operand (op0, mode0))
18911 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18912 op0 = copy_to_mode_reg (mode0, op0);
18913 if ((optimize && !register_operand (op1, mode1))
18914 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18915 op1 = copy_to_mode_reg (mode1, op1);
18916
18917 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18918 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18919 if (! pat)
18920 return 0;
18921 emit_insn (pat);
18922 return target;
18923 }
18924
18925 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18926
18927 static rtx
18928 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18929 rtx target)
18930 {
18931 rtx pat;
18932 tree arg0 = CALL_EXPR_ARG (exp, 0);
18933 tree arg1 = CALL_EXPR_ARG (exp, 1);
18934 rtx op0 = expand_normal (arg0);
18935 rtx op1 = expand_normal (arg1);
18936 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18937 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18938 enum rtx_code comparison = d->comparison;
18939
18940 if (VECTOR_MODE_P (mode0))
18941 op0 = safe_vector_operand (op0, mode0);
18942 if (VECTOR_MODE_P (mode1))
18943 op1 = safe_vector_operand (op1, mode1);
18944
18945 /* Swap operands if we have a comparison that isn't available in
18946 hardware. */
18947 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18948 {
18949 rtx tmp = op1;
18950 op1 = op0;
18951 op0 = tmp;
18952 }
18953
18954 target = gen_reg_rtx (SImode);
18955 emit_move_insn (target, const0_rtx);
18956 target = gen_rtx_SUBREG (QImode, target, 0);
18957
18958 if ((optimize && !register_operand (op0, mode0))
18959 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18960 op0 = copy_to_mode_reg (mode0, op0);
18961 if ((optimize && !register_operand (op1, mode1))
18962 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18963 op1 = copy_to_mode_reg (mode1, op1);
18964
18965 pat = GEN_FCN (d->icode) (op0, op1);
18966 if (! pat)
18967 return 0;
18968 emit_insn (pat);
18969 emit_insn (gen_rtx_SET (VOIDmode,
18970 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18971 gen_rtx_fmt_ee (comparison, QImode,
18972 SET_DEST (pat),
18973 const0_rtx)));
18974
18975 return SUBREG_REG (target);
18976 }
18977
18978 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18979
18980 static rtx
18981 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18982 rtx target)
18983 {
18984 rtx pat;
18985 tree arg0 = CALL_EXPR_ARG (exp, 0);
18986 tree arg1 = CALL_EXPR_ARG (exp, 1);
18987 rtx op0 = expand_normal (arg0);
18988 rtx op1 = expand_normal (arg1);
18989 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18990 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18991 enum rtx_code comparison = d->comparison;
18992
18993 if (VECTOR_MODE_P (mode0))
18994 op0 = safe_vector_operand (op0, mode0);
18995 if (VECTOR_MODE_P (mode1))
18996 op1 = safe_vector_operand (op1, mode1);
18997
18998 target = gen_reg_rtx (SImode);
18999 emit_move_insn (target, const0_rtx);
19000 target = gen_rtx_SUBREG (QImode, target, 0);
19001
19002 if ((optimize && !register_operand (op0, mode0))
19003 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
19004 op0 = copy_to_mode_reg (mode0, op0);
19005 if ((optimize && !register_operand (op1, mode1))
19006 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
19007 op1 = copy_to_mode_reg (mode1, op1);
19008
19009 pat = GEN_FCN (d->icode) (op0, op1);
19010 if (! pat)
19011 return 0;
19012 emit_insn (pat);
19013 emit_insn (gen_rtx_SET (VOIDmode,
19014 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
19015 gen_rtx_fmt_ee (comparison, QImode,
19016 SET_DEST (pat),
19017 const0_rtx)));
19018
19019 return SUBREG_REG (target);
19020 }
19021
19022 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
19023
19024 static rtx
19025 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
19026 tree exp, rtx target)
19027 {
19028 rtx pat;
19029 tree arg0 = CALL_EXPR_ARG (exp, 0);
19030 tree arg1 = CALL_EXPR_ARG (exp, 1);
19031 tree arg2 = CALL_EXPR_ARG (exp, 2);
19032 tree arg3 = CALL_EXPR_ARG (exp, 3);
19033 tree arg4 = CALL_EXPR_ARG (exp, 4);
19034 rtx scratch0, scratch1;
19035 rtx op0 = expand_normal (arg0);
19036 rtx op1 = expand_normal (arg1);
19037 rtx op2 = expand_normal (arg2);
19038 rtx op3 = expand_normal (arg3);
19039 rtx op4 = expand_normal (arg4);
19040 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
19041
19042 tmode0 = insn_data[d->icode].operand[0].mode;
19043 tmode1 = insn_data[d->icode].operand[1].mode;
19044 modev2 = insn_data[d->icode].operand[2].mode;
19045 modei3 = insn_data[d->icode].operand[3].mode;
19046 modev4 = insn_data[d->icode].operand[4].mode;
19047 modei5 = insn_data[d->icode].operand[5].mode;
19048 modeimm = insn_data[d->icode].operand[6].mode;
19049
19050 if (VECTOR_MODE_P (modev2))
19051 op0 = safe_vector_operand (op0, modev2);
19052 if (VECTOR_MODE_P (modev4))
19053 op2 = safe_vector_operand (op2, modev4);
19054
19055 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
19056 op0 = copy_to_mode_reg (modev2, op0);
19057 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
19058 op1 = copy_to_mode_reg (modei3, op1);
19059 if ((optimize && !register_operand (op2, modev4))
19060 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
19061 op2 = copy_to_mode_reg (modev4, op2);
19062 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
19063 op3 = copy_to_mode_reg (modei5, op3);
19064
19065 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
19066 {
19067 error ("the fifth argument must be a 8-bit immediate");
19068 return const0_rtx;
19069 }
19070
19071 if (d->code == IX86_BUILTIN_PCMPESTRI128)
19072 {
19073 if (optimize || !target
19074 || GET_MODE (target) != tmode0
19075 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
19076 target = gen_reg_rtx (tmode0);
19077
19078 scratch1 = gen_reg_rtx (tmode1);
19079
19080 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
19081 }
19082 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
19083 {
19084 if (optimize || !target
19085 || GET_MODE (target) != tmode1
19086 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
19087 target = gen_reg_rtx (tmode1);
19088
19089 scratch0 = gen_reg_rtx (tmode0);
19090
19091 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
19092 }
19093 else
19094 {
19095 gcc_assert (d->flag);
19096
19097 scratch0 = gen_reg_rtx (tmode0);
19098 scratch1 = gen_reg_rtx (tmode1);
19099
19100 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
19101 }
19102
19103 if (! pat)
19104 return 0;
19105
19106 emit_insn (pat);
19107
19108 if (d->flag)
19109 {
19110 target = gen_reg_rtx (SImode);
19111 emit_move_insn (target, const0_rtx);
19112 target = gen_rtx_SUBREG (QImode, target, 0);
19113
19114 emit_insn
19115 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
19116 gen_rtx_fmt_ee (EQ, QImode,
19117 gen_rtx_REG ((enum machine_mode) d->flag,
19118 FLAGS_REG),
19119 const0_rtx)));
19120 return SUBREG_REG (target);
19121 }
19122 else
19123 return target;
19124 }
19125
19126
19127 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
19128
19129 static rtx
19130 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
19131 tree exp, rtx target)
19132 {
19133 rtx pat;
19134 tree arg0 = CALL_EXPR_ARG (exp, 0);
19135 tree arg1 = CALL_EXPR_ARG (exp, 1);
19136 tree arg2 = CALL_EXPR_ARG (exp, 2);
19137 rtx scratch0, scratch1;
19138 rtx op0 = expand_normal (arg0);
19139 rtx op1 = expand_normal (arg1);
19140 rtx op2 = expand_normal (arg2);
19141 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
19142
19143 tmode0 = insn_data[d->icode].operand[0].mode;
19144 tmode1 = insn_data[d->icode].operand[1].mode;
19145 modev2 = insn_data[d->icode].operand[2].mode;
19146 modev3 = insn_data[d->icode].operand[3].mode;
19147 modeimm = insn_data[d->icode].operand[4].mode;
19148
19149 if (VECTOR_MODE_P (modev2))
19150 op0 = safe_vector_operand (op0, modev2);
19151 if (VECTOR_MODE_P (modev3))
19152 op1 = safe_vector_operand (op1, modev3);
19153
19154 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
19155 op0 = copy_to_mode_reg (modev2, op0);
19156 if ((optimize && !register_operand (op1, modev3))
19157 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
19158 op1 = copy_to_mode_reg (modev3, op1);
19159
19160 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
19161 {
19162 error ("the third argument must be a 8-bit immediate");
19163 return const0_rtx;
19164 }
19165
19166 if (d->code == IX86_BUILTIN_PCMPISTRI128)
19167 {
19168 if (optimize || !target
19169 || GET_MODE (target) != tmode0
19170 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
19171 target = gen_reg_rtx (tmode0);
19172
19173 scratch1 = gen_reg_rtx (tmode1);
19174
19175 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
19176 }
19177 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
19178 {
19179 if (optimize || !target
19180 || GET_MODE (target) != tmode1
19181 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
19182 target = gen_reg_rtx (tmode1);
19183
19184 scratch0 = gen_reg_rtx (tmode0);
19185
19186 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
19187 }
19188 else
19189 {
19190 gcc_assert (d->flag);
19191
19192 scratch0 = gen_reg_rtx (tmode0);
19193 scratch1 = gen_reg_rtx (tmode1);
19194
19195 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
19196 }
19197
19198 if (! pat)
19199 return 0;
19200
19201 emit_insn (pat);
19202
19203 if (d->flag)
19204 {
19205 target = gen_reg_rtx (SImode);
19206 emit_move_insn (target, const0_rtx);
19207 target = gen_rtx_SUBREG (QImode, target, 0);
19208
19209 emit_insn
19210 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
19211 gen_rtx_fmt_ee (EQ, QImode,
19212 gen_rtx_REG ((enum machine_mode) d->flag,
19213 FLAGS_REG),
19214 const0_rtx)));
19215 return SUBREG_REG (target);
19216 }
19217 else
19218 return target;
19219 }
19220
19221 /* Return the integer constant in ARG. Constrain it to be in the range
19222 of the subparts of VEC_TYPE; issue an error if not. */
19223
19224 static int
19225 get_element_number (tree vec_type, tree arg)
19226 {
19227 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
19228
19229 if (!host_integerp (arg, 1)
19230 || (elt = tree_low_cst (arg, 1), elt > max))
19231 {
19232 error ("selector must be an integer constant in the range 0..%wi", max);
19233 return 0;
19234 }
19235
19236 return elt;
19237 }
19238
19239 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19240 ix86_expand_vector_init. We DO have language-level syntax for this, in
19241 the form of (type){ init-list }. Except that since we can't place emms
19242 instructions from inside the compiler, we can't allow the use of MMX
19243 registers unless the user explicitly asks for it. So we do *not* define
19244 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
19245 we have builtins invoked by mmintrin.h that gives us license to emit
19246 these sorts of instructions. */
19247
19248 static rtx
19249 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
19250 {
19251 enum machine_mode tmode = TYPE_MODE (type);
19252 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
19253 int i, n_elt = GET_MODE_NUNITS (tmode);
19254 rtvec v = rtvec_alloc (n_elt);
19255
19256 gcc_assert (VECTOR_MODE_P (tmode));
19257 gcc_assert (call_expr_nargs (exp) == n_elt);
19258
19259 for (i = 0; i < n_elt; ++i)
19260 {
19261 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
19262 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19263 }
19264
19265 if (!target || !register_operand (target, tmode))
19266 target = gen_reg_rtx (tmode);
19267
19268 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19269 return target;
19270 }
19271
19272 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19273 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19274 had a language-level syntax for referencing vector elements. */
19275
19276 static rtx
19277 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19278 {
19279 enum machine_mode tmode, mode0;
19280 tree arg0, arg1;
19281 int elt;
19282 rtx op0;
19283
19284 arg0 = CALL_EXPR_ARG (exp, 0);
19285 arg1 = CALL_EXPR_ARG (exp, 1);
19286
19287 op0 = expand_normal (arg0);
19288 elt = get_element_number (TREE_TYPE (arg0), arg1);
19289
19290 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19291 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19292 gcc_assert (VECTOR_MODE_P (mode0));
19293
19294 op0 = force_reg (mode0, op0);
19295
19296 if (optimize || !target || !register_operand (target, tmode))
19297 target = gen_reg_rtx (tmode);
19298
19299 ix86_expand_vector_extract (true, target, op0, elt);
19300
19301 return target;
19302 }
19303
19304 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19305 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19306 a language-level syntax for referencing vector elements. */
19307
19308 static rtx
19309 ix86_expand_vec_set_builtin (tree exp)
19310 {
19311 enum machine_mode tmode, mode1;
19312 tree arg0, arg1, arg2;
19313 int elt;
19314 rtx op0, op1, target;
19315
19316 arg0 = CALL_EXPR_ARG (exp, 0);
19317 arg1 = CALL_EXPR_ARG (exp, 1);
19318 arg2 = CALL_EXPR_ARG (exp, 2);
19319
19320 tmode = TYPE_MODE (TREE_TYPE (arg0));
19321 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19322 gcc_assert (VECTOR_MODE_P (tmode));
19323
19324 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19325 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19326 elt = get_element_number (TREE_TYPE (arg0), arg2);
19327
19328 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19329 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19330
19331 op0 = force_reg (tmode, op0);
19332 op1 = force_reg (mode1, op1);
19333
19334 /* OP0 is the source of these builtin functions and shouldn't be
19335 modified. Create a copy, use it and return it as target. */
19336 target = gen_reg_rtx (tmode);
19337 emit_move_insn (target, op0);
19338 ix86_expand_vector_set (true, target, op1, elt);
19339
19340 return target;
19341 }
19342
19343 /* Expand an expression EXP that calls a built-in function,
19344 with result going to TARGET if that's convenient
19345 (and in mode MODE if that's convenient).
19346 SUBTARGET may be used as the target for computing one of EXP's operands.
19347 IGNORE is nonzero if the value is to be ignored. */
19348
19349 static rtx
19350 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19351 enum machine_mode mode ATTRIBUTE_UNUSED,
19352 int ignore ATTRIBUTE_UNUSED)
19353 {
19354 const struct builtin_description *d;
19355 size_t i;
19356 enum insn_code icode;
19357 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19358 tree arg0, arg1, arg2, arg3;
19359 rtx op0, op1, op2, op3, pat;
19360 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19361 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19362
19363 switch (fcode)
19364 {
19365 case IX86_BUILTIN_EMMS:
19366 emit_insn (gen_mmx_emms ());
19367 return 0;
19368
19369 case IX86_BUILTIN_SFENCE:
19370 emit_insn (gen_sse_sfence ());
19371 return 0;
19372
19373 case IX86_BUILTIN_MASKMOVQ:
19374 case IX86_BUILTIN_MASKMOVDQU:
19375 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19376 ? CODE_FOR_mmx_maskmovq
19377 : CODE_FOR_sse2_maskmovdqu);
19378 /* Note the arg order is different from the operand order. */
19379 arg1 = CALL_EXPR_ARG (exp, 0);
19380 arg2 = CALL_EXPR_ARG (exp, 1);
19381 arg0 = CALL_EXPR_ARG (exp, 2);
19382 op0 = expand_normal (arg0);
19383 op1 = expand_normal (arg1);
19384 op2 = expand_normal (arg2);
19385 mode0 = insn_data[icode].operand[0].mode;
19386 mode1 = insn_data[icode].operand[1].mode;
19387 mode2 = insn_data[icode].operand[2].mode;
19388
19389 op0 = force_reg (Pmode, op0);
19390 op0 = gen_rtx_MEM (mode1, op0);
19391
19392 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19393 op0 = copy_to_mode_reg (mode0, op0);
19394 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19395 op1 = copy_to_mode_reg (mode1, op1);
19396 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19397 op2 = copy_to_mode_reg (mode2, op2);
19398 pat = GEN_FCN (icode) (op0, op1, op2);
19399 if (! pat)
19400 return 0;
19401 emit_insn (pat);
19402 return 0;
19403
19404 case IX86_BUILTIN_RSQRTF:
19405 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19406
19407 case IX86_BUILTIN_SQRTSS:
19408 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19409 case IX86_BUILTIN_RSQRTSS:
19410 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19411 case IX86_BUILTIN_RCPSS:
19412 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19413
19414 case IX86_BUILTIN_LOADUPS:
19415 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19416
19417 case IX86_BUILTIN_STOREUPS:
19418 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19419
19420 case IX86_BUILTIN_LOADHPS:
19421 case IX86_BUILTIN_LOADLPS:
19422 case IX86_BUILTIN_LOADHPD:
19423 case IX86_BUILTIN_LOADLPD:
19424 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19425 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19426 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19427 : CODE_FOR_sse2_loadlpd);
19428 arg0 = CALL_EXPR_ARG (exp, 0);
19429 arg1 = CALL_EXPR_ARG (exp, 1);
19430 op0 = expand_normal (arg0);
19431 op1 = expand_normal (arg1);
19432 tmode = insn_data[icode].operand[0].mode;
19433 mode0 = insn_data[icode].operand[1].mode;
19434 mode1 = insn_data[icode].operand[2].mode;
19435
19436 op0 = force_reg (mode0, op0);
19437 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19438 if (optimize || target == 0
19439 || GET_MODE (target) != tmode
19440 || !register_operand (target, tmode))
19441 target = gen_reg_rtx (tmode);
19442 pat = GEN_FCN (icode) (target, op0, op1);
19443 if (! pat)
19444 return 0;
19445 emit_insn (pat);
19446 return target;
19447
19448 case IX86_BUILTIN_STOREHPS:
19449 case IX86_BUILTIN_STORELPS:
19450 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19451 : CODE_FOR_sse_storelps);
19452 arg0 = CALL_EXPR_ARG (exp, 0);
19453 arg1 = CALL_EXPR_ARG (exp, 1);
19454 op0 = expand_normal (arg0);
19455 op1 = expand_normal (arg1);
19456 mode0 = insn_data[icode].operand[0].mode;
19457 mode1 = insn_data[icode].operand[1].mode;
19458
19459 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19460 op1 = force_reg (mode1, op1);
19461
19462 pat = GEN_FCN (icode) (op0, op1);
19463 if (! pat)
19464 return 0;
19465 emit_insn (pat);
19466 return const0_rtx;
19467
19468 case IX86_BUILTIN_MOVNTPS:
19469 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19470 case IX86_BUILTIN_MOVNTQ:
19471 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19472
19473 case IX86_BUILTIN_LDMXCSR:
19474 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19475 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19476 emit_move_insn (target, op0);
19477 emit_insn (gen_sse_ldmxcsr (target));
19478 return 0;
19479
19480 case IX86_BUILTIN_STMXCSR:
19481 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19482 emit_insn (gen_sse_stmxcsr (target));
19483 return copy_to_mode_reg (SImode, target);
19484
19485 case IX86_BUILTIN_SHUFPS:
19486 case IX86_BUILTIN_SHUFPD:
19487 icode = (fcode == IX86_BUILTIN_SHUFPS
19488 ? CODE_FOR_sse_shufps
19489 : CODE_FOR_sse2_shufpd);
19490 arg0 = CALL_EXPR_ARG (exp, 0);
19491 arg1 = CALL_EXPR_ARG (exp, 1);
19492 arg2 = CALL_EXPR_ARG (exp, 2);
19493 op0 = expand_normal (arg0);
19494 op1 = expand_normal (arg1);
19495 op2 = expand_normal (arg2);
19496 tmode = insn_data[icode].operand[0].mode;
19497 mode0 = insn_data[icode].operand[1].mode;
19498 mode1 = insn_data[icode].operand[2].mode;
19499 mode2 = insn_data[icode].operand[3].mode;
19500
19501 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19502 op0 = copy_to_mode_reg (mode0, op0);
19503 if ((optimize && !register_operand (op1, mode1))
19504 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19505 op1 = copy_to_mode_reg (mode1, op1);
19506 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19507 {
19508 /* @@@ better error message */
19509 error ("mask must be an immediate");
19510 return gen_reg_rtx (tmode);
19511 }
19512 if (optimize || target == 0
19513 || GET_MODE (target) != tmode
19514 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19515 target = gen_reg_rtx (tmode);
19516 pat = GEN_FCN (icode) (target, op0, op1, op2);
19517 if (! pat)
19518 return 0;
19519 emit_insn (pat);
19520 return target;
19521
19522 case IX86_BUILTIN_PSHUFW:
19523 case IX86_BUILTIN_PSHUFD:
19524 case IX86_BUILTIN_PSHUFHW:
19525 case IX86_BUILTIN_PSHUFLW:
19526 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19527 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19528 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19529 : CODE_FOR_mmx_pshufw);
19530 arg0 = CALL_EXPR_ARG (exp, 0);
19531 arg1 = CALL_EXPR_ARG (exp, 1);
19532 op0 = expand_normal (arg0);
19533 op1 = expand_normal (arg1);
19534 tmode = insn_data[icode].operand[0].mode;
19535 mode1 = insn_data[icode].operand[1].mode;
19536 mode2 = insn_data[icode].operand[2].mode;
19537
19538 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19539 op0 = copy_to_mode_reg (mode1, op0);
19540 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19541 {
19542 /* @@@ better error message */
19543 error ("mask must be an immediate");
19544 return const0_rtx;
19545 }
19546 if (target == 0
19547 || GET_MODE (target) != tmode
19548 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19549 target = gen_reg_rtx (tmode);
19550 pat = GEN_FCN (icode) (target, op0, op1);
19551 if (! pat)
19552 return 0;
19553 emit_insn (pat);
19554 return target;
19555
19556 case IX86_BUILTIN_PSLLWI128:
19557 icode = CODE_FOR_ashlv8hi3;
19558 goto do_pshifti;
19559 case IX86_BUILTIN_PSLLDI128:
19560 icode = CODE_FOR_ashlv4si3;
19561 goto do_pshifti;
19562 case IX86_BUILTIN_PSLLQI128:
19563 icode = CODE_FOR_ashlv2di3;
19564 goto do_pshifti;
19565 case IX86_BUILTIN_PSRAWI128:
19566 icode = CODE_FOR_ashrv8hi3;
19567 goto do_pshifti;
19568 case IX86_BUILTIN_PSRADI128:
19569 icode = CODE_FOR_ashrv4si3;
19570 goto do_pshifti;
19571 case IX86_BUILTIN_PSRLWI128:
19572 icode = CODE_FOR_lshrv8hi3;
19573 goto do_pshifti;
19574 case IX86_BUILTIN_PSRLDI128:
19575 icode = CODE_FOR_lshrv4si3;
19576 goto do_pshifti;
19577 case IX86_BUILTIN_PSRLQI128:
19578 icode = CODE_FOR_lshrv2di3;
19579 goto do_pshifti;
19580 do_pshifti:
19581 arg0 = CALL_EXPR_ARG (exp, 0);
19582 arg1 = CALL_EXPR_ARG (exp, 1);
19583 op0 = expand_normal (arg0);
19584 op1 = expand_normal (arg1);
19585
19586 if (!CONST_INT_P (op1))
19587 {
19588 error ("shift must be an immediate");
19589 return const0_rtx;
19590 }
19591 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19592 op1 = GEN_INT (255);
19593
19594 tmode = insn_data[icode].operand[0].mode;
19595 mode1 = insn_data[icode].operand[1].mode;
19596 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19597 op0 = copy_to_reg (op0);
19598
19599 target = gen_reg_rtx (tmode);
19600 pat = GEN_FCN (icode) (target, op0, op1);
19601 if (!pat)
19602 return 0;
19603 emit_insn (pat);
19604 return target;
19605
19606 case IX86_BUILTIN_PSLLW128:
19607 icode = CODE_FOR_ashlv8hi3;
19608 goto do_pshift;
19609 case IX86_BUILTIN_PSLLD128:
19610 icode = CODE_FOR_ashlv4si3;
19611 goto do_pshift;
19612 case IX86_BUILTIN_PSLLQ128:
19613 icode = CODE_FOR_ashlv2di3;
19614 goto do_pshift;
19615 case IX86_BUILTIN_PSRAW128:
19616 icode = CODE_FOR_ashrv8hi3;
19617 goto do_pshift;
19618 case IX86_BUILTIN_PSRAD128:
19619 icode = CODE_FOR_ashrv4si3;
19620 goto do_pshift;
19621 case IX86_BUILTIN_PSRLW128:
19622 icode = CODE_FOR_lshrv8hi3;
19623 goto do_pshift;
19624 case IX86_BUILTIN_PSRLD128:
19625 icode = CODE_FOR_lshrv4si3;
19626 goto do_pshift;
19627 case IX86_BUILTIN_PSRLQ128:
19628 icode = CODE_FOR_lshrv2di3;
19629 goto do_pshift;
19630 do_pshift:
19631 arg0 = CALL_EXPR_ARG (exp, 0);
19632 arg1 = CALL_EXPR_ARG (exp, 1);
19633 op0 = expand_normal (arg0);
19634 op1 = expand_normal (arg1);
19635
19636 tmode = insn_data[icode].operand[0].mode;
19637 mode1 = insn_data[icode].operand[1].mode;
19638
19639 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19640 op0 = copy_to_reg (op0);
19641
19642 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
19643 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
19644 op1 = copy_to_reg (op1);
19645
19646 target = gen_reg_rtx (tmode);
19647 pat = GEN_FCN (icode) (target, op0, op1);
19648 if (!pat)
19649 return 0;
19650 emit_insn (pat);
19651 return target;
19652
19653 case IX86_BUILTIN_PSLLDQI128:
19654 case IX86_BUILTIN_PSRLDQI128:
19655 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19656 : CODE_FOR_sse2_lshrti3);
19657 arg0 = CALL_EXPR_ARG (exp, 0);
19658 arg1 = CALL_EXPR_ARG (exp, 1);
19659 op0 = expand_normal (arg0);
19660 op1 = expand_normal (arg1);
19661 tmode = insn_data[icode].operand[0].mode;
19662 mode1 = insn_data[icode].operand[1].mode;
19663 mode2 = insn_data[icode].operand[2].mode;
19664
19665 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19666 {
19667 op0 = copy_to_reg (op0);
19668 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19669 }
19670 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19671 {
19672 error ("shift must be an immediate");
19673 return const0_rtx;
19674 }
19675 target = gen_reg_rtx (V2DImode);
19676 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19677 op0, op1);
19678 if (! pat)
19679 return 0;
19680 emit_insn (pat);
19681 return target;
19682
19683 case IX86_BUILTIN_FEMMS:
19684 emit_insn (gen_mmx_femms ());
19685 return NULL_RTX;
19686
19687 case IX86_BUILTIN_PAVGUSB:
19688 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19689
19690 case IX86_BUILTIN_PF2ID:
19691 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19692
19693 case IX86_BUILTIN_PFACC:
19694 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19695
19696 case IX86_BUILTIN_PFADD:
19697 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19698
19699 case IX86_BUILTIN_PFCMPEQ:
19700 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19701
19702 case IX86_BUILTIN_PFCMPGE:
19703 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19704
19705 case IX86_BUILTIN_PFCMPGT:
19706 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19707
19708 case IX86_BUILTIN_PFMAX:
19709 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19710
19711 case IX86_BUILTIN_PFMIN:
19712 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19713
19714 case IX86_BUILTIN_PFMUL:
19715 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19716
19717 case IX86_BUILTIN_PFRCP:
19718 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19719
19720 case IX86_BUILTIN_PFRCPIT1:
19721 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19722
19723 case IX86_BUILTIN_PFRCPIT2:
19724 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19725
19726 case IX86_BUILTIN_PFRSQIT1:
19727 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19728
19729 case IX86_BUILTIN_PFRSQRT:
19730 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19731
19732 case IX86_BUILTIN_PFSUB:
19733 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19734
19735 case IX86_BUILTIN_PFSUBR:
19736 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19737
19738 case IX86_BUILTIN_PI2FD:
19739 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19740
19741 case IX86_BUILTIN_PMULHRW:
19742 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19743
19744 case IX86_BUILTIN_PF2IW:
19745 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19746
19747 case IX86_BUILTIN_PFNACC:
19748 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19749
19750 case IX86_BUILTIN_PFPNACC:
19751 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19752
19753 case IX86_BUILTIN_PI2FW:
19754 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19755
19756 case IX86_BUILTIN_PSWAPDSI:
19757 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19758
19759 case IX86_BUILTIN_PSWAPDSF:
19760 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19761
19762 case IX86_BUILTIN_SQRTSD:
19763 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19764 case IX86_BUILTIN_LOADUPD:
19765 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19766 case IX86_BUILTIN_STOREUPD:
19767 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19768
19769 case IX86_BUILTIN_MFENCE:
19770 emit_insn (gen_sse2_mfence ());
19771 return 0;
19772 case IX86_BUILTIN_LFENCE:
19773 emit_insn (gen_sse2_lfence ());
19774 return 0;
19775
19776 case IX86_BUILTIN_CLFLUSH:
19777 arg0 = CALL_EXPR_ARG (exp, 0);
19778 op0 = expand_normal (arg0);
19779 icode = CODE_FOR_sse2_clflush;
19780 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19781 op0 = copy_to_mode_reg (Pmode, op0);
19782
19783 emit_insn (gen_sse2_clflush (op0));
19784 return 0;
19785
19786 case IX86_BUILTIN_MOVNTPD:
19787 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19788 case IX86_BUILTIN_MOVNTDQ:
19789 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19790 case IX86_BUILTIN_MOVNTI:
19791 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19792
19793 case IX86_BUILTIN_LOADDQU:
19794 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19795 case IX86_BUILTIN_STOREDQU:
19796 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19797
19798 case IX86_BUILTIN_MONITOR:
19799 arg0 = CALL_EXPR_ARG (exp, 0);
19800 arg1 = CALL_EXPR_ARG (exp, 1);
19801 arg2 = CALL_EXPR_ARG (exp, 2);
19802 op0 = expand_normal (arg0);
19803 op1 = expand_normal (arg1);
19804 op2 = expand_normal (arg2);
19805 if (!REG_P (op0))
19806 op0 = copy_to_mode_reg (Pmode, op0);
19807 if (!REG_P (op1))
19808 op1 = copy_to_mode_reg (SImode, op1);
19809 if (!REG_P (op2))
19810 op2 = copy_to_mode_reg (SImode, op2);
19811 if (!TARGET_64BIT)
19812 emit_insn (gen_sse3_monitor (op0, op1, op2));
19813 else
19814 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19815 return 0;
19816
19817 case IX86_BUILTIN_MWAIT:
19818 arg0 = CALL_EXPR_ARG (exp, 0);
19819 arg1 = CALL_EXPR_ARG (exp, 1);
19820 op0 = expand_normal (arg0);
19821 op1 = expand_normal (arg1);
19822 if (!REG_P (op0))
19823 op0 = copy_to_mode_reg (SImode, op0);
19824 if (!REG_P (op1))
19825 op1 = copy_to_mode_reg (SImode, op1);
19826 emit_insn (gen_sse3_mwait (op0, op1));
19827 return 0;
19828
19829 case IX86_BUILTIN_LDDQU:
19830 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19831 target, 1);
19832
19833 case IX86_BUILTIN_PALIGNR:
19834 case IX86_BUILTIN_PALIGNR128:
19835 if (fcode == IX86_BUILTIN_PALIGNR)
19836 {
19837 icode = CODE_FOR_ssse3_palignrdi;
19838 mode = DImode;
19839 }
19840 else
19841 {
19842 icode = CODE_FOR_ssse3_palignrti;
19843 mode = V2DImode;
19844 }
19845 arg0 = CALL_EXPR_ARG (exp, 0);
19846 arg1 = CALL_EXPR_ARG (exp, 1);
19847 arg2 = CALL_EXPR_ARG (exp, 2);
19848 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19849 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19850 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19851 tmode = insn_data[icode].operand[0].mode;
19852 mode1 = insn_data[icode].operand[1].mode;
19853 mode2 = insn_data[icode].operand[2].mode;
19854 mode3 = insn_data[icode].operand[3].mode;
19855
19856 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19857 {
19858 op0 = copy_to_reg (op0);
19859 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19860 }
19861 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19862 {
19863 op1 = copy_to_reg (op1);
19864 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19865 }
19866 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19867 {
19868 error ("shift must be an immediate");
19869 return const0_rtx;
19870 }
19871 target = gen_reg_rtx (mode);
19872 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19873 op0, op1, op2);
19874 if (! pat)
19875 return 0;
19876 emit_insn (pat);
19877 return target;
19878
19879 case IX86_BUILTIN_MOVNTDQA:
19880 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19881 target, 1);
19882
19883 case IX86_BUILTIN_MOVNTSD:
19884 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19885
19886 case IX86_BUILTIN_MOVNTSS:
19887 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19888
19889 case IX86_BUILTIN_INSERTQ:
19890 case IX86_BUILTIN_EXTRQ:
19891 icode = (fcode == IX86_BUILTIN_EXTRQ
19892 ? CODE_FOR_sse4a_extrq
19893 : CODE_FOR_sse4a_insertq);
19894 arg0 = CALL_EXPR_ARG (exp, 0);
19895 arg1 = CALL_EXPR_ARG (exp, 1);
19896 op0 = expand_normal (arg0);
19897 op1 = expand_normal (arg1);
19898 tmode = insn_data[icode].operand[0].mode;
19899 mode1 = insn_data[icode].operand[1].mode;
19900 mode2 = insn_data[icode].operand[2].mode;
19901 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19902 op0 = copy_to_mode_reg (mode1, op0);
19903 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19904 op1 = copy_to_mode_reg (mode2, op1);
19905 if (optimize || target == 0
19906 || GET_MODE (target) != tmode
19907 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19908 target = gen_reg_rtx (tmode);
19909 pat = GEN_FCN (icode) (target, op0, op1);
19910 if (! pat)
19911 return NULL_RTX;
19912 emit_insn (pat);
19913 return target;
19914
19915 case IX86_BUILTIN_EXTRQI:
19916 icode = CODE_FOR_sse4a_extrqi;
19917 arg0 = CALL_EXPR_ARG (exp, 0);
19918 arg1 = CALL_EXPR_ARG (exp, 1);
19919 arg2 = CALL_EXPR_ARG (exp, 2);
19920 op0 = expand_normal (arg0);
19921 op1 = expand_normal (arg1);
19922 op2 = expand_normal (arg2);
19923 tmode = insn_data[icode].operand[0].mode;
19924 mode1 = insn_data[icode].operand[1].mode;
19925 mode2 = insn_data[icode].operand[2].mode;
19926 mode3 = insn_data[icode].operand[3].mode;
19927 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19928 op0 = copy_to_mode_reg (mode1, op0);
19929 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19930 {
19931 error ("index mask must be an immediate");
19932 return gen_reg_rtx (tmode);
19933 }
19934 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19935 {
19936 error ("length mask must be an immediate");
19937 return gen_reg_rtx (tmode);
19938 }
19939 if (optimize || target == 0
19940 || GET_MODE (target) != tmode
19941 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19942 target = gen_reg_rtx (tmode);
19943 pat = GEN_FCN (icode) (target, op0, op1, op2);
19944 if (! pat)
19945 return NULL_RTX;
19946 emit_insn (pat);
19947 return target;
19948
19949 case IX86_BUILTIN_INSERTQI:
19950 icode = CODE_FOR_sse4a_insertqi;
19951 arg0 = CALL_EXPR_ARG (exp, 0);
19952 arg1 = CALL_EXPR_ARG (exp, 1);
19953 arg2 = CALL_EXPR_ARG (exp, 2);
19954 arg3 = CALL_EXPR_ARG (exp, 3);
19955 op0 = expand_normal (arg0);
19956 op1 = expand_normal (arg1);
19957 op2 = expand_normal (arg2);
19958 op3 = expand_normal (arg3);
19959 tmode = insn_data[icode].operand[0].mode;
19960 mode1 = insn_data[icode].operand[1].mode;
19961 mode2 = insn_data[icode].operand[2].mode;
19962 mode3 = insn_data[icode].operand[3].mode;
19963 mode4 = insn_data[icode].operand[4].mode;
19964
19965 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19966 op0 = copy_to_mode_reg (mode1, op0);
19967
19968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19969 op1 = copy_to_mode_reg (mode2, op1);
19970
19971 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19972 {
19973 error ("index mask must be an immediate");
19974 return gen_reg_rtx (tmode);
19975 }
19976 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19977 {
19978 error ("length mask must be an immediate");
19979 return gen_reg_rtx (tmode);
19980 }
19981 if (optimize || target == 0
19982 || GET_MODE (target) != tmode
19983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19984 target = gen_reg_rtx (tmode);
19985 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19986 if (! pat)
19987 return NULL_RTX;
19988 emit_insn (pat);
19989 return target;
19990
19991 case IX86_BUILTIN_VEC_INIT_V2SI:
19992 case IX86_BUILTIN_VEC_INIT_V4HI:
19993 case IX86_BUILTIN_VEC_INIT_V8QI:
19994 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19995
19996 case IX86_BUILTIN_VEC_EXT_V2DF:
19997 case IX86_BUILTIN_VEC_EXT_V2DI:
19998 case IX86_BUILTIN_VEC_EXT_V4SF:
19999 case IX86_BUILTIN_VEC_EXT_V4SI:
20000 case IX86_BUILTIN_VEC_EXT_V8HI:
20001 case IX86_BUILTIN_VEC_EXT_V2SI:
20002 case IX86_BUILTIN_VEC_EXT_V4HI:
20003 case IX86_BUILTIN_VEC_EXT_V16QI:
20004 return ix86_expand_vec_ext_builtin (exp, target);
20005
20006 case IX86_BUILTIN_VEC_SET_V2DI:
20007 case IX86_BUILTIN_VEC_SET_V4SF:
20008 case IX86_BUILTIN_VEC_SET_V4SI:
20009 case IX86_BUILTIN_VEC_SET_V8HI:
20010 case IX86_BUILTIN_VEC_SET_V4HI:
20011 case IX86_BUILTIN_VEC_SET_V16QI:
20012 return ix86_expand_vec_set_builtin (exp);
20013
20014 case IX86_BUILTIN_INFQ:
20015 {
20016 REAL_VALUE_TYPE inf;
20017 rtx tmp;
20018
20019 real_inf (&inf);
20020 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
20021
20022 tmp = validize_mem (force_const_mem (mode, tmp));
20023
20024 if (target == 0)
20025 target = gen_reg_rtx (mode);
20026
20027 emit_move_insn (target, tmp);
20028 return target;
20029 }
20030
20031 case IX86_BUILTIN_FABSQ:
20032 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
20033
20034 case IX86_BUILTIN_COPYSIGNQ:
20035 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
20036
20037 default:
20038 break;
20039 }
20040
20041 for (i = 0, d = bdesc_sse_3arg;
20042 i < ARRAY_SIZE (bdesc_sse_3arg);
20043 i++, d++)
20044 if (d->code == fcode)
20045 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
20046 target);
20047
20048 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20049 if (d->code == fcode)
20050 {
20051 /* Compares are treated specially. */
20052 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
20053 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
20054 || d->icode == CODE_FOR_sse2_maskcmpv2df3
20055 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
20056 return ix86_expand_sse_compare (d, exp, target);
20057
20058 return ix86_expand_binop_builtin (d->icode, exp, target);
20059 }
20060
20061 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
20062 if (d->code == fcode)
20063 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
20064
20065 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20066 if (d->code == fcode)
20067 return ix86_expand_sse_comi (d, exp, target);
20068
20069 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
20070 if (d->code == fcode)
20071 return ix86_expand_sse_ptest (d, exp, target);
20072
20073 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
20074 if (d->code == fcode)
20075 return ix86_expand_crc32 (d->icode, exp, target);
20076
20077 for (i = 0, d = bdesc_pcmpestr;
20078 i < ARRAY_SIZE (bdesc_pcmpestr);
20079 i++, d++)
20080 if (d->code == fcode)
20081 return ix86_expand_sse_pcmpestr (d, exp, target);
20082
20083 for (i = 0, d = bdesc_pcmpistr;
20084 i < ARRAY_SIZE (bdesc_pcmpistr);
20085 i++, d++)
20086 if (d->code == fcode)
20087 return ix86_expand_sse_pcmpistr (d, exp, target);
20088
20089 gcc_unreachable ();
20090 }
20091
20092 /* Returns a function decl for a vectorized version of the builtin function
20093 with builtin function code FN and the result vector type TYPE, or NULL_TREE
20094 if it is not available. */
20095
20096 static tree
20097 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
20098 tree type_in)
20099 {
20100 enum machine_mode in_mode, out_mode;
20101 int in_n, out_n;
20102
20103 if (TREE_CODE (type_out) != VECTOR_TYPE
20104 || TREE_CODE (type_in) != VECTOR_TYPE)
20105 return NULL_TREE;
20106
20107 out_mode = TYPE_MODE (TREE_TYPE (type_out));
20108 out_n = TYPE_VECTOR_SUBPARTS (type_out);
20109 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20110 in_n = TYPE_VECTOR_SUBPARTS (type_in);
20111
20112 switch (fn)
20113 {
20114 case BUILT_IN_SQRT:
20115 if (out_mode == DFmode && out_n == 2
20116 && in_mode == DFmode && in_n == 2)
20117 return ix86_builtins[IX86_BUILTIN_SQRTPD];
20118 break;
20119
20120 case BUILT_IN_SQRTF:
20121 if (out_mode == SFmode && out_n == 4
20122 && in_mode == SFmode && in_n == 4)
20123 return ix86_builtins[IX86_BUILTIN_SQRTPS];
20124 break;
20125
20126 case BUILT_IN_LRINT:
20127 if (out_mode == SImode && out_n == 4
20128 && in_mode == DFmode && in_n == 2)
20129 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
20130 break;
20131
20132 case BUILT_IN_LRINTF:
20133 if (out_mode == SImode && out_n == 4
20134 && in_mode == SFmode && in_n == 4)
20135 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
20136 break;
20137
20138 default:
20139 ;
20140 }
20141
20142 /* Dispatch to a handler for a vectorization library. */
20143 if (ix86_veclib_handler)
20144 return (*ix86_veclib_handler)(fn, type_out, type_in);
20145
20146 return NULL_TREE;
20147 }
20148
20149 /* Handler for an ACML-style interface to a library with vectorized
20150 intrinsics. */
20151
20152 static tree
20153 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
20154 {
20155 char name[20] = "__vr.._";
20156 tree fntype, new_fndecl, args;
20157 unsigned arity;
20158 const char *bname;
20159 enum machine_mode el_mode, in_mode;
20160 int n, in_n;
20161
20162 /* The ACML is 64bits only and suitable for unsafe math only as
20163 it does not correctly support parts of IEEE with the required
20164 precision such as denormals. */
20165 if (!TARGET_64BIT
20166 || !flag_unsafe_math_optimizations)
20167 return NULL_TREE;
20168
20169 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20170 n = TYPE_VECTOR_SUBPARTS (type_out);
20171 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20172 in_n = TYPE_VECTOR_SUBPARTS (type_in);
20173 if (el_mode != in_mode
20174 || n != in_n)
20175 return NULL_TREE;
20176
20177 switch (fn)
20178 {
20179 case BUILT_IN_SIN:
20180 case BUILT_IN_COS:
20181 case BUILT_IN_EXP:
20182 case BUILT_IN_LOG:
20183 case BUILT_IN_LOG2:
20184 case BUILT_IN_LOG10:
20185 name[4] = 'd';
20186 name[5] = '2';
20187 if (el_mode != DFmode
20188 || n != 2)
20189 return NULL_TREE;
20190 break;
20191
20192 case BUILT_IN_SINF:
20193 case BUILT_IN_COSF:
20194 case BUILT_IN_EXPF:
20195 case BUILT_IN_POWF:
20196 case BUILT_IN_LOGF:
20197 case BUILT_IN_LOG2F:
20198 case BUILT_IN_LOG10F:
20199 name[4] = 's';
20200 name[5] = '4';
20201 if (el_mode != SFmode
20202 || n != 4)
20203 return NULL_TREE;
20204 break;
20205
20206 default:
20207 return NULL_TREE;
20208 }
20209
20210 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
20211 sprintf (name + 7, "%s", bname+10);
20212
20213 arity = 0;
20214 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
20215 args = TREE_CHAIN (args))
20216 arity++;
20217
20218 if (arity == 1)
20219 fntype = build_function_type_list (type_out, type_in, NULL);
20220 else
20221 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20222
20223 /* Build a function declaration for the vectorized function. */
20224 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
20225 TREE_PUBLIC (new_fndecl) = 1;
20226 DECL_EXTERNAL (new_fndecl) = 1;
20227 DECL_IS_NOVOPS (new_fndecl) = 1;
20228 TREE_READONLY (new_fndecl) = 1;
20229
20230 return new_fndecl;
20231 }
20232
20233
20234 /* Returns a decl of a function that implements conversion of the
20235 input vector of type TYPE, or NULL_TREE if it is not available. */
20236
20237 static tree
20238 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
20239 {
20240 if (TREE_CODE (type) != VECTOR_TYPE)
20241 return NULL_TREE;
20242
20243 switch (code)
20244 {
20245 case FLOAT_EXPR:
20246 switch (TYPE_MODE (type))
20247 {
20248 case V4SImode:
20249 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
20250 default:
20251 return NULL_TREE;
20252 }
20253
20254 case FIX_TRUNC_EXPR:
20255 switch (TYPE_MODE (type))
20256 {
20257 case V4SFmode:
20258 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
20259 default:
20260 return NULL_TREE;
20261 }
20262 default:
20263 return NULL_TREE;
20264
20265 }
20266 }
20267
20268 /* Returns a code for a target-specific builtin that implements
20269 reciprocal of the function, or NULL_TREE if not available. */
20270
20271 static tree
20272 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
20273 bool sqrt ATTRIBUTE_UNUSED)
20274 {
20275 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
20276 && flag_finite_math_only && !flag_trapping_math
20277 && flag_unsafe_math_optimizations))
20278 return NULL_TREE;
20279
20280 if (md_fn)
20281 /* Machine dependent builtins. */
20282 switch (fn)
20283 {
20284 /* Vectorized version of sqrt to rsqrt conversion. */
20285 case IX86_BUILTIN_SQRTPS:
20286 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
20287
20288 default:
20289 return NULL_TREE;
20290 }
20291 else
20292 /* Normal builtins. */
20293 switch (fn)
20294 {
20295 /* Sqrt to rsqrt conversion. */
20296 case BUILT_IN_SQRTF:
20297 return ix86_builtins[IX86_BUILTIN_RSQRTF];
20298
20299 default:
20300 return NULL_TREE;
20301 }
20302 }
20303
20304 /* Store OPERAND to the memory after reload is completed. This means
20305 that we can't easily use assign_stack_local. */
20306 rtx
20307 ix86_force_to_memory (enum machine_mode mode, rtx operand)
20308 {
20309 rtx result;
20310
20311 gcc_assert (reload_completed);
20312 if (TARGET_RED_ZONE)
20313 {
20314 result = gen_rtx_MEM (mode,
20315 gen_rtx_PLUS (Pmode,
20316 stack_pointer_rtx,
20317 GEN_INT (-RED_ZONE_SIZE)));
20318 emit_move_insn (result, operand);
20319 }
20320 else if (!TARGET_RED_ZONE && TARGET_64BIT)
20321 {
20322 switch (mode)
20323 {
20324 case HImode:
20325 case SImode:
20326 operand = gen_lowpart (DImode, operand);
20327 /* FALLTHRU */
20328 case DImode:
20329 emit_insn (
20330 gen_rtx_SET (VOIDmode,
20331 gen_rtx_MEM (DImode,
20332 gen_rtx_PRE_DEC (DImode,
20333 stack_pointer_rtx)),
20334 operand));
20335 break;
20336 default:
20337 gcc_unreachable ();
20338 }
20339 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20340 }
20341 else
20342 {
20343 switch (mode)
20344 {
20345 case DImode:
20346 {
20347 rtx operands[2];
20348 split_di (&operand, 1, operands, operands + 1);
20349 emit_insn (
20350 gen_rtx_SET (VOIDmode,
20351 gen_rtx_MEM (SImode,
20352 gen_rtx_PRE_DEC (Pmode,
20353 stack_pointer_rtx)),
20354 operands[1]));
20355 emit_insn (
20356 gen_rtx_SET (VOIDmode,
20357 gen_rtx_MEM (SImode,
20358 gen_rtx_PRE_DEC (Pmode,
20359 stack_pointer_rtx)),
20360 operands[0]));
20361 }
20362 break;
20363 case HImode:
20364 /* Store HImodes as SImodes. */
20365 operand = gen_lowpart (SImode, operand);
20366 /* FALLTHRU */
20367 case SImode:
20368 emit_insn (
20369 gen_rtx_SET (VOIDmode,
20370 gen_rtx_MEM (GET_MODE (operand),
20371 gen_rtx_PRE_DEC (SImode,
20372 stack_pointer_rtx)),
20373 operand));
20374 break;
20375 default:
20376 gcc_unreachable ();
20377 }
20378 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20379 }
20380 return result;
20381 }
20382
20383 /* Free operand from the memory. */
20384 void
20385 ix86_free_from_memory (enum machine_mode mode)
20386 {
20387 if (!TARGET_RED_ZONE)
20388 {
20389 int size;
20390
20391 if (mode == DImode || TARGET_64BIT)
20392 size = 8;
20393 else
20394 size = 4;
20395 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20396 to pop or add instruction if registers are available. */
20397 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20398 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20399 GEN_INT (size))));
20400 }
20401 }
20402
20403 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20404 QImode must go into class Q_REGS.
20405 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20406 movdf to do mem-to-mem moves through integer regs. */
20407 enum reg_class
20408 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20409 {
20410 enum machine_mode mode = GET_MODE (x);
20411
20412 /* We're only allowed to return a subclass of CLASS. Many of the
20413 following checks fail for NO_REGS, so eliminate that early. */
20414 if (regclass == NO_REGS)
20415 return NO_REGS;
20416
20417 /* All classes can load zeros. */
20418 if (x == CONST0_RTX (mode))
20419 return regclass;
20420
20421 /* Force constants into memory if we are loading a (nonzero) constant into
20422 an MMX or SSE register. This is because there are no MMX/SSE instructions
20423 to load from a constant. */
20424 if (CONSTANT_P (x)
20425 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20426 return NO_REGS;
20427
20428 /* Prefer SSE regs only, if we can use them for math. */
20429 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20430 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20431
20432 /* Floating-point constants need more complex checks. */
20433 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20434 {
20435 /* General regs can load everything. */
20436 if (reg_class_subset_p (regclass, GENERAL_REGS))
20437 return regclass;
20438
20439 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20440 zero above. We only want to wind up preferring 80387 registers if
20441 we plan on doing computation with them. */
20442 if (TARGET_80387
20443 && standard_80387_constant_p (x))
20444 {
20445 /* Limit class to non-sse. */
20446 if (regclass == FLOAT_SSE_REGS)
20447 return FLOAT_REGS;
20448 if (regclass == FP_TOP_SSE_REGS)
20449 return FP_TOP_REG;
20450 if (regclass == FP_SECOND_SSE_REGS)
20451 return FP_SECOND_REG;
20452 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20453 return regclass;
20454 }
20455
20456 return NO_REGS;
20457 }
20458
20459 /* Generally when we see PLUS here, it's the function invariant
20460 (plus soft-fp const_int). Which can only be computed into general
20461 regs. */
20462 if (GET_CODE (x) == PLUS)
20463 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20464
20465 /* QImode constants are easy to load, but non-constant QImode data
20466 must go into Q_REGS. */
20467 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20468 {
20469 if (reg_class_subset_p (regclass, Q_REGS))
20470 return regclass;
20471 if (reg_class_subset_p (Q_REGS, regclass))
20472 return Q_REGS;
20473 return NO_REGS;
20474 }
20475
20476 return regclass;
20477 }
20478
20479 /* Discourage putting floating-point values in SSE registers unless
20480 SSE math is being used, and likewise for the 387 registers. */
20481 enum reg_class
20482 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20483 {
20484 enum machine_mode mode = GET_MODE (x);
20485
20486 /* Restrict the output reload class to the register bank that we are doing
20487 math on. If we would like not to return a subset of CLASS, reject this
20488 alternative: if reload cannot do this, it will still use its choice. */
20489 mode = GET_MODE (x);
20490 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20491 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20492
20493 if (X87_FLOAT_MODE_P (mode))
20494 {
20495 if (regclass == FP_TOP_SSE_REGS)
20496 return FP_TOP_REG;
20497 else if (regclass == FP_SECOND_SSE_REGS)
20498 return FP_SECOND_REG;
20499 else
20500 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20501 }
20502
20503 return regclass;
20504 }
20505
20506 /* If we are copying between general and FP registers, we need a memory
20507 location. The same is true for SSE and MMX registers.
20508
20509 To optimize register_move_cost performance, allow inline variant.
20510
20511 The macro can't work reliably when one of the CLASSES is class containing
20512 registers from multiple units (SSE, MMX, integer). We avoid this by never
20513 combining those units in single alternative in the machine description.
20514 Ensure that this constraint holds to avoid unexpected surprises.
20515
20516 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20517 enforce these sanity checks. */
20518
20519 static inline int
20520 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20521 enum machine_mode mode, int strict)
20522 {
20523 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20524 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20525 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20526 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20527 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20528 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20529 {
20530 gcc_assert (!strict);
20531 return true;
20532 }
20533
20534 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20535 return true;
20536
20537 /* ??? This is a lie. We do have moves between mmx/general, and for
20538 mmx/sse2. But by saying we need secondary memory we discourage the
20539 register allocator from using the mmx registers unless needed. */
20540 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20541 return true;
20542
20543 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20544 {
20545 /* SSE1 doesn't have any direct moves from other classes. */
20546 if (!TARGET_SSE2)
20547 return true;
20548
20549 /* If the target says that inter-unit moves are more expensive
20550 than moving through memory, then don't generate them. */
20551 if (!TARGET_INTER_UNIT_MOVES)
20552 return true;
20553
20554 /* Between SSE and general, we have moves no larger than word size. */
20555 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20556 return true;
20557 }
20558
20559 return false;
20560 }
20561
20562 int
20563 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20564 enum machine_mode mode, int strict)
20565 {
20566 return inline_secondary_memory_needed (class1, class2, mode, strict);
20567 }
20568
20569 /* Return true if the registers in CLASS cannot represent the change from
20570 modes FROM to TO. */
20571
20572 bool
20573 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20574 enum reg_class regclass)
20575 {
20576 if (from == to)
20577 return false;
20578
20579 /* x87 registers can't do subreg at all, as all values are reformatted
20580 to extended precision. */
20581 if (MAYBE_FLOAT_CLASS_P (regclass))
20582 return true;
20583
20584 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20585 {
20586 /* Vector registers do not support QI or HImode loads. If we don't
20587 disallow a change to these modes, reload will assume it's ok to
20588 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20589 the vec_dupv4hi pattern. */
20590 if (GET_MODE_SIZE (from) < 4)
20591 return true;
20592
20593 /* Vector registers do not support subreg with nonzero offsets, which
20594 are otherwise valid for integer registers. Since we can't see
20595 whether we have a nonzero offset from here, prohibit all
20596 nonparadoxical subregs changing size. */
20597 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20598 return true;
20599 }
20600
20601 return false;
20602 }
20603
20604 /* Return the cost of moving data of mode M between a
20605 register and memory. A value of 2 is the default; this cost is
20606 relative to those in `REGISTER_MOVE_COST'.
20607
20608 This function is used extensively by register_move_cost that is used to
20609 build tables at startup. Make it inline in this case.
20610 When IN is 2, return maximum of in and out move cost.
20611
20612 If moving between registers and memory is more expensive than
20613 between two registers, you should define this macro to express the
20614 relative cost.
20615
20616 Model also increased moving costs of QImode registers in non
20617 Q_REGS classes.
20618 */
20619 static inline int
20620 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
20621 int in)
20622 {
20623 int cost;
20624 if (FLOAT_CLASS_P (regclass))
20625 {
20626 int index;
20627 switch (mode)
20628 {
20629 case SFmode:
20630 index = 0;
20631 break;
20632 case DFmode:
20633 index = 1;
20634 break;
20635 case XFmode:
20636 index = 2;
20637 break;
20638 default:
20639 return 100;
20640 }
20641 if (in == 2)
20642 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
20643 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20644 }
20645 if (SSE_CLASS_P (regclass))
20646 {
20647 int index;
20648 switch (GET_MODE_SIZE (mode))
20649 {
20650 case 4:
20651 index = 0;
20652 break;
20653 case 8:
20654 index = 1;
20655 break;
20656 case 16:
20657 index = 2;
20658 break;
20659 default:
20660 return 100;
20661 }
20662 if (in == 2)
20663 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
20664 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20665 }
20666 if (MMX_CLASS_P (regclass))
20667 {
20668 int index;
20669 switch (GET_MODE_SIZE (mode))
20670 {
20671 case 4:
20672 index = 0;
20673 break;
20674 case 8:
20675 index = 1;
20676 break;
20677 default:
20678 return 100;
20679 }
20680 if (in)
20681 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
20682 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20683 }
20684 switch (GET_MODE_SIZE (mode))
20685 {
20686 case 1:
20687 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20688 {
20689 if (!in)
20690 return ix86_cost->int_store[0];
20691 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
20692 cost = ix86_cost->movzbl_load;
20693 else
20694 cost = ix86_cost->int_load[0];
20695 if (in == 2)
20696 return MAX (cost, ix86_cost->int_store[0]);
20697 return cost;
20698 }
20699 else
20700 {
20701 if (in == 2)
20702 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
20703 if (in)
20704 return ix86_cost->movzbl_load;
20705 else
20706 return ix86_cost->int_store[0] + 4;
20707 }
20708 break;
20709 case 2:
20710 if (in == 2)
20711 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
20712 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20713 default:
20714 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20715 if (mode == TFmode)
20716 mode = XFmode;
20717 if (in == 2)
20718 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
20719 else if (in)
20720 cost = ix86_cost->int_load[2];
20721 else
20722 cost = ix86_cost->int_store[2];
20723 return (cost * (((int) GET_MODE_SIZE (mode)
20724 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20725 }
20726 }
20727
20728 int
20729 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20730 {
20731 return inline_memory_move_cost (mode, regclass, in);
20732 }
20733
20734
20735 /* Return the cost of moving data from a register in class CLASS1 to
20736 one in class CLASS2.
20737
20738 It is not required that the cost always equal 2 when FROM is the same as TO;
20739 on some machines it is expensive to move between registers if they are not
20740 general registers. */
20741
20742 int
20743 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20744 enum reg_class class2)
20745 {
20746 /* In case we require secondary memory, compute cost of the store followed
20747 by load. In order to avoid bad register allocation choices, we need
20748 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20749
20750 if (inline_secondary_memory_needed (class1, class2, mode, 0))
20751 {
20752 int cost = 1;
20753
20754 cost += inline_memory_move_cost (mode, class1, 2);
20755 cost += inline_memory_move_cost (mode, class2, 2);
20756
20757 /* In case of copying from general_purpose_register we may emit multiple
20758 stores followed by single load causing memory size mismatch stall.
20759 Count this as arbitrarily high cost of 20. */
20760 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20761 cost += 20;
20762
20763 /* In the case of FP/MMX moves, the registers actually overlap, and we
20764 have to switch modes in order to treat them differently. */
20765 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20766 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20767 cost += 20;
20768
20769 return cost;
20770 }
20771
20772 /* Moves between SSE/MMX and integer unit are expensive. */
20773 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20774 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20775
20776 /* ??? By keeping returned value relatively high, we limit the number
20777 of moves between integer and MMX/SSE registers for all targets.
20778 Additionally, high value prevents problem with x86_modes_tieable_p(),
20779 where integer modes in MMX/SSE registers are not tieable
20780 because of missing QImode and HImode moves to, from or between
20781 MMX/SSE registers. */
20782 return MAX (ix86_cost->mmxsse_to_integer, 8);
20783
20784 if (MAYBE_FLOAT_CLASS_P (class1))
20785 return ix86_cost->fp_move;
20786 if (MAYBE_SSE_CLASS_P (class1))
20787 return ix86_cost->sse_move;
20788 if (MAYBE_MMX_CLASS_P (class1))
20789 return ix86_cost->mmx_move;
20790 return 2;
20791 }
20792
20793 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20794
20795 bool
20796 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20797 {
20798 /* Flags and only flags can only hold CCmode values. */
20799 if (CC_REGNO_P (regno))
20800 return GET_MODE_CLASS (mode) == MODE_CC;
20801 if (GET_MODE_CLASS (mode) == MODE_CC
20802 || GET_MODE_CLASS (mode) == MODE_RANDOM
20803 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20804 return 0;
20805 if (FP_REGNO_P (regno))
20806 return VALID_FP_MODE_P (mode);
20807 if (SSE_REGNO_P (regno))
20808 {
20809 /* We implement the move patterns for all vector modes into and
20810 out of SSE registers, even when no operation instructions
20811 are available. */
20812 return (VALID_SSE_REG_MODE (mode)
20813 || VALID_SSE2_REG_MODE (mode)
20814 || VALID_MMX_REG_MODE (mode)
20815 || VALID_MMX_REG_MODE_3DNOW (mode));
20816 }
20817 if (MMX_REGNO_P (regno))
20818 {
20819 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20820 so if the register is available at all, then we can move data of
20821 the given mode into or out of it. */
20822 return (VALID_MMX_REG_MODE (mode)
20823 || VALID_MMX_REG_MODE_3DNOW (mode));
20824 }
20825
20826 if (mode == QImode)
20827 {
20828 /* Take care for QImode values - they can be in non-QI regs,
20829 but then they do cause partial register stalls. */
20830 if (regno < 4 || TARGET_64BIT)
20831 return 1;
20832 if (!TARGET_PARTIAL_REG_STALL)
20833 return 1;
20834 return reload_in_progress || reload_completed;
20835 }
20836 /* We handle both integer and floats in the general purpose registers. */
20837 else if (VALID_INT_MODE_P (mode))
20838 return 1;
20839 else if (VALID_FP_MODE_P (mode))
20840 return 1;
20841 else if (VALID_DFP_MODE_P (mode))
20842 return 1;
20843 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20844 on to use that value in smaller contexts, this can easily force a
20845 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20846 supporting DImode, allow it. */
20847 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20848 return 1;
20849
20850 return 0;
20851 }
20852
20853 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20854 tieable integer mode. */
20855
20856 static bool
20857 ix86_tieable_integer_mode_p (enum machine_mode mode)
20858 {
20859 switch (mode)
20860 {
20861 case HImode:
20862 case SImode:
20863 return true;
20864
20865 case QImode:
20866 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20867
20868 case DImode:
20869 return TARGET_64BIT;
20870
20871 default:
20872 return false;
20873 }
20874 }
20875
20876 /* Return true if MODE1 is accessible in a register that can hold MODE2
20877 without copying. That is, all register classes that can hold MODE2
20878 can also hold MODE1. */
20879
20880 bool
20881 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20882 {
20883 if (mode1 == mode2)
20884 return true;
20885
20886 if (ix86_tieable_integer_mode_p (mode1)
20887 && ix86_tieable_integer_mode_p (mode2))
20888 return true;
20889
20890 /* MODE2 being XFmode implies fp stack or general regs, which means we
20891 can tie any smaller floating point modes to it. Note that we do not
20892 tie this with TFmode. */
20893 if (mode2 == XFmode)
20894 return mode1 == SFmode || mode1 == DFmode;
20895
20896 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20897 that we can tie it with SFmode. */
20898 if (mode2 == DFmode)
20899 return mode1 == SFmode;
20900
20901 /* If MODE2 is only appropriate for an SSE register, then tie with
20902 any other mode acceptable to SSE registers. */
20903 if (GET_MODE_SIZE (mode2) == 16
20904 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20905 return (GET_MODE_SIZE (mode1) == 16
20906 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20907
20908 /* If MODE2 is appropriate for an MMX register, then tie
20909 with any other mode acceptable to MMX registers. */
20910 if (GET_MODE_SIZE (mode2) == 8
20911 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20912 return (GET_MODE_SIZE (mode1) == 8
20913 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20914
20915 return false;
20916 }
20917
20918 /* Compute a (partial) cost for rtx X. Return true if the complete
20919 cost has been computed, and false if subexpressions should be
20920 scanned. In either case, *TOTAL contains the cost result. */
20921
20922 static bool
20923 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20924 {
20925 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20926 enum machine_mode mode = GET_MODE (x);
20927
20928 switch (code)
20929 {
20930 case CONST_INT:
20931 case CONST:
20932 case LABEL_REF:
20933 case SYMBOL_REF:
20934 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20935 *total = 3;
20936 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20937 *total = 2;
20938 else if (flag_pic && SYMBOLIC_CONST (x)
20939 && (!TARGET_64BIT
20940 || (!GET_CODE (x) != LABEL_REF
20941 && (GET_CODE (x) != SYMBOL_REF
20942 || !SYMBOL_REF_LOCAL_P (x)))))
20943 *total = 1;
20944 else
20945 *total = 0;
20946 return true;
20947
20948 case CONST_DOUBLE:
20949 if (mode == VOIDmode)
20950 *total = 0;
20951 else
20952 switch (standard_80387_constant_p (x))
20953 {
20954 case 1: /* 0.0 */
20955 *total = 1;
20956 break;
20957 default: /* Other constants */
20958 *total = 2;
20959 break;
20960 case 0:
20961 case -1:
20962 /* Start with (MEM (SYMBOL_REF)), since that's where
20963 it'll probably end up. Add a penalty for size. */
20964 *total = (COSTS_N_INSNS (1)
20965 + (flag_pic != 0 && !TARGET_64BIT)
20966 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20967 break;
20968 }
20969 return true;
20970
20971 case ZERO_EXTEND:
20972 /* The zero extensions is often completely free on x86_64, so make
20973 it as cheap as possible. */
20974 if (TARGET_64BIT && mode == DImode
20975 && GET_MODE (XEXP (x, 0)) == SImode)
20976 *total = 1;
20977 else if (TARGET_ZERO_EXTEND_WITH_AND)
20978 *total = ix86_cost->add;
20979 else
20980 *total = ix86_cost->movzx;
20981 return false;
20982
20983 case SIGN_EXTEND:
20984 *total = ix86_cost->movsx;
20985 return false;
20986
20987 case ASHIFT:
20988 if (CONST_INT_P (XEXP (x, 1))
20989 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20990 {
20991 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20992 if (value == 1)
20993 {
20994 *total = ix86_cost->add;
20995 return false;
20996 }
20997 if ((value == 2 || value == 3)
20998 && ix86_cost->lea <= ix86_cost->shift_const)
20999 {
21000 *total = ix86_cost->lea;
21001 return false;
21002 }
21003 }
21004 /* FALLTHRU */
21005
21006 case ROTATE:
21007 case ASHIFTRT:
21008 case LSHIFTRT:
21009 case ROTATERT:
21010 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
21011 {
21012 if (CONST_INT_P (XEXP (x, 1)))
21013 {
21014 if (INTVAL (XEXP (x, 1)) > 32)
21015 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
21016 else
21017 *total = ix86_cost->shift_const * 2;
21018 }
21019 else
21020 {
21021 if (GET_CODE (XEXP (x, 1)) == AND)
21022 *total = ix86_cost->shift_var * 2;
21023 else
21024 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
21025 }
21026 }
21027 else
21028 {
21029 if (CONST_INT_P (XEXP (x, 1)))
21030 *total = ix86_cost->shift_const;
21031 else
21032 *total = ix86_cost->shift_var;
21033 }
21034 return false;
21035
21036 case MULT:
21037 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21038 {
21039 /* ??? SSE scalar cost should be used here. */
21040 *total = ix86_cost->fmul;
21041 return false;
21042 }
21043 else if (X87_FLOAT_MODE_P (mode))
21044 {
21045 *total = ix86_cost->fmul;
21046 return false;
21047 }
21048 else if (FLOAT_MODE_P (mode))
21049 {
21050 /* ??? SSE vector cost should be used here. */
21051 *total = ix86_cost->fmul;
21052 return false;
21053 }
21054 else
21055 {
21056 rtx op0 = XEXP (x, 0);
21057 rtx op1 = XEXP (x, 1);
21058 int nbits;
21059 if (CONST_INT_P (XEXP (x, 1)))
21060 {
21061 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21062 for (nbits = 0; value != 0; value &= value - 1)
21063 nbits++;
21064 }
21065 else
21066 /* This is arbitrary. */
21067 nbits = 7;
21068
21069 /* Compute costs correctly for widening multiplication. */
21070 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
21071 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
21072 == GET_MODE_SIZE (mode))
21073 {
21074 int is_mulwiden = 0;
21075 enum machine_mode inner_mode = GET_MODE (op0);
21076
21077 if (GET_CODE (op0) == GET_CODE (op1))
21078 is_mulwiden = 1, op1 = XEXP (op1, 0);
21079 else if (CONST_INT_P (op1))
21080 {
21081 if (GET_CODE (op0) == SIGN_EXTEND)
21082 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
21083 == INTVAL (op1);
21084 else
21085 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
21086 }
21087
21088 if (is_mulwiden)
21089 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
21090 }
21091
21092 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
21093 + nbits * ix86_cost->mult_bit
21094 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
21095
21096 return true;
21097 }
21098
21099 case DIV:
21100 case UDIV:
21101 case MOD:
21102 case UMOD:
21103 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21104 /* ??? SSE cost should be used here. */
21105 *total = ix86_cost->fdiv;
21106 else if (X87_FLOAT_MODE_P (mode))
21107 *total = ix86_cost->fdiv;
21108 else if (FLOAT_MODE_P (mode))
21109 /* ??? SSE vector cost should be used here. */
21110 *total = ix86_cost->fdiv;
21111 else
21112 *total = ix86_cost->divide[MODE_INDEX (mode)];
21113 return false;
21114
21115 case PLUS:
21116 if (GET_MODE_CLASS (mode) == MODE_INT
21117 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
21118 {
21119 if (GET_CODE (XEXP (x, 0)) == PLUS
21120 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
21121 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
21122 && CONSTANT_P (XEXP (x, 1)))
21123 {
21124 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
21125 if (val == 2 || val == 4 || val == 8)
21126 {
21127 *total = ix86_cost->lea;
21128 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
21129 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
21130 outer_code);
21131 *total += rtx_cost (XEXP (x, 1), outer_code);
21132 return true;
21133 }
21134 }
21135 else if (GET_CODE (XEXP (x, 0)) == MULT
21136 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
21137 {
21138 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
21139 if (val == 2 || val == 4 || val == 8)
21140 {
21141 *total = ix86_cost->lea;
21142 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
21143 *total += rtx_cost (XEXP (x, 1), outer_code);
21144 return true;
21145 }
21146 }
21147 else if (GET_CODE (XEXP (x, 0)) == PLUS)
21148 {
21149 *total = ix86_cost->lea;
21150 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
21151 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
21152 *total += rtx_cost (XEXP (x, 1), outer_code);
21153 return true;
21154 }
21155 }
21156 /* FALLTHRU */
21157
21158 case MINUS:
21159 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21160 {
21161 /* ??? SSE cost should be used here. */
21162 *total = ix86_cost->fadd;
21163 return false;
21164 }
21165 else if (X87_FLOAT_MODE_P (mode))
21166 {
21167 *total = ix86_cost->fadd;
21168 return false;
21169 }
21170 else if (FLOAT_MODE_P (mode))
21171 {
21172 /* ??? SSE vector cost should be used here. */
21173 *total = ix86_cost->fadd;
21174 return false;
21175 }
21176 /* FALLTHRU */
21177
21178 case AND:
21179 case IOR:
21180 case XOR:
21181 if (!TARGET_64BIT && mode == DImode)
21182 {
21183 *total = (ix86_cost->add * 2
21184 + (rtx_cost (XEXP (x, 0), outer_code)
21185 << (GET_MODE (XEXP (x, 0)) != DImode))
21186 + (rtx_cost (XEXP (x, 1), outer_code)
21187 << (GET_MODE (XEXP (x, 1)) != DImode)));
21188 return true;
21189 }
21190 /* FALLTHRU */
21191
21192 case NEG:
21193 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21194 {
21195 /* ??? SSE cost should be used here. */
21196 *total = ix86_cost->fchs;
21197 return false;
21198 }
21199 else if (X87_FLOAT_MODE_P (mode))
21200 {
21201 *total = ix86_cost->fchs;
21202 return false;
21203 }
21204 else if (FLOAT_MODE_P (mode))
21205 {
21206 /* ??? SSE vector cost should be used here. */
21207 *total = ix86_cost->fchs;
21208 return false;
21209 }
21210 /* FALLTHRU */
21211
21212 case NOT:
21213 if (!TARGET_64BIT && mode == DImode)
21214 *total = ix86_cost->add * 2;
21215 else
21216 *total = ix86_cost->add;
21217 return false;
21218
21219 case COMPARE:
21220 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
21221 && XEXP (XEXP (x, 0), 1) == const1_rtx
21222 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
21223 && XEXP (x, 1) == const0_rtx)
21224 {
21225 /* This kind of construct is implemented using test[bwl].
21226 Treat it as if we had an AND. */
21227 *total = (ix86_cost->add
21228 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
21229 + rtx_cost (const1_rtx, outer_code));
21230 return true;
21231 }
21232 return false;
21233
21234 case FLOAT_EXTEND:
21235 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
21236 *total = 0;
21237 return false;
21238
21239 case ABS:
21240 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21241 /* ??? SSE cost should be used here. */
21242 *total = ix86_cost->fabs;
21243 else if (X87_FLOAT_MODE_P (mode))
21244 *total = ix86_cost->fabs;
21245 else if (FLOAT_MODE_P (mode))
21246 /* ??? SSE vector cost should be used here. */
21247 *total = ix86_cost->fabs;
21248 return false;
21249
21250 case SQRT:
21251 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21252 /* ??? SSE cost should be used here. */
21253 *total = ix86_cost->fsqrt;
21254 else if (X87_FLOAT_MODE_P (mode))
21255 *total = ix86_cost->fsqrt;
21256 else if (FLOAT_MODE_P (mode))
21257 /* ??? SSE vector cost should be used here. */
21258 *total = ix86_cost->fsqrt;
21259 return false;
21260
21261 case UNSPEC:
21262 if (XINT (x, 1) == UNSPEC_TP)
21263 *total = 0;
21264 return false;
21265
21266 default:
21267 return false;
21268 }
21269 }
21270
21271 #if TARGET_MACHO
21272
21273 static int current_machopic_label_num;
21274
21275 /* Given a symbol name and its associated stub, write out the
21276 definition of the stub. */
21277
21278 void
21279 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21280 {
21281 unsigned int length;
21282 char *binder_name, *symbol_name, lazy_ptr_name[32];
21283 int label = ++current_machopic_label_num;
21284
21285 /* For 64-bit we shouldn't get here. */
21286 gcc_assert (!TARGET_64BIT);
21287
21288 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21289 symb = (*targetm.strip_name_encoding) (symb);
21290
21291 length = strlen (stub);
21292 binder_name = alloca (length + 32);
21293 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
21294
21295 length = strlen (symb);
21296 symbol_name = alloca (length + 32);
21297 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21298
21299 sprintf (lazy_ptr_name, "L%d$lz", label);
21300
21301 if (MACHOPIC_PURE)
21302 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
21303 else
21304 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
21305
21306 fprintf (file, "%s:\n", stub);
21307 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21308
21309 if (MACHOPIC_PURE)
21310 {
21311 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
21312 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
21313 fprintf (file, "\tjmp\t*%%edx\n");
21314 }
21315 else
21316 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
21317
21318 fprintf (file, "%s:\n", binder_name);
21319
21320 if (MACHOPIC_PURE)
21321 {
21322 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
21323 fprintf (file, "\tpushl\t%%eax\n");
21324 }
21325 else
21326 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
21327
21328 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
21329
21330 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21331 fprintf (file, "%s:\n", lazy_ptr_name);
21332 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21333 fprintf (file, "\t.long %s\n", binder_name);
21334 }
21335
21336 void
21337 darwin_x86_file_end (void)
21338 {
21339 darwin_file_end ();
21340 ix86_file_end ();
21341 }
21342 #endif /* TARGET_MACHO */
21343
21344 /* Order the registers for register allocator. */
21345
21346 void
21347 x86_order_regs_for_local_alloc (void)
21348 {
21349 int pos = 0;
21350 int i;
21351
21352 /* First allocate the local general purpose registers. */
21353 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21354 if (GENERAL_REGNO_P (i) && call_used_regs[i])
21355 reg_alloc_order [pos++] = i;
21356
21357 /* Global general purpose registers. */
21358 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21359 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
21360 reg_alloc_order [pos++] = i;
21361
21362 /* x87 registers come first in case we are doing FP math
21363 using them. */
21364 if (!TARGET_SSE_MATH)
21365 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21366 reg_alloc_order [pos++] = i;
21367
21368 /* SSE registers. */
21369 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21370 reg_alloc_order [pos++] = i;
21371 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21372 reg_alloc_order [pos++] = i;
21373
21374 /* x87 registers. */
21375 if (TARGET_SSE_MATH)
21376 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21377 reg_alloc_order [pos++] = i;
21378
21379 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21380 reg_alloc_order [pos++] = i;
21381
21382 /* Initialize the rest of array as we do not allocate some registers
21383 at all. */
21384 while (pos < FIRST_PSEUDO_REGISTER)
21385 reg_alloc_order [pos++] = 0;
21386 }
21387
21388 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21389 struct attribute_spec.handler. */
21390 static tree
21391 ix86_handle_struct_attribute (tree *node, tree name,
21392 tree args ATTRIBUTE_UNUSED,
21393 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
21394 {
21395 tree *type = NULL;
21396 if (DECL_P (*node))
21397 {
21398 if (TREE_CODE (*node) == TYPE_DECL)
21399 type = &TREE_TYPE (*node);
21400 }
21401 else
21402 type = node;
21403
21404 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21405 || TREE_CODE (*type) == UNION_TYPE)))
21406 {
21407 warning (OPT_Wattributes, "%qs attribute ignored",
21408 IDENTIFIER_POINTER (name));
21409 *no_add_attrs = true;
21410 }
21411
21412 else if ((is_attribute_p ("ms_struct", name)
21413 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21414 || ((is_attribute_p ("gcc_struct", name)
21415 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21416 {
21417 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
21418 IDENTIFIER_POINTER (name));
21419 *no_add_attrs = true;
21420 }
21421
21422 return NULL_TREE;
21423 }
21424
21425 static bool
21426 ix86_ms_bitfield_layout_p (const_tree record_type)
21427 {
21428 return (TARGET_MS_BITFIELD_LAYOUT &&
21429 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21430 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
21431 }
21432
21433 /* Returns an expression indicating where the this parameter is
21434 located on entry to the FUNCTION. */
21435
21436 static rtx
21437 x86_this_parameter (tree function)
21438 {
21439 tree type = TREE_TYPE (function);
21440 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21441
21442 if (TARGET_64BIT)
21443 {
21444 const int *parm_regs;
21445
21446 if (TARGET_64BIT_MS_ABI)
21447 parm_regs = x86_64_ms_abi_int_parameter_registers;
21448 else
21449 parm_regs = x86_64_int_parameter_registers;
21450 return gen_rtx_REG (DImode, parm_regs[aggr]);
21451 }
21452
21453 if (ix86_function_regparm (type, function) > 0
21454 && !type_has_variadic_args_p (type))
21455 {
21456 int regno = 0;
21457 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21458 regno = 2;
21459 return gen_rtx_REG (SImode, regno);
21460 }
21461
21462 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21463 }
21464
21465 /* Determine whether x86_output_mi_thunk can succeed. */
21466
21467 static bool
21468 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
21469 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21470 HOST_WIDE_INT vcall_offset, const_tree function)
21471 {
21472 /* 64-bit can handle anything. */
21473 if (TARGET_64BIT)
21474 return true;
21475
21476 /* For 32-bit, everything's fine if we have one free register. */
21477 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21478 return true;
21479
21480 /* Need a free register for vcall_offset. */
21481 if (vcall_offset)
21482 return false;
21483
21484 /* Need a free register for GOT references. */
21485 if (flag_pic && !(*targetm.binds_local_p) (function))
21486 return false;
21487
21488 /* Otherwise ok. */
21489 return true;
21490 }
21491
21492 /* Output the assembler code for a thunk function. THUNK_DECL is the
21493 declaration for the thunk function itself, FUNCTION is the decl for
21494 the target function. DELTA is an immediate constant offset to be
21495 added to THIS. If VCALL_OFFSET is nonzero, the word at
21496 *(*this + vcall_offset) should be added to THIS. */
21497
21498 static void
21499 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21500 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21501 HOST_WIDE_INT vcall_offset, tree function)
21502 {
21503 rtx xops[3];
21504 rtx this_param = x86_this_parameter (function);
21505 rtx this_reg, tmp;
21506
21507 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21508 pull it in now and let DELTA benefit. */
21509 if (REG_P (this_param))
21510 this_reg = this_param;
21511 else if (vcall_offset)
21512 {
21513 /* Put the this parameter into %eax. */
21514 xops[0] = this_param;
21515 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21516 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21517 }
21518 else
21519 this_reg = NULL_RTX;
21520
21521 /* Adjust the this parameter by a fixed constant. */
21522 if (delta)
21523 {
21524 xops[0] = GEN_INT (delta);
21525 xops[1] = this_reg ? this_reg : this_param;
21526 if (TARGET_64BIT)
21527 {
21528 if (!x86_64_general_operand (xops[0], DImode))
21529 {
21530 tmp = gen_rtx_REG (DImode, R10_REG);
21531 xops[1] = tmp;
21532 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21533 xops[0] = tmp;
21534 xops[1] = this_param;
21535 }
21536 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21537 }
21538 else
21539 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21540 }
21541
21542 /* Adjust the this parameter by a value stored in the vtable. */
21543 if (vcall_offset)
21544 {
21545 if (TARGET_64BIT)
21546 tmp = gen_rtx_REG (DImode, R10_REG);
21547 else
21548 {
21549 int tmp_regno = 2 /* ECX */;
21550 if (lookup_attribute ("fastcall",
21551 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21552 tmp_regno = 0 /* EAX */;
21553 tmp = gen_rtx_REG (SImode, tmp_regno);
21554 }
21555
21556 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21557 xops[1] = tmp;
21558 if (TARGET_64BIT)
21559 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21560 else
21561 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21562
21563 /* Adjust the this parameter. */
21564 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21565 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21566 {
21567 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21568 xops[0] = GEN_INT (vcall_offset);
21569 xops[1] = tmp2;
21570 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21571 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21572 }
21573 xops[1] = this_reg;
21574 if (TARGET_64BIT)
21575 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21576 else
21577 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21578 }
21579
21580 /* If necessary, drop THIS back to its stack slot. */
21581 if (this_reg && this_reg != this_param)
21582 {
21583 xops[0] = this_reg;
21584 xops[1] = this_param;
21585 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21586 }
21587
21588 xops[0] = XEXP (DECL_RTL (function), 0);
21589 if (TARGET_64BIT)
21590 {
21591 if (!flag_pic || (*targetm.binds_local_p) (function))
21592 output_asm_insn ("jmp\t%P0", xops);
21593 /* All thunks should be in the same object as their target,
21594 and thus binds_local_p should be true. */
21595 else if (TARGET_64BIT_MS_ABI)
21596 gcc_unreachable ();
21597 else
21598 {
21599 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21600 tmp = gen_rtx_CONST (Pmode, tmp);
21601 tmp = gen_rtx_MEM (QImode, tmp);
21602 xops[0] = tmp;
21603 output_asm_insn ("jmp\t%A0", xops);
21604 }
21605 }
21606 else
21607 {
21608 if (!flag_pic || (*targetm.binds_local_p) (function))
21609 output_asm_insn ("jmp\t%P0", xops);
21610 else
21611 #if TARGET_MACHO
21612 if (TARGET_MACHO)
21613 {
21614 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21615 tmp = (gen_rtx_SYMBOL_REF
21616 (Pmode,
21617 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21618 tmp = gen_rtx_MEM (QImode, tmp);
21619 xops[0] = tmp;
21620 output_asm_insn ("jmp\t%0", xops);
21621 }
21622 else
21623 #endif /* TARGET_MACHO */
21624 {
21625 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21626 output_set_got (tmp, NULL_RTX);
21627
21628 xops[1] = tmp;
21629 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21630 output_asm_insn ("jmp\t{*}%1", xops);
21631 }
21632 }
21633 }
21634
21635 static void
21636 x86_file_start (void)
21637 {
21638 default_file_start ();
21639 #if TARGET_MACHO
21640 darwin_file_start ();
21641 #endif
21642 if (X86_FILE_START_VERSION_DIRECTIVE)
21643 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21644 if (X86_FILE_START_FLTUSED)
21645 fputs ("\t.global\t__fltused\n", asm_out_file);
21646 if (ix86_asm_dialect == ASM_INTEL)
21647 fputs ("\t.intel_syntax\n", asm_out_file);
21648 }
21649
21650 int
21651 x86_field_alignment (tree field, int computed)
21652 {
21653 enum machine_mode mode;
21654 tree type = TREE_TYPE (field);
21655
21656 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21657 return computed;
21658 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21659 ? get_inner_array_type (type) : type);
21660 if (mode == DFmode || mode == DCmode
21661 || GET_MODE_CLASS (mode) == MODE_INT
21662 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21663 return MIN (32, computed);
21664 return computed;
21665 }
21666
21667 /* Output assembler code to FILE to increment profiler label # LABELNO
21668 for profiling a function entry. */
21669 void
21670 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21671 {
21672 if (TARGET_64BIT)
21673 {
21674 #ifndef NO_PROFILE_COUNTERS
21675 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21676 #endif
21677
21678 if (!TARGET_64BIT_MS_ABI && flag_pic)
21679 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21680 else
21681 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21682 }
21683 else if (flag_pic)
21684 {
21685 #ifndef NO_PROFILE_COUNTERS
21686 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21687 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21688 #endif
21689 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21690 }
21691 else
21692 {
21693 #ifndef NO_PROFILE_COUNTERS
21694 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21695 PROFILE_COUNT_REGISTER);
21696 #endif
21697 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21698 }
21699 }
21700
21701 /* We don't have exact information about the insn sizes, but we may assume
21702 quite safely that we are informed about all 1 byte insns and memory
21703 address sizes. This is enough to eliminate unnecessary padding in
21704 99% of cases. */
21705
21706 static int
21707 min_insn_size (rtx insn)
21708 {
21709 int l = 0;
21710
21711 if (!INSN_P (insn) || !active_insn_p (insn))
21712 return 0;
21713
21714 /* Discard alignments we've emit and jump instructions. */
21715 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21716 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21717 return 0;
21718 if (JUMP_P (insn)
21719 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21720 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21721 return 0;
21722
21723 /* Important case - calls are always 5 bytes.
21724 It is common to have many calls in the row. */
21725 if (CALL_P (insn)
21726 && symbolic_reference_mentioned_p (PATTERN (insn))
21727 && !SIBLING_CALL_P (insn))
21728 return 5;
21729 if (get_attr_length (insn) <= 1)
21730 return 1;
21731
21732 /* For normal instructions we may rely on the sizes of addresses
21733 and the presence of symbol to require 4 bytes of encoding.
21734 This is not the case for jumps where references are PC relative. */
21735 if (!JUMP_P (insn))
21736 {
21737 l = get_attr_length_address (insn);
21738 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21739 l = 4;
21740 }
21741 if (l)
21742 return 1+l;
21743 else
21744 return 2;
21745 }
21746
21747 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21748 window. */
21749
21750 static void
21751 ix86_avoid_jump_misspredicts (void)
21752 {
21753 rtx insn, start = get_insns ();
21754 int nbytes = 0, njumps = 0;
21755 int isjump = 0;
21756
21757 /* Look for all minimal intervals of instructions containing 4 jumps.
21758 The intervals are bounded by START and INSN. NBYTES is the total
21759 size of instructions in the interval including INSN and not including
21760 START. When the NBYTES is smaller than 16 bytes, it is possible
21761 that the end of START and INSN ends up in the same 16byte page.
21762
21763 The smallest offset in the page INSN can start is the case where START
21764 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21765 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21766 */
21767 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21768 {
21769
21770 nbytes += min_insn_size (insn);
21771 if (dump_file)
21772 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21773 INSN_UID (insn), min_insn_size (insn));
21774 if ((JUMP_P (insn)
21775 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21776 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21777 || CALL_P (insn))
21778 njumps++;
21779 else
21780 continue;
21781
21782 while (njumps > 3)
21783 {
21784 start = NEXT_INSN (start);
21785 if ((JUMP_P (start)
21786 && GET_CODE (PATTERN (start)) != ADDR_VEC
21787 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21788 || CALL_P (start))
21789 njumps--, isjump = 1;
21790 else
21791 isjump = 0;
21792 nbytes -= min_insn_size (start);
21793 }
21794 gcc_assert (njumps >= 0);
21795 if (dump_file)
21796 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21797 INSN_UID (start), INSN_UID (insn), nbytes);
21798
21799 if (njumps == 3 && isjump && nbytes < 16)
21800 {
21801 int padsize = 15 - nbytes + min_insn_size (insn);
21802
21803 if (dump_file)
21804 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21805 INSN_UID (insn), padsize);
21806 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21807 }
21808 }
21809 }
21810
21811 /* AMD Athlon works faster
21812 when RET is not destination of conditional jump or directly preceded
21813 by other jump instruction. We avoid the penalty by inserting NOP just
21814 before the RET instructions in such cases. */
21815 static void
21816 ix86_pad_returns (void)
21817 {
21818 edge e;
21819 edge_iterator ei;
21820
21821 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21822 {
21823 basic_block bb = e->src;
21824 rtx ret = BB_END (bb);
21825 rtx prev;
21826 bool replace = false;
21827
21828 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21829 || !maybe_hot_bb_p (bb))
21830 continue;
21831 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21832 if (active_insn_p (prev) || LABEL_P (prev))
21833 break;
21834 if (prev && LABEL_P (prev))
21835 {
21836 edge e;
21837 edge_iterator ei;
21838
21839 FOR_EACH_EDGE (e, ei, bb->preds)
21840 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21841 && !(e->flags & EDGE_FALLTHRU))
21842 replace = true;
21843 }
21844 if (!replace)
21845 {
21846 prev = prev_active_insn (ret);
21847 if (prev
21848 && ((JUMP_P (prev) && any_condjump_p (prev))
21849 || CALL_P (prev)))
21850 replace = true;
21851 /* Empty functions get branch mispredict even when the jump destination
21852 is not visible to us. */
21853 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21854 replace = true;
21855 }
21856 if (replace)
21857 {
21858 emit_insn_before (gen_return_internal_long (), ret);
21859 delete_insn (ret);
21860 }
21861 }
21862 }
21863
21864 /* Implement machine specific optimizations. We implement padding of returns
21865 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21866 static void
21867 ix86_reorg (void)
21868 {
21869 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21870 ix86_pad_returns ();
21871 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21872 ix86_avoid_jump_misspredicts ();
21873 }
21874
21875 /* Return nonzero when QImode register that must be represented via REX prefix
21876 is used. */
21877 bool
21878 x86_extended_QIreg_mentioned_p (rtx insn)
21879 {
21880 int i;
21881 extract_insn_cached (insn);
21882 for (i = 0; i < recog_data.n_operands; i++)
21883 if (REG_P (recog_data.operand[i])
21884 && REGNO (recog_data.operand[i]) >= 4)
21885 return true;
21886 return false;
21887 }
21888
21889 /* Return nonzero when P points to register encoded via REX prefix.
21890 Called via for_each_rtx. */
21891 static int
21892 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21893 {
21894 unsigned int regno;
21895 if (!REG_P (*p))
21896 return 0;
21897 regno = REGNO (*p);
21898 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21899 }
21900
21901 /* Return true when INSN mentions register that must be encoded using REX
21902 prefix. */
21903 bool
21904 x86_extended_reg_mentioned_p (rtx insn)
21905 {
21906 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21907 }
21908
21909 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21910 optabs would emit if we didn't have TFmode patterns. */
21911
21912 void
21913 x86_emit_floatuns (rtx operands[2])
21914 {
21915 rtx neglab, donelab, i0, i1, f0, in, out;
21916 enum machine_mode mode, inmode;
21917
21918 inmode = GET_MODE (operands[1]);
21919 gcc_assert (inmode == SImode || inmode == DImode);
21920
21921 out = operands[0];
21922 in = force_reg (inmode, operands[1]);
21923 mode = GET_MODE (out);
21924 neglab = gen_label_rtx ();
21925 donelab = gen_label_rtx ();
21926 f0 = gen_reg_rtx (mode);
21927
21928 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21929
21930 expand_float (out, in, 0);
21931
21932 emit_jump_insn (gen_jump (donelab));
21933 emit_barrier ();
21934
21935 emit_label (neglab);
21936
21937 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21938 1, OPTAB_DIRECT);
21939 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21940 1, OPTAB_DIRECT);
21941 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21942
21943 expand_float (f0, i0, 0);
21944
21945 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21946
21947 emit_label (donelab);
21948 }
21949 \f
21950 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21951 with all elements equal to VAR. Return true if successful. */
21952
21953 static bool
21954 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21955 rtx target, rtx val)
21956 {
21957 enum machine_mode smode, wsmode, wvmode;
21958 rtx x;
21959
21960 switch (mode)
21961 {
21962 case V2SImode:
21963 case V2SFmode:
21964 if (!mmx_ok)
21965 return false;
21966 /* FALLTHRU */
21967
21968 case V2DFmode:
21969 case V2DImode:
21970 case V4SFmode:
21971 case V4SImode:
21972 val = force_reg (GET_MODE_INNER (mode), val);
21973 x = gen_rtx_VEC_DUPLICATE (mode, val);
21974 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21975 return true;
21976
21977 case V4HImode:
21978 if (!mmx_ok)
21979 return false;
21980 if (TARGET_SSE || TARGET_3DNOW_A)
21981 {
21982 val = gen_lowpart (SImode, val);
21983 x = gen_rtx_TRUNCATE (HImode, val);
21984 x = gen_rtx_VEC_DUPLICATE (mode, x);
21985 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21986 return true;
21987 }
21988 else
21989 {
21990 smode = HImode;
21991 wsmode = SImode;
21992 wvmode = V2SImode;
21993 goto widen;
21994 }
21995
21996 case V8QImode:
21997 if (!mmx_ok)
21998 return false;
21999 smode = QImode;
22000 wsmode = HImode;
22001 wvmode = V4HImode;
22002 goto widen;
22003 case V8HImode:
22004 if (TARGET_SSE2)
22005 {
22006 rtx tmp1, tmp2;
22007 /* Extend HImode to SImode using a paradoxical SUBREG. */
22008 tmp1 = gen_reg_rtx (SImode);
22009 emit_move_insn (tmp1, gen_lowpart (SImode, val));
22010 /* Insert the SImode value as low element of V4SImode vector. */
22011 tmp2 = gen_reg_rtx (V4SImode);
22012 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
22013 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
22014 CONST0_RTX (V4SImode),
22015 const1_rtx);
22016 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
22017 /* Cast the V4SImode vector back to a V8HImode vector. */
22018 tmp1 = gen_reg_rtx (V8HImode);
22019 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
22020 /* Duplicate the low short through the whole low SImode word. */
22021 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
22022 /* Cast the V8HImode vector back to a V4SImode vector. */
22023 tmp2 = gen_reg_rtx (V4SImode);
22024 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
22025 /* Replicate the low element of the V4SImode vector. */
22026 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
22027 /* Cast the V2SImode back to V8HImode, and store in target. */
22028 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
22029 return true;
22030 }
22031 smode = HImode;
22032 wsmode = SImode;
22033 wvmode = V4SImode;
22034 goto widen;
22035 case V16QImode:
22036 if (TARGET_SSE2)
22037 {
22038 rtx tmp1, tmp2;
22039 /* Extend QImode to SImode using a paradoxical SUBREG. */
22040 tmp1 = gen_reg_rtx (SImode);
22041 emit_move_insn (tmp1, gen_lowpart (SImode, val));
22042 /* Insert the SImode value as low element of V4SImode vector. */
22043 tmp2 = gen_reg_rtx (V4SImode);
22044 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
22045 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
22046 CONST0_RTX (V4SImode),
22047 const1_rtx);
22048 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
22049 /* Cast the V4SImode vector back to a V16QImode vector. */
22050 tmp1 = gen_reg_rtx (V16QImode);
22051 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
22052 /* Duplicate the low byte through the whole low SImode word. */
22053 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
22054 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
22055 /* Cast the V16QImode vector back to a V4SImode vector. */
22056 tmp2 = gen_reg_rtx (V4SImode);
22057 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
22058 /* Replicate the low element of the V4SImode vector. */
22059 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
22060 /* Cast the V2SImode back to V16QImode, and store in target. */
22061 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
22062 return true;
22063 }
22064 smode = QImode;
22065 wsmode = HImode;
22066 wvmode = V8HImode;
22067 goto widen;
22068 widen:
22069 /* Replicate the value once into the next wider mode and recurse. */
22070 val = convert_modes (wsmode, smode, val, true);
22071 x = expand_simple_binop (wsmode, ASHIFT, val,
22072 GEN_INT (GET_MODE_BITSIZE (smode)),
22073 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22074 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
22075
22076 x = gen_reg_rtx (wvmode);
22077 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
22078 gcc_unreachable ();
22079 emit_move_insn (target, gen_lowpart (mode, x));
22080 return true;
22081
22082 default:
22083 return false;
22084 }
22085 }
22086
22087 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22088 whose ONE_VAR element is VAR, and other elements are zero. Return true
22089 if successful. */
22090
22091 static bool
22092 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
22093 rtx target, rtx var, int one_var)
22094 {
22095 enum machine_mode vsimode;
22096 rtx new_target;
22097 rtx x, tmp;
22098
22099 switch (mode)
22100 {
22101 case V2SFmode:
22102 case V2SImode:
22103 if (!mmx_ok)
22104 return false;
22105 /* FALLTHRU */
22106
22107 case V2DFmode:
22108 case V2DImode:
22109 if (one_var != 0)
22110 return false;
22111 var = force_reg (GET_MODE_INNER (mode), var);
22112 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
22113 emit_insn (gen_rtx_SET (VOIDmode, target, x));
22114 return true;
22115
22116 case V4SFmode:
22117 case V4SImode:
22118 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
22119 new_target = gen_reg_rtx (mode);
22120 else
22121 new_target = target;
22122 var = force_reg (GET_MODE_INNER (mode), var);
22123 x = gen_rtx_VEC_DUPLICATE (mode, var);
22124 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
22125 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
22126 if (one_var != 0)
22127 {
22128 /* We need to shuffle the value to the correct position, so
22129 create a new pseudo to store the intermediate result. */
22130
22131 /* With SSE2, we can use the integer shuffle insns. */
22132 if (mode != V4SFmode && TARGET_SSE2)
22133 {
22134 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
22135 GEN_INT (1),
22136 GEN_INT (one_var == 1 ? 0 : 1),
22137 GEN_INT (one_var == 2 ? 0 : 1),
22138 GEN_INT (one_var == 3 ? 0 : 1)));
22139 if (target != new_target)
22140 emit_move_insn (target, new_target);
22141 return true;
22142 }
22143
22144 /* Otherwise convert the intermediate result to V4SFmode and
22145 use the SSE1 shuffle instructions. */
22146 if (mode != V4SFmode)
22147 {
22148 tmp = gen_reg_rtx (V4SFmode);
22149 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
22150 }
22151 else
22152 tmp = new_target;
22153
22154 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
22155 GEN_INT (1),
22156 GEN_INT (one_var == 1 ? 0 : 1),
22157 GEN_INT (one_var == 2 ? 0+4 : 1+4),
22158 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
22159
22160 if (mode != V4SFmode)
22161 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
22162 else if (tmp != target)
22163 emit_move_insn (target, tmp);
22164 }
22165 else if (target != new_target)
22166 emit_move_insn (target, new_target);
22167 return true;
22168
22169 case V8HImode:
22170 case V16QImode:
22171 vsimode = V4SImode;
22172 goto widen;
22173 case V4HImode:
22174 case V8QImode:
22175 if (!mmx_ok)
22176 return false;
22177 vsimode = V2SImode;
22178 goto widen;
22179 widen:
22180 if (one_var != 0)
22181 return false;
22182
22183 /* Zero extend the variable element to SImode and recurse. */
22184 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
22185
22186 x = gen_reg_rtx (vsimode);
22187 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
22188 var, one_var))
22189 gcc_unreachable ();
22190
22191 emit_move_insn (target, gen_lowpart (mode, x));
22192 return true;
22193
22194 default:
22195 return false;
22196 }
22197 }
22198
22199 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22200 consisting of the values in VALS. It is known that all elements
22201 except ONE_VAR are constants. Return true if successful. */
22202
22203 static bool
22204 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
22205 rtx target, rtx vals, int one_var)
22206 {
22207 rtx var = XVECEXP (vals, 0, one_var);
22208 enum machine_mode wmode;
22209 rtx const_vec, x;
22210
22211 const_vec = copy_rtx (vals);
22212 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
22213 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
22214
22215 switch (mode)
22216 {
22217 case V2DFmode:
22218 case V2DImode:
22219 case V2SFmode:
22220 case V2SImode:
22221 /* For the two element vectors, it's just as easy to use
22222 the general case. */
22223 return false;
22224
22225 case V4SFmode:
22226 case V4SImode:
22227 case V8HImode:
22228 case V4HImode:
22229 break;
22230
22231 case V16QImode:
22232 wmode = V8HImode;
22233 goto widen;
22234 case V8QImode:
22235 wmode = V4HImode;
22236 goto widen;
22237 widen:
22238 /* There's no way to set one QImode entry easily. Combine
22239 the variable value with its adjacent constant value, and
22240 promote to an HImode set. */
22241 x = XVECEXP (vals, 0, one_var ^ 1);
22242 if (one_var & 1)
22243 {
22244 var = convert_modes (HImode, QImode, var, true);
22245 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
22246 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22247 x = GEN_INT (INTVAL (x) & 0xff);
22248 }
22249 else
22250 {
22251 var = convert_modes (HImode, QImode, var, true);
22252 x = gen_int_mode (INTVAL (x) << 8, HImode);
22253 }
22254 if (x != const0_rtx)
22255 var = expand_simple_binop (HImode, IOR, var, x, var,
22256 1, OPTAB_LIB_WIDEN);
22257
22258 x = gen_reg_rtx (wmode);
22259 emit_move_insn (x, gen_lowpart (wmode, const_vec));
22260 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
22261
22262 emit_move_insn (target, gen_lowpart (mode, x));
22263 return true;
22264
22265 default:
22266 return false;
22267 }
22268
22269 emit_move_insn (target, const_vec);
22270 ix86_expand_vector_set (mmx_ok, target, var, one_var);
22271 return true;
22272 }
22273
22274 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
22275 all values variable, and none identical. */
22276
22277 static void
22278 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
22279 rtx target, rtx vals)
22280 {
22281 enum machine_mode half_mode = GET_MODE_INNER (mode);
22282 rtx op0 = NULL, op1 = NULL;
22283 bool use_vec_concat = false;
22284
22285 switch (mode)
22286 {
22287 case V2SFmode:
22288 case V2SImode:
22289 if (!mmx_ok && !TARGET_SSE)
22290 break;
22291 /* FALLTHRU */
22292
22293 case V2DFmode:
22294 case V2DImode:
22295 /* For the two element vectors, we always implement VEC_CONCAT. */
22296 op0 = XVECEXP (vals, 0, 0);
22297 op1 = XVECEXP (vals, 0, 1);
22298 use_vec_concat = true;
22299 break;
22300
22301 case V4SFmode:
22302 half_mode = V2SFmode;
22303 goto half;
22304 case V4SImode:
22305 half_mode = V2SImode;
22306 goto half;
22307 half:
22308 {
22309 rtvec v;
22310
22311 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
22312 Recurse to load the two halves. */
22313
22314 op0 = gen_reg_rtx (half_mode);
22315 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
22316 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
22317
22318 op1 = gen_reg_rtx (half_mode);
22319 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
22320 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
22321
22322 use_vec_concat = true;
22323 }
22324 break;
22325
22326 case V8HImode:
22327 case V16QImode:
22328 case V4HImode:
22329 case V8QImode:
22330 break;
22331
22332 default:
22333 gcc_unreachable ();
22334 }
22335
22336 if (use_vec_concat)
22337 {
22338 if (!register_operand (op0, half_mode))
22339 op0 = force_reg (half_mode, op0);
22340 if (!register_operand (op1, half_mode))
22341 op1 = force_reg (half_mode, op1);
22342
22343 emit_insn (gen_rtx_SET (VOIDmode, target,
22344 gen_rtx_VEC_CONCAT (mode, op0, op1)));
22345 }
22346 else
22347 {
22348 int i, j, n_elts, n_words, n_elt_per_word;
22349 enum machine_mode inner_mode;
22350 rtx words[4], shift;
22351
22352 inner_mode = GET_MODE_INNER (mode);
22353 n_elts = GET_MODE_NUNITS (mode);
22354 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22355 n_elt_per_word = n_elts / n_words;
22356 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
22357
22358 for (i = 0; i < n_words; ++i)
22359 {
22360 rtx word = NULL_RTX;
22361
22362 for (j = 0; j < n_elt_per_word; ++j)
22363 {
22364 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
22365 elt = convert_modes (word_mode, inner_mode, elt, true);
22366
22367 if (j == 0)
22368 word = elt;
22369 else
22370 {
22371 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
22372 word, 1, OPTAB_LIB_WIDEN);
22373 word = expand_simple_binop (word_mode, IOR, word, elt,
22374 word, 1, OPTAB_LIB_WIDEN);
22375 }
22376 }
22377
22378 words[i] = word;
22379 }
22380
22381 if (n_words == 1)
22382 emit_move_insn (target, gen_lowpart (mode, words[0]));
22383 else if (n_words == 2)
22384 {
22385 rtx tmp = gen_reg_rtx (mode);
22386 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
22387 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
22388 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
22389 emit_move_insn (target, tmp);
22390 }
22391 else if (n_words == 4)
22392 {
22393 rtx tmp = gen_reg_rtx (V4SImode);
22394 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
22395 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
22396 emit_move_insn (target, gen_lowpart (mode, tmp));
22397 }
22398 else
22399 gcc_unreachable ();
22400 }
22401 }
22402
22403 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22404 instructions unless MMX_OK is true. */
22405
22406 void
22407 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
22408 {
22409 enum machine_mode mode = GET_MODE (target);
22410 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22411 int n_elts = GET_MODE_NUNITS (mode);
22412 int n_var = 0, one_var = -1;
22413 bool all_same = true, all_const_zero = true;
22414 int i;
22415 rtx x;
22416
22417 for (i = 0; i < n_elts; ++i)
22418 {
22419 x = XVECEXP (vals, 0, i);
22420 if (!CONSTANT_P (x))
22421 n_var++, one_var = i;
22422 else if (x != CONST0_RTX (inner_mode))
22423 all_const_zero = false;
22424 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22425 all_same = false;
22426 }
22427
22428 /* Constants are best loaded from the constant pool. */
22429 if (n_var == 0)
22430 {
22431 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22432 return;
22433 }
22434
22435 /* If all values are identical, broadcast the value. */
22436 if (all_same
22437 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22438 XVECEXP (vals, 0, 0)))
22439 return;
22440
22441 /* Values where only one field is non-constant are best loaded from
22442 the pool and overwritten via move later. */
22443 if (n_var == 1)
22444 {
22445 if (all_const_zero
22446 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22447 XVECEXP (vals, 0, one_var),
22448 one_var))
22449 return;
22450
22451 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22452 return;
22453 }
22454
22455 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22456 }
22457
22458 void
22459 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22460 {
22461 enum machine_mode mode = GET_MODE (target);
22462 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22463 bool use_vec_merge = false;
22464 rtx tmp;
22465
22466 switch (mode)
22467 {
22468 case V2SFmode:
22469 case V2SImode:
22470 if (mmx_ok)
22471 {
22472 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22473 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22474 if (elt == 0)
22475 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22476 else
22477 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22478 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22479 return;
22480 }
22481 break;
22482
22483 case V2DImode:
22484 use_vec_merge = TARGET_SSE4_1;
22485 if (use_vec_merge)
22486 break;
22487
22488 case V2DFmode:
22489 {
22490 rtx op0, op1;
22491
22492 /* For the two element vectors, we implement a VEC_CONCAT with
22493 the extraction of the other element. */
22494
22495 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22496 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22497
22498 if (elt == 0)
22499 op0 = val, op1 = tmp;
22500 else
22501 op0 = tmp, op1 = val;
22502
22503 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22504 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22505 }
22506 return;
22507
22508 case V4SFmode:
22509 use_vec_merge = TARGET_SSE4_1;
22510 if (use_vec_merge)
22511 break;
22512
22513 switch (elt)
22514 {
22515 case 0:
22516 use_vec_merge = true;
22517 break;
22518
22519 case 1:
22520 /* tmp = target = A B C D */
22521 tmp = copy_to_reg (target);
22522 /* target = A A B B */
22523 emit_insn (gen_sse_unpcklps (target, target, target));
22524 /* target = X A B B */
22525 ix86_expand_vector_set (false, target, val, 0);
22526 /* target = A X C D */
22527 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22528 GEN_INT (1), GEN_INT (0),
22529 GEN_INT (2+4), GEN_INT (3+4)));
22530 return;
22531
22532 case 2:
22533 /* tmp = target = A B C D */
22534 tmp = copy_to_reg (target);
22535 /* tmp = X B C D */
22536 ix86_expand_vector_set (false, tmp, val, 0);
22537 /* target = A B X D */
22538 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22539 GEN_INT (0), GEN_INT (1),
22540 GEN_INT (0+4), GEN_INT (3+4)));
22541 return;
22542
22543 case 3:
22544 /* tmp = target = A B C D */
22545 tmp = copy_to_reg (target);
22546 /* tmp = X B C D */
22547 ix86_expand_vector_set (false, tmp, val, 0);
22548 /* target = A B X D */
22549 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22550 GEN_INT (0), GEN_INT (1),
22551 GEN_INT (2+4), GEN_INT (0+4)));
22552 return;
22553
22554 default:
22555 gcc_unreachable ();
22556 }
22557 break;
22558
22559 case V4SImode:
22560 use_vec_merge = TARGET_SSE4_1;
22561 if (use_vec_merge)
22562 break;
22563
22564 /* Element 0 handled by vec_merge below. */
22565 if (elt == 0)
22566 {
22567 use_vec_merge = true;
22568 break;
22569 }
22570
22571 if (TARGET_SSE2)
22572 {
22573 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22574 store into element 0, then shuffle them back. */
22575
22576 rtx order[4];
22577
22578 order[0] = GEN_INT (elt);
22579 order[1] = const1_rtx;
22580 order[2] = const2_rtx;
22581 order[3] = GEN_INT (3);
22582 order[elt] = const0_rtx;
22583
22584 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22585 order[1], order[2], order[3]));
22586
22587 ix86_expand_vector_set (false, target, val, 0);
22588
22589 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22590 order[1], order[2], order[3]));
22591 }
22592 else
22593 {
22594 /* For SSE1, we have to reuse the V4SF code. */
22595 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22596 gen_lowpart (SFmode, val), elt);
22597 }
22598 return;
22599
22600 case V8HImode:
22601 use_vec_merge = TARGET_SSE2;
22602 break;
22603 case V4HImode:
22604 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22605 break;
22606
22607 case V16QImode:
22608 use_vec_merge = TARGET_SSE4_1;
22609 break;
22610
22611 case V8QImode:
22612 default:
22613 break;
22614 }
22615
22616 if (use_vec_merge)
22617 {
22618 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22619 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22620 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22621 }
22622 else
22623 {
22624 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22625
22626 emit_move_insn (mem, target);
22627
22628 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22629 emit_move_insn (tmp, val);
22630
22631 emit_move_insn (target, mem);
22632 }
22633 }
22634
22635 void
22636 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22637 {
22638 enum machine_mode mode = GET_MODE (vec);
22639 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22640 bool use_vec_extr = false;
22641 rtx tmp;
22642
22643 switch (mode)
22644 {
22645 case V2SImode:
22646 case V2SFmode:
22647 if (!mmx_ok)
22648 break;
22649 /* FALLTHRU */
22650
22651 case V2DFmode:
22652 case V2DImode:
22653 use_vec_extr = true;
22654 break;
22655
22656 case V4SFmode:
22657 use_vec_extr = TARGET_SSE4_1;
22658 if (use_vec_extr)
22659 break;
22660
22661 switch (elt)
22662 {
22663 case 0:
22664 tmp = vec;
22665 break;
22666
22667 case 1:
22668 case 3:
22669 tmp = gen_reg_rtx (mode);
22670 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22671 GEN_INT (elt), GEN_INT (elt),
22672 GEN_INT (elt+4), GEN_INT (elt+4)));
22673 break;
22674
22675 case 2:
22676 tmp = gen_reg_rtx (mode);
22677 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22678 break;
22679
22680 default:
22681 gcc_unreachable ();
22682 }
22683 vec = tmp;
22684 use_vec_extr = true;
22685 elt = 0;
22686 break;
22687
22688 case V4SImode:
22689 use_vec_extr = TARGET_SSE4_1;
22690 if (use_vec_extr)
22691 break;
22692
22693 if (TARGET_SSE2)
22694 {
22695 switch (elt)
22696 {
22697 case 0:
22698 tmp = vec;
22699 break;
22700
22701 case 1:
22702 case 3:
22703 tmp = gen_reg_rtx (mode);
22704 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22705 GEN_INT (elt), GEN_INT (elt),
22706 GEN_INT (elt), GEN_INT (elt)));
22707 break;
22708
22709 case 2:
22710 tmp = gen_reg_rtx (mode);
22711 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22712 break;
22713
22714 default:
22715 gcc_unreachable ();
22716 }
22717 vec = tmp;
22718 use_vec_extr = true;
22719 elt = 0;
22720 }
22721 else
22722 {
22723 /* For SSE1, we have to reuse the V4SF code. */
22724 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22725 gen_lowpart (V4SFmode, vec), elt);
22726 return;
22727 }
22728 break;
22729
22730 case V8HImode:
22731 use_vec_extr = TARGET_SSE2;
22732 break;
22733 case V4HImode:
22734 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22735 break;
22736
22737 case V16QImode:
22738 use_vec_extr = TARGET_SSE4_1;
22739 break;
22740
22741 case V8QImode:
22742 /* ??? Could extract the appropriate HImode element and shift. */
22743 default:
22744 break;
22745 }
22746
22747 if (use_vec_extr)
22748 {
22749 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22750 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22751
22752 /* Let the rtl optimizers know about the zero extension performed. */
22753 if (inner_mode == QImode || inner_mode == HImode)
22754 {
22755 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22756 target = gen_lowpart (SImode, target);
22757 }
22758
22759 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22760 }
22761 else
22762 {
22763 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22764
22765 emit_move_insn (mem, vec);
22766
22767 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22768 emit_move_insn (target, tmp);
22769 }
22770 }
22771
22772 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22773 pattern to reduce; DEST is the destination; IN is the input vector. */
22774
22775 void
22776 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22777 {
22778 rtx tmp1, tmp2, tmp3;
22779
22780 tmp1 = gen_reg_rtx (V4SFmode);
22781 tmp2 = gen_reg_rtx (V4SFmode);
22782 tmp3 = gen_reg_rtx (V4SFmode);
22783
22784 emit_insn (gen_sse_movhlps (tmp1, in, in));
22785 emit_insn (fn (tmp2, tmp1, in));
22786
22787 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22788 GEN_INT (1), GEN_INT (1),
22789 GEN_INT (1+4), GEN_INT (1+4)));
22790 emit_insn (fn (dest, tmp2, tmp3));
22791 }
22792 \f
22793 /* Target hook for scalar_mode_supported_p. */
22794 static bool
22795 ix86_scalar_mode_supported_p (enum machine_mode mode)
22796 {
22797 if (DECIMAL_FLOAT_MODE_P (mode))
22798 return true;
22799 else if (mode == TFmode)
22800 return TARGET_64BIT;
22801 else
22802 return default_scalar_mode_supported_p (mode);
22803 }
22804
22805 /* Implements target hook vector_mode_supported_p. */
22806 static bool
22807 ix86_vector_mode_supported_p (enum machine_mode mode)
22808 {
22809 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22810 return true;
22811 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22812 return true;
22813 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22814 return true;
22815 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22816 return true;
22817 return false;
22818 }
22819
22820 /* Target hook for c_mode_for_suffix. */
22821 static enum machine_mode
22822 ix86_c_mode_for_suffix (char suffix)
22823 {
22824 if (TARGET_64BIT && suffix == 'q')
22825 return TFmode;
22826 if (TARGET_MMX && suffix == 'w')
22827 return XFmode;
22828
22829 return VOIDmode;
22830 }
22831
22832 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22833
22834 We do this in the new i386 backend to maintain source compatibility
22835 with the old cc0-based compiler. */
22836
22837 static tree
22838 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22839 tree inputs ATTRIBUTE_UNUSED,
22840 tree clobbers)
22841 {
22842 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22843 clobbers);
22844 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22845 clobbers);
22846 return clobbers;
22847 }
22848
22849 /* Implements target vector targetm.asm.encode_section_info. This
22850 is not used by netware. */
22851
22852 static void ATTRIBUTE_UNUSED
22853 ix86_encode_section_info (tree decl, rtx rtl, int first)
22854 {
22855 default_encode_section_info (decl, rtl, first);
22856
22857 if (TREE_CODE (decl) == VAR_DECL
22858 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22859 && ix86_in_large_data_p (decl))
22860 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22861 }
22862
22863 /* Worker function for REVERSE_CONDITION. */
22864
22865 enum rtx_code
22866 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22867 {
22868 return (mode != CCFPmode && mode != CCFPUmode
22869 ? reverse_condition (code)
22870 : reverse_condition_maybe_unordered (code));
22871 }
22872
22873 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22874 to OPERANDS[0]. */
22875
22876 const char *
22877 output_387_reg_move (rtx insn, rtx *operands)
22878 {
22879 if (REG_P (operands[0]))
22880 {
22881 if (REG_P (operands[1])
22882 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22883 {
22884 if (REGNO (operands[0]) == FIRST_STACK_REG)
22885 return output_387_ffreep (operands, 0);
22886 return "fstp\t%y0";
22887 }
22888 if (STACK_TOP_P (operands[0]))
22889 return "fld%z1\t%y1";
22890 return "fst\t%y0";
22891 }
22892 else if (MEM_P (operands[0]))
22893 {
22894 gcc_assert (REG_P (operands[1]));
22895 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22896 return "fstp%z0\t%y0";
22897 else
22898 {
22899 /* There is no non-popping store to memory for XFmode.
22900 So if we need one, follow the store with a load. */
22901 if (GET_MODE (operands[0]) == XFmode)
22902 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22903 else
22904 return "fst%z0\t%y0";
22905 }
22906 }
22907 else
22908 gcc_unreachable();
22909 }
22910
22911 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22912 FP status register is set. */
22913
22914 void
22915 ix86_emit_fp_unordered_jump (rtx label)
22916 {
22917 rtx reg = gen_reg_rtx (HImode);
22918 rtx temp;
22919
22920 emit_insn (gen_x86_fnstsw_1 (reg));
22921
22922 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22923 {
22924 emit_insn (gen_x86_sahf_1 (reg));
22925
22926 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22927 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22928 }
22929 else
22930 {
22931 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22932
22933 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22934 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22935 }
22936
22937 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22938 gen_rtx_LABEL_REF (VOIDmode, label),
22939 pc_rtx);
22940 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22941
22942 emit_jump_insn (temp);
22943 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22944 }
22945
22946 /* Output code to perform a log1p XFmode calculation. */
22947
22948 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22949 {
22950 rtx label1 = gen_label_rtx ();
22951 rtx label2 = gen_label_rtx ();
22952
22953 rtx tmp = gen_reg_rtx (XFmode);
22954 rtx tmp2 = gen_reg_rtx (XFmode);
22955
22956 emit_insn (gen_absxf2 (tmp, op1));
22957 emit_insn (gen_cmpxf (tmp,
22958 CONST_DOUBLE_FROM_REAL_VALUE (
22959 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22960 XFmode)));
22961 emit_jump_insn (gen_bge (label1));
22962
22963 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22964 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22965 emit_jump (label2);
22966
22967 emit_label (label1);
22968 emit_move_insn (tmp, CONST1_RTX (XFmode));
22969 emit_insn (gen_addxf3 (tmp, op1, tmp));
22970 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22971 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22972
22973 emit_label (label2);
22974 }
22975
22976 /* Output code to perform a Newton-Rhapson approximation of a single precision
22977 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22978
22979 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22980 {
22981 rtx x0, x1, e0, e1, two;
22982
22983 x0 = gen_reg_rtx (mode);
22984 e0 = gen_reg_rtx (mode);
22985 e1 = gen_reg_rtx (mode);
22986 x1 = gen_reg_rtx (mode);
22987
22988 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22989
22990 if (VECTOR_MODE_P (mode))
22991 two = ix86_build_const_vector (SFmode, true, two);
22992
22993 two = force_reg (mode, two);
22994
22995 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22996
22997 /* x0 = 1./b estimate */
22998 emit_insn (gen_rtx_SET (VOIDmode, x0,
22999 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
23000 UNSPEC_RCP)));
23001 /* e0 = x0 * b */
23002 emit_insn (gen_rtx_SET (VOIDmode, e0,
23003 gen_rtx_MULT (mode, x0, b)));
23004 /* e1 = 2. - e0 */
23005 emit_insn (gen_rtx_SET (VOIDmode, e1,
23006 gen_rtx_MINUS (mode, two, e0)));
23007 /* x1 = x0 * e1 */
23008 emit_insn (gen_rtx_SET (VOIDmode, x1,
23009 gen_rtx_MULT (mode, x0, e1)));
23010 /* res = a * x1 */
23011 emit_insn (gen_rtx_SET (VOIDmode, res,
23012 gen_rtx_MULT (mode, a, x1)));
23013 }
23014
23015 /* Output code to perform a Newton-Rhapson approximation of a
23016 single precision floating point [reciprocal] square root. */
23017
23018 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
23019 bool recip)
23020 {
23021 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
23022
23023 x0 = gen_reg_rtx (mode);
23024 e0 = gen_reg_rtx (mode);
23025 e1 = gen_reg_rtx (mode);
23026 e2 = gen_reg_rtx (mode);
23027 e3 = gen_reg_rtx (mode);
23028
23029 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
23030 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
23031
23032 mask = gen_reg_rtx (mode);
23033
23034 if (VECTOR_MODE_P (mode))
23035 {
23036 three = ix86_build_const_vector (SFmode, true, three);
23037 half = ix86_build_const_vector (SFmode, true, half);
23038 }
23039
23040 three = force_reg (mode, three);
23041 half = force_reg (mode, half);
23042
23043 zero = force_reg (mode, CONST0_RTX(mode));
23044
23045 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
23046 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
23047
23048 /* Compare a to zero. */
23049 emit_insn (gen_rtx_SET (VOIDmode, mask,
23050 gen_rtx_NE (mode, a, zero)));
23051
23052 /* x0 = 1./sqrt(a) estimate */
23053 emit_insn (gen_rtx_SET (VOIDmode, x0,
23054 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
23055 UNSPEC_RSQRT)));
23056 /* Filter out infinity. */
23057 if (VECTOR_MODE_P (mode))
23058 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
23059 gen_rtx_AND (mode,
23060 gen_lowpart (V4SFmode, x0),
23061 gen_lowpart (V4SFmode, mask))));
23062 else
23063 emit_insn (gen_rtx_SET (VOIDmode, x0,
23064 gen_rtx_AND (mode, x0, mask)));
23065
23066 /* e0 = x0 * a */
23067 emit_insn (gen_rtx_SET (VOIDmode, e0,
23068 gen_rtx_MULT (mode, x0, a)));
23069 /* e1 = e0 * x0 */
23070 emit_insn (gen_rtx_SET (VOIDmode, e1,
23071 gen_rtx_MULT (mode, e0, x0)));
23072 /* e2 = 3. - e1 */
23073 emit_insn (gen_rtx_SET (VOIDmode, e2,
23074 gen_rtx_MINUS (mode, three, e1)));
23075 if (recip)
23076 /* e3 = .5 * x0 */
23077 emit_insn (gen_rtx_SET (VOIDmode, e3,
23078 gen_rtx_MULT (mode, half, x0)));
23079 else
23080 /* e3 = .5 * e0 */
23081 emit_insn (gen_rtx_SET (VOIDmode, e3,
23082 gen_rtx_MULT (mode, half, e0)));
23083 /* ret = e2 * e3 */
23084 emit_insn (gen_rtx_SET (VOIDmode, res,
23085 gen_rtx_MULT (mode, e2, e3)));
23086 }
23087
23088 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
23089
23090 static void ATTRIBUTE_UNUSED
23091 i386_solaris_elf_named_section (const char *name, unsigned int flags,
23092 tree decl)
23093 {
23094 /* With Binutils 2.15, the "@unwind" marker must be specified on
23095 every occurrence of the ".eh_frame" section, not just the first
23096 one. */
23097 if (TARGET_64BIT
23098 && strcmp (name, ".eh_frame") == 0)
23099 {
23100 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
23101 flags & SECTION_WRITE ? "aw" : "a");
23102 return;
23103 }
23104 default_elf_asm_named_section (name, flags, decl);
23105 }
23106
23107 /* Return the mangling of TYPE if it is an extended fundamental type. */
23108
23109 static const char *
23110 ix86_mangle_type (const_tree type)
23111 {
23112 type = TYPE_MAIN_VARIANT (type);
23113
23114 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
23115 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
23116 return NULL;
23117
23118 switch (TYPE_MODE (type))
23119 {
23120 case TFmode:
23121 /* __float128 is "g". */
23122 return "g";
23123 case XFmode:
23124 /* "long double" or __float80 is "e". */
23125 return "e";
23126 default:
23127 return NULL;
23128 }
23129 }
23130
23131 /* For 32-bit code we can save PIC register setup by using
23132 __stack_chk_fail_local hidden function instead of calling
23133 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
23134 register, so it is better to call __stack_chk_fail directly. */
23135
23136 static tree
23137 ix86_stack_protect_fail (void)
23138 {
23139 return TARGET_64BIT
23140 ? default_external_stack_protect_fail ()
23141 : default_hidden_stack_protect_fail ();
23142 }
23143
23144 /* Select a format to encode pointers in exception handling data. CODE
23145 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
23146 true if the symbol may be affected by dynamic relocations.
23147
23148 ??? All x86 object file formats are capable of representing this.
23149 After all, the relocation needed is the same as for the call insn.
23150 Whether or not a particular assembler allows us to enter such, I
23151 guess we'll have to see. */
23152 int
23153 asm_preferred_eh_data_format (int code, int global)
23154 {
23155 if (flag_pic)
23156 {
23157 int type = DW_EH_PE_sdata8;
23158 if (!TARGET_64BIT
23159 || ix86_cmodel == CM_SMALL_PIC
23160 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
23161 type = DW_EH_PE_sdata4;
23162 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
23163 }
23164 if (ix86_cmodel == CM_SMALL
23165 || (ix86_cmodel == CM_MEDIUM && code))
23166 return DW_EH_PE_udata4;
23167 return DW_EH_PE_absptr;
23168 }
23169 \f
23170 /* Expand copysign from SIGN to the positive value ABS_VALUE
23171 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
23172 the sign-bit. */
23173 static void
23174 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
23175 {
23176 enum machine_mode mode = GET_MODE (sign);
23177 rtx sgn = gen_reg_rtx (mode);
23178 if (mask == NULL_RTX)
23179 {
23180 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
23181 if (!VECTOR_MODE_P (mode))
23182 {
23183 /* We need to generate a scalar mode mask in this case. */
23184 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
23185 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
23186 mask = gen_reg_rtx (mode);
23187 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
23188 }
23189 }
23190 else
23191 mask = gen_rtx_NOT (mode, mask);
23192 emit_insn (gen_rtx_SET (VOIDmode, sgn,
23193 gen_rtx_AND (mode, mask, sign)));
23194 emit_insn (gen_rtx_SET (VOIDmode, result,
23195 gen_rtx_IOR (mode, abs_value, sgn)));
23196 }
23197
23198 /* Expand fabs (OP0) and return a new rtx that holds the result. The
23199 mask for masking out the sign-bit is stored in *SMASK, if that is
23200 non-null. */
23201 static rtx
23202 ix86_expand_sse_fabs (rtx op0, rtx *smask)
23203 {
23204 enum machine_mode mode = GET_MODE (op0);
23205 rtx xa, mask;
23206
23207 xa = gen_reg_rtx (mode);
23208 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
23209 if (!VECTOR_MODE_P (mode))
23210 {
23211 /* We need to generate a scalar mode mask in this case. */
23212 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
23213 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
23214 mask = gen_reg_rtx (mode);
23215 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
23216 }
23217 emit_insn (gen_rtx_SET (VOIDmode, xa,
23218 gen_rtx_AND (mode, op0, mask)));
23219
23220 if (smask)
23221 *smask = mask;
23222
23223 return xa;
23224 }
23225
23226 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
23227 swapping the operands if SWAP_OPERANDS is true. The expanded
23228 code is a forward jump to a newly created label in case the
23229 comparison is true. The generated label rtx is returned. */
23230 static rtx
23231 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
23232 bool swap_operands)
23233 {
23234 rtx label, tmp;
23235
23236 if (swap_operands)
23237 {
23238 tmp = op0;
23239 op0 = op1;
23240 op1 = tmp;
23241 }
23242
23243 label = gen_label_rtx ();
23244 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
23245 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23246 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
23247 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
23248 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23249 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
23250 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23251 JUMP_LABEL (tmp) = label;
23252
23253 return label;
23254 }
23255
23256 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
23257 using comparison code CODE. Operands are swapped for the comparison if
23258 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
23259 static rtx
23260 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
23261 bool swap_operands)
23262 {
23263 enum machine_mode mode = GET_MODE (op0);
23264 rtx mask = gen_reg_rtx (mode);
23265
23266 if (swap_operands)
23267 {
23268 rtx tmp = op0;
23269 op0 = op1;
23270 op1 = tmp;
23271 }
23272
23273 if (mode == DFmode)
23274 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
23275 gen_rtx_fmt_ee (code, mode, op0, op1)));
23276 else
23277 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
23278 gen_rtx_fmt_ee (code, mode, op0, op1)));
23279
23280 return mask;
23281 }
23282
23283 /* Generate and return a rtx of mode MODE for 2**n where n is the number
23284 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
23285 static rtx
23286 ix86_gen_TWO52 (enum machine_mode mode)
23287 {
23288 REAL_VALUE_TYPE TWO52r;
23289 rtx TWO52;
23290
23291 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
23292 TWO52 = const_double_from_real_value (TWO52r, mode);
23293 TWO52 = force_reg (mode, TWO52);
23294
23295 return TWO52;
23296 }
23297
23298 /* Expand SSE sequence for computing lround from OP1 storing
23299 into OP0. */
23300 void
23301 ix86_expand_lround (rtx op0, rtx op1)
23302 {
23303 /* C code for the stuff we're doing below:
23304 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
23305 return (long)tmp;
23306 */
23307 enum machine_mode mode = GET_MODE (op1);
23308 const struct real_format *fmt;
23309 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23310 rtx adj;
23311
23312 /* load nextafter (0.5, 0.0) */
23313 fmt = REAL_MODE_FORMAT (mode);
23314 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
23315 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23316
23317 /* adj = copysign (0.5, op1) */
23318 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
23319 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
23320
23321 /* adj = op1 + adj */
23322 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
23323
23324 /* op0 = (imode)adj */
23325 expand_fix (op0, adj, 0);
23326 }
23327
23328 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
23329 into OPERAND0. */
23330 void
23331 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
23332 {
23333 /* C code for the stuff we're doing below (for do_floor):
23334 xi = (long)op1;
23335 xi -= (double)xi > op1 ? 1 : 0;
23336 return xi;
23337 */
23338 enum machine_mode fmode = GET_MODE (op1);
23339 enum machine_mode imode = GET_MODE (op0);
23340 rtx ireg, freg, label, tmp;
23341
23342 /* reg = (long)op1 */
23343 ireg = gen_reg_rtx (imode);
23344 expand_fix (ireg, op1, 0);
23345
23346 /* freg = (double)reg */
23347 freg = gen_reg_rtx (fmode);
23348 expand_float (freg, ireg, 0);
23349
23350 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23351 label = ix86_expand_sse_compare_and_jump (UNLE,
23352 freg, op1, !do_floor);
23353 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
23354 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
23355 emit_move_insn (ireg, tmp);
23356
23357 emit_label (label);
23358 LABEL_NUSES (label) = 1;
23359
23360 emit_move_insn (op0, ireg);
23361 }
23362
23363 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23364 result in OPERAND0. */
23365 void
23366 ix86_expand_rint (rtx operand0, rtx operand1)
23367 {
23368 /* C code for the stuff we're doing below:
23369 xa = fabs (operand1);
23370 if (!isless (xa, 2**52))
23371 return operand1;
23372 xa = xa + 2**52 - 2**52;
23373 return copysign (xa, operand1);
23374 */
23375 enum machine_mode mode = GET_MODE (operand0);
23376 rtx res, xa, label, TWO52, mask;
23377
23378 res = gen_reg_rtx (mode);
23379 emit_move_insn (res, operand1);
23380
23381 /* xa = abs (operand1) */
23382 xa = ix86_expand_sse_fabs (res, &mask);
23383
23384 /* if (!isless (xa, TWO52)) goto label; */
23385 TWO52 = ix86_gen_TWO52 (mode);
23386 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23387
23388 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23389 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23390
23391 ix86_sse_copysign_to_positive (res, xa, res, mask);
23392
23393 emit_label (label);
23394 LABEL_NUSES (label) = 1;
23395
23396 emit_move_insn (operand0, res);
23397 }
23398
23399 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23400 into OPERAND0. */
23401 void
23402 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
23403 {
23404 /* C code for the stuff we expand below.
23405 double xa = fabs (x), x2;
23406 if (!isless (xa, TWO52))
23407 return x;
23408 xa = xa + TWO52 - TWO52;
23409 x2 = copysign (xa, x);
23410 Compensate. Floor:
23411 if (x2 > x)
23412 x2 -= 1;
23413 Compensate. Ceil:
23414 if (x2 < x)
23415 x2 -= -1;
23416 return x2;
23417 */
23418 enum machine_mode mode = GET_MODE (operand0);
23419 rtx xa, TWO52, tmp, label, one, res, mask;
23420
23421 TWO52 = ix86_gen_TWO52 (mode);
23422
23423 /* Temporary for holding the result, initialized to the input
23424 operand to ease control flow. */
23425 res = gen_reg_rtx (mode);
23426 emit_move_insn (res, operand1);
23427
23428 /* xa = abs (operand1) */
23429 xa = ix86_expand_sse_fabs (res, &mask);
23430
23431 /* if (!isless (xa, TWO52)) goto label; */
23432 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23433
23434 /* xa = xa + TWO52 - TWO52; */
23435 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23436 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23437
23438 /* xa = copysign (xa, operand1) */
23439 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23440
23441 /* generate 1.0 or -1.0 */
23442 one = force_reg (mode,
23443 const_double_from_real_value (do_floor
23444 ? dconst1 : dconstm1, mode));
23445
23446 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23447 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23448 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23449 gen_rtx_AND (mode, one, tmp)));
23450 /* We always need to subtract here to preserve signed zero. */
23451 tmp = expand_simple_binop (mode, MINUS,
23452 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23453 emit_move_insn (res, tmp);
23454
23455 emit_label (label);
23456 LABEL_NUSES (label) = 1;
23457
23458 emit_move_insn (operand0, res);
23459 }
23460
23461 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23462 into OPERAND0. */
23463 void
23464 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23465 {
23466 /* C code for the stuff we expand below.
23467 double xa = fabs (x), x2;
23468 if (!isless (xa, TWO52))
23469 return x;
23470 x2 = (double)(long)x;
23471 Compensate. Floor:
23472 if (x2 > x)
23473 x2 -= 1;
23474 Compensate. Ceil:
23475 if (x2 < x)
23476 x2 += 1;
23477 if (HONOR_SIGNED_ZEROS (mode))
23478 return copysign (x2, x);
23479 return x2;
23480 */
23481 enum machine_mode mode = GET_MODE (operand0);
23482 rtx xa, xi, TWO52, tmp, label, one, res, mask;
23483
23484 TWO52 = ix86_gen_TWO52 (mode);
23485
23486 /* Temporary for holding the result, initialized to the input
23487 operand to ease control flow. */
23488 res = gen_reg_rtx (mode);
23489 emit_move_insn (res, operand1);
23490
23491 /* xa = abs (operand1) */
23492 xa = ix86_expand_sse_fabs (res, &mask);
23493
23494 /* if (!isless (xa, TWO52)) goto label; */
23495 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23496
23497 /* xa = (double)(long)x */
23498 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23499 expand_fix (xi, res, 0);
23500 expand_float (xa, xi, 0);
23501
23502 /* generate 1.0 */
23503 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23504
23505 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23506 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23507 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23508 gen_rtx_AND (mode, one, tmp)));
23509 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23510 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23511 emit_move_insn (res, tmp);
23512
23513 if (HONOR_SIGNED_ZEROS (mode))
23514 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23515
23516 emit_label (label);
23517 LABEL_NUSES (label) = 1;
23518
23519 emit_move_insn (operand0, res);
23520 }
23521
23522 /* Expand SSE sequence for computing round from OPERAND1 storing
23523 into OPERAND0. Sequence that works without relying on DImode truncation
23524 via cvttsd2siq that is only available on 64bit targets. */
23525 void
23526 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23527 {
23528 /* C code for the stuff we expand below.
23529 double xa = fabs (x), xa2, x2;
23530 if (!isless (xa, TWO52))
23531 return x;
23532 Using the absolute value and copying back sign makes
23533 -0.0 -> -0.0 correct.
23534 xa2 = xa + TWO52 - TWO52;
23535 Compensate.
23536 dxa = xa2 - xa;
23537 if (dxa <= -0.5)
23538 xa2 += 1;
23539 else if (dxa > 0.5)
23540 xa2 -= 1;
23541 x2 = copysign (xa2, x);
23542 return x2;
23543 */
23544 enum machine_mode mode = GET_MODE (operand0);
23545 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23546
23547 TWO52 = ix86_gen_TWO52 (mode);
23548
23549 /* Temporary for holding the result, initialized to the input
23550 operand to ease control flow. */
23551 res = gen_reg_rtx (mode);
23552 emit_move_insn (res, operand1);
23553
23554 /* xa = abs (operand1) */
23555 xa = ix86_expand_sse_fabs (res, &mask);
23556
23557 /* if (!isless (xa, TWO52)) goto label; */
23558 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23559
23560 /* xa2 = xa + TWO52 - TWO52; */
23561 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23562 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
23563
23564 /* dxa = xa2 - xa; */
23565 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
23566
23567 /* generate 0.5, 1.0 and -0.5 */
23568 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
23569 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23570 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23571 0, OPTAB_DIRECT);
23572
23573 /* Compensate. */
23574 tmp = gen_reg_rtx (mode);
23575 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23576 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23577 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23578 gen_rtx_AND (mode, one, tmp)));
23579 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23580 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23581 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23582 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23583 gen_rtx_AND (mode, one, tmp)));
23584 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23585
23586 /* res = copysign (xa2, operand1) */
23587 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23588
23589 emit_label (label);
23590 LABEL_NUSES (label) = 1;
23591
23592 emit_move_insn (operand0, res);
23593 }
23594
23595 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23596 into OPERAND0. */
23597 void
23598 ix86_expand_trunc (rtx operand0, rtx operand1)
23599 {
23600 /* C code for SSE variant we expand below.
23601 double xa = fabs (x), x2;
23602 if (!isless (xa, TWO52))
23603 return x;
23604 x2 = (double)(long)x;
23605 if (HONOR_SIGNED_ZEROS (mode))
23606 return copysign (x2, x);
23607 return x2;
23608 */
23609 enum machine_mode mode = GET_MODE (operand0);
23610 rtx xa, xi, TWO52, label, res, mask;
23611
23612 TWO52 = ix86_gen_TWO52 (mode);
23613
23614 /* Temporary for holding the result, initialized to the input
23615 operand to ease control flow. */
23616 res = gen_reg_rtx (mode);
23617 emit_move_insn (res, operand1);
23618
23619 /* xa = abs (operand1) */
23620 xa = ix86_expand_sse_fabs (res, &mask);
23621
23622 /* if (!isless (xa, TWO52)) goto label; */
23623 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23624
23625 /* x = (double)(long)x */
23626 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23627 expand_fix (xi, res, 0);
23628 expand_float (res, xi, 0);
23629
23630 if (HONOR_SIGNED_ZEROS (mode))
23631 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23632
23633 emit_label (label);
23634 LABEL_NUSES (label) = 1;
23635
23636 emit_move_insn (operand0, res);
23637 }
23638
23639 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23640 into OPERAND0. */
23641 void
23642 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23643 {
23644 enum machine_mode mode = GET_MODE (operand0);
23645 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23646
23647 /* C code for SSE variant we expand below.
23648 double xa = fabs (x), x2;
23649 if (!isless (xa, TWO52))
23650 return x;
23651 xa2 = xa + TWO52 - TWO52;
23652 Compensate:
23653 if (xa2 > xa)
23654 xa2 -= 1.0;
23655 x2 = copysign (xa2, x);
23656 return x2;
23657 */
23658
23659 TWO52 = ix86_gen_TWO52 (mode);
23660
23661 /* Temporary for holding the result, initialized to the input
23662 operand to ease control flow. */
23663 res = gen_reg_rtx (mode);
23664 emit_move_insn (res, operand1);
23665
23666 /* xa = abs (operand1) */
23667 xa = ix86_expand_sse_fabs (res, &smask);
23668
23669 /* if (!isless (xa, TWO52)) goto label; */
23670 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23671
23672 /* res = xa + TWO52 - TWO52; */
23673 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23674 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23675 emit_move_insn (res, tmp);
23676
23677 /* generate 1.0 */
23678 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23679
23680 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23681 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23682 emit_insn (gen_rtx_SET (VOIDmode, mask,
23683 gen_rtx_AND (mode, mask, one)));
23684 tmp = expand_simple_binop (mode, MINUS,
23685 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23686 emit_move_insn (res, tmp);
23687
23688 /* res = copysign (res, operand1) */
23689 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23690
23691 emit_label (label);
23692 LABEL_NUSES (label) = 1;
23693
23694 emit_move_insn (operand0, res);
23695 }
23696
23697 /* Expand SSE sequence for computing round from OPERAND1 storing
23698 into OPERAND0. */
23699 void
23700 ix86_expand_round (rtx operand0, rtx operand1)
23701 {
23702 /* C code for the stuff we're doing below:
23703 double xa = fabs (x);
23704 if (!isless (xa, TWO52))
23705 return x;
23706 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23707 return copysign (xa, x);
23708 */
23709 enum machine_mode mode = GET_MODE (operand0);
23710 rtx res, TWO52, xa, label, xi, half, mask;
23711 const struct real_format *fmt;
23712 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23713
23714 /* Temporary for holding the result, initialized to the input
23715 operand to ease control flow. */
23716 res = gen_reg_rtx (mode);
23717 emit_move_insn (res, operand1);
23718
23719 TWO52 = ix86_gen_TWO52 (mode);
23720 xa = ix86_expand_sse_fabs (res, &mask);
23721 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23722
23723 /* load nextafter (0.5, 0.0) */
23724 fmt = REAL_MODE_FORMAT (mode);
23725 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
23726 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23727
23728 /* xa = xa + 0.5 */
23729 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23730 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23731
23732 /* xa = (double)(int64_t)xa */
23733 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23734 expand_fix (xi, xa, 0);
23735 expand_float (xa, xi, 0);
23736
23737 /* res = copysign (xa, operand1) */
23738 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23739
23740 emit_label (label);
23741 LABEL_NUSES (label) = 1;
23742
23743 emit_move_insn (operand0, res);
23744 }
23745
23746 \f
23747 /* Table of valid machine attributes. */
23748 static const struct attribute_spec ix86_attribute_table[] =
23749 {
23750 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23751 /* Stdcall attribute says callee is responsible for popping arguments
23752 if they are not variable. */
23753 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23754 /* Fastcall attribute says callee is responsible for popping arguments
23755 if they are not variable. */
23756 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23757 /* Cdecl attribute says the callee is a normal C declaration */
23758 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23759 /* Regparm attribute specifies how many integer arguments are to be
23760 passed in registers. */
23761 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23762 /* Sseregparm attribute says we are using x86_64 calling conventions
23763 for FP arguments. */
23764 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23765 /* force_align_arg_pointer says this function realigns the stack at entry. */
23766 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23767 false, true, true, ix86_handle_cconv_attribute },
23768 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23769 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23770 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23771 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23772 #endif
23773 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23774 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23775 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23776 SUBTARGET_ATTRIBUTE_TABLE,
23777 #endif
23778 { NULL, 0, 0, false, false, false, NULL }
23779 };
23780
23781 /* Implement targetm.vectorize.builtin_vectorization_cost. */
23782 static int
23783 x86_builtin_vectorization_cost (bool runtime_test)
23784 {
23785 /* If the branch of the runtime test is taken - i.e. - the vectorized
23786 version is skipped - this incurs a misprediction cost (because the
23787 vectorized version is expected to be the fall-through). So we subtract
23788 the latency of a mispredicted branch from the costs that are incured
23789 when the vectorized version is executed.
23790
23791 TODO: The values in individual target tables have to be tuned or new
23792 fields may be needed. For eg. on K8, the default branch path is the
23793 not-taken path. If the taken path is predicted correctly, the minimum
23794 penalty of going down the taken-path is 1 cycle. If the taken-path is
23795 not predicted correctly, then the minimum penalty is 10 cycles. */
23796
23797 if (runtime_test)
23798 {
23799 return (-(ix86_cost->cond_taken_branch_cost));
23800 }
23801 else
23802 return 0;
23803 }
23804
23805 /* Initialize the GCC target structure. */
23806 #undef TARGET_ATTRIBUTE_TABLE
23807 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23808 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23809 # undef TARGET_MERGE_DECL_ATTRIBUTES
23810 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23811 #endif
23812
23813 #undef TARGET_COMP_TYPE_ATTRIBUTES
23814 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23815
23816 #undef TARGET_INIT_BUILTINS
23817 #define TARGET_INIT_BUILTINS ix86_init_builtins
23818 #undef TARGET_EXPAND_BUILTIN
23819 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23820
23821 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23822 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23823 ix86_builtin_vectorized_function
23824
23825 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23826 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23827
23828 #undef TARGET_BUILTIN_RECIPROCAL
23829 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23830
23831 #undef TARGET_ASM_FUNCTION_EPILOGUE
23832 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23833
23834 #undef TARGET_ENCODE_SECTION_INFO
23835 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23836 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23837 #else
23838 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23839 #endif
23840
23841 #undef TARGET_ASM_OPEN_PAREN
23842 #define TARGET_ASM_OPEN_PAREN ""
23843 #undef TARGET_ASM_CLOSE_PAREN
23844 #define TARGET_ASM_CLOSE_PAREN ""
23845
23846 #undef TARGET_ASM_ALIGNED_HI_OP
23847 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23848 #undef TARGET_ASM_ALIGNED_SI_OP
23849 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23850 #ifdef ASM_QUAD
23851 #undef TARGET_ASM_ALIGNED_DI_OP
23852 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23853 #endif
23854
23855 #undef TARGET_ASM_UNALIGNED_HI_OP
23856 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23857 #undef TARGET_ASM_UNALIGNED_SI_OP
23858 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23859 #undef TARGET_ASM_UNALIGNED_DI_OP
23860 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23861
23862 #undef TARGET_SCHED_ADJUST_COST
23863 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23864 #undef TARGET_SCHED_ISSUE_RATE
23865 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23866 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23867 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23868 ia32_multipass_dfa_lookahead
23869
23870 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23871 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23872
23873 #ifdef HAVE_AS_TLS
23874 #undef TARGET_HAVE_TLS
23875 #define TARGET_HAVE_TLS true
23876 #endif
23877 #undef TARGET_CANNOT_FORCE_CONST_MEM
23878 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23879 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23880 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23881
23882 #undef TARGET_DELEGITIMIZE_ADDRESS
23883 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23884
23885 #undef TARGET_MS_BITFIELD_LAYOUT_P
23886 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23887
23888 #if TARGET_MACHO
23889 #undef TARGET_BINDS_LOCAL_P
23890 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23891 #endif
23892 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23893 #undef TARGET_BINDS_LOCAL_P
23894 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23895 #endif
23896
23897 #undef TARGET_ASM_OUTPUT_MI_THUNK
23898 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23899 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23900 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23901
23902 #undef TARGET_ASM_FILE_START
23903 #define TARGET_ASM_FILE_START x86_file_start
23904
23905 #undef TARGET_DEFAULT_TARGET_FLAGS
23906 #define TARGET_DEFAULT_TARGET_FLAGS \
23907 (TARGET_DEFAULT \
23908 | TARGET_SUBTARGET_DEFAULT \
23909 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23910
23911 #undef TARGET_HANDLE_OPTION
23912 #define TARGET_HANDLE_OPTION ix86_handle_option
23913
23914 #undef TARGET_RTX_COSTS
23915 #define TARGET_RTX_COSTS ix86_rtx_costs
23916 #undef TARGET_ADDRESS_COST
23917 #define TARGET_ADDRESS_COST ix86_address_cost
23918
23919 #undef TARGET_FIXED_CONDITION_CODE_REGS
23920 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23921 #undef TARGET_CC_MODES_COMPATIBLE
23922 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23923
23924 #undef TARGET_MACHINE_DEPENDENT_REORG
23925 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23926
23927 #undef TARGET_BUILD_BUILTIN_VA_LIST
23928 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23929
23930 #undef TARGET_MD_ASM_CLOBBERS
23931 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23932
23933 #undef TARGET_PROMOTE_PROTOTYPES
23934 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23935 #undef TARGET_STRUCT_VALUE_RTX
23936 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23937 #undef TARGET_SETUP_INCOMING_VARARGS
23938 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23939 #undef TARGET_MUST_PASS_IN_STACK
23940 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23941 #undef TARGET_PASS_BY_REFERENCE
23942 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23943 #undef TARGET_INTERNAL_ARG_POINTER
23944 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23945 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23946 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23947 #undef TARGET_STRICT_ARGUMENT_NAMING
23948 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23949
23950 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23951 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23952
23953 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23954 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23955
23956 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23957 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23958
23959 #undef TARGET_C_MODE_FOR_SUFFIX
23960 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23961
23962 #ifdef HAVE_AS_TLS
23963 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23964 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23965 #endif
23966
23967 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23968 #undef TARGET_INSERT_ATTRIBUTES
23969 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23970 #endif
23971
23972 #undef TARGET_MANGLE_TYPE
23973 #define TARGET_MANGLE_TYPE ix86_mangle_type
23974
23975 #undef TARGET_STACK_PROTECT_FAIL
23976 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23977
23978 #undef TARGET_FUNCTION_VALUE
23979 #define TARGET_FUNCTION_VALUE ix86_function_value
23980
23981 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23982 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
23983
23984 struct gcc_target targetm = TARGET_INITIALIZER;
23985 \f
23986 #include "gt-i386.h"