llvmpipe: Emit SSE intrinsics based on runtime cpu capability check.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_conv.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Helper functions for type conversions.
32 *
33 * We want to use the fastest type for a given computation whenever feasible.
34 * The other side of this is that we need to be able convert between several
35 * types accurately and efficiently.
36 *
37 * Conversion between types of different bit width is quite complex since a
38 *
39 * To remember there are a few invariants in type conversions:
40 *
41 * - register width must remain constant:
42 *
43 * src_type.width * src_type.length == dst_type.width * dst_type.length
44 *
45 * - total number of elements must remain constant:
46 *
47 * src_type.length * num_srcs == dst_type.length * num_dsts
48 *
49 * It is not always possible to do the conversion both accurately and
50 * efficiently, usually due to lack of adequate machine instructions. In these
51 * cases it is important not to cut shortcuts here and sacrifice accuracy, as
52 * there this functions can be used anywhere. In the future we might have a
53 * precision parameter which can gauge the accuracy vs efficiency compromise,
54 * but for now if the data conversion between two stages happens to be the
55 * bottleneck, then most likely should just avoid converting at all and run
56 * both stages with the same type.
57 *
58 * Make sure to run lp_test_conv unit test after any change to this file.
59 *
60 * @author Jose Fonseca <jfonseca@vmware.com>
61 */
62
63
64 #include "util/u_debug.h"
65 #include "util/u_math.h"
66 #include "util/u_cpu_detect.h"
67
68 #include "lp_bld_type.h"
69 #include "lp_bld_const.h"
70 #include "lp_bld_intr.h"
71 #include "lp_bld_arit.h"
72 #include "lp_bld_conv.h"
73
74
75 /**
76 * Special case for converting clamped IEEE-754 floats to unsigned norms.
77 *
78 * The mathematical voodoo below may seem excessive but it is actually
79 * paramount we do it this way for several reasons. First, there is no single
80 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
81 * secondly, even if there was, since the FP's mantissa takes only a fraction
82 * of register bits the typically scale and cast approach would require double
83 * precision for accurate results, and therefore half the throughput
84 *
85 * Although the result values can be scaled to an arbitrary bit width specified
86 * by dst_width, the actual result type will have the same width.
87 */
88 LLVMValueRef
89 lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
90 struct lp_type src_type,
91 unsigned dst_width,
92 LLVMValueRef src)
93 {
94 LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
95 LLVMValueRef res;
96 unsigned mantissa;
97 unsigned n;
98 unsigned long long ubound;
99 unsigned long long mask;
100 double scale;
101 double bias;
102
103 assert(src_type.floating);
104
105 mantissa = lp_mantissa(src_type);
106
107 /* We cannot carry more bits than the mantissa */
108 n = MIN2(mantissa, dst_width);
109
110 /* This magic coefficients will make the desired result to appear in the
111 * lowest significant bits of the mantissa.
112 */
113 ubound = ((unsigned long long)1 << n);
114 mask = ubound - 1;
115 scale = (double)mask/ubound;
116 bias = (double)((unsigned long long)1 << (mantissa - n));
117
118 res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), "");
119 res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), "");
120 res = LLVMBuildBitCast(builder, res, int_vec_type, "");
121
122 if(dst_width > n) {
123 int shift = dst_width - n;
124 res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");
125
126 /* TODO: Fill in the empty lower bits for additional precision? */
127 #if 0
128 {
129 LLVMValueRef msb;
130 msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), "");
131 msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), "");
132 msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), "");
133 res = LLVMBuildOr(builder, res, msb, "");
134 }
135 #elif 0
136 while(shift > 0) {
137 res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), "");
138 shift -= n;
139 n *= 2;
140 }
141 #endif
142 }
143 else
144 res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), "");
145
146 return res;
147 }
148
149
150 /**
151 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
152 */
153 LLVMValueRef
154 lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
155 unsigned src_width,
156 struct lp_type dst_type,
157 LLVMValueRef src)
158 {
159 LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
160 LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
161 LLVMValueRef bias_;
162 LLVMValueRef res;
163 unsigned mantissa;
164 unsigned n;
165 unsigned long long ubound;
166 unsigned long long mask;
167 double scale;
168 double bias;
169
170 mantissa = lp_mantissa(dst_type);
171
172 n = MIN2(mantissa, src_width);
173
174 ubound = ((unsigned long long)1 << n);
175 mask = ubound - 1;
176 scale = (double)ubound/mask;
177 bias = (double)((unsigned long long)1 << (mantissa - n));
178
179 res = src;
180
181 if(src_width > mantissa) {
182 int shift = src_width - mantissa;
183 res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), "");
184 }
185
186 bias_ = lp_build_const_scalar(dst_type, bias);
187
188 res = LLVMBuildOr(builder,
189 res,
190 LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");
191
192 res = LLVMBuildBitCast(builder, res, vec_type, "");
193
194 res = LLVMBuildSub(builder, res, bias_, "");
195 res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), "");
196
197 return res;
198 }
199
200
201 /**
202 * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
203 */
204 static LLVMValueRef
205 lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
206 {
207 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
208 unsigned i, j;
209
210 assert(n <= LP_MAX_VECTOR_LENGTH);
211 assert(lo_hi < 2);
212
213 /* TODO: cache results in a static table */
214
215 for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
216 elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
217 elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
218 }
219
220 return LLVMConstVector(elems, n);
221 }
222
223
224 /**
225 * Build shuffle vectors that match PACKxx instructions.
226 */
227 static LLVMValueRef
228 lp_build_const_pack_shuffle(unsigned n)
229 {
230 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
231 unsigned i;
232
233 assert(n <= LP_MAX_VECTOR_LENGTH);
234
235 /* TODO: cache results in a static table */
236
237 for(i = 0; i < n; ++i)
238 elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
239
240 return LLVMConstVector(elems, n);
241 }
242
243
244 /**
245 * Expand the bit width.
246 *
247 * This will only change the number of bits the values are represented, not the
248 * values themselves.
249 */
250 static void
251 lp_build_expand(LLVMBuilderRef builder,
252 struct lp_type src_type,
253 struct lp_type dst_type,
254 LLVMValueRef src,
255 LLVMValueRef *dst, unsigned num_dsts)
256 {
257 unsigned num_tmps;
258 unsigned i;
259
260 /* Register width must remain constant */
261 assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
262
263 /* We must not loose or gain channels. Only precision */
264 assert(src_type.length == dst_type.length * num_dsts);
265
266 num_tmps = 1;
267 dst[0] = src;
268
269 while(src_type.width < dst_type.width) {
270 struct lp_type new_type = src_type;
271 LLVMTypeRef new_vec_type;
272
273 new_type.width *= 2;
274 new_type.length /= 2;
275 new_vec_type = lp_build_vec_type(new_type);
276
277 for(i = num_tmps; i--; ) {
278 LLVMValueRef zero;
279 LLVMValueRef shuffle_lo;
280 LLVMValueRef shuffle_hi;
281 LLVMValueRef lo;
282 LLVMValueRef hi;
283
284 zero = lp_build_zero(src_type);
285 shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0);
286 shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1);
287
288 /* PUNPCKLBW, PUNPCKHBW */
289 lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, "");
290 hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, "");
291
292 dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, "");
293 dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, "");
294 }
295
296 src_type = new_type;
297
298 num_tmps *= 2;
299 }
300
301 assert(num_tmps == num_dsts);
302 }
303
304
305 /**
306 * Non-interleaved pack.
307 *
308 * This will move values as
309 *
310 * lo = __ l0 __ l1 __ l2 __.. __ ln
311 * hi = __ h0 __ h1 __ h2 __.. __ hn
312 * res = l0 l1 l2 .. ln h0 h1 h2 .. hn
313 *
314 * TODO: handle saturation consistently.
315 */
316 static LLVMValueRef
317 lp_build_pack2(LLVMBuilderRef builder,
318 struct lp_type src_type,
319 struct lp_type dst_type,
320 boolean clamped,
321 LLVMValueRef lo,
322 LLVMValueRef hi)
323 {
324 LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
325 LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
326 LLVMValueRef shuffle;
327 LLVMValueRef res;
328
329 /* Register width must remain constant */
330 assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
331
332 /* We must not loose or gain channels. Only precision */
333 assert(src_type.length * 2 == dst_type.length);
334
335 assert(!src_type.floating);
336 assert(!dst_type.floating);
337
338 if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
339 /* All X86 non-interleaved pack instructions all take signed inputs and
340 * saturate them, so saturate beforehand. */
341 if(!src_type.sign && !clamped) {
342 struct lp_build_context bld;
343 unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
344 LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
345 lp_build_context_init(&bld, builder, src_type);
346 lo = lp_build_min(&bld, lo, dst_max);
347 hi = lp_build_min(&bld, hi, dst_max);
348 }
349
350 switch(src_type.width) {
351 case 32:
352 if(dst_type.sign || !util_cpu_caps.has_sse4_1)
353 res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
354 else
355 /* PACKUSDW is the only instrinsic with a consistent signature */
356 return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
357 break;
358
359 case 16:
360 if(dst_type.sign)
361 res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
362 else
363 res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
364 break;
365
366 default:
367 assert(0);
368 return LLVMGetUndef(dst_vec_type);
369 break;
370 }
371
372 res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
373 return res;
374 }
375
376 lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
377 hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
378
379 shuffle = lp_build_const_pack_shuffle(dst_type.length);
380
381 res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
382
383 return res;
384 }
385
386
387 /**
388 * Truncate the bit width.
389 *
390 * TODO: Handle saturation consistently.
391 */
392 static LLVMValueRef
393 lp_build_pack(LLVMBuilderRef builder,
394 struct lp_type src_type,
395 struct lp_type dst_type,
396 boolean clamped,
397 const LLVMValueRef *src, unsigned num_srcs)
398 {
399 LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
400 unsigned i;
401
402 /* Register width must remain constant */
403 assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
404
405 /* We must not loose or gain channels. Only precision */
406 assert(src_type.length * num_srcs == dst_type.length);
407
408 for(i = 0; i < num_srcs; ++i)
409 tmp[i] = src[i];
410
411 while(src_type.width > dst_type.width) {
412 struct lp_type new_type = src_type;
413
414 new_type.width /= 2;
415 new_type.length *= 2;
416
417 /* Take in consideration the sign changes only in the last step */
418 if(new_type.width == dst_type.width)
419 new_type.sign = dst_type.sign;
420
421 num_srcs /= 2;
422
423 for(i = 0; i < num_srcs; ++i)
424 tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped,
425 tmp[2*i + 0], tmp[2*i + 1]);
426
427 src_type = new_type;
428 }
429
430 assert(num_srcs == 1);
431
432 return tmp[0];
433 }
434
435
436 /**
437 * Generic type conversion.
438 *
439 * TODO: Take a precision argument, or even better, add a new precision member
440 * to the lp_type union.
441 */
442 void
443 lp_build_conv(LLVMBuilderRef builder,
444 struct lp_type src_type,
445 struct lp_type dst_type,
446 const LLVMValueRef *src, unsigned num_srcs,
447 LLVMValueRef *dst, unsigned num_dsts)
448 {
449 struct lp_type tmp_type;
450 LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
451 unsigned num_tmps;
452 unsigned i;
453
454 /* Register width must remain constant */
455 assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
456
457 /* We must not loose or gain channels. Only precision */
458 assert(src_type.length * num_srcs == dst_type.length * num_dsts);
459
460 assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
461 assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
462
463 tmp_type = src_type;
464 for(i = 0; i < num_srcs; ++i)
465 tmp[i] = src[i];
466 num_tmps = num_srcs;
467
468 /*
469 * Clamp if necessary
470 */
471
472 if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
473 struct lp_build_context bld;
474 double src_min = lp_const_min(src_type);
475 double dst_min = lp_const_min(dst_type);
476 double src_max = lp_const_max(src_type);
477 double dst_max = lp_const_max(dst_type);
478 LLVMValueRef thres;
479
480 lp_build_context_init(&bld, builder, tmp_type);
481
482 if(src_min < dst_min) {
483 if(dst_min == 0.0)
484 thres = bld.zero;
485 else
486 thres = lp_build_const_scalar(src_type, dst_min);
487 for(i = 0; i < num_tmps; ++i)
488 tmp[i] = lp_build_max(&bld, tmp[i], thres);
489 }
490
491 if(src_max > dst_max) {
492 if(dst_max == 1.0)
493 thres = bld.one;
494 else
495 thres = lp_build_const_scalar(src_type, dst_max);
496 for(i = 0; i < num_tmps; ++i)
497 tmp[i] = lp_build_min(&bld, tmp[i], thres);
498 }
499 }
500
501 /*
502 * Scale to the narrowest range
503 */
504
505 if(dst_type.floating) {
506 /* Nothing to do */
507 }
508 else if(tmp_type.floating) {
509 if(!dst_type.fixed && !dst_type.sign && dst_type.norm) {
510 for(i = 0; i < num_tmps; ++i) {
511 tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder,
512 tmp_type,
513 dst_type.width,
514 tmp[i]);
515 }
516 tmp_type.floating = FALSE;
517 }
518 else {
519 double dst_scale = lp_const_scale(dst_type);
520 LLVMTypeRef tmp_vec_type;
521
522 if (dst_scale != 1.0) {
523 LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale);
524 for(i = 0; i < num_tmps; ++i)
525 tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
526 }
527
528 /* Use an equally sized integer for intermediate computations */
529 tmp_type.floating = FALSE;
530 tmp_vec_type = lp_build_vec_type(tmp_type);
531 for(i = 0; i < num_tmps; ++i) {
532 #if 0
533 if(dst_type.sign)
534 tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
535 else
536 tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
537 #else
538 /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
539 tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
540 #endif
541 }
542 }
543 }
544 else {
545 unsigned src_shift = lp_const_shift(src_type);
546 unsigned dst_shift = lp_const_shift(dst_type);
547
548 /* FIXME: compensate different offsets too */
549 if(src_shift > dst_shift) {
550 LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift);
551 for(i = 0; i < num_tmps; ++i)
552 if(src_type.sign)
553 tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
554 else
555 tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
556 }
557 }
558
559 /*
560 * Truncate or expand bit width
561 */
562
563 assert(!tmp_type.floating || tmp_type.width == dst_type.width);
564
565 if(tmp_type.width > dst_type.width) {
566 assert(num_dsts == 1);
567 tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
568 tmp_type.width = dst_type.width;
569 tmp_type.length = dst_type.length;
570 num_tmps = 1;
571 }
572
573 if(tmp_type.width < dst_type.width) {
574 assert(num_tmps == 1);
575 lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
576 tmp_type.width = dst_type.width;
577 tmp_type.length = dst_type.length;
578 num_tmps = num_dsts;
579 }
580
581 assert(tmp_type.width == dst_type.width);
582 assert(tmp_type.length == dst_type.length);
583 assert(num_tmps == num_dsts);
584
585 /*
586 * Scale to the widest range
587 */
588
589 if(src_type.floating) {
590 /* Nothing to do */
591 }
592 else if(!src_type.floating && dst_type.floating) {
593 if(!src_type.fixed && !src_type.sign && src_type.norm) {
594 for(i = 0; i < num_tmps; ++i) {
595 tmp[i] = lp_build_unsigned_norm_to_float(builder,
596 src_type.width,
597 dst_type,
598 tmp[i]);
599 }
600 tmp_type.floating = TRUE;
601 }
602 else {
603 double src_scale = lp_const_scale(src_type);
604 LLVMTypeRef tmp_vec_type;
605
606 /* Use an equally sized integer for intermediate computations */
607 tmp_type.floating = TRUE;
608 tmp_type.sign = TRUE;
609 tmp_vec_type = lp_build_vec_type(tmp_type);
610 for(i = 0; i < num_tmps; ++i) {
611 #if 0
612 if(dst_type.sign)
613 tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
614 else
615 tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, "");
616 #else
617 /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */
618 tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
619 #endif
620 }
621
622 if (src_scale != 1.0) {
623 LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale);
624 for(i = 0; i < num_tmps; ++i)
625 tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
626 }
627 }
628 }
629 else {
630 unsigned src_shift = lp_const_shift(src_type);
631 unsigned dst_shift = lp_const_shift(dst_type);
632
633 /* FIXME: compensate different offsets too */
634 if(src_shift < dst_shift) {
635 LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift);
636 for(i = 0; i < num_tmps; ++i)
637 tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
638 }
639 }
640
641 for(i = 0; i < num_dsts; ++i)
642 dst[i] = tmp[i];
643 }
644
645
646 /**
647 * Bit mask conversion.
648 *
649 * This will convert the integer masks that match the given types.
650 *
651 * The mask values should 0 or -1, i.e., all bits either set to zero or one.
652 * Any other value will likely cause in unpredictable results.
653 *
654 * This is basically a very trimmed down version of lp_build_conv.
655 */
656 void
657 lp_build_conv_mask(LLVMBuilderRef builder,
658 struct lp_type src_type,
659 struct lp_type dst_type,
660 const LLVMValueRef *src, unsigned num_srcs,
661 LLVMValueRef *dst, unsigned num_dsts)
662 {
663 /* Register width must remain constant */
664 assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
665
666 /* We must not loose or gain channels. Only precision */
667 assert(src_type.length * num_srcs == dst_type.length * num_dsts);
668
669 /*
670 * Drop
671 *
672 * We assume all values are 0 or -1
673 */
674
675 src_type.floating = FALSE;
676 src_type.fixed = FALSE;
677 src_type.sign = TRUE;
678 src_type.norm = FALSE;
679
680 dst_type.floating = FALSE;
681 dst_type.fixed = FALSE;
682 dst_type.sign = TRUE;
683 dst_type.norm = FALSE;
684
685 /*
686 * Truncate or expand bit width
687 */
688
689 if(src_type.width > dst_type.width) {
690 assert(num_dsts == 1);
691 dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
692 }
693 else if(src_type.width < dst_type.width) {
694 assert(num_srcs == 1);
695 lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts);
696 }
697 else {
698 assert(num_srcs == num_dsts);
699 memcpy(dst, src, num_dsts * sizeof *dst);
700 }
701 }