1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
26 **************************************************************************/
29 #include "util/u_debug.h"
30 #include "lp_bld_debug.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_format.h"
33 #include "lp_bld_gather.h"
34 #include "lp_bld_init.h"
35 #include "lp_bld_intr.h"
39 * Get the pointer to one element from scatter positions in memory.
41 * @sa lp_build_gather()
44 lp_build_gather_elem_ptr(struct gallivm_state
*gallivm
,
46 LLVMValueRef base_ptr
,
53 assert(LLVMTypeOf(base_ptr
) == LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0));
59 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
60 offset
= LLVMBuildExtractElement(gallivm
->builder
, offsets
, index
, "");
63 ptr
= LLVMBuildGEP(gallivm
->builder
, base_ptr
, &offset
, 1, "");
70 * Gather one element from scatter positions in memory.
72 * @sa lp_build_gather()
75 lp_build_gather_elem(struct gallivm_state
*gallivm
,
80 LLVMValueRef base_ptr
,
83 boolean vector_justify
)
85 LLVMTypeRef src_type
= LLVMIntTypeInContext(gallivm
->context
, src_width
);
86 LLVMTypeRef src_ptr_type
= LLVMPointerType(src_type
, 0);
87 LLVMTypeRef dst_elem_type
= LLVMIntTypeInContext(gallivm
->context
, dst_width
);
91 assert(LLVMTypeOf(base_ptr
) == LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0));
93 ptr
= lp_build_gather_elem_ptr(gallivm
, length
, base_ptr
, offsets
, i
);
94 ptr
= LLVMBuildBitCast(gallivm
->builder
, ptr
, src_ptr_type
, "");
95 res
= LLVMBuildLoad(gallivm
->builder
, ptr
, "");
98 * On some archs we probably really want to avoid having to deal
99 * with alignments lower than 4 bytes (if fetch size is a power of
100 * two >= 32). On x86 it doesn't matter, however.
101 * We should be able to guarantee full alignment for any kind of texture
102 * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
103 * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
104 * but I don't think that's quite what we wanted).
105 * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
106 * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
107 * enforcing what we want (which is what d3d10 does, the offset needs to
108 * be aligned to element size, but GL has bytes regardless of element
109 * size which would only leave us with minimum alignment restriction of 16
110 * which doesn't make much sense if the type isn't 4x32bit). Due to
111 * translation of offsets to first_elem in sampler_views it actually seems
112 * gallium could not do anything else except 16 no matter what...
115 lp_set_load_alignment(res
, 1);
118 assert(src_width
<= dst_width
);
119 if (src_width
> dst_width
) {
120 res
= LLVMBuildTrunc(gallivm
->builder
, res
, dst_elem_type
, "");
121 } else if (src_width
< dst_width
) {
122 res
= LLVMBuildZExt(gallivm
->builder
, res
, dst_elem_type
, "");
123 if (vector_justify
) {
124 #ifdef PIPE_ARCH_BIG_ENDIAN
125 res
= LLVMBuildShl(gallivm
->builder
, res
,
126 LLVMConstInt(dst_elem_type
, dst_width
- src_width
, 0), "");
136 * Gather elements from scatter positions in memory into a single vector.
137 * Use for fetching texels from a texture.
138 * For SSE, typical values are length=4, src_width=32, dst_width=32.
140 * When src_width < dst_width, the return value can be justified in
142 * "integer justification" is used when the caller treats the destination
143 * as a packed integer bitmask, as described by the channels' "shift" and
145 * "vector justification" is used when the caller casts the destination
146 * to a vector and needs channel X to be in vector element 0.
148 * @param length length of the offsets
149 * @param src_width src element width in bits
150 * @param dst_width result element width in bits (src will be expanded to fit)
151 * @param aligned whether the data is guaranteed to be aligned (to src_width)
152 * @param base_ptr base pointer, should be a i8 pointer type.
153 * @param offsets vector with offsets
154 * @param vector_justify select vector rather than integer justification
157 lp_build_gather(struct gallivm_state
*gallivm
,
162 LLVMValueRef base_ptr
,
163 LLVMValueRef offsets
,
164 boolean vector_justify
)
170 return lp_build_gather_elem(gallivm
, length
,
171 src_width
, dst_width
, aligned
,
172 base_ptr
, offsets
, 0, vector_justify
);
176 LLVMTypeRef dst_elem_type
= LLVMIntTypeInContext(gallivm
->context
, dst_width
);
177 LLVMTypeRef dst_vec_type
= LLVMVectorType(dst_elem_type
, length
);
180 res
= LLVMGetUndef(dst_vec_type
);
181 for (i
= 0; i
< length
; ++i
) {
182 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
184 elem
= lp_build_gather_elem(gallivm
, length
,
185 src_width
, dst_width
, aligned
,
186 base_ptr
, offsets
, i
, vector_justify
);
187 res
= LLVMBuildInsertElement(gallivm
->builder
, res
, elem
, index
, "");
195 lp_build_gather_values(struct gallivm_state
* gallivm
,
196 LLVMValueRef
* values
,
197 unsigned value_count
)
199 LLVMTypeRef vec_type
= LLVMVectorType(LLVMTypeOf(values
[0]), value_count
);
200 LLVMBuilderRef builder
= gallivm
->builder
;
201 LLVMValueRef vec
= LLVMGetUndef(vec_type
);
204 for (i
= 0; i
< value_count
; i
++) {
205 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
206 vec
= LLVMBuildInsertElement(builder
, vec
, values
[i
], index
, "");