1 /**************************************************************************
3 * Copyright 2015 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "lp_bld_format.h"
29 #include "lp_bld_type.h"
30 #include "lp_bld_struct.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_flow.h"
33 #include "lp_bld_swizzle.h"
35 #include "util/u_math.h"
40 * Complex block-compression based formats are handled here by using a cache,
41 * so re-decoding of every pixel is not required.
42 * Especially for bilinear filtering, texel reuse is very high hence even
43 * a small cache helps.
44 * The elements in the cache are the decoded blocks - currently things
45 * are restricted to formats which are 4x4 block based, and the decoded
46 * texels must fit into 4x8 bits.
47 * The cache is direct mapped so hitrates aren't all that great and cache
48 * thrashing could happen.
50 * @author Roland Scheidegger <sroland@vmware.com>
54 #if LP_BUILD_FORMAT_CACHE_DEBUG
56 update_cache_access(struct gallivm_state
*gallivm
,
61 LLVMBuilderRef builder
= gallivm
->builder
;
62 LLVMValueRef member_ptr
, cache_access
;
64 assert(index
== LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL
||
65 index
== LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS
);
67 member_ptr
= lp_build_struct_get_ptr(gallivm
, ptr
, index
, "");
68 cache_access
= LLVMBuildLoad(builder
, member_ptr
, "cache_access");
69 cache_access
= LLVMBuildAdd(builder
, cache_access
,
70 LLVMConstInt(LLVMInt64TypeInContext(gallivm
->context
),
72 LLVMBuildStore(builder
, cache_access
, member_ptr
);
78 store_cached_block(struct gallivm_state
*gallivm
,
80 LLVMValueRef tag_value
,
81 LLVMValueRef hash_index
,
84 LLVMBuilderRef builder
= gallivm
->builder
;
85 LLVMValueRef ptr
, indices
[3];
86 LLVMTypeRef type_ptr4x32
;
89 type_ptr4x32
= LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4), 0);
90 indices
[0] = lp_build_const_int32(gallivm
, 0);
91 indices
[1] = lp_build_const_int32(gallivm
, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS
);
92 indices
[2] = hash_index
;
93 ptr
= LLVMBuildGEP(builder
, cache
, indices
, Elements(indices
), "");
94 LLVMBuildStore(builder
, tag_value
, ptr
);
96 indices
[1] = lp_build_const_int32(gallivm
, LP_BUILD_FORMAT_CACHE_MEMBER_DATA
);
97 hash_index
= LLVMBuildMul(builder
, hash_index
,
98 lp_build_const_int32(gallivm
, 16), "");
99 for (count
= 0; count
< 4; count
++) {
100 indices
[2] = hash_index
;
101 ptr
= LLVMBuildGEP(builder
, cache
, indices
, Elements(indices
), "");
102 ptr
= LLVMBuildBitCast(builder
, ptr
, type_ptr4x32
, "");
103 LLVMBuildStore(builder
, col
[count
], ptr
);
104 hash_index
= LLVMBuildAdd(builder
, hash_index
,
105 lp_build_const_int32(gallivm
, 4), "");
111 lookup_cached_pixel(struct gallivm_state
*gallivm
,
115 LLVMBuilderRef builder
= gallivm
->builder
;
116 LLVMValueRef member_ptr
, indices
[3];
118 indices
[0] = lp_build_const_int32(gallivm
, 0);
119 indices
[1] = lp_build_const_int32(gallivm
, LP_BUILD_FORMAT_CACHE_MEMBER_DATA
);
121 member_ptr
= LLVMBuildGEP(builder
, ptr
, indices
, Elements(indices
), "");
122 return LLVMBuildLoad(builder
, member_ptr
, "cache_data");
127 lookup_tag_data(struct gallivm_state
*gallivm
,
131 LLVMBuilderRef builder
= gallivm
->builder
;
132 LLVMValueRef member_ptr
, indices
[3];
134 indices
[0] = lp_build_const_int32(gallivm
, 0);
135 indices
[1] = lp_build_const_int32(gallivm
, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS
);
137 member_ptr
= LLVMBuildGEP(builder
, ptr
, indices
, Elements(indices
), "");
138 return LLVMBuildLoad(builder
, member_ptr
, "tag_data");
143 update_cached_block(struct gallivm_state
*gallivm
,
144 const struct util_format_description
*format_desc
,
145 LLVMValueRef ptr_addr
,
146 LLVMValueRef hash_index
,
150 LLVMBuilderRef builder
= gallivm
->builder
;
151 LLVMTypeRef i8t
= LLVMInt8TypeInContext(gallivm
->context
);
152 LLVMTypeRef pi8t
= LLVMPointerType(i8t
, 0);
153 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
154 LLVMTypeRef i32x4
= LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4);
155 LLVMValueRef function
;
156 LLVMValueRef tag_value
, tmp_ptr
;
161 * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
162 * This doesn't actually make any sense whatsoever, someone would need
163 * to write a function doing this for all pixels in a block (either as
164 * an external c function or with generated code). Don't ask.
169 * Function to call looks like:
170 * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
172 LLVMTypeRef ret_type
;
173 LLVMTypeRef arg_types
[4];
174 LLVMTypeRef function_type
;
176 assert(format_desc
->fetch_rgba_8unorm
);
178 ret_type
= LLVMVoidTypeInContext(gallivm
->context
);
183 function_type
= LLVMFunctionType(ret_type
, arg_types
,
184 Elements(arg_types
), 0);
186 /* make const pointer for the C fetch_rgba_8unorm function */
187 function
= lp_build_const_int_pointer(gallivm
,
188 func_to_pointer((func_pointer
) format_desc
->fetch_rgba_8unorm
));
190 /* cast the callee pointer to the function's type */
191 function
= LLVMBuildBitCast(builder
, function
,
192 LLVMPointerType(function_type
, 0),
196 tmp_ptr
= lp_build_array_alloca(gallivm
, i32x4
,
197 lp_build_const_int32(gallivm
, 16),
199 tmp_ptr
= LLVMBuildBitCast(builder
, tmp_ptr
, pi8t
, "");
202 * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
203 * This is going to be really really slow.
204 * Note: the block store format is actually
205 * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
207 for (i
= 0; i
< 4; ++i
) {
208 for (j
= 0; j
< 4; ++j
) {
209 LLVMValueRef args
[4];
210 LLVMValueRef dst_offset
= lp_build_const_int32(gallivm
, (i
* 4 + j
) * 4);
213 * Note we actually supply a pointer to the start of the block,
214 * not the start of the texture.
216 args
[0] = LLVMBuildGEP(gallivm
->builder
, tmp_ptr
, &dst_offset
, 1, "");
218 args
[2] = LLVMConstInt(i32t
, i
, 0);
219 args
[3] = LLVMConstInt(i32t
, j
, 0);
220 LLVMBuildCall(builder
, function
, args
, Elements(args
), "");
224 /* Finally store the block - pointless mem copy + update tag. */
225 tmp_ptr
= LLVMBuildBitCast(builder
, tmp_ptr
, LLVMPointerType(i32x4
, 0), "");
226 for (i
= 0; i
< 4; ++i
) {
227 LLVMValueRef tmp_offset
= lp_build_const_int32(gallivm
, i
);
228 LLVMValueRef ptr
= LLVMBuildGEP(gallivm
->builder
, tmp_ptr
, &tmp_offset
, 1, "");
229 col
[i
] = LLVMBuildLoad(builder
, ptr
, "");
232 tag_value
= LLVMBuildPtrToInt(gallivm
->builder
, ptr_addr
,
233 LLVMInt64TypeInContext(gallivm
->context
), "");
234 store_cached_block(gallivm
, col
, tag_value
, hash_index
, cache
);
239 * Do a cached lookup.
241 * Returns (vectors of) 4x8 rgba aos value
244 lp_build_fetch_cached_texels(struct gallivm_state
*gallivm
,
245 const struct util_format_description
*format_desc
,
247 LLVMValueRef base_ptr
,
254 LLVMBuilderRef builder
= gallivm
->builder
;
255 unsigned count
, low_bit
, log2size
;
256 LLVMValueRef color
, offset_stored
, addr
, ptr_addrtrunc
, tmp
;
257 LLVMValueRef ij_index
, hash_index
, hash_mask
, block_index
;
258 LLVMTypeRef i8t
= LLVMInt8TypeInContext(gallivm
->context
);
259 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
260 LLVMTypeRef i64t
= LLVMInt64TypeInContext(gallivm
->context
);
262 struct lp_build_context bld32
;
263 memset(&type
, 0, sizeof type
);
267 assert(format_desc
->block
.width
== 4);
268 assert(format_desc
->block
.height
== 4);
270 lp_build_context_init(&bld32
, gallivm
, type
);
273 * compute hash - we use direct mapped cache, the hash function could
274 * be better but it needs to be simple
276 * compare offset with offset stored at tag (hash)
277 * if not equal decode/store block, update tag
278 * extract color from cache
279 * assemble result vector
282 /* TODO: not ideal with 32bit pointers... */
284 low_bit
= util_logbase2(format_desc
->block
.bits
/ 8);
285 log2size
= util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE
);
286 addr
= LLVMBuildPtrToInt(builder
, base_ptr
, i64t
, "");
287 ptr_addrtrunc
= LLVMBuildPtrToInt(builder
, base_ptr
, i32t
, "");
288 ptr_addrtrunc
= lp_build_broadcast_scalar(&bld32
, ptr_addrtrunc
);
289 /* For the hash function, first mask off the unused lowest bits. Then just
290 do some xor with address bits - only use lower 32bits */
291 ptr_addrtrunc
= LLVMBuildAdd(builder
, offset
, ptr_addrtrunc
, "");
292 ptr_addrtrunc
= LLVMBuildLShr(builder
, ptr_addrtrunc
,
293 lp_build_const_int_vec(gallivm
, type
, low_bit
), "");
294 /* This only really makes sense for size 64,128,256 */
295 hash_index
= ptr_addrtrunc
;
296 ptr_addrtrunc
= LLVMBuildLShr(builder
, ptr_addrtrunc
,
297 lp_build_const_int_vec(gallivm
, type
, 2*log2size
), "");
298 hash_index
= LLVMBuildXor(builder
, ptr_addrtrunc
, hash_index
, "");
299 tmp
= LLVMBuildLShr(builder
, hash_index
,
300 lp_build_const_int_vec(gallivm
, type
, log2size
), "");
301 hash_index
= LLVMBuildXor(builder
, hash_index
, tmp
, "");
303 hash_mask
= lp_build_const_int_vec(gallivm
, type
, LP_BUILD_FORMAT_CACHE_SIZE
- 1);
304 hash_index
= LLVMBuildAnd(builder
, hash_index
, hash_mask
, "");
305 ij_index
= LLVMBuildShl(builder
, i
, lp_build_const_int_vec(gallivm
, type
, 2), "");
306 ij_index
= LLVMBuildAdd(builder
, ij_index
, j
, "");
307 block_index
= LLVMBuildShl(builder
, hash_index
,
308 lp_build_const_int_vec(gallivm
, type
, 4), "");
309 block_index
= LLVMBuildAdd(builder
, ij_index
, block_index
, "");
312 color
= LLVMGetUndef(LLVMVectorType(i32t
, n
));
313 for (count
= 0; count
< n
; count
++) {
314 LLVMValueRef index
, cond
, colorx
;
315 LLVMValueRef block_indexx
, hash_indexx
, addrx
, offsetx
, ptr_addrx
;
316 struct lp_build_if_state if_ctx
;
318 index
= lp_build_const_int32(gallivm
, count
);
319 offsetx
= LLVMBuildExtractElement(builder
, offset
, index
, "");
320 addrx
= LLVMBuildZExt(builder
, offsetx
, i64t
, "");
321 addrx
= LLVMBuildAdd(builder
, addrx
, addr
, "");
322 block_indexx
= LLVMBuildExtractElement(builder
, block_index
, index
, "");
323 hash_indexx
= LLVMBuildLShr(builder
, block_indexx
,
324 lp_build_const_int32(gallivm
, 4), "");
325 offset_stored
= lookup_tag_data(gallivm
, cache
, hash_indexx
);
326 cond
= LLVMBuildICmp(builder
, LLVMIntNE
, offset_stored
, addrx
, "");
328 lp_build_if(&if_ctx
, gallivm
, cond
);
330 ptr_addrx
= LLVMBuildIntToPtr(builder
, addrx
,
331 LLVMPointerType(i8t
, 0), "");
332 update_cached_block(gallivm
, format_desc
, ptr_addrx
, hash_indexx
, cache
);
333 #if LP_BUILD_FORMAT_CACHE_DEBUG
334 update_cache_access(gallivm
, cache
, 1,
335 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS
);
338 lp_build_endif(&if_ctx
);
340 colorx
= lookup_cached_pixel(gallivm
, cache
, block_indexx
);
342 color
= LLVMBuildInsertElement(builder
, color
, colorx
,
343 lp_build_const_int32(gallivm
, count
), "");
348 struct lp_build_if_state if_ctx
;
350 tmp
= LLVMBuildZExt(builder
, offset
, i64t
, "");
351 addr
= LLVMBuildAdd(builder
, tmp
, addr
, "");
352 offset_stored
= lookup_tag_data(gallivm
, cache
, hash_index
);
353 cond
= LLVMBuildICmp(builder
, LLVMIntNE
, offset_stored
, addr
, "");
355 lp_build_if(&if_ctx
, gallivm
, cond
);
357 tmp
= LLVMBuildIntToPtr(builder
, addr
, LLVMPointerType(i8t
, 0), "");
358 update_cached_block(gallivm
, format_desc
, tmp
, hash_index
, cache
);
359 #if LP_BUILD_FORMAT_CACHE_DEBUG
360 update_cache_access(gallivm
, cache
, 1,
361 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS
);
364 lp_build_endif(&if_ctx
);
366 color
= lookup_cached_pixel(gallivm
, cache
, block_index
);
368 #if LP_BUILD_FORMAT_CACHE_DEBUG
369 update_cache_access(gallivm
, cache
, n
,
370 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL
);
372 return LLVMBuildBitCast(builder
, color
, LLVMVectorType(i8t
, n
* 4), "");