gallivm,draw,llvmpipe: Support wider native registers.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_quad.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "u_cpu_detect.h"
30 #include "lp_bld_type.h"
31 #include "lp_bld_arit.h"
32 #include "lp_bld_const.h"
33 #include "lp_bld_swizzle.h"
34 #include "lp_bld_quad.h"
35
36
37 static const unsigned char
38 swizzle_left[4] = {
39 LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
40 LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_LEFT
41 };
42
43 static const unsigned char
44 swizzle_right[4] = {
45 LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_TOP_RIGHT,
46 LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
47 };
48
49 static const unsigned char
50 swizzle_top[4] = {
51 LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT,
52 LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT
53 };
54
55 static const unsigned char
56 swizzle_bottom[4] = {
57 LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT,
58 LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT
59 };
60
61
62 LLVMValueRef
63 lp_build_ddx(struct lp_build_context *bld,
64 LLVMValueRef a)
65 {
66 LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left);
67 LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
68 return lp_build_sub(bld, a_right, a_left);
69 }
70
71
72 LLVMValueRef
73 lp_build_ddy(struct lp_build_context *bld,
74 LLVMValueRef a)
75 {
76 LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top);
77 LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
78 return lp_build_sub(bld, a_bottom, a_top);
79 }
80
81 /*
82 * To be able to handle multiple quads at once in texture sampling and
83 * do lod calculations per quad, it is necessary to get the per-quad
84 * derivatives into the lp_build_rho function.
85 * For 8-wide vectors the packed derivative values for 3 coords would
86 * look like this, this scales to a arbitrary (multiple of 4) vector size:
87 * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
88 * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
89 * The second vector will be unused for 1d and 2d textures.
90 */
91 LLVMValueRef
92 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
93 LLVMValueRef a)
94 {
95 struct gallivm_state *gallivm = bld->gallivm;
96 LLVMBuilderRef builder = gallivm->builder;
97 LLVMValueRef vec1, vec2;
98
99 /* same packing as _twocoord, but can use aos swizzle helper */
100
101 /*
102 * XXX could make swizzle1 a noop swizzle by using right top/bottom
103 * pair for ddy
104 */
105 static const unsigned char swizzle1[] = {
106 LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
107 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
108 };
109 static const unsigned char swizzle2[] = {
110 LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT,
111 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
112 };
113
114 vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
115 vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
116
117 if (bld->type.floating)
118 return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
119 else
120 return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
121 }
122
123
124 LLVMValueRef
125 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
126 LLVMValueRef a, LLVMValueRef b)
127 {
128 struct gallivm_state *gallivm = bld->gallivm;
129 LLVMBuilderRef builder = gallivm->builder;
130 LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
131 LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
132 LLVMValueRef vec1, vec2;
133 unsigned length, num_quads, i;
134
135 /* XXX: do hsub version */
136 length = bld->type.length;
137 num_quads = length / 4;
138 for (i = 0; i < num_quads; i++) {
139 unsigned s1 = 4 * i;
140 unsigned s2 = 4 * i + length;
141 shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
142 shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
143 shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
144 shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
145 shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
146 shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
147 shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
148 shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
149 }
150 vec1 = LLVMBuildShuffleVector(builder, a, b,
151 LLVMConstVector(shuffles1, length), "");
152 vec2 = LLVMBuildShuffleVector(builder, a, b,
153 LLVMConstVector(shuffles2, length), "");
154 if (bld->type.floating)
155 return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
156 else
157 return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
158 }
159