glsl: move to compiler/
[mesa.git] / src / mesa / drivers / dri / i965 / brw_lower_texture_gradients.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_lower_texture_gradients.cpp
26 */
27
28 #include "compiler/glsl/ir.h"
29 #include "compiler/glsl/ir_builder.h"
30 #include "program/prog_instruction.h"
31 #include "brw_context.h"
32
33 using namespace ir_builder;
34
35 class lower_texture_grad_visitor : public ir_hierarchical_visitor {
36 public:
37 lower_texture_grad_visitor(bool has_sample_d_c)
38 : has_sample_d_c(has_sample_d_c)
39 {
40 progress = false;
41 }
42
43 ir_visitor_status visit_leave(ir_texture *ir);
44
45
46 bool progress;
47 bool has_sample_d_c;
48
49 private:
50 void emit(ir_variable *, ir_rvalue *);
51 ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
52 };
53
54 /**
55 * Emit a variable declaration and an assignment to initialize it.
56 */
57 void
58 lower_texture_grad_visitor::emit(ir_variable *var, ir_rvalue *value)
59 {
60 base_ir->insert_before(var);
61 base_ir->insert_before(assign(var, value));
62 }
63
64 /**
65 * Emit a temporary variable declaration
66 */
67 ir_variable *
68 lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const char *name)
69 {
70 ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
71 base_ir->insert_before(var);
72 return var;
73 }
74
75 static const glsl_type *
76 txs_type(const glsl_type *type)
77 {
78 unsigned dims;
79 switch (type->sampler_dimensionality) {
80 case GLSL_SAMPLER_DIM_1D:
81 dims = 1;
82 break;
83 case GLSL_SAMPLER_DIM_2D:
84 case GLSL_SAMPLER_DIM_RECT:
85 case GLSL_SAMPLER_DIM_CUBE:
86 dims = 2;
87 break;
88 case GLSL_SAMPLER_DIM_3D:
89 dims = 3;
90 break;
91 default:
92 unreachable("Should not get here: invalid sampler dimensionality");
93 }
94
95 if (type->sampler_array)
96 dims++;
97
98 return glsl_type::get_instance(GLSL_TYPE_INT, dims, 1);
99 }
100
101 ir_visitor_status
102 lower_texture_grad_visitor::visit_leave(ir_texture *ir)
103 {
104 /* Only lower textureGrad with cube maps or shadow samplers */
105 if (ir->op != ir_txd ||
106 (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE &&
107 !ir->shadow_comparitor))
108 return visit_continue;
109
110 /* Lower textureGrad() with samplerCube* even if we have the sample_d_c
111 * message. GLSL provides gradients for the 'r' coordinate. Unfortunately:
112 *
113 * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description:
114 * "The r coordinate contains the faceid, and the r gradients are ignored
115 * by hardware."
116 */
117 bool need_lowering = !has_sample_d_c ||
118 ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
119
120 if (!need_lowering)
121 return visit_continue;
122
123 void *mem_ctx = ralloc_parent(ir);
124
125 const glsl_type *grad_type = ir->lod_info.grad.dPdx->type;
126
127 /* Use textureSize() to get the width and height of LOD 0; swizzle away
128 * the depth/number of array slices.
129 */
130 ir_texture *txs = new(mem_ctx) ir_texture(ir_txs);
131 txs->set_sampler(ir->sampler->clone(mem_ctx, NULL),
132 txs_type(ir->sampler->type));
133 txs->lod_info.lod = new(mem_ctx) ir_constant(0);
134 ir_variable *size =
135 new(mem_ctx) ir_variable(grad_type, "size", ir_var_temporary);
136 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
137 base_ir->insert_before(size);
138 base_ir->insert_before(assign(size,
139 swizzle_for_size(expr(ir_unop_i2f, txs), 2),
140 WRITEMASK_XY));
141 base_ir->insert_before(assign(size, new(mem_ctx) ir_constant(1.0f), WRITEMASK_Z));
142 } else {
143 emit(size, expr(ir_unop_i2f,
144 swizzle_for_size(txs, grad_type->vector_elements)));
145 }
146
147 /* Scale the gradients by width and height. Effectively, the incoming
148 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
149 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
150 */
151 ir_variable *dPdx =
152 new(mem_ctx) ir_variable(grad_type, "dPdx", ir_var_temporary);
153 emit(dPdx, mul(size, ir->lod_info.grad.dPdx));
154
155 ir_variable *dPdy =
156 new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
157 emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
158
159 ir->op = ir_txl;
160 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
161 /* Cubemap texture lookups first generate a texture coordinate normalized
162 * to [-1, 1] on the appropiate face. The appropiate face is determined
163 * by which component has largest magnitude and its sign. The texture
164 * coordinate is the quotient of the remaining texture coordinates against
165 * that absolute value of the component of largest magnitude. This
166 * division requires that the computing of the derivative of the texel
167 * coordinate must use the quotient rule. The high level GLSL code is as
168 * follows:
169 *
170 * Step 1: selection
171 *
172 * vec3 abs_p, Q, dQdx, dQdy;
173 * abs_p = abs(ir->coordinate);
174 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
175 * Q = ir->coordinate.yzx;
176 * dQdx = ir->lod_info.grad.dPdx.yzx;
177 * dQdy = ir->lod_info.grad.dPdy.yzx;
178 * }
179 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
180 * Q = ir->coordinate.xzy;
181 * dQdx = ir->lod_info.grad.dPdx.xzy;
182 * dQdy = ir->lod_info.grad.dPdy.xzy;
183 * }
184 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
185 * Q = ir->coordinate;
186 * dQdx = ir->lod_info.grad.dPdx;
187 * dQdy = ir->lod_info.grad.dPdy;
188 * }
189 *
190 * Step 2: use quotient rule to compute derivative. The normalized to
191 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
192 * only concerned with the magnitudes of the derivatives whose values are
193 * not affected by the sign. We drop the sign from the computation.
194 *
195 * vec2 dx, dy;
196 * float recip;
197 *
198 * recip = 1.0 / Q.z;
199 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
200 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
201 *
202 * Step 3: compute LOD. At this point we have the derivatives of the
203 * texture coordinates normalized to [-1,1]. We take the LOD to be
204 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
205 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
206 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
207 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
208 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
209 * where L is the dimension of the cubemap. The code is:
210 *
211 * float M, result;
212 * M = max(dot(dx, dx), dot(dy, dy));
213 * L = textureSize(sampler, 0).x;
214 * result = -1.0 + 0.5 * log2(L * L * M);
215 */
216
217 /* Helpers to make code more human readable. */
218 #define EMIT(instr) base_ir->insert_before(instr)
219 #define THEN(irif, instr) irif->then_instructions.push_tail(instr)
220 #define CLONE(x) x->clone(mem_ctx, NULL)
221
222 ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p");
223
224 EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3)));
225
226 ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q");
227 ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx");
228 ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy");
229
230 /* unmodified dPdx, dPdy values */
231 ir_rvalue *dPdx = ir->lod_info.grad.dPdx;
232 ir_rvalue *dPdy = ir->lod_info.grad.dPdy;
233
234 /* 1. compute selector */
235
236 /* if (abs_p.x >= max(abs_p.y, abs_p.z)) ... */
237 ir_if *branch_x =
238 new(mem_ctx) ir_if(gequal(swizzle_x(abs_p),
239 max2(swizzle_y(abs_p), swizzle_z(abs_p))));
240
241 /* Q = p.yzx;
242 * dQdx = dPdx.yzx;
243 * dQdy = dPdy.yzx;
244 */
245 int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
246 THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3)));
247 THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3)));
248 THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3)));
249 EMIT(branch_x);
250
251 /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
252 ir_if *branch_y =
253 new(mem_ctx) ir_if(gequal(swizzle_y(abs_p),
254 max2(swizzle_x(abs_p), swizzle_z(abs_p))));
255
256 /* Q = p.xzy;
257 * dQdx = dPdx.xzy;
258 * dQdy = dPdy.xzy;
259 */
260 int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0);
261 THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3)));
262 THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3)));
263 THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3)));
264 EMIT(branch_y);
265
266 /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
267 ir_if *branch_z =
268 new(mem_ctx) ir_if(gequal(swizzle_z(abs_p),
269 max2(swizzle_x(abs_p), swizzle_y(abs_p))));
270
271 /* Q = p;
272 * dQdx = dPdx;
273 * dQdy = dPdy;
274 */
275 THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3)));
276 THEN(branch_z, assign(dQdx, CLONE(dPdx)));
277 THEN(branch_z, assign(dQdy, CLONE(dPdy)));
278 EMIT(branch_z);
279
280 /* 2. quotient rule */
281 ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip");
282 EMIT(assign(recip, expr(ir_unop_rcp, swizzle_z(Q))));
283
284 ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx");
285 ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy");
286
287 /* tmp = Q.xy * recip;
288 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
289 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
290 */
291 ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp");
292 EMIT(assign(tmp, mul(swizzle_xy(Q), recip)));
293 EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx),
294 mul(tmp, swizzle_z(dQdx))))));
295 EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy),
296 mul(tmp, swizzle_z(dQdy))))));
297
298 /* M = max(dot(dx, dx), dot(dy, dy)); */
299 ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M");
300 EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy))));
301
302 /* size has textureSize() of LOD 0 */
303 ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L");
304 EMIT(assign(L, swizzle_x(size)));
305
306 ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result");
307
308 /* result = -1.0 + 0.5 * log2(L * L * M); */
309 EMIT(assign(result,
310 add(new(mem_ctx)ir_constant(-1.0f),
311 mul(new(mem_ctx)ir_constant(0.5f),
312 expr(ir_unop_log2, mul(mul(L, L), M))))));
313
314 /* 3. final assignment of parameters to textureLod call */
315 ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result);
316
317 #undef THEN
318 #undef EMIT
319
320 } else {
321 /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
322 ir_rvalue *rho;
323 if (dPdx->type->is_scalar()) {
324 rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
325 expr(ir_unop_abs, dPdy));
326 } else {
327 rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
328 expr(ir_unop_sqrt, dot(dPdy, dPdy)));
329 }
330
331 /* lambda_base = log2(rho). We're ignoring GL state biases for now. */
332 ir->lod_info.lod = expr(ir_unop_log2, rho);
333 }
334
335 progress = true;
336 return visit_continue;
337 }
338
339 extern "C" {
340
341 bool
342 brw_lower_texture_gradients(struct brw_context *brw,
343 struct exec_list *instructions)
344 {
345 bool has_sample_d_c = brw->gen >= 8 || brw->is_haswell;
346 lower_texture_grad_visitor v(has_sample_d_c);
347
348 visit_list_elements(&v, instructions);
349
350 return v.progress;
351 }
352
353 }