2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file brw_lower_texture_gradients.cpp
28 #include "compiler/glsl/ir.h"
29 #include "compiler/glsl/ir_builder.h"
30 #include "program/prog_instruction.h"
31 #include "brw_context.h"
33 using namespace ir_builder
;
35 class lower_texture_grad_visitor
: public ir_hierarchical_visitor
{
37 lower_texture_grad_visitor(bool has_sample_d_c
)
38 : has_sample_d_c(has_sample_d_c
)
43 ir_visitor_status
visit_leave(ir_texture
*ir
);
50 void emit(ir_variable
*, ir_rvalue
*);
51 ir_variable
*temp(void *ctx
, const glsl_type
*type
, const char *name
);
55 * Emit a variable declaration and an assignment to initialize it.
58 lower_texture_grad_visitor::emit(ir_variable
*var
, ir_rvalue
*value
)
60 base_ir
->insert_before(var
);
61 base_ir
->insert_before(assign(var
, value
));
65 * Emit a temporary variable declaration
68 lower_texture_grad_visitor::temp(void *ctx
, const glsl_type
*type
, const char *name
)
70 ir_variable
*var
= new(ctx
) ir_variable(type
, name
, ir_var_temporary
);
71 base_ir
->insert_before(var
);
75 static const glsl_type
*
76 txs_type(const glsl_type
*type
)
79 switch (type
->sampler_dimensionality
) {
80 case GLSL_SAMPLER_DIM_1D
:
83 case GLSL_SAMPLER_DIM_2D
:
84 case GLSL_SAMPLER_DIM_RECT
:
85 case GLSL_SAMPLER_DIM_CUBE
:
88 case GLSL_SAMPLER_DIM_3D
:
92 unreachable("Should not get here: invalid sampler dimensionality");
95 if (type
->sampler_array
)
98 return glsl_type::get_instance(GLSL_TYPE_INT
, dims
, 1);
102 lower_texture_grad_visitor::visit_leave(ir_texture
*ir
)
104 /* Only lower textureGrad with cube maps or shadow samplers */
105 if (ir
->op
!= ir_txd
||
106 (ir
->sampler
->type
->sampler_dimensionality
!= GLSL_SAMPLER_DIM_CUBE
&&
107 !ir
->shadow_comparitor
))
108 return visit_continue
;
110 /* Lower textureGrad() with samplerCube* even if we have the sample_d_c
111 * message. GLSL provides gradients for the 'r' coordinate. Unfortunately:
113 * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description:
114 * "The r coordinate contains the faceid, and the r gradients are ignored
117 bool need_lowering
= !has_sample_d_c
||
118 ir
->sampler
->type
->sampler_dimensionality
== GLSL_SAMPLER_DIM_CUBE
;
121 return visit_continue
;
123 void *mem_ctx
= ralloc_parent(ir
);
125 const glsl_type
*grad_type
= ir
->lod_info
.grad
.dPdx
->type
;
127 /* Use textureSize() to get the width and height of LOD 0; swizzle away
128 * the depth/number of array slices.
130 ir_texture
*txs
= new(mem_ctx
) ir_texture(ir_txs
);
131 txs
->set_sampler(ir
->sampler
->clone(mem_ctx
, NULL
),
132 txs_type(ir
->sampler
->type
));
133 txs
->lod_info
.lod
= new(mem_ctx
) ir_constant(0);
135 new(mem_ctx
) ir_variable(grad_type
, "size", ir_var_temporary
);
136 if (ir
->sampler
->type
->sampler_dimensionality
== GLSL_SAMPLER_DIM_CUBE
) {
137 base_ir
->insert_before(size
);
138 base_ir
->insert_before(assign(size
,
139 swizzle_for_size(expr(ir_unop_i2f
, txs
), 2),
141 base_ir
->insert_before(assign(size
, new(mem_ctx
) ir_constant(1.0f
), WRITEMASK_Z
));
143 emit(size
, expr(ir_unop_i2f
,
144 swizzle_for_size(txs
, grad_type
->vector_elements
)));
147 /* Scale the gradients by width and height. Effectively, the incoming
148 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
149 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
152 new(mem_ctx
) ir_variable(grad_type
, "dPdx", ir_var_temporary
);
153 emit(dPdx
, mul(size
, ir
->lod_info
.grad
.dPdx
));
156 new(mem_ctx
) ir_variable(grad_type
, "dPdy", ir_var_temporary
);
157 emit(dPdy
, mul(size
, ir
->lod_info
.grad
.dPdy
));
160 if (ir
->sampler
->type
->sampler_dimensionality
== GLSL_SAMPLER_DIM_CUBE
) {
161 /* Cubemap texture lookups first generate a texture coordinate normalized
162 * to [-1, 1] on the appropiate face. The appropiate face is determined
163 * by which component has largest magnitude and its sign. The texture
164 * coordinate is the quotient of the remaining texture coordinates against
165 * that absolute value of the component of largest magnitude. This
166 * division requires that the computing of the derivative of the texel
167 * coordinate must use the quotient rule. The high level GLSL code is as
172 * vec3 abs_p, Q, dQdx, dQdy;
173 * abs_p = abs(ir->coordinate);
174 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
175 * Q = ir->coordinate.yzx;
176 * dQdx = ir->lod_info.grad.dPdx.yzx;
177 * dQdy = ir->lod_info.grad.dPdy.yzx;
179 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
180 * Q = ir->coordinate.xzy;
181 * dQdx = ir->lod_info.grad.dPdx.xzy;
182 * dQdy = ir->lod_info.grad.dPdy.xzy;
184 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
185 * Q = ir->coordinate;
186 * dQdx = ir->lod_info.grad.dPdx;
187 * dQdy = ir->lod_info.grad.dPdy;
190 * Step 2: use quotient rule to compute derivative. The normalized to
191 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
192 * only concerned with the magnitudes of the derivatives whose values are
193 * not affected by the sign. We drop the sign from the computation.
199 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
200 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
202 * Step 3: compute LOD. At this point we have the derivatives of the
203 * texture coordinates normalized to [-1,1]. We take the LOD to be
204 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
205 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
206 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
207 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
208 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
209 * where L is the dimension of the cubemap. The code is:
212 * M = max(dot(dx, dx), dot(dy, dy));
213 * L = textureSize(sampler, 0).x;
214 * result = -1.0 + 0.5 * log2(L * L * M);
217 /* Helpers to make code more human readable. */
218 #define EMIT(instr) base_ir->insert_before(instr)
219 #define THEN(irif, instr) irif->then_instructions.push_tail(instr)
220 #define CLONE(x) x->clone(mem_ctx, NULL)
222 ir_variable
*abs_p
= temp(mem_ctx
, glsl_type::vec3_type
, "abs_p");
224 EMIT(assign(abs_p
, swizzle_for_size(abs(CLONE(ir
->coordinate
)), 3)));
226 ir_variable
*Q
= temp(mem_ctx
, glsl_type::vec3_type
, "Q");
227 ir_variable
*dQdx
= temp(mem_ctx
, glsl_type::vec3_type
, "dQdx");
228 ir_variable
*dQdy
= temp(mem_ctx
, glsl_type::vec3_type
, "dQdy");
230 /* unmodified dPdx, dPdy values */
231 ir_rvalue
*dPdx
= ir
->lod_info
.grad
.dPdx
;
232 ir_rvalue
*dPdy
= ir
->lod_info
.grad
.dPdy
;
234 /* 1. compute selector */
236 /* if (abs_p.x >= max(abs_p.y, abs_p.z)) ... */
238 new(mem_ctx
) ir_if(gequal(swizzle_x(abs_p
),
239 max2(swizzle_y(abs_p
), swizzle_z(abs_p
))));
245 int yzx
= MAKE_SWIZZLE4(SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, 0);
246 THEN(branch_x
, assign(Q
, swizzle(CLONE(ir
->coordinate
), yzx
, 3)));
247 THEN(branch_x
, assign(dQdx
, swizzle(CLONE(dPdx
), yzx
, 3)));
248 THEN(branch_x
, assign(dQdy
, swizzle(CLONE(dPdy
), yzx
, 3)));
251 /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
253 new(mem_ctx
) ir_if(gequal(swizzle_y(abs_p
),
254 max2(swizzle_x(abs_p
), swizzle_z(abs_p
))));
260 int xzy
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Z
, SWIZZLE_Y
, 0);
261 THEN(branch_y
, assign(Q
, swizzle(CLONE(ir
->coordinate
), xzy
, 3)));
262 THEN(branch_y
, assign(dQdx
, swizzle(CLONE(dPdx
), xzy
, 3)));
263 THEN(branch_y
, assign(dQdy
, swizzle(CLONE(dPdy
), xzy
, 3)));
266 /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
268 new(mem_ctx
) ir_if(gequal(swizzle_z(abs_p
),
269 max2(swizzle_x(abs_p
), swizzle_y(abs_p
))));
275 THEN(branch_z
, assign(Q
, swizzle_for_size(CLONE(ir
->coordinate
), 3)));
276 THEN(branch_z
, assign(dQdx
, CLONE(dPdx
)));
277 THEN(branch_z
, assign(dQdy
, CLONE(dPdy
)));
280 /* 2. quotient rule */
281 ir_variable
*recip
= temp(mem_ctx
, glsl_type::float_type
, "recip");
282 EMIT(assign(recip
, expr(ir_unop_rcp
, swizzle_z(Q
))));
284 ir_variable
*dx
= temp(mem_ctx
, glsl_type::vec2_type
, "dx");
285 ir_variable
*dy
= temp(mem_ctx
, glsl_type::vec2_type
, "dy");
287 /* tmp = Q.xy * recip;
288 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
289 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
291 ir_variable
*tmp
= temp(mem_ctx
, glsl_type::vec2_type
, "tmp");
292 EMIT(assign(tmp
, mul(swizzle_xy(Q
), recip
)));
293 EMIT(assign(dx
, mul(recip
, sub(swizzle_xy(dQdx
),
294 mul(tmp
, swizzle_z(dQdx
))))));
295 EMIT(assign(dy
, mul(recip
, sub(swizzle_xy(dQdy
),
296 mul(tmp
, swizzle_z(dQdy
))))));
298 /* M = max(dot(dx, dx), dot(dy, dy)); */
299 ir_variable
*M
= temp(mem_ctx
, glsl_type::float_type
, "M");
300 EMIT(assign(M
, max2(dot(dx
, dx
), dot(dy
, dy
))));
302 /* size has textureSize() of LOD 0 */
303 ir_variable
*L
= temp(mem_ctx
, glsl_type::float_type
, "L");
304 EMIT(assign(L
, swizzle_x(size
)));
306 ir_variable
*result
= temp(mem_ctx
, glsl_type::float_type
, "result");
308 /* result = -1.0 + 0.5 * log2(L * L * M); */
310 add(new(mem_ctx
)ir_constant(-1.0f
),
311 mul(new(mem_ctx
)ir_constant(0.5f
),
312 expr(ir_unop_log2
, mul(mul(L
, L
), M
))))));
314 /* 3. final assignment of parameters to textureLod call */
315 ir
->lod_info
.lod
= new (mem_ctx
) ir_dereference_variable(result
);
321 /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
323 if (dPdx
->type
->is_scalar()) {
324 rho
= expr(ir_binop_max
, expr(ir_unop_abs
, dPdx
),
325 expr(ir_unop_abs
, dPdy
));
327 rho
= expr(ir_binop_max
, expr(ir_unop_sqrt
, dot(dPdx
, dPdx
)),
328 expr(ir_unop_sqrt
, dot(dPdy
, dPdy
)));
331 /* lambda_base = log2(rho). We're ignoring GL state biases for now. */
332 ir
->lod_info
.lod
= expr(ir_unop_log2
, rho
);
336 return visit_continue
;
342 brw_lower_texture_gradients(struct brw_context
*brw
,
343 struct exec_list
*instructions
)
345 bool has_sample_d_c
= brw
->gen
>= 8 || brw
->is_haswell
;
346 lower_texture_grad_visitor
v(has_sample_d_c
);
348 visit_list_elements(&v
, instructions
);