9e7f274b7d1b43ef7cc0fff51f5bc0b8f7ad933a
[mesa.git] / lower_packed_varyings.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_varyings_to_packed.cpp
26 *
27 * This lowering pass generates GLSL code that manually packs varyings into
28 * vec4 slots, for the benefit of back-ends that don't support packed varyings
29 * natively.
30 *
31 * For example, the following shader:
32 *
33 * out mat3x2 foo; // location=4, location_frac=0
34 * out vec3 bar[2]; // location=5, location_frac=2
35 *
36 * main()
37 * {
38 * ...
39 * }
40 *
41 * Is rewritten to:
42 *
43 * mat3x2 foo;
44 * vec3 bar[2];
45 * out vec4 packed4; // location=4, location_frac=0
46 * out vec4 packed5; // location=5, location_frac=0
47 * out vec4 packed6; // location=6, location_frac=0
48 *
49 * main()
50 * {
51 * ...
52 * packed4.xy = foo[0];
53 * packed4.zw = foo[1];
54 * packed5.xy = foo[2];
55 * packed5.zw = bar[0].xy;
56 * packed6.x = bar[0].z;
57 * packed6.yzw = bar[1];
58 * }
59 *
60 * This lowering pass properly handles "double parking" of a varying vector
61 * across two varying slots. For example, in the code above, two of the
62 * components of bar[0] are stored in packed5, and the remaining component is
63 * stored in packed6.
64 *
65 * Note that in theory, the extra instructions may cause some loss of
66 * performance. However, hopefully in most cases the performance loss will
67 * either be absorbed by a later optimization pass, or it will be offset by
68 * memory bandwidth savings (because fewer varyings are used).
69 *
70 * This lowering pass also packs flat floats, ints, and uints together, by
71 * using ivec4 as the base type of flat "varyings", and using appropriate
72 * casts to convert floats and uints into ints.
73 */
74
75 #include "glsl_symbol_table.h"
76 #include "ir.h"
77 #include "ir_optimization.h"
78
79 /**
80 * Visitor that performs varying packing. For each varying declared in the
81 * shader, this visitor determines whether it needs to be packed. If so, it
82 * demotes it to an ordinary global, creates new packed varyings, and
83 * generates assignments to convert between the original varying and the
84 * packed varying.
85 */
86 class lower_packed_varyings_visitor
87 {
88 public:
89 lower_packed_varyings_visitor(void *mem_ctx, unsigned location_base,
90 unsigned locations_used,
91 ir_variable_mode mode,
92 exec_list *main_instructions);
93
94 void run(exec_list *instructions);
95
96 private:
97 ir_assignment *bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs);
98 ir_assignment *bitwise_assign_unpack(ir_rvalue *lhs, ir_rvalue *rhs);
99 unsigned lower_rvalue(ir_rvalue *rvalue, unsigned fine_location,
100 ir_variable *unpacked_var, const char *name);
101 unsigned lower_arraylike(ir_rvalue *rvalue, unsigned array_size,
102 unsigned fine_location,
103 ir_variable *unpacked_var, const char *name);
104 ir_variable *get_packed_varying(unsigned location,
105 ir_variable *unpacked_var,
106 const char *name);
107 bool needs_lowering(ir_variable *var);
108
109 /**
110 * Memory context used to allocate new instructions for the shader.
111 */
112 void * const mem_ctx;
113
114 /**
115 * Location representing the first generic varying slot for this shader
116 * stage (e.g. VERT_RESULT_VAR0 if we are packing vertex shader outputs).
117 * Varyings whose location is less than this value are assumed to
118 * correspond to special fixed function hardware, so they are not lowered.
119 */
120 const unsigned location_base;
121
122 /**
123 * Number of generic varying slots which are used by this shader. This is
124 * used to allocate temporary intermediate data structures. If any any
125 * varying used by this shader has a location greater than or equal to
126 * location_base + locations_used, an assertion will fire.
127 */
128 const unsigned locations_used;
129
130 /**
131 * Array of pointers to the packed varyings that have been created for each
132 * generic varying slot. NULL entries in this array indicate varying slots
133 * for which a packed varying has not been created yet.
134 */
135 ir_variable **packed_varyings;
136
137 /**
138 * Type of varying which is being lowered in this pass (either ir_var_in or
139 * ir_var_out).
140 */
141 const ir_variable_mode mode;
142
143 /**
144 * List of instructions corresponding to the main() function. This is
145 * where we add instructions to pack or unpack the varyings.
146 */
147 exec_list *main_instructions;
148 };
149
150 lower_packed_varyings_visitor::lower_packed_varyings_visitor(
151 void *mem_ctx, unsigned location_base, unsigned locations_used,
152 ir_variable_mode mode, exec_list *main_instructions)
153 : mem_ctx(mem_ctx),
154 location_base(location_base),
155 locations_used(locations_used),
156 packed_varyings((ir_variable **)
157 rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
158 locations_used)),
159 mode(mode),
160 main_instructions(main_instructions)
161 {
162 }
163
164 void
165 lower_packed_varyings_visitor::run(exec_list *instructions)
166 {
167 foreach_list (node, instructions) {
168 ir_variable *var = ((ir_instruction *) node)->as_variable();
169 if (var == NULL)
170 continue;
171
172 if (var->mode != this->mode ||
173 var->location < (int) this->location_base ||
174 !this->needs_lowering(var))
175 continue;
176
177 /* Change the old varying into an ordinary global. */
178 var->mode = ir_var_auto;
179
180 /* Create a reference to the old varying. */
181 ir_dereference_variable *deref
182 = new(this->mem_ctx) ir_dereference_variable(var);
183
184 /* Recursively pack or unpack it. */
185 this->lower_rvalue(deref, var->location * 4 + var->location_frac, var,
186 var->name);
187 }
188 }
189
190
191 /**
192 * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
193 * bitcasts if necessary to match up types.
194 *
195 * This function is called when packing varyings.
196 */
197 ir_assignment *
198 lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs,
199 ir_rvalue *rhs)
200 {
201 if (lhs->type->base_type != rhs->type->base_type) {
202 /* Since we only mix types in flat varyings, and we always store flat
203 * varyings as type ivec4, we need only produce conversions from (uint
204 * or float) to int.
205 */
206 assert(lhs->type->base_type == GLSL_TYPE_INT);
207 switch (rhs->type->base_type) {
208 case GLSL_TYPE_UINT:
209 rhs = new(this->mem_ctx)
210 ir_expression(ir_unop_u2i, lhs->type, rhs);
211 break;
212 case GLSL_TYPE_FLOAT:
213 rhs = new(this->mem_ctx)
214 ir_expression(ir_unop_bitcast_f2i, lhs->type, rhs);
215 break;
216 default:
217 assert(!"Unexpected type conversion while lowering varyings");
218 break;
219 }
220 }
221 return new(this->mem_ctx) ir_assignment(lhs, rhs);
222 }
223
224
225 /**
226 * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
227 * bitcasts if necessary to match up types.
228 *
229 * This function is called when unpacking varyings.
230 */
231 ir_assignment *
232 lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs,
233 ir_rvalue *rhs)
234 {
235 if (lhs->type->base_type != rhs->type->base_type) {
236 /* Since we only mix types in flat varyings, and we always store flat
237 * varyings as type ivec4, we need only produce conversions from int to
238 * (uint or float).
239 */
240 assert(rhs->type->base_type == GLSL_TYPE_INT);
241 switch (lhs->type->base_type) {
242 case GLSL_TYPE_UINT:
243 rhs = new(this->mem_ctx)
244 ir_expression(ir_unop_i2u, lhs->type, rhs);
245 break;
246 case GLSL_TYPE_FLOAT:
247 rhs = new(this->mem_ctx)
248 ir_expression(ir_unop_bitcast_i2f, lhs->type, rhs);
249 break;
250 default:
251 assert(!"Unexpected type conversion while lowering varyings");
252 break;
253 }
254 }
255 return new(this->mem_ctx) ir_assignment(lhs, rhs);
256 }
257
258
259 /**
260 * Recursively pack or unpack the given varying (or portion of a varying) by
261 * traversing all of its constituent vectors.
262 *
263 * \param fine_location is the location where the first constituent vector
264 * should be packed--the word "fine" indicates that this location is expressed
265 * in multiples of a float, rather than multiples of a vec4 as is used
266 * elsewhere in Mesa.
267 *
268 * \return the location where the next constituent vector (after this one)
269 * should be packed.
270 */
271 unsigned
272 lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
273 unsigned fine_location,
274 ir_variable *unpacked_var,
275 const char *name)
276 {
277 /* FINISHME: Support for "varying" records in GLSL 1.50. */
278 assert(!rvalue->type->is_record());
279
280 if (rvalue->type->is_array()) {
281 /* Arrays are packed/unpacked by considering each array element in
282 * sequence.
283 */
284 return this->lower_arraylike(rvalue, rvalue->type->array_size(),
285 fine_location, unpacked_var, name);
286 } else if (rvalue->type->is_matrix()) {
287 /* Matrices are packed/unpacked by considering each column vector in
288 * sequence.
289 */
290 return this->lower_arraylike(rvalue, rvalue->type->matrix_columns,
291 fine_location, unpacked_var, name);
292 } else if (rvalue->type->vector_elements + fine_location % 4 > 4) {
293 /* This vector is going to be "double parked" across two varying slots,
294 * so handle it as two separate assignments.
295 */
296 unsigned left_components = 4 - fine_location % 4;
297 unsigned right_components
298 = rvalue->type->vector_elements - left_components;
299 unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
300 unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
301 char left_swizzle_name[4] = { 0, 0, 0, 0 };
302 char right_swizzle_name[4] = { 0, 0, 0, 0 };
303 for (unsigned i = 0; i < left_components; i++) {
304 left_swizzle_values[i] = i;
305 left_swizzle_name[i] = "xyzw"[i];
306 }
307 for (unsigned i = 0; i < right_components; i++) {
308 right_swizzle_values[i] = i + left_components;
309 right_swizzle_name[i] = "xyzw"[i + left_components];
310 }
311 ir_swizzle *left_swizzle = new(this->mem_ctx)
312 ir_swizzle(rvalue, left_swizzle_values, left_components);
313 ir_swizzle *right_swizzle = new(this->mem_ctx)
314 ir_swizzle(rvalue->clone(this->mem_ctx, NULL), right_swizzle_values,
315 right_components);
316 char *left_name
317 = ralloc_asprintf(this->mem_ctx, "%s.%s", name, left_swizzle_name);
318 char *right_name
319 = ralloc_asprintf(this->mem_ctx, "%s.%s", name, right_swizzle_name);
320 fine_location = this->lower_rvalue(left_swizzle, fine_location,
321 unpacked_var, left_name);
322 return this->lower_rvalue(right_swizzle, fine_location, unpacked_var,
323 right_name);
324 } else {
325 /* No special handling is necessary; pack the rvalue into the
326 * varying.
327 */
328 unsigned swizzle_values[4] = { 0, 0, 0, 0 };
329 unsigned components = rvalue->type->vector_elements;
330 unsigned location = fine_location / 4;
331 unsigned location_frac = fine_location % 4;
332 for (unsigned i = 0; i < components; ++i)
333 swizzle_values[i] = i + location_frac;
334 ir_dereference_variable *packed_deref = new(this->mem_ctx)
335 ir_dereference_variable(this->get_packed_varying(location,
336 unpacked_var, name));
337 ir_swizzle *swizzle = new(this->mem_ctx)
338 ir_swizzle(packed_deref, swizzle_values, components);
339 if (this->mode == ir_var_out) {
340 ir_assignment *assignment
341 = this->bitwise_assign_pack(swizzle, rvalue);
342 this->main_instructions->push_tail(assignment);
343 } else {
344 ir_assignment *assignment
345 = this->bitwise_assign_unpack(rvalue, swizzle);
346 this->main_instructions->push_head(assignment);
347 }
348 return fine_location + components;
349 }
350 }
351
352 /**
353 * Recursively pack or unpack a varying for which we need to iterate over its
354 * constituent elements, accessing each one using an ir_dereference_array.
355 * This takes care of both arrays and matrices, since ir_dereference_array
356 * treats a matrix like an array of its column vectors.
357 */
358 unsigned
359 lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue,
360 unsigned array_size,
361 unsigned fine_location,
362 ir_variable *unpacked_var,
363 const char *name)
364 {
365 for (unsigned i = 0; i < array_size; i++) {
366 if (i != 0)
367 rvalue = rvalue->clone(this->mem_ctx, NULL);
368 ir_constant *constant = new(this->mem_ctx) ir_constant(i);
369 ir_dereference_array *dereference_array = new(this->mem_ctx)
370 ir_dereference_array(rvalue, constant);
371 char *subscripted_name
372 = ralloc_asprintf(this->mem_ctx, "%s[%d]", name, i);
373 fine_location = this->lower_rvalue(dereference_array, fine_location,
374 unpacked_var, subscripted_name);
375 }
376 return fine_location;
377 }
378
379 /**
380 * Retrieve the packed varying corresponding to the given varying location.
381 * If no packed varying has been created for the given varying location yet,
382 * create it and add it to the shader before returning it.
383 *
384 * The newly created varying inherits its interpolation parameters from \c
385 * unpacked_var. Its base type is ivec4 if we are lowering a flat varying,
386 * vec4 otherwise.
387 */
388 ir_variable *
389 lower_packed_varyings_visitor::get_packed_varying(unsigned location,
390 ir_variable *unpacked_var,
391 const char *name)
392 {
393 unsigned slot = location - this->location_base;
394 assert(slot < locations_used);
395 if (this->packed_varyings[slot] == NULL) {
396 char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
397 const glsl_type *packed_type;
398 if (unpacked_var->interpolation == INTERP_QUALIFIER_FLAT)
399 packed_type = glsl_type::ivec4_type;
400 else
401 packed_type = glsl_type::vec4_type;
402 ir_variable *packed_var = new(this->mem_ctx)
403 ir_variable(packed_type, packed_name, this->mode);
404 packed_var->centroid = unpacked_var->centroid;
405 packed_var->interpolation = unpacked_var->interpolation;
406 packed_var->location = location;
407 unpacked_var->insert_before(packed_var);
408 this->packed_varyings[slot] = packed_var;
409 } else {
410 ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name,
411 ",%s", name);
412 }
413 return this->packed_varyings[slot];
414 }
415
416 bool
417 lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
418 {
419 /* Things composed of vec4's don't need lowering. Everything else does. */
420 const glsl_type *type = var->type;
421 if (type->is_array())
422 type = type->fields.array;
423 if (type->vector_elements == 4)
424 return false;
425 return true;
426 }
427
428 void
429 lower_packed_varyings(void *mem_ctx, unsigned location_base,
430 unsigned locations_used, ir_variable_mode mode,
431 gl_shader *shader)
432 {
433 exec_list *instructions = shader->ir;
434 ir_function *main_func = shader->symbols->get_function("main");
435 exec_list void_parameters;
436 ir_function_signature *main_func_sig
437 = main_func->matching_signature(&void_parameters);
438 exec_list *main_instructions = &main_func_sig->body;
439 lower_packed_varyings_visitor visitor(mem_ctx, location_base,
440 locations_used, mode,
441 main_instructions);
442 visitor.run(instructions);
443 }