r300/compiler: move util functions to radeon_compiler_util
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_compiler.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28
29 #include "radeon_dataflow.h"
30 #include "radeon_program.h"
31 #include "radeon_program_pair.h"
32 #include "radeon_compiler_util.h"
33
34
35 void rc_init(struct radeon_compiler * c)
36 {
37 memset(c, 0, sizeof(*c));
38
39 memory_pool_init(&c->Pool);
40 c->Program.Instructions.Prev = &c->Program.Instructions;
41 c->Program.Instructions.Next = &c->Program.Instructions;
42 c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
43 }
44
45 void rc_destroy(struct radeon_compiler * c)
46 {
47 rc_constants_destroy(&c->Program.Constants);
48 memory_pool_destroy(&c->Pool);
49 free(c->ErrorMsg);
50 }
51
52 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
53 {
54 va_list ap;
55
56 if (!(c->Debug & RC_DBG_LOG))
57 return;
58
59 va_start(ap, fmt);
60 vfprintf(stderr, fmt, ap);
61 va_end(ap);
62 }
63
64 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
65 {
66 va_list ap;
67
68 c->Error = 1;
69
70 if (!c->ErrorMsg) {
71 /* Only remember the first error */
72 char buf[1024];
73 int written;
74
75 va_start(ap, fmt);
76 written = vsnprintf(buf, sizeof(buf), fmt, ap);
77 va_end(ap);
78
79 if (written < sizeof(buf)) {
80 c->ErrorMsg = strdup(buf);
81 } else {
82 c->ErrorMsg = malloc(written + 1);
83
84 va_start(ap, fmt);
85 vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
86 va_end(ap);
87 }
88 }
89
90 if (c->Debug & RC_DBG_LOG) {
91 fprintf(stderr, "r300compiler error: ");
92
93 va_start(ap, fmt);
94 vfprintf(stderr, fmt, ap);
95 va_end(ap);
96 }
97 }
98
99 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
100 {
101 rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
102 return 1;
103 }
104
105 /**
106 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
107 * based on which inputs and outputs are actually referenced
108 * in program instructions.
109 */
110 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
111 {
112 struct rc_instruction *inst;
113
114 c->Program.InputsRead = 0;
115 c->Program.OutputsWritten = 0;
116
117 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
118 {
119 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
120 int i;
121
122 for (i = 0; i < opcode->NumSrcRegs; ++i) {
123 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
124 c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
125 }
126
127 if (opcode->HasDstReg) {
128 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
129 c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
130 }
131 }
132 }
133
134 /**
135 * Rewrite the program such that everything that source the given input
136 * register will source new_input instead.
137 */
138 void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
139 {
140 struct rc_instruction * inst;
141
142 c->Program.InputsRead &= ~(1 << input);
143
144 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
145 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
146 unsigned i;
147
148 for(i = 0; i < opcode->NumSrcRegs; ++i) {
149 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
150 inst->U.I.SrcReg[i].File = new_input.File;
151 inst->U.I.SrcReg[i].Index = new_input.Index;
152 inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
153 if (!inst->U.I.SrcReg[i].Abs) {
154 inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
155 inst->U.I.SrcReg[i].Abs = new_input.Abs;
156 }
157
158 c->Program.InputsRead |= 1 << new_input.Index;
159 }
160 }
161 }
162 }
163
164
165 /**
166 * Rewrite the program such that everything that writes into the given
167 * output register will instead write to new_output. The new_output
168 * writemask is honoured.
169 */
170 void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
171 {
172 struct rc_instruction * inst;
173
174 c->Program.OutputsWritten &= ~(1 << output);
175
176 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
177 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
178
179 if (opcode->HasDstReg) {
180 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
181 inst->U.I.DstReg.Index = new_output;
182 inst->U.I.DstReg.WriteMask &= writemask;
183
184 c->Program.OutputsWritten |= 1 << new_output;
185 }
186 }
187 }
188 }
189
190
191 /**
192 * Rewrite the program such that a given output is duplicated.
193 */
194 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
195 {
196 unsigned tempreg = rc_find_free_temporary(c);
197 struct rc_instruction * inst;
198
199 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
200 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
201
202 if (opcode->HasDstReg) {
203 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
204 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
205 inst->U.I.DstReg.Index = tempreg;
206 }
207 }
208 }
209
210 inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
211 inst->U.I.Opcode = RC_OPCODE_MOV;
212 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
213 inst->U.I.DstReg.Index = output;
214
215 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
216 inst->U.I.SrcReg[0].Index = tempreg;
217 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
218
219 inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
220 inst->U.I.Opcode = RC_OPCODE_MOV;
221 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
222 inst->U.I.DstReg.Index = dup_output;
223
224 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
225 inst->U.I.SrcReg[0].Index = tempreg;
226 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
227
228 c->Program.OutputsWritten |= 1 << dup_output;
229 }
230
231
232 /**
233 * Introduce standard code fragment to deal with fragment.position.
234 */
235 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
236 int full_vtransform)
237 {
238 unsigned tempregi = rc_find_free_temporary(c);
239 struct rc_instruction * inst_rcp;
240 struct rc_instruction * inst_mul;
241 struct rc_instruction * inst_mad;
242 struct rc_instruction * inst;
243
244 c->Program.InputsRead &= ~(1 << wpos);
245 c->Program.InputsRead |= 1 << new_input;
246
247 /* perspective divide */
248 inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
249 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
250
251 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
252 inst_rcp->U.I.DstReg.Index = tempregi;
253 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
254
255 inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
256 inst_rcp->U.I.SrcReg[0].Index = new_input;
257 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
258
259 inst_mul = rc_insert_new_instruction(c, inst_rcp);
260 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
261
262 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
263 inst_mul->U.I.DstReg.Index = tempregi;
264 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
265
266 inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
267 inst_mul->U.I.SrcReg[0].Index = new_input;
268
269 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
270 inst_mul->U.I.SrcReg[1].Index = tempregi;
271 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
272
273 /* viewport transformation */
274 inst_mad = rc_insert_new_instruction(c, inst_mul);
275 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
276
277 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
278 inst_mad->U.I.DstReg.Index = tempregi;
279 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
280
281 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
282 inst_mad->U.I.SrcReg[0].Index = tempregi;
283 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
284
285 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
286 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
287
288 inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
289 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
290
291 if (full_vtransform) {
292 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
293 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
294 } else {
295 inst_mad->U.I.SrcReg[1].Index =
296 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
297 }
298
299 for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
300 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
301 unsigned i;
302
303 for(i = 0; i < opcode->NumSrcRegs; i++) {
304 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
305 inst->U.I.SrcReg[i].Index == wpos) {
306 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
307 inst->U.I.SrcReg[i].Index = tempregi;
308 }
309 }
310 }
311 }
312
313
314 /**
315 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
316 * Gallium and OpenGL define it the other way around.
317 *
318 * So let's just negate FACE at the beginning of the shader and rewrite the rest
319 * of the shader to read from the newly allocated temporary.
320 */
321 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
322 {
323 unsigned tempregi = rc_find_free_temporary(c);
324 struct rc_instruction *inst_add;
325 struct rc_instruction *inst;
326
327 /* perspective divide */
328 inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
329 inst_add->U.I.Opcode = RC_OPCODE_ADD;
330
331 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
332 inst_add->U.I.DstReg.Index = tempregi;
333 inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
334
335 inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
336 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
337
338 inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
339 inst_add->U.I.SrcReg[1].Index = face;
340 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
341 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
342
343 for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
344 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
345 unsigned i;
346
347 for(i = 0; i < opcode->NumSrcRegs; i++) {
348 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
349 inst->U.I.SrcReg[i].Index == face) {
350 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
351 inst->U.I.SrcReg[i].Index = tempregi;
352 }
353 }
354 }
355 }
356
357 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
358 rc_register_file file, unsigned int index, unsigned int mask)
359 {
360 unsigned int * max_reg = userdata;
361 if (file == RC_FILE_TEMPORARY)
362 index > *max_reg ? *max_reg = index : 0;
363 }
364
365 static void print_stats(struct radeon_compiler * c)
366 {
367 struct rc_instruction * tmp;
368 unsigned max_reg, insts, fc, tex, alpha, rgb, presub;
369 max_reg = insts = fc = tex = alpha = rgb = presub = 0;
370 for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
371 tmp = tmp->Next){
372 const struct rc_opcode_info * info;
373 rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
374 if (tmp->Type == RC_INSTRUCTION_NORMAL) {
375 if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
376 presub++;
377 info = rc_get_opcode_info(tmp->U.I.Opcode);
378 } else {
379 if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
380 presub++;
381 if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
382 presub++;
383 /* Assuming alpha will never be a flow control or
384 * a tex instruction. */
385 if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
386 alpha++;
387 if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
388 rgb++;
389 info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
390 }
391 if (info->IsFlowControl)
392 fc++;
393 if (info->HasTexture)
394 tex++;
395 insts++;
396 }
397 if (insts < 4)
398 return;
399 fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
400 "~%4u Instructions\n"
401 "~%4u Vector Instructions (RGB)\n"
402 "~%4u Scalar Instructions (Alpha)\n"
403 "~%4u Flow Control Instructions\n"
404 "~%4u Texture Instructions\n"
405 "~%4u Presub Operations\n"
406 "~%4u Temporary Registers\n"
407 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
408 insts, rgb, alpha, fc, tex, presub, max_reg + 1);
409 }
410
411 /* Executes a list of compiler passes given in the parameter 'list'. */
412 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
413 const char *shader_name)
414 {
415 if (c->Debug & RC_DBG_LOG) {
416 fprintf(stderr, "%s: before compilation\n", shader_name);
417 rc_print_program(&c->Program);
418 }
419
420 for (unsigned i = 0; list[i].name; i++) {
421 if (list[i].predicate) {
422 list[i].run(c, list[i].user);
423
424 if (c->Error)
425 return;
426
427 if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
428 fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name);
429 rc_print_program(&c->Program);
430 }
431 }
432 }
433 if (c->Debug & RC_DBG_STATS)
434 print_stats(c);
435 }
436
437 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
438 {
439 /* Check the number of constants. */
440 if (c->Program.Constants.Count > c->max_constants) {
441 rc_error(c, "Too many constants. Max: 256, Got: %i\n",
442 c->Program.Constants.Count);
443 }
444 }