freedreno: gallium driver for adreno
[mesa.git] / src / gallium / drivers / freedreno / ir.c
1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir.h"
25
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <assert.h>
30
31 #include "freedreno_util.h"
32 #include "instr.h"
33
34 #define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
35 #define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__)
36 #define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__)
37
38 #define REG_MASK 0x3f
39
40 static int cf_emit(struct ir_cf *cf, instr_cf_t *instr);
41
42 static int instr_emit(struct ir_instruction *instr, uint32_t *dwords,
43 uint32_t idx, struct ir_shader_info *info);
44
45 static void reg_update_stats(struct ir_register *reg,
46 struct ir_shader_info *info, bool dest);
47 static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n);
48 static uint32_t reg_fetch_dst_swiz(struct ir_register *reg);
49 static uint32_t reg_alu_dst_swiz(struct ir_register *reg);
50 static uint32_t reg_alu_src_swiz(struct ir_register *reg);
51
52 /* simple allocator to carve allocations out of an up-front allocated heap,
53 * so that we can free everything easily in one shot.
54 */
55 static void * ir_alloc(struct ir_shader *shader, int sz)
56 {
57 void *ptr = &shader->heap[shader->heap_idx];
58 shader->heap_idx += ALIGN(sz, 4);
59 return ptr;
60 }
61
62 static char * ir_strdup(struct ir_shader *shader, const char *str)
63 {
64 char *ptr = NULL;
65 if (str) {
66 int len = strlen(str);
67 ptr = ir_alloc(shader, len+1);
68 memcpy(ptr, str, len);
69 ptr[len] = '\0';
70 }
71 return ptr;
72 }
73
74 struct ir_shader * ir_shader_create(void)
75 {
76 DEBUG_MSG("");
77 return calloc(1, sizeof(struct ir_shader));
78 }
79
80 void ir_shader_destroy(struct ir_shader *shader)
81 {
82 DEBUG_MSG("");
83 free(shader);
84 }
85
86 /* resolve addr/cnt/sequence fields in the individual CF's */
87 static int shader_resolve(struct ir_shader *shader, struct ir_shader_info *info)
88 {
89 uint32_t addr;
90 unsigned i;
91 int j;
92
93 addr = shader->cfs_count / 2;
94 for (i = 0; i < shader->cfs_count; i++) {
95 struct ir_cf *cf = shader->cfs[i];
96 if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
97 uint32_t sequence = 0;
98
99 if (cf->exec.addr && (cf->exec.addr != addr))
100 WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
101 if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
102 WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
103
104 for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
105 struct ir_instruction *instr = cf->exec.instrs[j];
106 sequence <<= 2;
107 if (instr->instr_type == IR_FETCH)
108 sequence |= 0x1;
109 if (instr->sync)
110 sequence |= 0x2;
111 }
112
113 cf->exec.addr = addr;
114 cf->exec.cnt = cf->exec.instrs_count;
115 cf->exec.sequence = sequence;
116
117 addr += cf->exec.instrs_count;
118 }
119 }
120
121 info->sizedwords = 3 * addr;
122
123 return 0;
124 }
125
126 void * ir_shader_assemble(struct ir_shader *shader, struct ir_shader_info *info)
127 {
128 uint32_t i, j;
129 uint32_t *ptr, *dwords = NULL;
130 uint32_t idx = 0;
131 int ret;
132
133 info->sizedwords = 0;
134 info->max_reg = -1;
135 info->max_input_reg = 0;
136 info->regs_written = 0;
137
138 /* we need an even # of CF's.. insert a NOP if needed */
139 if (shader->cfs_count != ALIGN(shader->cfs_count, 2))
140 ir_cf_create(shader, NOP);
141
142 /* first pass, resolve sizes and addresses: */
143 ret = shader_resolve(shader, info);
144 if (ret) {
145 ERROR_MSG("resolve failed: %d", ret);
146 goto fail;
147 }
148
149 ptr = dwords = calloc(1, 4 * info->sizedwords);
150
151 /* second pass, emit CF program in pairs: */
152 for (i = 0; i < shader->cfs_count; i += 2) {
153 instr_cf_t *cfs = (instr_cf_t *)ptr;
154 ret = cf_emit(shader->cfs[i], &cfs[0]);
155 if (ret) {
156 ERROR_MSG("CF emit failed: %d\n", ret);
157 goto fail;
158 }
159 ret = cf_emit(shader->cfs[i+1], &cfs[1]);
160 if (ret) {
161 ERROR_MSG("CF emit failed: %d\n", ret);
162 goto fail;
163 }
164 ptr += 3;
165 assert((ptr - dwords) <= info->sizedwords);
166 }
167
168 /* third pass, emit ALU/FETCH: */
169 for (i = 0; i < shader->cfs_count; i++) {
170 struct ir_cf *cf = shader->cfs[i];
171 if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
172 for (j = 0; j < cf->exec.instrs_count; j++) {
173 ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
174 if (ret) {
175 ERROR_MSG("instruction emit failed: %d", ret);
176 goto fail;
177 }
178 ptr += 3;
179 assert((ptr - dwords) <= info->sizedwords);
180 }
181 }
182 }
183
184 return dwords;
185
186 fail:
187 free(dwords);
188 return NULL;
189 }
190
191
192 struct ir_attribute * ir_attribute_create(struct ir_shader *shader,
193 int rstart, int num, const char *name)
194 {
195 struct ir_attribute *a = ir_alloc(shader, sizeof(struct ir_attribute));
196 DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name);
197 a->name = ir_strdup(shader, name);
198 a->rstart = rstart;
199 a->num = num;
200 assert(shader->attributes_count < ARRAY_SIZE(shader->attributes));
201 shader->attributes[shader->attributes_count++] = a;
202 return a;
203 }
204
205 struct ir_const * ir_const_create(struct ir_shader *shader,
206 int cstart, float v0, float v1, float v2, float v3)
207 {
208 struct ir_const *c = ir_alloc(shader, sizeof(struct ir_const));
209 DEBUG_MSG("C%d: %f, %f, %f, %f", cstart, v0, v1, v2, v3);
210 c->val[0] = v0;
211 c->val[1] = v1;
212 c->val[2] = v2;
213 c->val[3] = v3;
214 c->cstart = cstart;
215 assert(shader->consts_count < ARRAY_SIZE(shader->consts));
216 shader->consts[shader->consts_count++] = c;
217 return c;
218 }
219
220 struct ir_sampler * ir_sampler_create(struct ir_shader *shader,
221 int idx, const char *name)
222 {
223 struct ir_sampler *s = ir_alloc(shader, sizeof(struct ir_sampler));
224 DEBUG_MSG("CONST(%d): %s", idx, name);
225 s->name = ir_strdup(shader, name);
226 s->idx = idx;
227 assert(shader->samplers_count < ARRAY_SIZE(shader->samplers));
228 shader->samplers[shader->samplers_count++] = s;
229 return s;
230 }
231
232 struct ir_uniform * ir_uniform_create(struct ir_shader *shader,
233 int cstart, int num, const char *name)
234 {
235 struct ir_uniform *u = ir_alloc(shader, sizeof(struct ir_uniform));
236 DEBUG_MSG("C%d-C%d: %s", cstart, cstart + num - 1, name);
237 u->name = ir_strdup(shader, name);
238 u->cstart = cstart;
239 u->num = num;
240 assert(shader->uniforms_count < ARRAY_SIZE(shader->uniforms));
241 shader->uniforms[shader->uniforms_count++] = u;
242 return u;
243 }
244
245 struct ir_varying * ir_varying_create(struct ir_shader *shader,
246 int rstart, int num, const char *name)
247 {
248 struct ir_varying *v = ir_alloc(shader, sizeof(struct ir_varying));
249 DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name);
250 v->name = ir_strdup(shader, name);
251 v->rstart = rstart;
252 v->num = num;
253 assert(shader->varyings_count < ARRAY_SIZE(shader->varyings));
254 shader->varyings[shader->varyings_count++] = v;
255 return v;
256 }
257
258
259 struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type)
260 {
261 struct ir_cf *cf = ir_alloc(shader, sizeof(struct ir_cf));
262 DEBUG_MSG("%d", cf_type);
263 cf->shader = shader;
264 cf->cf_type = cf_type;
265 assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
266 shader->cfs[shader->cfs_count++] = cf;
267 return cf;
268 }
269
270
271 /*
272 * CF instructions:
273 */
274
275 static int cf_emit(struct ir_cf *cf, instr_cf_t *instr)
276 {
277 memset(instr, 0, sizeof(*instr));
278
279 instr->opc = cf->cf_type;
280
281 switch (cf->cf_type) {
282 case NOP:
283 break;
284 case EXEC:
285 case EXEC_END:
286 assert(cf->exec.addr <= 0x1ff);
287 assert(cf->exec.cnt <= 0x6);
288 assert(cf->exec.sequence <= 0xfff);
289 instr->exec.address = cf->exec.addr;
290 instr->exec.count = cf->exec.cnt;
291 instr->exec.serialize = cf->exec.sequence;
292 break;
293 case ALLOC:
294 assert(cf->alloc.size <= 0xf);
295 instr->alloc.size = cf->alloc.size;
296 switch (cf->alloc.type) {
297 case SQ_POSITION:
298 case SQ_PARAMETER_PIXEL:
299 instr->alloc.buffer_select = cf->alloc.type;
300 break;
301 default:
302 ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
303 return -1;
304 }
305 break;
306 case COND_EXEC:
307 case COND_EXEC_END:
308 case COND_PRED_EXEC:
309 case COND_PRED_EXEC_END:
310 case LOOP_START:
311 case LOOP_END:
312 case COND_CALL:
313 case RETURN:
314 case COND_JMP:
315 case COND_EXEC_PRED_CLEAN:
316 case COND_EXEC_PRED_CLEAN_END:
317 case MARK_VS_FETCH_DONE:
318 ERROR_MSG("TODO");
319 return -1;
320 }
321
322 return 0;
323 }
324
325
326 struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type)
327 {
328 struct ir_instruction *instr =
329 ir_alloc(cf->shader, sizeof(struct ir_instruction));
330 DEBUG_MSG("%d", instr_type);
331 instr->shader = cf->shader;
332 instr->pred = cf->shader->pred;
333 instr->instr_type = instr_type;
334 assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
335 cf->exec.instrs[cf->exec.instrs_count++] = instr;
336 return instr;
337 }
338
339
340 /*
341 * FETCH instructions:
342 */
343
344 static int instr_emit_fetch(struct ir_instruction *instr,
345 uint32_t *dwords, uint32_t idx,
346 struct ir_shader_info *info)
347 {
348 instr_fetch_t *fetch = (instr_fetch_t *)dwords;
349 int reg = 0;
350 struct ir_register *dst_reg = instr->regs[reg++];
351 struct ir_register *src_reg = instr->regs[reg++];
352
353 memset(fetch, 0, sizeof(*fetch));
354
355 reg_update_stats(dst_reg, info, true);
356 reg_update_stats(src_reg, info, false);
357
358 fetch->opc = instr->fetch.opc;
359
360 if (instr->fetch.opc == VTX_FETCH) {
361 instr_fetch_vtx_t *vtx = &fetch->vtx;
362
363 assert(instr->fetch.stride <= 0xff);
364 assert(instr->fetch.fmt <= 0x3f);
365 assert(instr->fetch.const_idx <= 0x1f);
366 assert(instr->fetch.const_idx_sel <= 0x3);
367
368 vtx->src_reg = src_reg->num;
369 vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
370 vtx->dst_reg = dst_reg->num;
371 vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
372 vtx->must_be_one = 1;
373 vtx->const_index = instr->fetch.const_idx;
374 vtx->const_index_sel = instr->fetch.const_idx_sel;
375 vtx->format_comp_all = !!instr->fetch.is_signed;
376 vtx->num_format_all = !instr->fetch.is_normalized;
377 vtx->format = instr->fetch.fmt;
378 vtx->stride = instr->fetch.stride;
379 vtx->offset = instr->fetch.offset;
380
381 if (instr->pred != IR_PRED_NONE) {
382 vtx->pred_select = 1;
383 vtx->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0;
384 }
385
386 /* XXX seems like every FETCH but the first has
387 * this bit set:
388 */
389 vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
390 vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
391 } else if (instr->fetch.opc == TEX_FETCH) {
392 instr_fetch_tex_t *tex = &fetch->tex;
393
394 assert(instr->fetch.const_idx <= 0x1f);
395
396 tex->src_reg = src_reg->num;
397 tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
398 tex->dst_reg = dst_reg->num;
399 tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
400 tex->const_idx = instr->fetch.const_idx;
401 tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
402 tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
403 tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
404 tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
405 tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
406 tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
407 tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
408 tex->use_comp_lod = 1;
409 tex->sample_location = SAMPLE_CENTER;
410
411 if (instr->pred != IR_PRED_NONE) {
412 tex->pred_select = 1;
413 tex->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0;
414 }
415
416 } else {
417 ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
418 return -1;
419 }
420
421 return 0;
422 }
423
424 /*
425 * ALU instructions:
426 */
427
428 static int instr_emit_alu(struct ir_instruction *instr, uint32_t *dwords,
429 struct ir_shader_info *info)
430 {
431 int reg = 0;
432 instr_alu_t *alu = (instr_alu_t *)dwords;
433 struct ir_register *dst_reg = instr->regs[reg++];
434 struct ir_register *src1_reg;
435 struct ir_register *src2_reg;
436 struct ir_register *src3_reg;
437
438 memset(alu, 0, sizeof(*alu));
439
440 /* handle instructions w/ 3 src operands: */
441 switch (instr->alu.vector_opc) {
442 case MULADDv:
443 case CNDEv:
444 case CNDGTEv:
445 case CNDGTv:
446 case DOT2ADDv:
447 /* note: disassembler lists 3rd src first, ie:
448 * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
449 * which is the reason for this strange ordering.
450 */
451 src3_reg = instr->regs[reg++];
452 break;
453 default:
454 src3_reg = NULL;
455 break;
456 }
457
458 src1_reg = instr->regs[reg++];
459 src2_reg = instr->regs[reg++];
460
461 reg_update_stats(dst_reg, info, true);
462 reg_update_stats(src1_reg, info, false);
463 reg_update_stats(src2_reg, info, false);
464
465 assert((dst_reg->flags & ~IR_REG_EXPORT) == 0);
466 assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
467 assert((src1_reg->flags & IR_REG_EXPORT) == 0);
468 assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
469 assert((src2_reg->flags & IR_REG_EXPORT) == 0);
470 assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
471
472 if (instr->alu.vector_opc == ~0) {
473 alu->vector_opc = MAXv;
474 alu->vector_write_mask = 0;
475 } else {
476 alu->vector_opc = instr->alu.vector_opc;
477 alu->vector_write_mask = reg_alu_dst_swiz(dst_reg);
478 }
479
480 alu->vector_dest = dst_reg->num;
481 alu->export_data = !!(dst_reg->flags & IR_REG_EXPORT);
482
483 // TODO predicate case/condition.. need to add to parser
484
485 alu->src2_reg = src2_reg->num;
486 alu->src2_swiz = reg_alu_src_swiz(src2_reg);
487 alu->src2_reg_negate = !!(src2_reg->flags & IR_REG_NEGATE);
488 alu->src2_reg_abs = !!(src2_reg->flags & IR_REG_ABS);
489 alu->src2_sel = !(src2_reg->flags & IR_REG_CONST);
490
491 alu->src1_reg = src1_reg->num;
492 alu->src1_swiz = reg_alu_src_swiz(src1_reg);
493 alu->src1_reg_negate = !!(src1_reg->flags & IR_REG_NEGATE);
494 alu->src1_reg_abs = !!(src1_reg->flags & IR_REG_ABS);
495 alu->src1_sel = !(src1_reg->flags & IR_REG_CONST);
496
497 alu->vector_clamp = instr->alu.vector_clamp;
498 alu->scalar_clamp = instr->alu.scalar_clamp;
499
500 if (instr->alu.scalar_opc != ~0) {
501 struct ir_register *sdst_reg = instr->regs[reg++];
502
503 reg_update_stats(sdst_reg, info, true);
504
505 assert(sdst_reg->flags == dst_reg->flags);
506
507 if (src3_reg) {
508 assert(src3_reg == instr->regs[reg++]);
509 } else {
510 src3_reg = instr->regs[reg++];
511 }
512
513 alu->scalar_dest = sdst_reg->num;
514 alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg);
515 alu->scalar_opc = instr->alu.scalar_opc;
516 } else {
517 /* not sure if this is required, but adreno compiler seems
518 * to always set scalar opc to MAXs if it is not used:
519 */
520 alu->scalar_opc = MAXs;
521 }
522
523 if (src3_reg) {
524 reg_update_stats(src3_reg, info, false);
525
526 alu->src3_reg = src3_reg->num;
527 alu->src3_swiz = reg_alu_src_swiz(src3_reg);
528 alu->src3_reg_negate = !!(src3_reg->flags & IR_REG_NEGATE);
529 alu->src3_reg_abs = !!(src3_reg->flags & IR_REG_ABS);
530 alu->src3_sel = !(src3_reg->flags & IR_REG_CONST);
531 } else {
532 /* not sure if this is required, but adreno compiler seems
533 * to always set register bank for 3rd src if unused:
534 */
535 alu->src3_sel = 1;
536 }
537
538 if (instr->pred != IR_PRED_NONE) {
539 alu->pred_select = (instr->pred == IR_PRED_EQ) ? 3 : 2;
540 }
541
542 return 0;
543 }
544
545 static int instr_emit(struct ir_instruction *instr, uint32_t *dwords,
546 uint32_t idx, struct ir_shader_info *info)
547 {
548 switch (instr->instr_type) {
549 case IR_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
550 case IR_ALU: return instr_emit_alu(instr, dwords, info);
551 }
552 return -1;
553 }
554
555
556 struct ir_register * ir_reg_create(struct ir_instruction *instr,
557 int num, const char *swizzle, int flags)
558 {
559 struct ir_register *reg =
560 ir_alloc(instr->shader, sizeof(struct ir_register));
561 DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
562 assert(num <= REG_MASK);
563 reg->flags = flags;
564 reg->num = num;
565 reg->swizzle = ir_strdup(instr->shader, swizzle);
566 assert(instr->regs_count < ARRAY_SIZE(instr->regs));
567 instr->regs[instr->regs_count++] = reg;
568 return reg;
569 }
570
571 static void reg_update_stats(struct ir_register *reg,
572 struct ir_shader_info *info, bool dest)
573 {
574 if (!(reg->flags & (IR_REG_CONST|IR_REG_EXPORT))) {
575 info->max_reg = max(info->max_reg, reg->num);
576
577 if (dest) {
578 info->regs_written |= (1 << reg->num);
579 } else if (!(info->regs_written & (1 << reg->num))) {
580 /* for registers that haven't been written, they must be an
581 * input register that the thread scheduler (presumably?)
582 * needs to know about:
583 */
584 info->max_input_reg = max(info->max_input_reg, reg->num);
585 }
586 }
587 }
588
589 static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n)
590 {
591 uint32_t swiz = 0;
592 int i;
593
594 assert(reg->flags == 0);
595 assert(reg->swizzle);
596
597 DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
598
599 for (i = n-1; i >= 0; i--) {
600 swiz <<= 2;
601 switch (reg->swizzle[i]) {
602 default:
603 ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
604 case 'x': swiz |= 0x0; break;
605 case 'y': swiz |= 0x1; break;
606 case 'z': swiz |= 0x2; break;
607 case 'w': swiz |= 0x3; break;
608 }
609 }
610
611 return swiz;
612 }
613
614 static uint32_t reg_fetch_dst_swiz(struct ir_register *reg)
615 {
616 uint32_t swiz = 0;
617 int i;
618
619 assert(reg->flags == 0);
620 assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
621
622 DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
623
624 if (reg->swizzle) {
625 for (i = 3; i >= 0; i--) {
626 swiz <<= 3;
627 switch (reg->swizzle[i]) {
628 default:
629 ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
630 case 'x': swiz |= 0x0; break;
631 case 'y': swiz |= 0x1; break;
632 case 'z': swiz |= 0x2; break;
633 case 'w': swiz |= 0x3; break;
634 case '0': swiz |= 0x4; break;
635 case '1': swiz |= 0x5; break;
636 case '_': swiz |= 0x7; break;
637 }
638 }
639 } else {
640 swiz = 0x688;
641 }
642
643 return swiz;
644 }
645
646 /* actually, a write-mask */
647 static uint32_t reg_alu_dst_swiz(struct ir_register *reg)
648 {
649 uint32_t swiz = 0;
650 int i;
651
652 assert((reg->flags & ~IR_REG_EXPORT) == 0);
653 assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
654
655 DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
656
657 if (reg->swizzle) {
658 for (i = 3; i >= 0; i--) {
659 swiz <<= 1;
660 if (reg->swizzle[i] == "xyzw"[i]) {
661 swiz |= 0x1;
662 } else if (reg->swizzle[i] != '_') {
663 ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
664 break;
665 }
666 }
667 } else {
668 swiz = 0xf;
669 }
670
671 return swiz;
672 }
673
674 static uint32_t reg_alu_src_swiz(struct ir_register *reg)
675 {
676 uint32_t swiz = 0;
677 int i;
678
679 assert((reg->flags & IR_REG_EXPORT) == 0);
680 assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
681
682 DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
683
684 if (reg->swizzle) {
685 for (i = 3; i >= 0; i--) {
686 swiz <<= 2;
687 switch (reg->swizzle[i]) {
688 default:
689 ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
690 case 'x': swiz |= (0x0 - i) & 0x3; break;
691 case 'y': swiz |= (0x1 - i) & 0x3; break;
692 case 'z': swiz |= (0x2 - i) & 0x3; break;
693 case 'w': swiz |= (0x3 - i) & 0x3; break;
694 }
695 }
696 } else {
697 swiz = 0x0;
698 }
699
700 return swiz;
701 }