Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / drivers / r600 / r600_asm.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <stdio.h>
24 #include <errno.h>
25 #include "util/u_format.h"
26 #include "util/u_memory.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "r600_pipe.h"
29 #include "r600_sq.h"
30 #include "r600_opcodes.h"
31 #include "r600_asm.h"
32 #include "r600_formats.h"
33 #include "r600d.h"
34
35 #define NUM_OF_CYCLES 3
36 #define NUM_OF_COMPONENTS 4
37
38 #define PREV_ALU(alu) LIST_ENTRY(struct r600_bc_alu, alu->list.prev, list)
39 #define NEXT_ALU(alu) LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)
40
41 static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
42 {
43 if(alu->is_op3)
44 return 3;
45
46 switch (bc->chiprev) {
47 case CHIPREV_R600:
48 case CHIPREV_R700:
49 switch (alu->inst) {
50 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
51 return 0;
52 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
53 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
54 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
55 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
56 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
57 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
58 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
59 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
60 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
61 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
62 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
63 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
64 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
65 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
66 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
67 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
68 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
69 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
70 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
71 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
72 return 2;
73
74 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
75 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
76 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
77 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
78 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
79 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
80 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
81 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
82 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
83 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
84 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
85 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
86 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
87 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
88 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
89 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
90 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
91 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
92 return 1;
93 default: R600_ERR(
94 "Need instruction operand number for 0x%x.\n", alu->inst);
95 }
96 break;
97 case CHIPREV_EVERGREEN:
98 switch (alu->inst) {
99 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
100 return 0;
101 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
102 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
103 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
104 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
105 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
106 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
107 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
108 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
109 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
110 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
111 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
112 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
113 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
114 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
115 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
116 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
117 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
118 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
119 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
120 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
121 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY:
122 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW:
123 return 2;
124
125 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
126 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
127 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
128 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
129 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
130 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
131 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
132 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
133 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
134 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
135 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
136 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
137 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
138 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
139 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
140 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
141 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
142 return 1;
143 default: R600_ERR(
144 "Need instruction operand number for 0x%x.\n", alu->inst);
145 }
146 break;
147 }
148
149 return 3;
150 }
151
152 int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
153
154 static struct r600_bc_cf *r600_bc_cf(void)
155 {
156 struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf);
157
158 if (cf == NULL)
159 return NULL;
160 LIST_INITHEAD(&cf->list);
161 LIST_INITHEAD(&cf->alu);
162 LIST_INITHEAD(&cf->vtx);
163 LIST_INITHEAD(&cf->tex);
164 cf->barrier = 1;
165 return cf;
166 }
167
168 static struct r600_bc_alu *r600_bc_alu(void)
169 {
170 struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu);
171
172 if (alu == NULL)
173 return NULL;
174 LIST_INITHEAD(&alu->list);
175 return alu;
176 }
177
178 static struct r600_bc_vtx *r600_bc_vtx(void)
179 {
180 struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx);
181
182 if (vtx == NULL)
183 return NULL;
184 LIST_INITHEAD(&vtx->list);
185 return vtx;
186 }
187
188 static struct r600_bc_tex *r600_bc_tex(void)
189 {
190 struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex);
191
192 if (tex == NULL)
193 return NULL;
194 LIST_INITHEAD(&tex->list);
195 return tex;
196 }
197
198 int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
199 {
200 LIST_INITHEAD(&bc->cf);
201 bc->family = family;
202 switch (bc->family) {
203 case CHIP_R600:
204 case CHIP_RV610:
205 case CHIP_RV630:
206 case CHIP_RV670:
207 case CHIP_RV620:
208 case CHIP_RV635:
209 case CHIP_RS780:
210 case CHIP_RS880:
211 bc->chiprev = CHIPREV_R600;
212 break;
213 case CHIP_RV770:
214 case CHIP_RV730:
215 case CHIP_RV710:
216 case CHIP_RV740:
217 bc->chiprev = CHIPREV_R700;
218 break;
219 case CHIP_CEDAR:
220 case CHIP_REDWOOD:
221 case CHIP_JUNIPER:
222 case CHIP_CYPRESS:
223 case CHIP_HEMLOCK:
224 case CHIP_PALM:
225 case CHIP_BARTS:
226 case CHIP_TURKS:
227 case CHIP_CAICOS:
228 bc->chiprev = CHIPREV_EVERGREEN;
229 break;
230 default:
231 R600_ERR("unknown family %d\n", bc->family);
232 return -EINVAL;
233 }
234 return 0;
235 }
236
237 static int r600_bc_add_cf(struct r600_bc *bc)
238 {
239 struct r600_bc_cf *cf = r600_bc_cf();
240
241 if (cf == NULL)
242 return -ENOMEM;
243 LIST_ADDTAIL(&cf->list, &bc->cf);
244 if (bc->cf_last)
245 cf->id = bc->cf_last->id + 2;
246 bc->cf_last = cf;
247 bc->ncf++;
248 bc->ndw += 2;
249 bc->force_add_cf = 0;
250 return 0;
251 }
252
253 static void r600_bc_remove_cf(struct r600_bc *bc, struct r600_bc_cf *cf)
254 {
255 struct r600_bc_cf *other;
256 LIST_FOR_EACH_ENTRY(other, &bc->cf, list) {
257 if (other->id > cf->id)
258 other->id -= 2;
259 if (other->cf_addr > cf->id)
260 other->cf_addr -= 2;
261 }
262 LIST_DEL(&cf->list);
263 free(cf);
264 }
265
266 static void r600_bc_move_cf(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_cf *next)
267 {
268 struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, next->list.prev, list);
269 unsigned old_id = cf->id;
270 unsigned new_id = next->list.prev == &bc->cf ? 0 : prev->id + 2;
271 struct r600_bc_cf *other;
272
273 if (prev == cf || next == cf)
274 return; /* position hasn't changed */
275
276 LIST_DEL(&cf->list);
277 LIST_FOR_EACH_ENTRY(other, &bc->cf, list) {
278 if (other->id > old_id)
279 other->id -= 2;
280 if (other->id >= new_id)
281 other->id += 2;
282 if (other->cf_addr > old_id)
283 other->cf_addr -= 2;
284 if (other->cf_addr > new_id)
285 other->cf_addr += 2;
286 }
287 cf->id = new_id;
288 LIST_ADD(&cf->list, &prev->list);
289 }
290
291 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
292 {
293 int r;
294
295 if (bc->cf_last && bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
296 output->type == bc->cf_last->output.type &&
297 output->elem_size == bc->cf_last->output.elem_size &&
298 output->swizzle_x == bc->cf_last->output.swizzle_x &&
299 output->swizzle_y == bc->cf_last->output.swizzle_y &&
300 output->swizzle_z == bc->cf_last->output.swizzle_z &&
301 output->swizzle_w == bc->cf_last->output.swizzle_w &&
302 (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
303
304 if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
305 (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
306
307 bc->cf_last->output.gpr = output->gpr;
308 bc->cf_last->output.array_base = output->array_base;
309 bc->cf_last->output.burst_count += output->burst_count;
310 return 0;
311
312 } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
313 output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
314
315 bc->cf_last->output.burst_count += output->burst_count;
316 return 0;
317 }
318 }
319
320 r = r600_bc_add_cf(bc);
321 if (r)
322 return r;
323 bc->cf_last->inst = BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
324 memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
325 bc->cf_last->output.burst_count = 1;
326 return 0;
327 }
328
329 /* alu predicate instructions */
330 static int is_alu_pred_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
331 {
332 switch (bc->chiprev) {
333 case CHIPREV_R600:
334 case CHIPREV_R700:
335 return !alu->is_op3 && (
336 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
337 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
338 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
339 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
340 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
341 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
342 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
343 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
344 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
345 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
346 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
347 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
348 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
349 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
350 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
351 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
352 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
353 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
354 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
355 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
356 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
357 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
358 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
359 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
360 case CHIPREV_EVERGREEN:
361 default:
362 return !alu->is_op3 && (
363 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
364 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
365 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
366 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
367 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
368 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
369 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
370 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
371 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
372 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
373 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
374 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
375 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
376 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
377 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
378 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
379 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
380 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
381 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
382 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
383 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
384 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
385 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
386 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
387 }
388 }
389
390 /* alu kill instructions */
391 static int is_alu_kill_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
392 {
393 switch (bc->chiprev) {
394 case CHIPREV_R600:
395 case CHIPREV_R700:
396 return !alu->is_op3 && (
397 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
398 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
399 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
400 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
401 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
402 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
403 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
404 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
405 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
406 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT);
407 case CHIPREV_EVERGREEN:
408 default:
409 return !alu->is_op3 && (
410 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
411 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
412 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
413 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
414 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
415 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
416 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
417 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
418 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
419 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT);
420 }
421 }
422
423 /* alu instructions that can ony exits once per group */
424 static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
425 {
426 return is_alu_kill_inst(bc, alu) ||
427 is_alu_pred_inst(bc, alu);
428 }
429
430 static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
431 {
432 switch (bc->chiprev) {
433 case CHIPREV_R600:
434 case CHIPREV_R700:
435 return !alu->is_op3 && (
436 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
437 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
438 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
439 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
440 case CHIPREV_EVERGREEN:
441 default:
442 return !alu->is_op3 && (
443 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
444 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
445 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
446 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
447 }
448 }
449
450 static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
451 {
452 switch (bc->chiprev) {
453 case CHIPREV_R600:
454 case CHIPREV_R700:
455 return !alu->is_op3 &&
456 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
457 case CHIPREV_EVERGREEN:
458 default:
459 return !alu->is_op3 &&
460 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
461 }
462 }
463
464 static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
465 {
466 switch (bc->chiprev) {
467 case CHIPREV_R600:
468 case CHIPREV_R700:
469 return !alu->is_op3 && (
470 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
471 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
472 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
473 case CHIPREV_EVERGREEN:
474 default:
475 return !alu->is_op3 && (
476 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
477 }
478 }
479
480 /* alu instructions that can only execute on the vector unit */
481 static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
482 {
483 return is_alu_reduction_inst(bc, alu) ||
484 is_alu_mova_inst(bc, alu);
485 }
486
487 /* alu instructions that can only execute on the trans unit */
488 static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
489 {
490 switch (bc->chiprev) {
491 case CHIPREV_R600:
492 case CHIPREV_R700:
493 if (!alu->is_op3)
494 return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
495 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
496 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
497 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
498 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
499 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
500 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
501 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
502 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
503 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
504 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
505 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
506 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
507 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
508 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
509 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
510 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
511 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
512 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
513 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
514 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
515 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
516 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
517 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
518 else
519 return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
520 alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
521 alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
522 alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
523 case CHIPREV_EVERGREEN:
524 default:
525 if (!alu->is_op3)
526 /* Note that FLT_TO_INT* instructions are vector instructions
527 * on Evergreen, despite what the documentation says. */
528 return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
529 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
530 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
531 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
532 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
533 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
534 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
535 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
536 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
537 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
538 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
539 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
540 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
541 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
542 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
543 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
544 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
545 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
546 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
547 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
548 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
549 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
550 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
551 else
552 return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
553 }
554 }
555
556 /* alu instructions that can execute on any unit */
557 static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
558 {
559 return !is_alu_vec_unit_inst(bc, alu) &&
560 !is_alu_trans_unit_inst(bc, alu);
561 }
562
563 static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
564 struct r600_bc_alu *assignment[5])
565 {
566 struct r600_bc_alu *alu;
567 unsigned i, chan, trans;
568
569 for (i = 0; i < 5; i++)
570 assignment[i] = NULL;
571
572 for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
573 chan = alu->dst.chan;
574 if (is_alu_trans_unit_inst(bc, alu))
575 trans = 1;
576 else if (is_alu_vec_unit_inst(bc, alu))
577 trans = 0;
578 else if (assignment[chan])
579 trans = 1; // assume ALU_INST_PREFER_VECTOR
580 else
581 trans = 0;
582
583 if (trans) {
584 if (assignment[4]) {
585 assert(0); //ALU.Trans has already been allocated
586 return -1;
587 }
588 assignment[4] = alu;
589 } else {
590 if (assignment[chan]) {
591 assert(0); //ALU.chan has already been allocated
592 return -1;
593 }
594 assignment[chan] = alu;
595 }
596
597 if (alu->last)
598 break;
599 }
600 return 0;
601 }
602
603 struct alu_bank_swizzle {
604 int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
605 int hw_cfile_addr[4];
606 int hw_cfile_elem[4];
607 };
608
609 static const unsigned cycle_for_bank_swizzle_vec[][3] = {
610 [SQ_ALU_VEC_012] = { 0, 1, 2 },
611 [SQ_ALU_VEC_021] = { 0, 2, 1 },
612 [SQ_ALU_VEC_120] = { 1, 2, 0 },
613 [SQ_ALU_VEC_102] = { 1, 0, 2 },
614 [SQ_ALU_VEC_201] = { 2, 0, 1 },
615 [SQ_ALU_VEC_210] = { 2, 1, 0 }
616 };
617
618 static const unsigned cycle_for_bank_swizzle_scl[][3] = {
619 [SQ_ALU_SCL_210] = { 2, 1, 0 },
620 [SQ_ALU_SCL_122] = { 1, 2, 2 },
621 [SQ_ALU_SCL_212] = { 2, 1, 2 },
622 [SQ_ALU_SCL_221] = { 2, 2, 1 }
623 };
624
625 static void init_bank_swizzle(struct alu_bank_swizzle *bs)
626 {
627 int i, cycle, component;
628 /* set up gpr use */
629 for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
630 for (component = 0; component < NUM_OF_COMPONENTS; component++)
631 bs->hw_gpr[cycle][component] = -1;
632 for (i = 0; i < 4; i++)
633 bs->hw_cfile_addr[i] = -1;
634 for (i = 0; i < 4; i++)
635 bs->hw_cfile_elem[i] = -1;
636 }
637
638 static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
639 {
640 if (bs->hw_gpr[cycle][chan] == -1)
641 bs->hw_gpr[cycle][chan] = sel;
642 else if (bs->hw_gpr[cycle][chan] != (int)sel) {
643 // Another scalar operation has already used GPR read port for channel
644 return -1;
645 }
646 return 0;
647 }
648
649 static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
650 {
651 int res, num_res = 4;
652 if (bc->chiprev >= CHIPREV_R700) {
653 num_res = 2;
654 chan /= 2;
655 }
656 for (res = 0; res < num_res; ++res) {
657 if (bs->hw_cfile_addr[res] == -1) {
658 bs->hw_cfile_addr[res] = sel;
659 bs->hw_cfile_elem[res] = chan;
660 return 0;
661 } else if (bs->hw_cfile_addr[res] == sel &&
662 bs->hw_cfile_elem[res] == chan)
663 return 0; // Read for this scalar element already reserved, nothing to do here.
664 }
665 // All cfile read ports are used, cannot reference vector element
666 return -1;
667 }
668
669 static int is_gpr(unsigned sel)
670 {
671 return (sel >= 0 && sel <= 127);
672 }
673
674 /* CB constants start at 512, and get translated to a kcache index when ALU
675 * clauses are constructed. Note that we handle kcache constants the same way
676 * as (the now gone) cfile constants, is that really required? */
677 static int is_cfile(unsigned sel)
678 {
679 return (sel > 255 && sel < 512) ||
680 (sel > 511 && sel < 4607) || // Kcache before translate
681 (sel > 127 && sel < 192); // Kcache after translate
682 }
683
684 static int is_const(int sel)
685 {
686 return is_cfile(sel) ||
687 (sel >= V_SQ_ALU_SRC_0 &&
688 sel <= V_SQ_ALU_SRC_LITERAL);
689 }
690
691 static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
692 struct alu_bank_swizzle *bs, int bank_swizzle)
693 {
694 int r, src, num_src, sel, elem, cycle;
695
696 num_src = r600_bc_get_num_operands(bc, alu);
697 for (src = 0; src < num_src; src++) {
698 sel = alu->src[src].sel;
699 elem = alu->src[src].chan;
700 if (is_gpr(sel)) {
701 cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
702 if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
703 // Nothing to do; special-case optimization,
704 // second source uses first source’s reservation
705 continue;
706 else {
707 r = reserve_gpr(bs, sel, elem, cycle);
708 if (r)
709 return r;
710 }
711 } else if (is_cfile(sel)) {
712 r = reserve_cfile(bc, bs, sel, elem);
713 if (r)
714 return r;
715 }
716 // No restrictions on PV, PS, literal or special constants
717 }
718 return 0;
719 }
720
721 static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
722 struct alu_bank_swizzle *bs, int bank_swizzle)
723 {
724 int r, src, num_src, const_count, sel, elem, cycle;
725
726 num_src = r600_bc_get_num_operands(bc, alu);
727 for (const_count = 0, src = 0; src < num_src; ++src) {
728 sel = alu->src[src].sel;
729 elem = alu->src[src].chan;
730 if (is_const(sel)) { // Any constant, including literal and inline constants
731 if (const_count >= 2)
732 // More than two references to a constant in
733 // transcendental operation.
734 return -1;
735 else
736 const_count++;
737 }
738 if (is_cfile(sel)) {
739 r = reserve_cfile(bc, bs, sel, elem);
740 if (r)
741 return r;
742 }
743 }
744 for (src = 0; src < num_src; ++src) {
745 sel = alu->src[src].sel;
746 elem = alu->src[src].chan;
747 if (is_gpr(sel)) {
748 cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
749 if (cycle < const_count)
750 // Cycle for GPR load conflicts with
751 // constant load in transcendental operation.
752 return -1;
753 r = reserve_gpr(bs, sel, elem, cycle);
754 if (r)
755 return r;
756 }
757 // Constants already processed
758 // No restrictions on PV, PS
759 }
760 return 0;
761 }
762
763 static int check_and_set_bank_swizzle(struct r600_bc *bc,
764 struct r600_bc_alu *slots[5])
765 {
766 struct alu_bank_swizzle bs;
767 int bank_swizzle[5];
768 int i, r = 0, forced = 0;
769
770 for (i = 0; i < 5; i++)
771 if (slots[i] && slots[i]->bank_swizzle_force) {
772 slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
773 forced = 1;
774 }
775
776 if (forced)
777 return 0;
778
779 // just check every possible combination of bank swizzle
780 // not very efficent, but works on the first try in most of the cases
781 for (i = 0; i < 4; i++)
782 bank_swizzle[i] = SQ_ALU_VEC_012;
783 bank_swizzle[4] = SQ_ALU_SCL_210;
784 while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
785 init_bank_swizzle(&bs);
786 for (i = 0; i < 4; i++) {
787 if (slots[i]) {
788 r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
789 if (r)
790 break;
791 }
792 }
793 if (!r && slots[4]) {
794 r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
795 }
796 if (!r) {
797 for (i = 0; i < 5; i++) {
798 if (slots[i])
799 slots[i]->bank_swizzle = bank_swizzle[i];
800 }
801 return 0;
802 }
803
804 for (i = 0; i < 5; i++) {
805 bank_swizzle[i]++;
806 if (bank_swizzle[i] <= SQ_ALU_VEC_210)
807 break;
808 else
809 bank_swizzle[i] = SQ_ALU_VEC_012;
810 }
811 }
812
813 // couldn't find a working swizzle
814 return -1;
815 }
816
817 static int replace_gpr_with_pv_ps(struct r600_bc *bc,
818 struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
819 {
820 struct r600_bc_alu *prev[5];
821 int gpr[5], chan[5];
822 int i, j, r, src, num_src;
823
824 r = assign_alu_units(bc, alu_prev, prev);
825 if (r)
826 return r;
827
828 for (i = 0; i < 5; ++i) {
829 if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
830 gpr[i] = prev[i]->dst.sel;
831 /* cube writes more than PV.X */
832 if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))
833 chan[i] = 0;
834 else
835 chan[i] = prev[i]->dst.chan;
836 } else
837 gpr[i] = -1;
838 }
839
840 for (i = 0; i < 5; ++i) {
841 struct r600_bc_alu *alu = slots[i];
842 if(!alu)
843 continue;
844
845 num_src = r600_bc_get_num_operands(bc, alu);
846 for (src = 0; src < num_src; ++src) {
847 if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
848 continue;
849
850 if (alu->src[src].sel == gpr[4] &&
851 alu->src[src].chan == chan[4]) {
852 alu->src[src].sel = V_SQ_ALU_SRC_PS;
853 alu->src[src].chan = 0;
854 continue;
855 }
856
857 for (j = 0; j < 4; ++j) {
858 if (alu->src[src].sel == gpr[j] &&
859 alu->src[src].chan == j) {
860 alu->src[src].sel = V_SQ_ALU_SRC_PV;
861 alu->src[src].chan = chan[j];
862 break;
863 }
864 }
865 }
866 }
867
868 return 0;
869 }
870
871 void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
872 {
873 switch(value) {
874 case 0:
875 *sel = V_SQ_ALU_SRC_0;
876 break;
877 case 1:
878 *sel = V_SQ_ALU_SRC_1_INT;
879 break;
880 case -1:
881 *sel = V_SQ_ALU_SRC_M_1_INT;
882 break;
883 case 0x3F800000: // 1.0f
884 *sel = V_SQ_ALU_SRC_1;
885 break;
886 case 0x3F000000: // 0.5f
887 *sel = V_SQ_ALU_SRC_0_5;
888 break;
889 case 0xBF800000: // -1.0f
890 *sel = V_SQ_ALU_SRC_1;
891 *neg ^= 1;
892 break;
893 case 0xBF000000: // -0.5f
894 *sel = V_SQ_ALU_SRC_0_5;
895 *neg ^= 1;
896 break;
897 default:
898 *sel = V_SQ_ALU_SRC_LITERAL;
899 break;
900 }
901 }
902
903 /* compute how many literal are needed */
904 static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
905 uint32_t literal[4], unsigned *nliteral)
906 {
907 unsigned num_src = r600_bc_get_num_operands(bc, alu);
908 unsigned i, j;
909
910 for (i = 0; i < num_src; ++i) {
911 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
912 uint32_t value = alu->src[i].value;
913 unsigned found = 0;
914 for (j = 0; j < *nliteral; ++j) {
915 if (literal[j] == value) {
916 found = 1;
917 break;
918 }
919 }
920 if (!found) {
921 if (*nliteral >= 4)
922 return -EINVAL;
923 literal[(*nliteral)++] = value;
924 }
925 }
926 }
927 return 0;
928 }
929
930 static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
931 struct r600_bc_alu *alu,
932 uint32_t literal[4], unsigned nliteral)
933 {
934 unsigned num_src = r600_bc_get_num_operands(bc, alu);
935 unsigned i, j;
936
937 for (i = 0; i < num_src; ++i) {
938 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
939 uint32_t value = alu->src[i].value;
940 for (j = 0; j < nliteral; ++j) {
941 if (literal[j] == value) {
942 alu->src[i].chan = j;
943 break;
944 }
945 }
946 }
947 }
948 }
949
950 static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
951 struct r600_bc_alu *alu_prev)
952 {
953 struct r600_bc_alu *prev[5];
954 struct r600_bc_alu *result[5] = { NULL };
955
956 uint32_t literal[4], prev_literal[4];
957 unsigned nliteral = 0, prev_nliteral = 0;
958
959 int i, j, r, src, num_src;
960 int num_once_inst = 0;
961 int have_mova = 0, have_rel = 0;
962
963 r = assign_alu_units(bc, alu_prev, prev);
964 if (r)
965 return r;
966
967 for (i = 0; i < 5; ++i) {
968 struct r600_bc_alu *alu;
969
970 /* check number of literals */
971 if (prev[i]) {
972 if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
973 return 0;
974 if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
975 return 0;
976 if (is_alu_mova_inst(bc, prev[i])) {
977 if (have_rel)
978 return 0;
979 have_mova = 1;
980 }
981 num_once_inst += is_alu_once_inst(bc, prev[i]);
982 }
983 if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
984 return 0;
985
986 // let's check used slots
987 if (prev[i] && !slots[i]) {
988 result[i] = prev[i];
989 continue;
990 } else if (prev[i] && slots[i]) {
991 if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
992 // trans unit is still free try to use it
993 if (is_alu_any_unit_inst(bc, slots[i])) {
994 result[i] = prev[i];
995 result[4] = slots[i];
996 } else if (is_alu_any_unit_inst(bc, prev[i])) {
997 result[i] = slots[i];
998 result[4] = prev[i];
999 } else
1000 return 0;
1001 } else
1002 return 0;
1003 } else if(!slots[i]) {
1004 continue;
1005 } else
1006 result[i] = slots[i];
1007
1008 // let's check source gprs
1009 alu = slots[i];
1010 num_once_inst += is_alu_once_inst(bc, alu);
1011
1012 num_src = r600_bc_get_num_operands(bc, alu);
1013 for (src = 0; src < num_src; ++src) {
1014 if (alu->src[src].rel) {
1015 if (have_mova)
1016 return 0;
1017 have_rel = 1;
1018 }
1019
1020 // constants doesn't matter
1021 if (!is_gpr(alu->src[src].sel))
1022 continue;
1023
1024 for (j = 0; j < 5; ++j) {
1025 if (!prev[j] || !prev[j]->dst.write)
1026 continue;
1027
1028 // if it's relative then we can't determin which gpr is really used
1029 if (prev[j]->dst.chan == alu->src[src].chan &&
1030 (prev[j]->dst.sel == alu->src[src].sel ||
1031 prev[j]->dst.rel || alu->src[src].rel))
1032 return 0;
1033 }
1034 }
1035 }
1036
1037 /* more than one PRED_ or KILL_ ? */
1038 if (num_once_inst > 1)
1039 return 0;
1040
1041 /* check if the result can still be swizzlet */
1042 r = check_and_set_bank_swizzle(bc, result);
1043 if (r)
1044 return 0;
1045
1046 /* looks like everything worked out right, apply the changes */
1047
1048 /* undo adding previus literals */
1049 bc->cf_last->ndw -= align(prev_nliteral, 2);
1050
1051 /* sort instructions */
1052 for (i = 0; i < 5; ++i) {
1053 slots[i] = result[i];
1054 if (result[i]) {
1055 LIST_DEL(&result[i]->list);
1056 result[i]->last = 0;
1057 LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
1058 }
1059 }
1060
1061 /* determine new last instruction */
1062 LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
1063
1064 /* determine new first instruction */
1065 for (i = 0; i < 5; ++i) {
1066 if (result[i]) {
1067 bc->cf_last->curr_bs_head = result[i];
1068 break;
1069 }
1070 }
1071
1072 bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
1073 bc->cf_last->prev2_bs_head = NULL;
1074
1075 return 0;
1076 }
1077
1078 /* This code handles kcache lines as single blocks of 32 constants. We could
1079 * probably do slightly better by recognizing that we actually have two
1080 * consecutive lines of 16 constants, but the resulting code would also be
1081 * somewhat more complicated. */
1082 static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
1083 {
1084 struct r600_bc_kcache *kcache = bc->cf_last->kcache;
1085 unsigned int required_lines;
1086 unsigned int free_lines = 0;
1087 unsigned int cache_line[3];
1088 unsigned int count = 0;
1089 unsigned int i, j;
1090 int r;
1091
1092 /* Collect required cache lines. */
1093 for (i = 0; i < 3; ++i) {
1094 bool found = false;
1095 unsigned int line;
1096
1097 if (alu->src[i].sel < 512)
1098 continue;
1099
1100 line = ((alu->src[i].sel - 512) / 32) * 2;
1101
1102 for (j = 0; j < count; ++j) {
1103 if (cache_line[j] == line) {
1104 found = true;
1105 break;
1106 }
1107 }
1108
1109 if (!found)
1110 cache_line[count++] = line;
1111 }
1112
1113 /* This should never actually happen. */
1114 if (count >= 3) return -ENOMEM;
1115
1116 for (i = 0; i < 2; ++i) {
1117 if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) {
1118 ++free_lines;
1119 }
1120 }
1121
1122 /* Filter lines pulled in by previous intructions. Note that this is
1123 * only for the required_lines count, we can't remove these from the
1124 * cache_line array since we may have to start a new ALU clause. */
1125 for (i = 0, required_lines = count; i < count; ++i) {
1126 for (j = 0; j < 2; ++j) {
1127 if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
1128 kcache[j].addr == cache_line[i]) {
1129 --required_lines;
1130 break;
1131 }
1132 }
1133 }
1134
1135 /* Start a new ALU clause if needed. */
1136 if (required_lines > free_lines) {
1137 if ((r = r600_bc_add_cf(bc))) {
1138 return r;
1139 }
1140 bc->cf_last->inst = (type << 3);
1141 kcache = bc->cf_last->kcache;
1142 }
1143
1144 /* Setup the kcache lines. */
1145 for (i = 0; i < count; ++i) {
1146 bool found = false;
1147
1148 for (j = 0; j < 2; ++j) {
1149 if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
1150 kcache[j].addr == cache_line[i]) {
1151 found = true;
1152 break;
1153 }
1154 }
1155
1156 if (found) continue;
1157
1158 for (j = 0; j < 2; ++j) {
1159 if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) {
1160 kcache[j].bank = 0;
1161 kcache[j].addr = cache_line[i];
1162 kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2;
1163 break;
1164 }
1165 }
1166 }
1167
1168 /* Alter the src operands to refer to the kcache. */
1169 for (i = 0; i < 3; ++i) {
1170 static const unsigned int base[] = {128, 160, 256, 288};
1171 unsigned int line;
1172
1173 if (alu->src[i].sel < 512)
1174 continue;
1175
1176 alu->src[i].sel -= 512;
1177 line = (alu->src[i].sel / 32) * 2;
1178
1179 for (j = 0; j < 2; ++j) {
1180 if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
1181 kcache[j].addr == line) {
1182 alu->src[i].sel &= 0x1f;
1183 alu->src[i].sel += base[j];
1184 break;
1185 }
1186 }
1187 }
1188
1189 return 0;
1190 }
1191
1192 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
1193 {
1194 struct r600_bc_alu *nalu = r600_bc_alu();
1195 struct r600_bc_alu *lalu;
1196 int i, r;
1197
1198 if (nalu == NULL)
1199 return -ENOMEM;
1200 memcpy(nalu, alu, sizeof(struct r600_bc_alu));
1201
1202 if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
1203 /* check if we could add it anyway */
1204 if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
1205 type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
1206 LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
1207 if (lalu->predicate) {
1208 bc->force_add_cf = 1;
1209 break;
1210 }
1211 }
1212 } else
1213 bc->force_add_cf = 1;
1214 }
1215
1216 /* cf can contains only alu or only vtx or only tex */
1217 if (bc->cf_last == NULL || bc->force_add_cf) {
1218 r = r600_bc_add_cf(bc);
1219 if (r) {
1220 free(nalu);
1221 return r;
1222 }
1223 }
1224 bc->cf_last->inst = (type << 3);
1225
1226 /* Setup the kcache for this ALU instruction. This will start a new
1227 * ALU clause if needed. */
1228 if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
1229 free(nalu);
1230 return r;
1231 }
1232
1233 if (!bc->cf_last->curr_bs_head) {
1234 bc->cf_last->curr_bs_head = nalu;
1235 }
1236 /* number of gpr == the last gpr used in any alu */
1237 for (i = 0; i < 3; i++) {
1238 if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
1239 bc->ngpr = nalu->src[i].sel + 1;
1240 }
1241 if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
1242 r600_bc_special_constants(nalu->src[i].value,
1243 &nalu->src[i].sel, &nalu->src[i].neg);
1244 }
1245 if (nalu->dst.sel >= bc->ngpr) {
1246 bc->ngpr = nalu->dst.sel + 1;
1247 }
1248 LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
1249 /* each alu use 2 dwords */
1250 bc->cf_last->ndw += 2;
1251 bc->ndw += 2;
1252
1253 /* process cur ALU instructions for bank swizzle */
1254 if (nalu->last) {
1255 uint32_t literal[4];
1256 unsigned nliteral;
1257 struct r600_bc_alu *slots[5];
1258 r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
1259 if (r)
1260 return r;
1261
1262 if (bc->cf_last->prev_bs_head) {
1263 r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
1264 if (r)
1265 return r;
1266 }
1267
1268 if (bc->cf_last->prev_bs_head) {
1269 r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
1270 if (r)
1271 return r;
1272 }
1273
1274 r = check_and_set_bank_swizzle(bc, slots);
1275 if (r)
1276 return r;
1277
1278 for (i = 0, nliteral = 0; i < 5; i++) {
1279 if (slots[i]) {
1280 r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
1281 if (r)
1282 return r;
1283 }
1284 }
1285 bc->cf_last->ndw += align(nliteral, 2);
1286
1287 /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
1288 * worst case */
1289 if ((bc->cf_last->ndw >> 1) >= 120) {
1290 bc->force_add_cf = 1;
1291 }
1292
1293 bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
1294 bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
1295 bc->cf_last->curr_bs_head = NULL;
1296 }
1297 return 0;
1298 }
1299
1300 int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
1301 {
1302 return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
1303 }
1304
1305 static void r600_bc_remove_alu(struct r600_bc_cf *cf, struct r600_bc_alu *alu)
1306 {
1307 if (alu->last && alu->list.prev != &cf->alu) {
1308 PREV_ALU(alu)->last = 1;
1309 }
1310 LIST_DEL(&alu->list);
1311 free(alu);
1312 cf->ndw -= 2;
1313 }
1314
1315 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
1316 {
1317 struct r600_bc_vtx *nvtx = r600_bc_vtx();
1318 int r;
1319
1320 if (nvtx == NULL)
1321 return -ENOMEM;
1322 memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx));
1323
1324 /* cf can contains only alu or only vtx or only tex */
1325 if (bc->cf_last == NULL ||
1326 (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
1327 bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) ||
1328 bc->force_add_cf) {
1329 r = r600_bc_add_cf(bc);
1330 if (r) {
1331 free(nvtx);
1332 return r;
1333 }
1334 bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
1335 }
1336 LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
1337 /* each fetch use 4 dwords */
1338 bc->cf_last->ndw += 4;
1339 bc->ndw += 4;
1340 if ((bc->cf_last->ndw / 4) > 7)
1341 bc->force_add_cf = 1;
1342 return 0;
1343 }
1344
1345 int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
1346 {
1347 struct r600_bc_tex *ntex = r600_bc_tex();
1348 int r;
1349
1350 if (ntex == NULL)
1351 return -ENOMEM;
1352 memcpy(ntex, tex, sizeof(struct r600_bc_tex));
1353
1354 /* we can't fetch data und use it as texture lookup address in the same TEX clause */
1355 if (bc->cf_last != NULL &&
1356 bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
1357 struct r600_bc_tex *ttex;
1358 LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
1359 if (ttex->dst_gpr == ntex->src_gpr) {
1360 bc->force_add_cf = 1;
1361 break;
1362 }
1363 }
1364 }
1365
1366 /* cf can contains only alu or only vtx or only tex */
1367 if (bc->cf_last == NULL ||
1368 bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
1369 bc->force_add_cf) {
1370 r = r600_bc_add_cf(bc);
1371 if (r) {
1372 free(ntex);
1373 return r;
1374 }
1375 bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
1376 }
1377 if (ntex->src_gpr >= bc->ngpr) {
1378 bc->ngpr = ntex->src_gpr + 1;
1379 }
1380 if (ntex->dst_gpr >= bc->ngpr) {
1381 bc->ngpr = ntex->dst_gpr + 1;
1382 }
1383 LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
1384 /* each texture fetch use 4 dwords */
1385 bc->cf_last->ndw += 4;
1386 bc->ndw += 4;
1387 if ((bc->cf_last->ndw / 4) > 7)
1388 bc->force_add_cf = 1;
1389 return 0;
1390 }
1391
1392 int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
1393 {
1394 int r;
1395 r = r600_bc_add_cf(bc);
1396 if (r)
1397 return r;
1398
1399 bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
1400 bc->cf_last->inst = inst;
1401 return 0;
1402 }
1403
1404 /* common to all 3 families */
1405 static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
1406 {
1407 unsigned fetch_resource_start = 0;
1408
1409 /* check if we are fetch shader */
1410 /* fetch shader can also access vertex resource,
1411 * first fetch shader resource is at 160
1412 */
1413 if (bc->type == -1) {
1414 switch (bc->chiprev) {
1415 /* r600 */
1416 case CHIPREV_R600:
1417 /* r700 */
1418 case CHIPREV_R700:
1419 fetch_resource_start = 160;
1420 break;
1421 /* evergreen */
1422 case CHIPREV_EVERGREEN:
1423 fetch_resource_start = 0;
1424 break;
1425 default:
1426 fprintf(stderr, "%s:%s:%d unknown chiprev %d\n",
1427 __FILE__, __func__, __LINE__, bc->chiprev);
1428 break;
1429 }
1430 }
1431 bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) |
1432 S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
1433 S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
1434 S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
1435 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
1436 bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
1437 S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
1438 S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
1439 S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
1440 S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
1441 S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
1442 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
1443 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
1444 S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
1445 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
1446 bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
1447 S_SQ_VTX_WORD2_MEGA_FETCH(1);
1448 bc->bytecode[id++] = 0;
1449 return 0;
1450 }
1451
1452 /* common to all 3 families */
1453 static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id)
1454 {
1455 bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
1456 S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
1457 S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
1458 S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
1459 bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
1460 S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
1461 S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
1462 S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
1463 S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
1464 S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
1465 S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
1466 S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
1467 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
1468 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
1469 S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
1470 bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
1471 S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
1472 S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
1473 S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
1474 S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
1475 S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
1476 S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
1477 S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
1478 bc->bytecode[id++] = 0;
1479 return 0;
1480 }
1481
1482 /* r600 only, r700/eg bits in r700_asm.c */
1483 static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
1484 {
1485 /* don't replace gpr by pv or ps for destination register */
1486 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
1487 S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
1488 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
1489 S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
1490 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
1491 S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
1492 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
1493 S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
1494 S_SQ_ALU_WORD0_LAST(alu->last);
1495
1496 if (alu->is_op3) {
1497 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1498 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1499 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1500 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1501 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
1502 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
1503 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
1504 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
1505 S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
1506 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
1507 } else {
1508 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1509 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1510 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1511 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1512 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
1513 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
1514 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
1515 S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
1516 S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
1517 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
1518 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
1519 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
1520 }
1521 return 0;
1522 }
1523
1524 enum cf_class
1525 {
1526 CF_CLASS_ALU,
1527 CF_CLASS_TEXTURE,
1528 CF_CLASS_VERTEX,
1529 CF_CLASS_EXPORT,
1530 CF_CLASS_OTHER
1531 };
1532
1533 static enum cf_class r600_bc_cf_class(struct r600_bc_cf *cf)
1534 {
1535 switch (cf->inst) {
1536 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
1537 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
1538 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
1539 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
1540 return CF_CLASS_ALU;
1541
1542 case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
1543 return CF_CLASS_TEXTURE;
1544
1545 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
1546 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
1547 return CF_CLASS_VERTEX;
1548
1549 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
1550 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
1551 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
1552 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
1553 return CF_CLASS_EXPORT;
1554
1555 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
1556 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
1557 case V_SQ_CF_WORD1_SQ_CF_INST_POP:
1558 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
1559 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
1560 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
1561 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
1562 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
1563 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
1564 case V_SQ_CF_WORD1_SQ_CF_INST_NOP:
1565 return CF_CLASS_OTHER;
1566
1567 default:
1568 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
1569 return -EINVAL;
1570 }
1571 }
1572
1573 /* common for r600/r700 - eg in eg_asm.c */
1574 static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
1575 {
1576 unsigned id = cf->id;
1577 unsigned end_of_program = bc->cf.prev == &cf->list;
1578
1579 switch (r600_bc_cf_class(cf)) {
1580 case CF_CLASS_ALU:
1581 assert(!end_of_program);
1582 bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
1583 S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
1584 S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
1585 S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
1586
1587 bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
1588 S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
1589 S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
1590 S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
1591 S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) |
1592 S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
1593 S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
1594 break;
1595 case CF_CLASS_TEXTURE:
1596 case CF_CLASS_VERTEX:
1597 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
1598 bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
1599 S_SQ_CF_WORD1_BARRIER(cf->barrier) |
1600 S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) |
1601 S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
1602 break;
1603 case CF_CLASS_EXPORT:
1604 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1605 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1606 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1607 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
1608 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1609 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
1610 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
1611 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
1612 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
1613 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1614 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
1615 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
1616 break;
1617 case CF_CLASS_OTHER:
1618 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
1619 bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
1620 S_SQ_CF_WORD1_BARRIER(cf->barrier) |
1621 S_SQ_CF_WORD1_COND(cf->cond) |
1622 S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
1623 S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
1624
1625 break;
1626 default:
1627 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
1628 return -EINVAL;
1629 }
1630 return 0;
1631 }
1632
1633 struct gpr_usage_range {
1634 int replacement;
1635 int rel_block;
1636 int start;
1637 int end;
1638 };
1639
1640 struct gpr_usage {
1641 unsigned channels:4;
1642 int first_write;
1643 int last_write[4];
1644 unsigned nranges;
1645 struct gpr_usage_range *ranges;
1646 };
1647
1648 static struct gpr_usage_range* last_gpr_usage_range(struct gpr_usage *usage)
1649 {
1650 if (usage->nranges)
1651 return usage->ranges + usage->nranges - 1;
1652 else
1653 return NULL;
1654 }
1655
1656 static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage)
1657 {
1658 struct gpr_usage_range *range;
1659
1660 usage->nranges++;
1661 usage->ranges = realloc(usage->ranges, usage->nranges * sizeof(struct gpr_usage_range));
1662 if (!usage->ranges)
1663 return NULL;
1664
1665 range = last_gpr_usage_range(usage);
1666 range->replacement = -1; /* no prefered replacement */
1667 range->rel_block = -1;
1668 range->start = -1;
1669 range->end = -1;
1670
1671 return range;
1672 }
1673
1674 static void notice_gpr_read(struct gpr_usage *usage, int id, unsigned chan)
1675 {
1676 struct gpr_usage_range* range;
1677
1678 usage->channels |= 1 << chan;
1679 usage->first_write = -1;
1680 if (!usage->nranges) {
1681 range = add_gpr_usage_range(usage);
1682 } else
1683 range = last_gpr_usage_range(usage);
1684
1685 if (range && range->end < id)
1686 range->end = id;
1687 }
1688
1689 static void notice_gpr_rel_read(struct r600_bc *bc, struct gpr_usage usage[128],
1690 int id, unsigned gpr, unsigned chan)
1691 {
1692 unsigned i;
1693 for (i = gpr; i < bc->ngpr; ++i)
1694 notice_gpr_read(&usage[i], id, chan);
1695
1696 last_gpr_usage_range(&usage[gpr])->rel_block = bc->ngpr - gpr;
1697 }
1698
1699 static void notice_gpr_last_write(struct gpr_usage *usage, int id, unsigned chan)
1700 {
1701 usage->last_write[chan] = id;
1702 }
1703
1704 static void notice_gpr_write(struct gpr_usage *usage, int id, unsigned chan,
1705 int predicate, int prefered_replacement)
1706 {
1707 struct gpr_usage_range* last_range = last_gpr_usage_range(usage);
1708 int start = usage->first_write != -1 ? usage->first_write : id;
1709 usage->channels &= ~(1 << chan);
1710 if (usage->channels) {
1711 if (usage->first_write == -1)
1712 usage->first_write = id;
1713 } else if (!last_range || (last_range->start != start && !predicate)) {
1714 usage->first_write = start;
1715 struct gpr_usage_range* range = add_gpr_usage_range(usage);
1716 range->replacement = prefered_replacement;
1717 range->start = start;
1718 } else if (last_range->start == start && prefered_replacement != -1) {
1719 last_range->replacement = prefered_replacement;
1720 }
1721 notice_gpr_last_write(usage, id, chan);
1722 }
1723
1724 static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int id, unsigned chan)
1725 {
1726 unsigned i;
1727 for (i = 0; i < 128; ++i)
1728 notice_gpr_last_write(&usage[i], id, chan);
1729 }
1730
1731 static void notice_gpr_rel_write(struct gpr_usage usage[128], int id, unsigned chan)
1732 {
1733 unsigned i;
1734 for (i = 0; i < 128; ++i)
1735 notice_gpr_write(&usage[i], id, chan, 1, -1);
1736 }
1737
1738 static void notice_alu_src_gprs(struct r600_bc *bc, struct r600_bc_alu *alu,
1739 struct gpr_usage usage[128], int id)
1740 {
1741 unsigned src, num_src;
1742
1743 num_src = r600_bc_get_num_operands(bc, alu);
1744 for (src = 0; src < num_src; ++src) {
1745 // constants doesn't matter
1746 if (!is_gpr(alu->src[src].sel))
1747 continue;
1748
1749 if (alu->src[src].rel)
1750 notice_gpr_rel_read(bc, usage, id, alu->src[src].sel, alu->src[src].chan);
1751 else
1752 notice_gpr_read(&usage[alu->src[src].sel], id, alu->src[src].chan);
1753 }
1754 }
1755
1756 static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128],
1757 int id, int predicate)
1758 {
1759 struct r600_bc_alu *alu;
1760 for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
1761 if (alu->dst.write) {
1762 if (alu->dst.rel)
1763 notice_gpr_rel_write(usage, id, alu->dst.chan);
1764 else if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV && is_gpr(alu->src[0].sel))
1765 notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan,
1766 predicate, alu->src[0].sel);
1767 else
1768 notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan, predicate, -1);
1769 }
1770
1771 if (alu->last)
1772 break;
1773 }
1774 }
1775
1776 static void notice_tex_gprs(struct r600_bc *bc, struct r600_bc_tex *tex,
1777 struct gpr_usage usage[128],
1778 int id, int predicate)
1779 {
1780 if (tex->src_rel) {
1781 if (tex->src_sel_x < 4)
1782 notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_x);
1783 if (tex->src_sel_y < 4)
1784 notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_y);
1785 if (tex->src_sel_z < 4)
1786 notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_z);
1787 if (tex->src_sel_w < 4)
1788 notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_w);
1789 } else {
1790 if (tex->src_sel_x < 4)
1791 notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_x);
1792 if (tex->src_sel_y < 4)
1793 notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_y);
1794 if (tex->src_sel_z < 4)
1795 notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_z);
1796 if (tex->src_sel_w < 4)
1797 notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_w);
1798 }
1799 if (tex->dst_rel) {
1800 if (tex->dst_sel_x != 7)
1801 notice_gpr_rel_write(usage, id, 0);
1802 if (tex->dst_sel_y != 7)
1803 notice_gpr_rel_write(usage, id, 1);
1804 if (tex->dst_sel_z != 7)
1805 notice_gpr_rel_write(usage, id, 2);
1806 if (tex->dst_sel_w != 7)
1807 notice_gpr_rel_write(usage, id, 3);
1808 } else {
1809 if (tex->dst_sel_x != 7)
1810 notice_gpr_write(&usage[tex->dst_gpr], id, 0, predicate, -1);
1811 if (tex->dst_sel_y != 7)
1812 notice_gpr_write(&usage[tex->dst_gpr], id, 1, predicate, -1);
1813 if (tex->dst_sel_z != 7)
1814 notice_gpr_write(&usage[tex->dst_gpr], id, 2, predicate, -1);
1815 if (tex->dst_sel_w != 7)
1816 notice_gpr_write(&usage[tex->dst_gpr], id, 3, predicate, -1);
1817 }
1818 }
1819
1820 static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
1821 int id, int predicate)
1822 {
1823 notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
1824
1825 if (vtx->dst_sel_x != 7)
1826 notice_gpr_write(&usage[vtx->dst_gpr], id, 0, predicate, -1);
1827 if (vtx->dst_sel_y != 7)
1828 notice_gpr_write(&usage[vtx->dst_gpr], id, 1, predicate, -1);
1829 if (vtx->dst_sel_z != 7)
1830 notice_gpr_write(&usage[vtx->dst_gpr], id, 2, predicate, -1);
1831 if (vtx->dst_sel_w != 7)
1832 notice_gpr_write(&usage[vtx->dst_gpr], id, 3, predicate, -1);
1833 }
1834
1835 static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
1836 struct r600_bc_cf *export_cf[128], int export_remap[128])
1837 {
1838 //TODO handle other memory operations
1839 struct gpr_usage *output = &usage[cf->output.gpr];
1840 int id = MAX4(output->last_write[0], output->last_write[1],
1841 output->last_write[2], output->last_write[3]);
1842 id += 0x100;
1843 id &= ~0xFF;
1844
1845 export_cf[cf->output.gpr] = cf;
1846 export_remap[cf->output.gpr] = id;
1847 if (cf->output.swizzle_x < 4)
1848 notice_gpr_read(output, id, cf->output.swizzle_x);
1849 if (cf->output.swizzle_y < 4)
1850 notice_gpr_read(output, id, cf->output.swizzle_y);
1851 if (cf->output.swizzle_z < 4)
1852 notice_gpr_read(output, id, cf->output.swizzle_z);
1853 if (cf->output.swizzle_w < 4)
1854 notice_gpr_read(output, id, cf->output.swizzle_w);
1855 }
1856
1857 static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int id)
1858 {
1859 unsigned i;
1860 for (i = 0; i < usage->nranges; ++i) {
1861 struct gpr_usage_range* range = &usage->ranges[i];
1862
1863 if (range->start < id && id <= range->end)
1864 return range;
1865 }
1866 return NULL;
1867 }
1868
1869 static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int id)
1870 {
1871 unsigned i;
1872 for (i = 0; i < usage->nranges; ++i) {
1873 struct gpr_usage_range* range = &usage->ranges[i];
1874 int end = range->end;
1875
1876 if (range->start <= id && (id < end || end == -1))
1877 return range;
1878 }
1879 return NULL;
1880 }
1881
1882 static int is_barrier_needed(struct gpr_usage *usage, int id, unsigned chan, int last_barrier)
1883 {
1884 if (usage->last_write[chan] != (id & ~0xFF))
1885 return usage->last_write[chan] >= last_barrier;
1886 else
1887 return 0;
1888 }
1889
1890 static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b)
1891 {
1892 return a->start <= b->end && b->start < a->end;
1893 }
1894
1895 static int rate_replacement(struct gpr_usage usage[128], unsigned current, unsigned gpr,
1896 struct gpr_usage_range* range)
1897 {
1898 int max_gpr = gpr + MAX2(range->rel_block, 1);
1899 int best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
1900 unsigned i;
1901
1902 for (; gpr < max_gpr; ++gpr) {
1903
1904 if (gpr >= 128) /* relative gpr block won't fit into clause temporaries */
1905 return -1; /* forget it */
1906
1907 if (gpr == current) /* ignore ranges of to be replaced register */
1908 continue;
1909
1910 for (i = 0; i < usage[gpr].nranges; ++i) {
1911 if (usage[gpr].ranges[i].replacement < gpr)
1912 continue; /* ignore already remapped ranges */
1913
1914 if (is_intersection(&usage[gpr].ranges[i], range))
1915 return -1; /* forget it if usages overlap */
1916
1917 if (range->start >= usage[gpr].ranges[i].end)
1918 best_start = MIN2(best_start, range->start - usage[gpr].ranges[i].end);
1919
1920 if (range->end != -1 && range->end <= usage[gpr].ranges[i].start)
1921 best_end = MIN2(best_end, usage[gpr].ranges[i].start - range->end);
1922 }
1923 }
1924 return best_start + best_end;
1925 }
1926
1927 static void find_replacement(struct gpr_usage usage[128], unsigned current,
1928 struct gpr_usage_range *range)
1929 {
1930 unsigned i, j;
1931 int best_gpr = -1, best_rate = 0x7FFFFFFF;
1932
1933 if (range->replacement == current)
1934 return; /* register prefers to be not remapped */
1935
1936 if (range->replacement != -1 && range->replacement <= current) {
1937 struct gpr_usage_range *other = find_src_range(&usage[range->replacement], range->start);
1938 if (other && other->replacement != -1)
1939 range->replacement = other->replacement;
1940 }
1941
1942 if (range->replacement != -1 && range->replacement < current) {
1943 int rate = rate_replacement(usage, current, range->replacement, range);
1944
1945 /* check if prefered replacement can be used */
1946 if (rate != -1) {
1947 best_rate = rate;
1948 best_gpr = range->replacement;
1949 }
1950 }
1951
1952 if (best_gpr == -1 && (range->start & ~0xFF) == (range->end & ~0xFF)) {
1953 /* register is just used inside one ALU clause */
1954 /* try to use clause temporaries for it */
1955 for (i = 127; i > 123; --i) {
1956 int rate = rate_replacement(usage, current, i, range);
1957
1958 if (rate == -1) /* can't be used because ranges overlap */
1959 continue;
1960
1961 if (rate < best_rate) {
1962 best_rate = rate;
1963 best_gpr = i;
1964
1965 /* can't get better than this */
1966 if (rate == 0)
1967 break;
1968 }
1969 }
1970 }
1971
1972 if (best_gpr == -1) {
1973 for (i = 0; i < current; ++i) {
1974 int rate = rate_replacement(usage, current, i, range);
1975
1976 if (rate == -1) /* can't be used because ranges overlap */
1977 continue;
1978
1979 if (rate < best_rate) {
1980 best_rate = rate;
1981 best_gpr = i;
1982
1983 /* can't get better than this */
1984 if (rate == 0)
1985 break;
1986 }
1987 }
1988 }
1989
1990 if (best_gpr != -1) {
1991 struct gpr_usage_range *reservation = add_gpr_usage_range(&usage[best_gpr]);
1992 reservation->replacement = best_gpr;
1993 reservation->rel_block = -1;
1994 reservation->start = range->start;
1995 reservation->end = range->end;
1996 } else
1997 best_gpr = current;
1998
1999 range->replacement = best_gpr;
2000 if (range->rel_block == -1)
2001 return; /* no relative block to handle we are done here */
2002
2003 /* set prefered register for the whole relative register block */
2004 for (i = current + 1, ++best_gpr; i < current + range->rel_block; ++i, ++best_gpr) {
2005 for (j = 0; j < usage[i].nranges; ++j) {
2006 if (is_intersection(&usage[i].ranges[j], range))
2007 usage[i].ranges[j].replacement = best_gpr;
2008 }
2009 }
2010 }
2011
2012 static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct gpr_usage usage[128],
2013 int id, int last_barrier, unsigned *barrier)
2014 {
2015 struct gpr_usage *cur_usage;
2016 struct gpr_usage_range *range;
2017 unsigned src, num_src;
2018
2019 num_src = r600_bc_get_num_operands(bc, alu);
2020 for (src = 0; src < num_src; ++src) {
2021 // constants doesn't matter
2022 if (!is_gpr(alu->src[src].sel))
2023 continue;
2024
2025 cur_usage = &usage[alu->src[src].sel];
2026 range = find_src_range(cur_usage, id);
2027 alu->src[src].sel = range->replacement;
2028
2029 *barrier |= is_barrier_needed(cur_usage, id, alu->src[src].chan, last_barrier);
2030 }
2031
2032 if (alu->dst.write) {
2033 cur_usage = &usage[alu->dst.sel];
2034 range = find_dst_range(cur_usage, id);
2035 if (!range || range->replacement == -1) {
2036 if (!alu->is_op3)
2037 alu->dst.write = 0;
2038 else
2039 /*TODO: really check that register 123 is useable */
2040 alu->dst.sel = 123;
2041 } else {
2042 alu->dst.sel = range->replacement;
2043 *barrier |= is_barrier_needed(cur_usage, id, alu->dst.chan, last_barrier);
2044 }
2045 }
2046 if (alu->dst.write) {
2047 if (alu->dst.rel)
2048 notice_gpr_rel_last_write(usage, id, alu->dst.chan);
2049 else
2050 notice_gpr_last_write(cur_usage, id, alu->dst.chan);
2051 }
2052 }
2053
2054 static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128],
2055 int id, int last_barrier, unsigned *barrier)
2056 {
2057 struct gpr_usage *cur_usage = &usage[tex->src_gpr];
2058 struct gpr_usage_range *range = find_src_range(cur_usage, id);
2059
2060 if (tex->src_rel) {
2061 *barrier = 1;
2062 } else {
2063 if (tex->src_sel_x < 4)
2064 *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_x, last_barrier);
2065 if (tex->src_sel_y < 4)
2066 *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_y, last_barrier);
2067 if (tex->src_sel_z < 4)
2068 *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_z, last_barrier);
2069 if (tex->src_sel_w < 4)
2070 *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_w, last_barrier);
2071 }
2072 tex->src_gpr = range->replacement;
2073
2074 cur_usage = &usage[tex->dst_gpr];
2075
2076 range = find_dst_range(cur_usage, id);
2077 if (range) {
2078 tex->dst_gpr = range->replacement;
2079
2080 if (tex->dst_rel) {
2081 if (tex->dst_sel_x != 7)
2082 notice_gpr_rel_last_write(usage, id, tex->dst_sel_x);
2083 if (tex->dst_sel_y != 7)
2084 notice_gpr_rel_last_write(usage, id, tex->dst_sel_y);
2085 if (tex->dst_sel_z != 7)
2086 notice_gpr_rel_last_write(usage, id, tex->dst_sel_z);
2087 if (tex->dst_sel_w != 7)
2088 notice_gpr_rel_last_write(usage, id, tex->dst_sel_w);
2089 } else {
2090 if (tex->dst_sel_x != 7)
2091 notice_gpr_last_write(cur_usage, id, tex->dst_sel_x);
2092 if (tex->dst_sel_y != 7)
2093 notice_gpr_last_write(cur_usage, id, tex->dst_sel_y);
2094 if (tex->dst_sel_z != 7)
2095 notice_gpr_last_write(cur_usage, id, tex->dst_sel_z);
2096 if (tex->dst_sel_w != 7)
2097 notice_gpr_last_write(cur_usage, id, tex->dst_sel_w);
2098 }
2099 } else {
2100 tex->dst_gpr = 123;
2101 }
2102 }
2103
2104 static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
2105 int id, int last_barrier, unsigned *barrier)
2106 {
2107 struct gpr_usage *cur_usage = &usage[vtx->src_gpr];
2108 struct gpr_usage_range *range = find_src_range(cur_usage, id);
2109
2110 *barrier |= is_barrier_needed(cur_usage, id, vtx->src_sel_x, last_barrier);
2111
2112 vtx->src_gpr = range->replacement;
2113
2114 cur_usage = &usage[vtx->dst_gpr];
2115 range = find_dst_range(cur_usage, id);
2116 if (range) {
2117 vtx->dst_gpr = range->replacement;
2118
2119 if (vtx->dst_sel_x != 7)
2120 notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x);
2121 if (vtx->dst_sel_y != 7)
2122 notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y);
2123 if (vtx->dst_sel_z != 7)
2124 notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z);
2125 if (vtx->dst_sel_w != 7)
2126 notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w);
2127 } else {
2128 vtx->dst_gpr = 123;
2129 }
2130 }
2131
2132 static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
2133 int id, int last_barrier)
2134 {
2135 //TODO handle other memory operations
2136 struct gpr_usage *cur_usage = &usage[cf->output.gpr];
2137 struct gpr_usage_range *range = find_src_range(cur_usage, id);
2138
2139 cf->barrier = 0;
2140 if (cf->output.swizzle_x < 4)
2141 cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_x, last_barrier);
2142 if (cf->output.swizzle_y < 4)
2143 cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_y, last_barrier);
2144 if (cf->output.swizzle_z < 4)
2145 cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_z, last_barrier);
2146 if (cf->output.swizzle_w < 4)
2147 cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_w, last_barrier);
2148
2149 cf->output.gpr = range->replacement;
2150 }
2151
2152 static void optimize_alu_inst(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_alu *alu)
2153 {
2154 struct r600_bc_alu *alu_next;
2155 unsigned chan;
2156 unsigned src, num_src;
2157
2158 /* check if a MOV could be optimized away */
2159 if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV) {
2160
2161 /* destination equals source? */
2162 if (alu->dst.sel != alu->src[0].sel ||
2163 alu->dst.chan != alu->src[0].chan)
2164 return;
2165
2166 /* any special handling for the source? */
2167 if (alu->src[0].rel || alu->src[0].neg || alu->src[0].abs)
2168 return;
2169
2170 /* any special handling for destination? */
2171 if (alu->dst.rel || alu->dst.clamp)
2172 return;
2173
2174 /* ok find next instruction group and check if ps/pv is used */
2175 for (alu_next = alu; !alu_next->last; alu_next = NEXT_ALU(alu_next));
2176
2177 if (alu_next->list.next != &cf->alu) {
2178 chan = is_alu_reduction_inst(bc, alu) ? 0 : alu->dst.chan;
2179 for (alu_next = NEXT_ALU(alu_next); alu_next; alu_next = NEXT_ALU(alu_next)) {
2180 num_src = r600_bc_get_num_operands(bc, alu_next);
2181 for (src = 0; src < num_src; ++src) {
2182 if (alu_next->src[src].sel == V_SQ_ALU_SRC_PV &&
2183 alu_next->src[src].chan == chan)
2184 return;
2185
2186 if (alu_next->src[src].sel == V_SQ_ALU_SRC_PS)
2187 return;
2188 }
2189
2190 if (alu_next->last)
2191 break;
2192 }
2193 }
2194
2195 r600_bc_remove_alu(cf, alu);
2196 }
2197 }
2198
2199 static void optimize_export_inst(struct r600_bc *bc, struct r600_bc_cf *cf)
2200 {
2201 struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, cf->list.prev, list);
2202 if (&prev->list == &bc->cf ||
2203 prev->inst != cf->inst ||
2204 prev->output.type != cf->output.type ||
2205 prev->output.elem_size != cf->output.elem_size ||
2206 prev->output.swizzle_x != cf->output.swizzle_x ||
2207 prev->output.swizzle_y != cf->output.swizzle_y ||
2208 prev->output.swizzle_z != cf->output.swizzle_z ||
2209 prev->output.swizzle_w != cf->output.swizzle_w)
2210 return;
2211
2212 if ((prev->output.burst_count + cf->output.burst_count) > 16)
2213 return;
2214
2215 if ((prev->output.gpr + prev->output.burst_count) == cf->output.gpr &&
2216 (prev->output.array_base + prev->output.burst_count) == cf->output.array_base) {
2217
2218 prev->output.burst_count += cf->output.burst_count;
2219 r600_bc_remove_cf(bc, cf);
2220
2221 } else if (prev->output.gpr == (cf->output.gpr + cf->output.burst_count) &&
2222 prev->output.array_base == (cf->output.array_base + cf->output.burst_count)) {
2223
2224 cf->output.burst_count += prev->output.burst_count;
2225 r600_bc_remove_cf(bc, prev);
2226 }
2227 }
2228
2229 static void r600_bc_optimize(struct r600_bc *bc)
2230 {
2231 struct r600_bc_cf *cf, *next_cf;
2232 struct r600_bc_alu *first, *next_alu;
2233 struct r600_bc_alu *alu;
2234 struct r600_bc_vtx *vtx;
2235 struct r600_bc_tex *tex;
2236 struct gpr_usage usage[128];
2237
2238 /* assume that each gpr is exported only once */
2239 struct r600_bc_cf *export_cf[128] = { NULL };
2240 int export_remap[128];
2241
2242 int id, cond_start, barrier[bc->nstack];
2243 unsigned i, j, stack, predicate, old_stack;
2244
2245 memset(&usage, 0, sizeof(usage));
2246 for (i = 0; i < 128; ++i) {
2247 usage[i].first_write = -1;
2248 usage[i].last_write[0] = -1;
2249 usage[i].last_write[1] = -1;
2250 usage[i].last_write[2] = -1;
2251 usage[i].last_write[3] = -1;
2252 }
2253
2254 /* first gather some informations about the gpr usage */
2255 id = 0; stack = 0;
2256 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2257 old_stack = stack;
2258 if (stack == 0)
2259 cond_start = stack;
2260
2261 switch (r600_bc_cf_class(cf)) {
2262 case CF_CLASS_ALU:
2263 predicate = 0;
2264 first = NULL;
2265 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2266 if (!first)
2267 first = alu;
2268 notice_alu_src_gprs(bc, alu, usage, id);
2269 if (alu->last) {
2270 notice_alu_dst_gprs(first, usage, id, predicate || stack > 0);
2271 first = NULL;
2272 ++id;
2273 }
2274 if (is_alu_pred_inst(bc, alu))
2275 predicate++;
2276 }
2277 if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3)
2278 stack += predicate;
2279 else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)
2280 stack -= 1;
2281 else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)
2282 stack -= 2;
2283 break;
2284 case CF_CLASS_TEXTURE:
2285 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2286 notice_tex_gprs(bc, tex, usage, id++, stack > 0);
2287 }
2288 break;
2289 case CF_CLASS_VERTEX:
2290 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2291 notice_vtx_gprs(vtx, usage, id++, stack > 0);
2292 }
2293 break;
2294 case CF_CLASS_EXPORT:
2295 notice_export_gprs(cf, usage, export_cf, export_remap);
2296 continue; // don't increment id
2297 case CF_CLASS_OTHER:
2298 switch (cf->inst) {
2299 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
2300 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
2301 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
2302 break;
2303
2304 case V_SQ_CF_WORD1_SQ_CF_INST_POP:
2305 stack -= cf->pop_count;
2306 break;
2307
2308 default:
2309 // TODO implement loop handling
2310 goto out;
2311 }
2312 }
2313
2314 /* extend last_write after conditional block */
2315 if (stack == 0 && old_stack != 0)
2316 for (i = 0; i < 128; ++i)
2317 for (j = 0; j < 4; ++j)
2318 if (usage[i].last_write[j] >= cond_start)
2319 usage[i].last_write[j] = id;
2320
2321 id += 0x100;
2322 id &= ~0xFF;
2323 }
2324 assert(stack == 0);
2325
2326 /* try to optimize gpr usage */
2327 for (i = 0; i < 124; ++i) {
2328 for (j = 0; j < usage[i].nranges; ++j) {
2329 struct gpr_usage_range *range = &usage[i].ranges[j];
2330 if (range->start == -1)
2331 /* can't rearange shader inputs */
2332 range->replacement = i;
2333 else if (range->end == -1)
2334 /* gpr isn't used any more after this instruction */
2335 range->replacement = -1;
2336 else
2337 find_replacement(usage, i, range);
2338
2339 if (range->replacement == i)
2340 bc->ngpr = i;
2341 else if (range->replacement < i && range->replacement > bc->ngpr)
2342 bc->ngpr = range->replacement;
2343 }
2344 }
2345 bc->ngpr++;
2346
2347 /* apply the changes */
2348 for (i = 0; i < 128; ++i) {
2349 usage[i].last_write[0] = -1;
2350 usage[i].last_write[1] = -1;
2351 usage[i].last_write[2] = -1;
2352 usage[i].last_write[3] = -1;
2353 }
2354 barrier[0] = 0;
2355 id = 0; stack = 0;
2356 LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
2357 old_stack = stack;
2358 switch (r600_bc_cf_class(cf)) {
2359 case CF_CLASS_ALU:
2360 predicate = 0;
2361 first = NULL;
2362 cf->barrier = 0;
2363 LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
2364 replace_alu_gprs(bc, alu, usage, id, barrier[stack], &cf->barrier);
2365 if (alu->last)
2366 ++id;
2367
2368 if (is_alu_pred_inst(bc, alu))
2369 predicate++;
2370
2371 if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)
2372 optimize_alu_inst(bc, cf, alu);
2373 }
2374 if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3)
2375 stack += predicate;
2376 else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)
2377 stack -= 1;
2378 else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)
2379 stack -= 2;
2380 if (LIST_IS_EMPTY(&cf->alu)) {
2381 r600_bc_remove_cf(bc, cf);
2382 cf = NULL;
2383 }
2384 break;
2385 case CF_CLASS_TEXTURE:
2386 cf->barrier = 0;
2387 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2388 replace_tex_gprs(tex, usage, id++, barrier[stack], &cf->barrier);
2389 }
2390 break;
2391 case CF_CLASS_VERTEX:
2392 cf->barrier = 0;
2393 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2394 replace_vtx_gprs(vtx, usage, id++, barrier[stack], &cf->barrier);
2395 }
2396 break;
2397 case CF_CLASS_EXPORT:
2398 continue; // don't increment id
2399 case CF_CLASS_OTHER:
2400 if (cf->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
2401 cf->barrier = 0;
2402 stack -= cf->pop_count;
2403 }
2404 break;
2405 }
2406
2407 id &= ~0xFF;
2408 if (cf && cf->barrier)
2409 barrier[old_stack] = id;
2410
2411 for (i = old_stack + 1; i <= stack; ++i)
2412 barrier[i] = barrier[old_stack];
2413
2414 id += 0x100;
2415 if (stack != 0) /* ensure exports are placed outside of conditional blocks */
2416 continue;
2417
2418 for (i = 0; i < 128; ++i) {
2419 if (!export_cf[i] || id < export_remap[i])
2420 continue;
2421
2422 r600_bc_move_cf(bc, export_cf[i], next_cf);
2423 replace_export_gprs(export_cf[i], usage, export_remap[i], barrier[stack]);
2424 if (export_cf[i]->barrier)
2425 barrier[stack] = id - 1;
2426 next_cf = LIST_ENTRY(struct r600_bc_cf, export_cf[i]->list.next, list);
2427 optimize_export_inst(bc, export_cf[i]);
2428 export_cf[i] = NULL;
2429 }
2430 }
2431 assert(stack == 0);
2432
2433 out:
2434 for (i = 0; i < 128; ++i) {
2435 free(usage[i].ranges);
2436 }
2437 }
2438
2439 int r600_bc_build(struct r600_bc *bc)
2440 {
2441 struct r600_bc_cf *cf;
2442 struct r600_bc_alu *alu;
2443 struct r600_bc_vtx *vtx;
2444 struct r600_bc_tex *tex;
2445 struct r600_bc_cf *exports[4] = { NULL };
2446 uint32_t literal[4];
2447 unsigned nliteral;
2448 unsigned addr;
2449 int i, r;
2450
2451 if (bc->callstack[0].max > 0)
2452 bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
2453 if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
2454 bc->nstack = 1;
2455 }
2456
2457 //r600_bc_optimize(bc);
2458
2459 /* first path compute addr of each CF block */
2460 /* addr start after all the CF instructions */
2461 addr = LIST_ENTRY(struct r600_bc_cf, bc->cf.prev, list)->id + 2;
2462 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2463 switch (r600_bc_cf_class(cf)) {
2464 case CF_CLASS_ALU:
2465 break;
2466 case CF_CLASS_TEXTURE:
2467 case CF_CLASS_VERTEX:
2468 /* fetch node need to be 16 bytes aligned*/
2469 addr += 3;
2470 addr &= 0xFFFFFFFCUL;
2471 break;
2472 case CF_CLASS_EXPORT:
2473 if (cf->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT))
2474 exports[cf->output.type] = cf;
2475 break;
2476 case CF_CLASS_OTHER:
2477 break;
2478 default:
2479 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
2480 return -EINVAL;
2481 }
2482 cf->addr = addr;
2483 addr += cf->ndw;
2484 bc->ndw = cf->addr + cf->ndw;
2485 }
2486
2487 /* set export done on last export of each type */
2488 for (i = 0; i < 4; ++i) {
2489 if (exports[i]) {
2490 exports[i]->inst = BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
2491 }
2492 }
2493
2494 free(bc->bytecode);
2495 bc->bytecode = calloc(1, bc->ndw * 4);
2496 if (bc->bytecode == NULL)
2497 return -ENOMEM;
2498 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2499 addr = cf->addr;
2500 if (bc->chiprev == CHIPREV_EVERGREEN)
2501 r = eg_bc_cf_build(bc, cf);
2502 else
2503 r = r600_bc_cf_build(bc, cf);
2504 if (r)
2505 return r;
2506 switch (r600_bc_cf_class(cf)) {
2507 case CF_CLASS_ALU:
2508 nliteral = 0;
2509 memset(literal, 0, sizeof(literal));
2510 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2511 r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
2512 if (r)
2513 return r;
2514 r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
2515 switch(bc->chiprev) {
2516 case CHIPREV_R600:
2517 r = r600_bc_alu_build(bc, alu, addr);
2518 break;
2519 case CHIPREV_R700:
2520 case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */
2521 r = r700_bc_alu_build(bc, alu, addr);
2522 break;
2523 default:
2524 R600_ERR("unknown family %d\n", bc->family);
2525 return -EINVAL;
2526 }
2527 if (r)
2528 return r;
2529 addr += 2;
2530 if (alu->last) {
2531 for (i = 0; i < align(nliteral, 2); ++i) {
2532 bc->bytecode[addr++] = literal[i];
2533 }
2534 nliteral = 0;
2535 memset(literal, 0, sizeof(literal));
2536 }
2537 }
2538 break;
2539 case CF_CLASS_VERTEX:
2540 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2541 r = r600_bc_vtx_build(bc, vtx, addr);
2542 if (r)
2543 return r;
2544 addr += 4;
2545 }
2546 break;
2547 case CF_CLASS_TEXTURE:
2548 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2549 r = r600_bc_tex_build(bc, tex, addr);
2550 if (r)
2551 return r;
2552 addr += 4;
2553 }
2554 break;
2555 case CF_CLASS_EXPORT:
2556 case CF_CLASS_OTHER:
2557 break;
2558 default:
2559 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
2560 return -EINVAL;
2561 }
2562 }
2563 return 0;
2564 }
2565
2566 void r600_bc_clear(struct r600_bc *bc)
2567 {
2568 struct r600_bc_cf *cf = NULL, *next_cf;
2569
2570 free(bc->bytecode);
2571 bc->bytecode = NULL;
2572
2573 LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
2574 struct r600_bc_alu *alu = NULL, *next_alu;
2575 struct r600_bc_tex *tex = NULL, *next_tex;
2576 struct r600_bc_tex *vtx = NULL, *next_vtx;
2577
2578 LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
2579 free(alu);
2580 }
2581
2582 LIST_INITHEAD(&cf->alu);
2583
2584 LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
2585 free(tex);
2586 }
2587
2588 LIST_INITHEAD(&cf->tex);
2589
2590 LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
2591 free(vtx);
2592 }
2593
2594 LIST_INITHEAD(&cf->vtx);
2595
2596 free(cf);
2597 }
2598
2599 LIST_INITHEAD(&cf->list);
2600 }
2601
2602 void r600_bc_dump(struct r600_bc *bc)
2603 {
2604 struct r600_bc_cf *cf = NULL;
2605 struct r600_bc_alu *alu = NULL;
2606 struct r600_bc_vtx *vtx = NULL;
2607 struct r600_bc_tex *tex = NULL;
2608
2609 unsigned i, id;
2610 uint32_t literal[4];
2611 unsigned nliteral;
2612 char chip = '6';
2613
2614 switch (bc->chiprev) {
2615 case 1:
2616 chip = '7';
2617 break;
2618 case 2:
2619 chip = 'E';
2620 break;
2621 case 0:
2622 default:
2623 chip = '6';
2624 break;
2625 }
2626 fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr);
2627 fprintf(stderr, " %c\n", chip);
2628
2629 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2630 id = cf->id;
2631
2632 switch (r600_bc_cf_class(cf)) {
2633 case CF_CLASS_ALU:
2634 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
2635 fprintf(stderr, "ADDR:%04d ", cf->addr);
2636 fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
2637 fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
2638 fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
2639 id++;
2640 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
2641 fprintf(stderr, "INST:%d ", cf->inst);
2642 fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
2643 fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
2644 fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
2645 fprintf(stderr, "BARRIER:%d ", cf->barrier);
2646 fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
2647 break;
2648 case CF_CLASS_TEXTURE:
2649 case CF_CLASS_VERTEX:
2650 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
2651 fprintf(stderr, "ADDR:%04d\n", cf->addr);
2652 id++;
2653 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
2654 fprintf(stderr, "INST:%d ", cf->inst);
2655 fprintf(stderr, "BARRIER:%d ", cf->barrier);
2656 fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
2657 break;
2658 case CF_CLASS_EXPORT:
2659 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
2660 fprintf(stderr, "GPR:%d ", cf->output.gpr);
2661 fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
2662 fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
2663 fprintf(stderr, "TYPE:%X\n", cf->output.type);
2664 id++;
2665 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
2666 fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
2667 fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
2668 fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
2669 fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
2670 fprintf(stderr, "BARRIER:%d ", cf->barrier);
2671 fprintf(stderr, "INST:%d ", cf->inst);
2672 fprintf(stderr, "BURST_COUNT:%d\n", cf->output.burst_count);
2673 break;
2674 case CF_CLASS_OTHER:
2675 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
2676 fprintf(stderr, "ADDR:%04d\n", cf->cf_addr);
2677 id++;
2678 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
2679 fprintf(stderr, "INST:%d ", cf->inst);
2680 fprintf(stderr, "COND:%X ", cf->cond);
2681 fprintf(stderr, "BARRIER:%d ", cf->barrier);
2682 fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
2683 break;
2684 }
2685
2686 id = cf->addr;
2687 nliteral = 0;
2688 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2689 r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
2690
2691 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
2692 fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
2693 fprintf(stderr, "REL:%d ", alu->src[0].rel);
2694 fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
2695 fprintf(stderr, "NEG:%d) ", alu->src[0].neg);
2696 fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
2697 fprintf(stderr, "REL:%d ", alu->src[1].rel);
2698 fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
2699 fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
2700 fprintf(stderr, "LAST:%d)\n", alu->last);
2701 id++;
2702 fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
2703 fprintf(stderr, "INST:%d ", alu->inst);
2704 fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
2705 fprintf(stderr, "CHAN:%d ", alu->dst.chan);
2706 fprintf(stderr, "REL:%d ", alu->dst.rel);
2707 fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
2708 fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
2709 if (alu->is_op3) {
2710 fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
2711 fprintf(stderr, "REL:%d ", alu->src[2].rel);
2712 fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
2713 fprintf(stderr, "NEG:%d)\n", alu->src[2].neg);
2714 } else {
2715 fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
2716 fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
2717 fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
2718 fprintf(stderr, "OMOD:%d ", alu->omod);
2719 fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
2720 fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
2721 }
2722
2723 id++;
2724 if (alu->last) {
2725 for (i = 0; i < nliteral; i++, id++) {
2726 float *f = (float*)(bc->bytecode + id);
2727 fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
2728 }
2729 id += nliteral & 1;
2730 nliteral = 0;
2731 }
2732 }
2733
2734 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2735 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
2736 fprintf(stderr, "INST:%d ", tex->inst);
2737 fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id);
2738 fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr);
2739 fprintf(stderr, "REL:%d)\n", tex->src_rel);
2740 id++;
2741 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
2742 fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr);
2743 fprintf(stderr, "REL:%d ", tex->dst_rel);
2744 fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x);
2745 fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y);
2746 fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z);
2747 fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w);
2748 fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias);
2749 fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x);
2750 fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y);
2751 fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z);
2752 fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w);
2753 id++;
2754 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
2755 fprintf(stderr, "OFFSET_X:%d ", tex->offset_x);
2756 fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y);
2757 fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z);
2758 fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id);
2759 fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x);
2760 fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y);
2761 fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z);
2762 fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w);
2763 id++;
2764 fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
2765 id++;
2766 }
2767
2768 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2769 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
2770 fprintf(stderr, "INST:%d ", vtx->inst);
2771 fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type);
2772 fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id);
2773 id++;
2774 /* This assumes that no semantic fetches exist */
2775 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
2776 fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr);
2777 fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x);
2778 fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
2779 fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr);
2780 fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x);
2781 fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y);
2782 fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z);
2783 fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w);
2784 fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields);
2785 fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format);
2786 fprintf(stderr, "NUM:%d ", vtx->num_format_all);
2787 fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
2788 fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
2789 id++;
2790 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
2791 fprintf(stderr, "OFFSET:%d\n", vtx->offset);
2792 //TODO
2793 id++;
2794 fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
2795 id++;
2796 }
2797 }
2798
2799 fprintf(stderr, "--------------------------------------\n");
2800 }
2801
2802 static void r600_cf_vtx(struct r600_vertex_element *ve)
2803 {
2804 struct r600_pipe_state *rstate;
2805
2806 rstate = &ve->rstate;
2807 rstate->id = R600_PIPE_STATE_FETCH_SHADER;
2808 rstate->nregs = 0;
2809 r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
2810 0x00000000, 0xFFFFFFFF, NULL);
2811 r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
2812 0x00000000, 0xFFFFFFFF, NULL);
2813 r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
2814 r600_bo_offset(ve->fetch_shader) >> 8,
2815 0xFFFFFFFF, ve->fetch_shader);
2816 }
2817
2818 static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
2819 unsigned *num_format, unsigned *format_comp)
2820 {
2821 const struct util_format_description *desc;
2822 unsigned i;
2823
2824 *format = 0;
2825 *num_format = 0;
2826 *format_comp = 0;
2827
2828 desc = util_format_description(pformat);
2829 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
2830 goto out_unknown;
2831 }
2832
2833 /* Find the first non-VOID channel. */
2834 for (i = 0; i < 4; i++) {
2835 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2836 break;
2837 }
2838 }
2839
2840 switch (desc->channel[i].type) {
2841 /* Half-floats, floats, ints */
2842 case UTIL_FORMAT_TYPE_FLOAT:
2843 switch (desc->channel[i].size) {
2844 case 16:
2845 switch (desc->nr_channels) {
2846 case 1:
2847 *format = FMT_16_FLOAT;
2848 break;
2849 case 2:
2850 *format = FMT_16_16_FLOAT;
2851 break;
2852 case 3:
2853 case 4:
2854 *format = FMT_16_16_16_16_FLOAT;
2855 break;
2856 }
2857 break;
2858 case 32:
2859 switch (desc->nr_channels) {
2860 case 1:
2861 *format = FMT_32_FLOAT;
2862 break;
2863 case 2:
2864 *format = FMT_32_32_FLOAT;
2865 break;
2866 case 3:
2867 *format = FMT_32_32_32_FLOAT;
2868 break;
2869 case 4:
2870 *format = FMT_32_32_32_32_FLOAT;
2871 break;
2872 }
2873 break;
2874 default:
2875 goto out_unknown;
2876 }
2877 break;
2878 /* Unsigned ints */
2879 case UTIL_FORMAT_TYPE_UNSIGNED:
2880 /* Signed ints */
2881 case UTIL_FORMAT_TYPE_SIGNED:
2882 switch (desc->channel[i].size) {
2883 case 8:
2884 switch (desc->nr_channels) {
2885 case 1:
2886 *format = FMT_8;
2887 break;
2888 case 2:
2889 *format = FMT_8_8;
2890 break;
2891 case 3:
2892 case 4:
2893 *format = FMT_8_8_8_8;
2894 break;
2895 }
2896 break;
2897 case 16:
2898 switch (desc->nr_channels) {
2899 case 1:
2900 *format = FMT_16;
2901 break;
2902 case 2:
2903 *format = FMT_16_16;
2904 break;
2905 case 3:
2906 case 4:
2907 *format = FMT_16_16_16_16;
2908 break;
2909 }
2910 break;
2911 case 32:
2912 switch (desc->nr_channels) {
2913 case 1:
2914 *format = FMT_32;
2915 break;
2916 case 2:
2917 *format = FMT_32_32;
2918 break;
2919 case 3:
2920 *format = FMT_32_32_32;
2921 break;
2922 case 4:
2923 *format = FMT_32_32_32_32;
2924 break;
2925 }
2926 break;
2927 default:
2928 goto out_unknown;
2929 }
2930 break;
2931 default:
2932 goto out_unknown;
2933 }
2934
2935 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2936 *format_comp = 1;
2937 }
2938 if (desc->channel[i].normalized) {
2939 *num_format = 0;
2940 } else {
2941 *num_format = 2;
2942 }
2943 return;
2944 out_unknown:
2945 R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
2946 }
2947
2948 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
2949 {
2950 static int dump_shaders = -1;
2951
2952 struct r600_bc bc;
2953 struct r600_bc_vtx vtx;
2954 struct pipe_vertex_element *elements = ve->elements;
2955 const struct util_format_description *desc;
2956 unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
2957 unsigned format, num_format, format_comp;
2958 u32 *bytecode;
2959 int i, r;
2960
2961 /* vertex elements offset need special handling, if offset is bigger
2962 + * than what we can put in fetch instruction then we need to alterate
2963 * the vertex resource offset. In such case in order to simplify code
2964 * we will bound one resource per elements. It's a worst case scenario.
2965 */
2966 for (i = 0; i < ve->count; i++) {
2967 ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset;
2968 if (ve->vbuffer_offset[i]) {
2969 ve->vbuffer_need_offset = 1;
2970 }
2971 }
2972
2973 memset(&bc, 0, sizeof(bc));
2974 r = r600_bc_init(&bc, r600_get_family(rctx->radeon));
2975 if (r)
2976 return r;
2977
2978 for (i = 0; i < ve->count; i++) {
2979 if (elements[i].instance_divisor > 1) {
2980 struct r600_bc_alu alu;
2981
2982 memset(&alu, 0, sizeof(alu));
2983 alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
2984 alu.src[0].sel = 0;
2985 alu.src[0].chan = 3;
2986
2987 alu.dst.sel = i + 1;
2988 alu.dst.chan = 3;
2989 alu.dst.write = 1;
2990 alu.last = 1;
2991
2992 if ((r = r600_bc_add_alu(&bc, &alu))) {
2993 r600_bc_clear(&bc);
2994 return r;
2995 }
2996
2997 memset(&alu, 0, sizeof(alu));
2998 alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2999 alu.src[0].sel = i + 1;
3000 alu.src[0].chan = 3;
3001
3002 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
3003 alu.src[1].value = fui(1.0f / (float)elements[i].instance_divisor);
3004
3005 alu.dst.sel = i + 1;
3006 alu.dst.chan = 3;
3007 alu.dst.write = 1;
3008 alu.last = 1;
3009
3010 if ((r = r600_bc_add_alu(&bc, &alu))) {
3011 r600_bc_clear(&bc);
3012 return r;
3013 }
3014
3015 memset(&alu, 0, sizeof(alu));
3016 alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3017 alu.src[0].sel = i + 1;
3018 alu.src[0].chan = 3;
3019
3020 alu.dst.sel = i + 1;
3021 alu.dst.chan = 3;
3022 alu.dst.write = 1;
3023 alu.last = 1;
3024
3025 if ((r = r600_bc_add_alu(&bc, &alu))) {
3026 r600_bc_clear(&bc);
3027 return r;
3028 }
3029
3030 memset(&alu, 0, sizeof(alu));
3031 alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
3032 alu.src[0].sel = i + 1;
3033 alu.src[0].chan = 3;
3034
3035 alu.dst.sel = i + 1;
3036 alu.dst.chan = 3;
3037 alu.dst.write = 1;
3038 alu.last = 1;
3039
3040 if ((r = r600_bc_add_alu(&bc, &alu))) {
3041 r600_bc_clear(&bc);
3042 return r;
3043 }
3044 }
3045 }
3046
3047 for (i = 0; i < ve->count; i++) {
3048 unsigned vbuffer_index;
3049 r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp);
3050 desc = util_format_description(ve->elements[i].src_format);
3051 if (desc == NULL) {
3052 r600_bc_clear(&bc);
3053 R600_ERR("unknown format %d\n", ve->elements[i].src_format);
3054 return -EINVAL;
3055 }
3056
3057 /* see above for vbuffer_need_offset explanation */
3058 vbuffer_index = elements[i].vertex_buffer_index;
3059 memset(&vtx, 0, sizeof(vtx));
3060 vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start;
3061 vtx.fetch_type = elements[i].instance_divisor ? 1 : 0;
3062 vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
3063 vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
3064 vtx.mega_fetch_count = 0x1F;
3065 vtx.dst_gpr = i + 1;
3066 vtx.dst_sel_x = desc->swizzle[0];
3067 vtx.dst_sel_y = desc->swizzle[1];
3068 vtx.dst_sel_z = desc->swizzle[2];
3069 vtx.dst_sel_w = desc->swizzle[3];
3070 vtx.data_format = format;
3071 vtx.num_format_all = num_format;
3072 vtx.format_comp_all = format_comp;
3073 vtx.srf_mode_all = 1;
3074 vtx.offset = elements[i].src_offset;
3075
3076 if ((r = r600_bc_add_vtx(&bc, &vtx))) {
3077 r600_bc_clear(&bc);
3078 return r;
3079 }
3080 }
3081
3082 r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
3083 r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_NOP));
3084
3085 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
3086 ve->fetch_shader = r600_bo(rctx->radeon, bc.ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
3087 if (ve->fetch_shader == NULL) {
3088 r600_bc_clear(&bc);
3089 return -ENOMEM;
3090 }
3091
3092 ve->fs_size = bc.ndw*4;
3093 if ((r = r600_bc_build(&bc))) {
3094 r600_bc_clear(&bc);
3095 return r;
3096 }
3097
3098 if (dump_shaders == -1)
3099 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
3100
3101 if (dump_shaders) {
3102 fprintf(stderr, "--------------------------------------------------------------\n");
3103 r600_bc_dump(&bc);
3104 fprintf(stderr, "______________________________________________________________\n");
3105 }
3106
3107 bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
3108 if (bytecode == NULL) {
3109 r600_bc_clear(&bc);
3110 r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
3111 return -ENOMEM;
3112 }
3113
3114 memcpy(bytecode, bc.bytecode, ve->fs_size);
3115
3116 r600_bo_unmap(rctx->radeon, ve->fetch_shader);
3117 r600_bc_clear(&bc);
3118
3119 if (rctx->family >= CHIP_CEDAR)
3120 eg_cf_vtx(ve);
3121 else
3122 r600_cf_vtx(ve);
3123
3124 return 0;
3125 }