Merge remote branch 'origin/7.8'
[mesa.git] / src / gallium / drivers / cell / spu / spu_command.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * SPU command processing code
31 */
32
33
34 #include <stdio.h>
35 #include <libmisc.h>
36
37 #include "pipe/p_defines.h"
38
39 #include "spu_command.h"
40 #include "spu_main.h"
41 #include "spu_render.h"
42 #include "spu_per_fragment_op.h"
43 #include "spu_texture.h"
44 #include "spu_tile.h"
45 #include "spu_vertex_shader.h"
46 #include "spu_dcache.h"
47 #include "cell/common.h"
48
49
50 struct spu_vs_context draw;
51
52
53 /**
54 * Buffers containing dynamically generated SPU code:
55 */
56 PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS];
57
58
59
60 static INLINE int
61 align(int value, int alignment)
62 {
63 return (value + alignment - 1) & ~(alignment - 1);
64 }
65
66
67
68 /**
69 * Tell the PPU that this SPU has finished copying a buffer to
70 * local store and that it may be reused by the PPU.
71 * This is done by writting a 16-byte batch-buffer-status block back into
72 * main memory (in cell_context->buffer_status[]).
73 */
74 static void
75 release_buffer(uint buffer)
76 {
77 /* Evidently, using less than a 16-byte status doesn't work reliably */
78 static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
79 CELL_BUFFER_STATUS_FREE,
80 CELL_BUFFER_STATUS_FREE,
81 CELL_BUFFER_STATUS_FREE};
82 const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
83 uint *dst = spu.init.buffer_status + index;
84
85 ASSERT(buffer < CELL_NUM_BUFFERS);
86
87 mfc_put((void *) &status, /* src in local memory */
88 (unsigned int) dst, /* dst in main memory */
89 sizeof(status), /* size */
90 TAG_MISC, /* tag is unimportant */
91 0, /* tid */
92 0 /* rid */);
93 }
94
95
96 /**
97 * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
98 * There's a qword of status per SPU.
99 */
100 static void
101 cmd_fence(struct cell_command_fence *fence_cmd)
102 {
103 static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
104 CELL_FENCE_SIGNALLED,
105 CELL_FENCE_SIGNALLED,
106 CELL_FENCE_SIGNALLED};
107 uint *dst = (uint *) fence_cmd->fence;
108 dst += 4 * spu.init.id; /* main store/memory address, not local store */
109 ASSERT_ALIGN16(dst);
110 mfc_put((void *) &status, /* src in local memory */
111 (unsigned int) dst, /* dst in main memory */
112 sizeof(status), /* size */
113 TAG_FENCE, /* tag */
114 0, /* tid */
115 0 /* rid */);
116 }
117
118
119 static void
120 cmd_clear_surface(const struct cell_command_clear_surface *clear)
121 {
122 D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
123
124 if (clear->surface == 0) {
125 spu.fb.color_clear_value = clear->value;
126 if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
127 uint x = (spu.init.id << 4) | (spu.init.id << 12) |
128 (spu.init.id << 20) | (spu.init.id << 28);
129 spu.fb.color_clear_value ^= x;
130 }
131 }
132 else {
133 spu.fb.depth_clear_value = clear->value;
134 }
135
136 #define CLEAR_OPT 1
137 #if CLEAR_OPT
138
139 /* Simply set all tiles' status to CLEAR.
140 * When we actually begin rendering into a tile, we'll initialize it to
141 * the clear value. If any tiles go untouched during the frame,
142 * really_clear_tiles() will set them to the clear value.
143 */
144 if (clear->surface == 0) {
145 memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
146 }
147 else {
148 memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
149 }
150
151 #else
152
153 /*
154 * This path clears the whole framebuffer to the clear color right now.
155 */
156
157 /*
158 printf("SPU: %s num=%d w=%d h=%d\n",
159 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
160 */
161
162 /* init a single tile to the clear value */
163 if (clear->surface == 0) {
164 clear_c_tile(&spu.ctile);
165 }
166 else {
167 clear_z_tile(&spu.ztile);
168 }
169
170 /* walk over my tiles, writing the 'clear' tile's data */
171 {
172 const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
173 uint i;
174 for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
175 uint tx = i % spu.fb.width_tiles;
176 uint ty = i / spu.fb.width_tiles;
177 if (clear->surface == 0)
178 put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
179 else
180 put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
181 }
182 }
183
184 if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
185 wait_on_mask(1 << TAG_SURFACE_CLEAR);
186 }
187
188 #endif /* CLEAR_OPT */
189
190 D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
191 }
192
193
194 static void
195 cmd_release_verts(const struct cell_command_release_verts *release)
196 {
197 D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
198 ASSERT(release->vertex_buf != ~0U);
199 release_buffer(release->vertex_buf);
200 }
201
202
203 /**
204 * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
205 * This involves installing new fragment ops SPU code.
206 * If this function is never called, we'll use a regular C fallback function
207 * for fragment processing.
208 */
209 static void
210 cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
211 {
212 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
213
214 /* Copy state info (for fallback case only - this will eventually
215 * go away when the fallback case goes away)
216 */
217 memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
218 memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
219 memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
220
221 /* Make sure the SPU knows which buffers it's expected to read when
222 * it's told to pull tiles.
223 */
224 spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
225
226 /* If we're forcing the fallback code to be used (for debug purposes),
227 * install that. Otherwise install the incoming SPU code.
228 */
229 if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
230 static unsigned int warned = 0;
231 if (!warned) {
232 fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
233 warned = 1;
234 }
235 /* The following two lines aren't really necessary if you
236 * know the debug flags won't change during a run, and if you
237 * know that the function pointers are initialized correctly.
238 * We set them here to allow a person to change the debug
239 * flags during a run (from inside a debugger).
240 */
241 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
242 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
243 return;
244 }
245
246 /* Make sure the SPU code buffer is large enough to hold the incoming code.
247 * Note that we *don't* use align_malloc() and align_free(), because
248 * those utility functions are *not* available in SPU code.
249 * */
250 if (spu.fragment_ops_code_size < fops->total_code_size) {
251 if (spu.fragment_ops_code != NULL) {
252 free(spu.fragment_ops_code);
253 }
254 spu.fragment_ops_code_size = fops->total_code_size;
255 spu.fragment_ops_code = malloc(fops->total_code_size);
256 if (spu.fragment_ops_code == NULL) {
257 /* Whoops. */
258 fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
259 spu.fragment_ops_code = NULL;
260 spu.fragment_ops_code_size = 0;
261 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
262 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
263 return;
264 }
265 }
266
267 /* Copy the SPU code from the command buffer to the spu buffer */
268 memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
269
270 /* Set the pointers for the front-facing and back-facing fragments
271 * to the specified offsets within the code. Note that if the
272 * front-facing and back-facing code are the same, they'll have
273 * the same offset.
274 */
275 spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
276 spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
277 }
278
279 static void
280 cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
281 {
282 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
283 /* Copy SPU code from batch buffer to spu buffer */
284 memcpy(spu.fragment_program_code, fp->code,
285 SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
286 #if 01
287 /* Point function pointer at new code */
288 spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
289 #endif
290 }
291
292
293 static uint
294 cmd_state_fs_constants(const qword *buffer, uint pos)
295 {
296 const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0);
297 const float *constants = (const float *) &buffer[pos+2];
298 uint i;
299
300 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
301
302 /* Expand each float to float[4] for SOA execution */
303 for (i = 0; i < num_const; i++) {
304 D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
305 spu.constants[i] = spu_splats(constants[i]);
306 }
307
308 /* return new buffer pos (in 16-byte words) */
309 return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16);
310 }
311
312
313 static void
314 cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
315 {
316 D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
317 cmd->width,
318 cmd->height,
319 cmd->color_start,
320 cmd->color_format,
321 cmd->depth_format);
322
323 ASSERT_ALIGN16(cmd->color_start);
324 ASSERT_ALIGN16(cmd->depth_start);
325
326 spu.fb.color_start = cmd->color_start;
327 spu.fb.depth_start = cmd->depth_start;
328 spu.fb.color_format = cmd->color_format;
329 spu.fb.depth_format = cmd->depth_format;
330 spu.fb.width = cmd->width;
331 spu.fb.height = cmd->height;
332 spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
333 spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
334
335 switch (spu.fb.depth_format) {
336 case PIPE_FORMAT_Z32_UNORM:
337 spu.fb.zsize = 4;
338 spu.fb.zscale = (float) 0xffffffffu;
339 break;
340 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
341 case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
342 case PIPE_FORMAT_X8Z24_UNORM:
343 case PIPE_FORMAT_Z24X8_UNORM:
344 spu.fb.zsize = 4;
345 spu.fb.zscale = (float) 0x00ffffffu;
346 break;
347 case PIPE_FORMAT_Z16_UNORM:
348 spu.fb.zsize = 2;
349 spu.fb.zscale = (float) 0xffffu;
350 break;
351 default:
352 spu.fb.zsize = 0;
353 break;
354 }
355 }
356
357
358 /**
359 * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
360 * sampler wrap modes.
361 */
362 static void
363 update_tex_masks(struct spu_texture *texture,
364 const struct pipe_sampler_state *sampler)
365 {
366 uint i;
367
368 for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
369 int width = texture->level[i].width;
370 int height = texture->level[i].height;
371
372 if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
373 texture->level[i].mask_s = spu_splats(width - 1);
374 else
375 texture->level[i].mask_s = spu_splats(~0);
376
377 if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
378 texture->level[i].mask_t = spu_splats(height - 1);
379 else
380 texture->level[i].mask_t = spu_splats(~0);
381
382 if (sampler->normalized_coords) {
383 texture->level[i].scale_s = spu_splats((float) width);
384 texture->level[i].scale_t = spu_splats((float) height);
385 }
386 else {
387 texture->level[i].scale_s = spu_splats(1.0f);
388 texture->level[i].scale_t = spu_splats(1.0f);
389 }
390 }
391 }
392
393
394 static void
395 cmd_state_sampler(const struct cell_command_sampler *sampler)
396 {
397 uint unit = sampler->unit;
398
399 D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
400
401 spu.sampler[unit] = sampler->state;
402
403 switch (spu.sampler[unit].min_img_filter) {
404 case PIPE_TEX_FILTER_LINEAR:
405 spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
406 break;
407 case PIPE_TEX_FILTER_NEAREST:
408 spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
409 break;
410 default:
411 ASSERT(0);
412 }
413
414 switch (spu.sampler[sampler->unit].mag_img_filter) {
415 case PIPE_TEX_FILTER_LINEAR:
416 spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
417 break;
418 case PIPE_TEX_FILTER_NEAREST:
419 spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
420 break;
421 default:
422 ASSERT(0);
423 }
424
425 switch (spu.sampler[sampler->unit].min_mip_filter) {
426 case PIPE_TEX_MIPFILTER_NEAREST:
427 case PIPE_TEX_MIPFILTER_LINEAR:
428 spu.sample_texture_2d[unit] = sample_texture_2d_lod;
429 break;
430 case PIPE_TEX_MIPFILTER_NONE:
431 spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
432 break;
433 default:
434 ASSERT(0);
435 }
436
437 update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
438 }
439
440
441 static void
442 cmd_state_texture(const struct cell_command_texture *texture)
443 {
444 const uint unit = texture->unit;
445 uint i;
446
447 D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
448
449 spu.texture[unit].max_level = 0;
450 spu.texture[unit].target = texture->target;
451
452 for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
453 uint width = texture->width[i];
454 uint height = texture->height[i];
455 uint depth = texture->depth[i];
456
457 D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
458 texture->start[i], texture->width[i], texture->height[i]);
459
460 spu.texture[unit].level[i].start = texture->start[i];
461 spu.texture[unit].level[i].width = width;
462 spu.texture[unit].level[i].height = height;
463 spu.texture[unit].level[i].depth = depth;
464
465 spu.texture[unit].level[i].tiles_per_row =
466 (width + TILE_SIZE - 1) / TILE_SIZE;
467
468 spu.texture[unit].level[i].bytes_per_image =
469 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
470
471 spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
472 spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
473
474 if (texture->start[i])
475 spu.texture[unit].max_level = i;
476 }
477
478 update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
479 }
480
481
482 static void
483 cmd_state_vertex_info(const struct vertex_info *vinfo)
484 {
485 D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
486 ASSERT(vinfo->num_attribs >= 1);
487 ASSERT(vinfo->num_attribs <= 8);
488 memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
489 }
490
491
492 static void
493 cmd_state_vs_array_info(const struct cell_array_info *vs_info)
494 {
495 const unsigned attr = vs_info->attr;
496
497 ASSERT(attr < PIPE_MAX_ATTRIBS);
498 draw.vertex_fetch.src_ptr[attr] = vs_info->base;
499 draw.vertex_fetch.pitch[attr] = vs_info->pitch;
500 draw.vertex_fetch.size[attr] = vs_info->size;
501 draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
502 draw.vertex_fetch.dirty = 1;
503 }
504
505
506 static void
507 cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
508 {
509 mfc_get(attribute_fetch_code_buffer,
510 (unsigned int) code->base, /* src */
511 code->size,
512 TAG_BATCH_BUFFER,
513 0, /* tid */
514 0 /* rid */);
515 wait_on_mask(1 << TAG_BATCH_BUFFER);
516
517 draw.vertex_fetch.code = attribute_fetch_code_buffer;
518 }
519
520
521 static void
522 cmd_finish(void)
523 {
524 D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
525 really_clear_tiles(0);
526 /* wait for all outstanding DMAs to finish */
527 mfc_write_tag_mask(~0);
528 mfc_read_tag_status_all();
529 /* send mbox message to PPU */
530 spu_write_out_mbox(CELL_CMD_FINISH);
531 }
532
533
534 /**
535 * Execute a batch of commands which was sent to us by the PPU.
536 * See the cell_emit_state.c code to see where the commands come from.
537 *
538 * The opcode param encodes the location of the buffer and its size.
539 */
540 static void
541 cmd_batch(uint opcode)
542 {
543 const uint buf = (opcode >> 8) & 0xff;
544 uint size = (opcode >> 16);
545 PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16];
546 const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
547 uint pos;
548
549 D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
550 buf, size, spu.init.buffers[buf]);
551
552 ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
553
554 ASSERT_ALIGN16(spu.init.buffers[buf]);
555
556 size = ROUNDUP16(size);
557
558 ASSERT_ALIGN16(spu.init.buffers[buf]);
559
560 mfc_get(buffer, /* dest */
561 (unsigned int) spu.init.buffers[buf], /* src */
562 size,
563 TAG_BATCH_BUFFER,
564 0, /* tid */
565 0 /* rid */);
566 wait_on_mask(1 << TAG_BATCH_BUFFER);
567
568 /* Tell PPU we're done copying the buffer to local store */
569 D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
570 release_buffer(buf);
571
572 /*
573 * Loop over commands in the batch buffer
574 */
575 for (pos = 0; pos < usize; /* no incr */) {
576 switch (si_to_uint(buffer[pos])) {
577 /*
578 * rendering commands
579 */
580 case CELL_CMD_CLEAR_SURFACE:
581 {
582 struct cell_command_clear_surface *clr
583 = (struct cell_command_clear_surface *) &buffer[pos];
584 cmd_clear_surface(clr);
585 pos += sizeof(*clr) / 16;
586 }
587 break;
588 case CELL_CMD_RENDER:
589 {
590 struct cell_command_render *render
591 = (struct cell_command_render *) &buffer[pos];
592 uint pos_incr;
593 cmd_render(render, &pos_incr);
594 pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return
595 }
596 break;
597 /*
598 * state-update commands
599 */
600 case CELL_CMD_STATE_FRAMEBUFFER:
601 {
602 struct cell_command_framebuffer *fb
603 = (struct cell_command_framebuffer *) &buffer[pos];
604 cmd_state_framebuffer(fb);
605 pos += sizeof(*fb) / 16;
606 }
607 break;
608 case CELL_CMD_STATE_FRAGMENT_OPS:
609 {
610 struct cell_command_fragment_ops *fops
611 = (struct cell_command_fragment_ops *) &buffer[pos];
612 cmd_state_fragment_ops(fops);
613 /* This is a variant-sized command */
614 pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16;
615 }
616 break;
617 case CELL_CMD_STATE_FRAGMENT_PROGRAM:
618 {
619 struct cell_command_fragment_program *fp
620 = (struct cell_command_fragment_program *) &buffer[pos];
621 cmd_state_fragment_program(fp);
622 pos += sizeof(*fp) / 16;
623 }
624 break;
625 case CELL_CMD_STATE_FS_CONSTANTS:
626 pos = cmd_state_fs_constants(buffer, pos);
627 break;
628 case CELL_CMD_STATE_RASTERIZER:
629 {
630 struct cell_command_rasterizer *rast =
631 (struct cell_command_rasterizer *) &buffer[pos];
632 spu.rasterizer = rast->rasterizer;
633 pos += sizeof(*rast) / 16;
634 }
635 break;
636 case CELL_CMD_STATE_SAMPLER:
637 {
638 struct cell_command_sampler *sampler
639 = (struct cell_command_sampler *) &buffer[pos];
640 cmd_state_sampler(sampler);
641 pos += sizeof(*sampler) / 16;
642 }
643 break;
644 case CELL_CMD_STATE_TEXTURE:
645 {
646 struct cell_command_texture *texture
647 = (struct cell_command_texture *) &buffer[pos];
648 cmd_state_texture(texture);
649 pos += sizeof(*texture) / 16;
650 }
651 break;
652 case CELL_CMD_STATE_VERTEX_INFO:
653 cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
654 pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16;
655 break;
656 case CELL_CMD_STATE_VIEWPORT:
657 (void) memcpy(& draw.viewport, &buffer[pos+1],
658 sizeof(struct pipe_viewport_state));
659 pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16;
660 break;
661 case CELL_CMD_STATE_UNIFORMS:
662 draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0);
663 pos += 2;
664 break;
665 case CELL_CMD_STATE_VS_ARRAY_INFO:
666 cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
667 pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16;
668 break;
669 case CELL_CMD_STATE_BIND_VS:
670 #if 0
671 spu_bind_vertex_shader(&draw,
672 (struct cell_shader_info *) &buffer[pos+1]);
673 #endif
674 pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16;
675 break;
676 case CELL_CMD_STATE_ATTRIB_FETCH:
677 cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
678 &buffer[pos+1]);
679 pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16;
680 break;
681 /*
682 * misc commands
683 */
684 case CELL_CMD_FINISH:
685 cmd_finish();
686 pos += 1;
687 break;
688 case CELL_CMD_FENCE:
689 {
690 struct cell_command_fence *fence_cmd =
691 (struct cell_command_fence *) &buffer[pos];
692 cmd_fence(fence_cmd);
693 pos += sizeof(*fence_cmd) / 16;
694 }
695 break;
696 case CELL_CMD_RELEASE_VERTS:
697 {
698 struct cell_command_release_verts *release
699 = (struct cell_command_release_verts *) &buffer[pos];
700 cmd_release_verts(release);
701 pos += sizeof(*release) / 16;
702 }
703 break;
704 case CELL_CMD_FLUSH_BUFFER_RANGE: {
705 struct cell_buffer_range *br = (struct cell_buffer_range *)
706 &buffer[pos+1];
707
708 spu_dcache_mark_dirty((unsigned) br->base, br->size);
709 pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16;
710 break;
711 }
712 default:
713 printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos]));
714 ASSERT(0);
715 break;
716 }
717 }
718
719 D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
720 }
721
722
723 #define PERF 0
724
725
726 /**
727 * Main loop for SPEs: Get a command, execute it, repeat.
728 */
729 void
730 command_loop(void)
731 {
732 int exitFlag = 0;
733 uint t0, t1;
734
735 D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
736
737 while (!exitFlag) {
738 unsigned opcode;
739
740 D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
741
742 if (PERF)
743 spu_write_decrementer(~0);
744
745 /* read/wait from mailbox */
746 opcode = (unsigned int) spu_read_in_mbox();
747 D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
748
749 if (PERF)
750 t0 = spu_read_decrementer();
751
752 switch (opcode & CELL_CMD_OPCODE_MASK) {
753 case CELL_CMD_EXIT:
754 D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
755 exitFlag = 1;
756 break;
757 case CELL_CMD_VS_EXECUTE:
758 #if 0
759 spu_execute_vertex_shader(&draw, &cmd.vs);
760 #endif
761 break;
762 case CELL_CMD_BATCH:
763 cmd_batch(opcode);
764 break;
765 default:
766 printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
767 }
768
769 if (PERF) {
770 t1 = spu_read_decrementer();
771 printf("wait mbox time: %gms batch time: %gms\n",
772 (~0u - t0) * spu.init.inv_timebase,
773 (t0 - t1) * spu.init.inv_timebase);
774 }
775 }
776
777 D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
778
779 if (spu.init.debug_flags & CELL_DEBUG_CACHE)
780 spu_dcache_report();
781 }
782
783 /* Initialize this module; we manage the fragment ops buffer here. */
784 void
785 spu_command_init(void)
786 {
787 /* Install default/fallback fragment processing function.
788 * This will normally be overriden by a code-gen'd function
789 * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
790 */
791 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
792 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
793
794 /* Set up the basic empty buffer for code-gen'ed fragment ops */
795 spu.fragment_ops_code = NULL;
796 spu.fragment_ops_code_size = 0;
797 }
798
799 void
800 spu_command_close(void)
801 {
802 /* Deallocate the code-gen buffer for fragment ops, and reset the
803 * fragment ops functions to their initial setting (just to leave
804 * things in a good state).
805 */
806 if (spu.fragment_ops_code != NULL) {
807 free(spu.fragment_ops_code);
808 }
809 spu_command_init();
810 }