7b63e85ae2e14b1eab9821f685bf1869c08b007b
[mesa.git] / src / mesa / pipe / cell / spu / main.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /* main() for Cell SPU code */
30
31
32 #include <stdio.h>
33 #include <assert.h>
34 #include <libmisc.h>
35 #include <spu_mfcio.h>
36
37 #include "main.h"
38 #include "tri.h"
39 #include "pipe/cell/common.h"
40 #include "pipe/p_defines.h"
41
42 /*
43 helpful headers:
44 /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
45 /opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
46 */
47
48 static boolean Debug = FALSE;
49
50 volatile struct cell_init_info init;
51
52 struct framebuffer fb;
53
54 uint ctile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB;
55 ushort ztile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB;
56
57 int DefaultTag;
58
59
60
61 void
62 wait_on_mask(unsigned tagMask)
63 {
64 mfc_write_tag_mask( tagMask );
65 /* wait for completion of _any_ DMAs specified by tagMask */
66 mfc_read_tag_status_any();
67 }
68
69
70
71 void
72 get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile,
73 int tag, int zBuf)
74 {
75 const uint offset = ty * fb->width_tiles + tx;
76 const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? 2 : 4);
77 const ubyte *src = zBuf ? fb->depth_start : fb->color_start;
78
79 src += offset * bytesPerTile;
80
81 assert(tx < fb->width_tiles);
82 assert(ty < fb->height_tiles);
83 ASSERT_ALIGN16(tile);
84 /*
85 printf("get_tile: dest: %p src: 0x%x size: %d\n",
86 tile, (unsigned int) src, bytesPerTile);
87 */
88 mfc_get(tile, /* dest in local memory */
89 (unsigned int) src, /* src in main memory */
90 bytesPerTile,
91 tag,
92 0, /* tid */
93 0 /* rid */);
94 }
95
96
97 void
98 put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile,
99 int tag, int zBuf)
100 {
101 const uint offset = ty * fb->width_tiles + tx;
102 const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? 2 : 4);
103 ubyte *dst = zBuf ? fb->depth_start : fb->color_start;
104
105 dst += offset * bytesPerTile;
106
107 assert(tx < fb->width_tiles);
108 assert(ty < fb->height_tiles);
109 ASSERT_ALIGN16(tile);
110 /*
111 printf("put_tile: src: %p dst: 0x%x size: %d\n",
112 tile, (unsigned int) dst, bytesPerTile);
113 */
114 mfc_put((void *) tile, /* src in local memory */
115 (unsigned int) dst, /* dst in main memory */
116 bytesPerTile,
117 tag,
118 0, /* tid */
119 0 /* rid */);
120 }
121
122
123
124 static void
125 clear_surface(const struct cell_command_clear_surface *clear)
126 {
127 uint num_tiles = fb.width_tiles * fb.height_tiles;
128 uint i, j;
129
130 if (clear->surface == 0) {
131 for (i = 0; i < TILE_SIZE; i++)
132 for (j = 0; j < TILE_SIZE; j++)
133 ctile[i][j] = clear->value;
134 }
135 else {
136 for (i = 0; i < TILE_SIZE; i++)
137 for (j = 0; j < TILE_SIZE; j++)
138 ztile[i][j] = clear->value;
139 }
140
141 /*
142 printf("SPU: %s num=%d w=%d h=%d\n",
143 __FUNCTION__, num_tiles, fb.width_tiles, fb.height_tiles);
144 */
145
146 for (i = init.id; i < num_tiles; i += init.num_spus) {
147 uint tx = i % fb.width_tiles;
148 uint ty = i / fb.width_tiles;
149 if (clear->surface == 0)
150 put_tile(&fb, tx, ty, (uint *) ctile, TAG_SURFACE_CLEAR, 0);
151 else
152 put_tile(&fb, tx, ty, (uint *) ztile, TAG_SURFACE_CLEAR, 1);
153 /* XXX we don't want this here, but it fixes bad tile results */
154 }
155
156 #if 0
157 wait_on_mask(1 << TAG_SURFACE_CLEAR);
158 #endif
159 }
160
161
162 /**
163 * Given a rendering command's bounding box (in pixels) compute the
164 * location of the corresponding screen tile bounding box.
165 */
166 static INLINE void
167 tile_bounding_box(const struct cell_command_render *render,
168 uint *txmin, uint *tymin,
169 uint *box_num_tiles, uint *box_width_tiles)
170 {
171 #if 1
172 /* Debug: full-window bounding box */
173 uint txmax = fb.width_tiles - 1;
174 uint tymax = fb.height_tiles - 1;
175 *txmin = 0;
176 *tymin = 0;
177 *box_num_tiles = fb.width_tiles * fb.height_tiles;
178 *box_width_tiles = fb.width_tiles;
179 #else
180 uint txmax, tymax, box_height_tiles;
181
182 *txmin = (uint) render->xmin / TILE_SIZE;
183 *tymin = (uint) render->ymin / TILE_SIZE;
184 txmax = (uint) render->xmax / TILE_SIZE;
185 tymax = (uint) render->ymax / TILE_SIZE;
186 *box_width_tiles = txmax - *txmin + 1;
187 box_height_tiles = tymax - *tymin + 1;
188 *box_num_tiles = *box_width_tiles * box_height_tiles;
189 #endif
190 #if 0
191 printf("Render bounds: %g, %g ... %g, %g\n",
192 render->xmin, render->ymin, render->xmax, render->ymax);
193 printf("Render tiles: %u, %u .. %u, %u\n", *txmin, *tymin, txmax, tymax);
194 #endif
195 }
196
197
198
199 static void
200 render(const struct cell_command_render *render)
201 {
202 struct cell_prim_buffer prim_buffer ALIGN16_ATTRIB;
203 uint i, j, vertex_bytes;
204
205 /*
206 printf("SPU %u: RENDER buffer dst=%p src=%p size=%d\n",
207 init.id,
208 &prim_buffer, render->vertex_data, (int)sizeof(prim_buffer));
209 */
210
211 ASSERT_ALIGN16(render->vertex_data);
212 ASSERT_ALIGN16(&prim_buffer);
213
214 /* how much vertex data */
215 vertex_bytes = render->num_verts * render->num_attribs * 4 * sizeof(float);
216
217 /* get vertex data from main memory */
218 mfc_get(&prim_buffer, /* dest */
219 (unsigned int) render->vertex_data, /* src */
220 vertex_bytes, /* size */
221 TAG_VERTEX_BUFFER,
222 0, /* tid */
223 0 /* rid */);
224 wait_on_mask(1 << TAG_VERTEX_BUFFER);
225
226 /* find tiles which intersect the prim bounding box */
227 uint txmin, tymin, box_width_tiles, box_num_tiles;
228 tile_bounding_box(render, &txmin, &tymin,
229 &box_num_tiles, &box_width_tiles);
230
231 /* make sure any pending clears have completed */
232 wait_on_mask(1 << TAG_SURFACE_CLEAR);
233
234 /* loop over tiles */
235 for (i = init.id; i < box_num_tiles; i += init.num_spus) {
236 const uint tx = txmin + i % box_width_tiles;
237 const uint ty = tymin + i / box_width_tiles;
238
239 assert(tx < fb.width_tiles);
240 assert(ty < fb.height_tiles);
241
242 /* Start fetching color/z tiles. We'll wait for completion when
243 * we need read/write to them later in triangle rasterization.
244 */
245 get_tile(&fb, tx, ty, (uint *) ctile, TAG_READ_TILE_COLOR, 0);
246 if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
247 get_tile(&fb, tx, ty, (uint *) ztile, TAG_READ_TILE_Z, 1);
248 }
249
250 assert(render->prim_type == PIPE_PRIM_TRIANGLES);
251
252 /* loop over tris */
253 for (j = 0; j < render->num_verts; j += 3) {
254 struct prim_header prim;
255
256 /*
257 printf(" %u: Triangle %g,%g %g,%g %g,%g\n",
258 init.id,
259 prim_buffer.vertex[j*3+0][0][0],
260 prim_buffer.vertex[j*3+0][0][1],
261 prim_buffer.vertex[j*3+1][0][0],
262 prim_buffer.vertex[j*3+1][0][1],
263 prim_buffer.vertex[j*3+2][0][0],
264 prim_buffer.vertex[j*3+2][0][1]);
265 */
266
267 /* pos */
268 COPY_4V(prim.v[0].data[0], prim_buffer.vertex[j+0][0]);
269 COPY_4V(prim.v[1].data[0], prim_buffer.vertex[j+1][0]);
270 COPY_4V(prim.v[2].data[0], prim_buffer.vertex[j+2][0]);
271
272 /* color */
273 COPY_4V(prim.v[0].data[1], prim_buffer.vertex[j+0][1]);
274 COPY_4V(prim.v[1].data[1], prim_buffer.vertex[j+1][1]);
275 COPY_4V(prim.v[2].data[1], prim_buffer.vertex[j+2][1]);
276
277 tri_draw(&prim, tx, ty);
278 }
279
280 /* in case nothing was drawn, wait now for completion */
281 /* XXX temporary */
282 wait_on_mask(1 << TAG_READ_TILE_COLOR);
283 if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
284 wait_on_mask(1 << TAG_READ_TILE_Z); /* XXX temporary */
285 }
286
287 /* XXX IF we wrote anything into the tile... */
288
289 put_tile(&fb, tx, ty, (uint *) ctile, TAG_WRITE_TILE_COLOR, 0);
290 if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
291 put_tile(&fb, tx, ty, (uint *) ztile, TAG_WRITE_TILE_Z, 1);
292 }
293
294 wait_on_mask(1 << TAG_WRITE_TILE_COLOR); /* XXX temp */
295 if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
296 wait_on_mask(1 << TAG_WRITE_TILE_Z); /* XXX temporary */
297 }
298 }
299 }
300
301
302 /**
303 * Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
304 */
305 static void
306 main_loop(void)
307 {
308 struct cell_command cmd;
309 int exitFlag = 0;
310
311 if (Debug)
312 printf("SPU %u: Enter main loop\n", init.id);
313
314 assert((sizeof(struct cell_command) & 0xf) == 0);
315 ASSERT_ALIGN16(&cmd);
316
317 while (!exitFlag) {
318 unsigned opcode;
319 int tag = 0;
320
321 if (Debug)
322 printf("SPU %u: Wait for cmd...\n", init.id);
323
324 /* read/wait from mailbox */
325 opcode = (unsigned int) spu_read_in_mbox();
326
327 if (Debug)
328 printf("SPU %u: got cmd %u\n", init.id, opcode);
329
330 /* command payload */
331 mfc_get(&cmd, /* dest */
332 (unsigned int) init.cmd, /* src */
333 sizeof(struct cell_command), /* bytes */
334 tag,
335 0, /* tid */
336 0 /* rid */);
337 wait_on_mask( 1 << tag );
338
339 switch (opcode) {
340 case CELL_CMD_EXIT:
341 if (Debug)
342 printf("SPU %u: EXIT\n", init.id);
343 exitFlag = 1;
344 break;
345 case CELL_CMD_FRAMEBUFFER:
346 if (Debug)
347 printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
348 init.id,
349 cmd.fb.width,
350 cmd.fb.height,
351 cmd.fb.color_start,
352 cmd.fb.color_format,
353 cmd.fb.depth_format);
354 fb.color_start = cmd.fb.color_start;
355 fb.depth_start = cmd.fb.depth_start;
356 fb.color_format = cmd.fb.color_format;
357 fb.depth_format = cmd.fb.depth_format;
358 fb.width = cmd.fb.width;
359 fb.height = cmd.fb.height;
360 fb.width_tiles = (fb.width + TILE_SIZE - 1) / TILE_SIZE;
361 fb.height_tiles = (fb.height + TILE_SIZE - 1) / TILE_SIZE;
362 /*
363 printf("SPU %u: %u x %u tiles\n",
364 init.id, fb.width_tiles, fb.height_tiles);
365 */
366 break;
367 case CELL_CMD_CLEAR_SURFACE:
368 if (Debug)
369 printf("SPU %u: CLEAR SURF %u to 0x%08x\n", init.id,
370 cmd.clear.surface, cmd.clear.value);
371 clear_surface(&cmd.clear);
372 break;
373 case CELL_CMD_RENDER:
374 if (Debug)
375 printf("SPU %u: RENDER %u verts, prim %u\n",
376 init.id, cmd.render.num_verts, cmd.render.prim_type);
377 render(&cmd.render);
378 break;
379
380 case CELL_CMD_FINISH:
381 if (Debug)
382 printf("SPU %u: FINISH\n", init.id);
383 /* wait for all outstanding DMAs to finish */
384 mfc_write_tag_mask(~0);
385 mfc_read_tag_status_all();
386 /* send mbox message to PPU */
387 spu_write_out_mbox(CELL_CMD_FINISH);
388 break;
389 default:
390 printf("Bad opcode!\n");
391 }
392
393 }
394
395 if (Debug)
396 printf("SPU %u: Exit main loop\n", init.id);
397 }
398
399
400
401 /**
402 * SPE entrypoint.
403 * Note: example programs declare params as 'unsigned long long' but
404 * that doesn't work.
405 */
406 int
407 main(unsigned long speid, unsigned long argp)
408 {
409 int tag = 0;
410
411 (void) speid;
412
413 DefaultTag = 1;
414
415 if (Debug)
416 printf("SPU: main() speid=%lu\n", speid);
417
418 mfc_get(&init, /* dest */
419 (unsigned int) argp, /* src */
420 sizeof(struct cell_init_info), /* bytes */
421 tag,
422 0, /* tid */
423 0 /* rid */);
424 wait_on_mask( 1 << tag );
425
426
427 main_loop();
428
429 return 0;
430 }