cell: minor change to Z float/int conversion code (avoid switch)
[mesa.git] / src / gallium / drivers / cell / spu / spu_per_fragment_op.c
1 /*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file spu_per_fragment_op.c
27 * SPU implementation various per-fragment operations.
28 *
29 * \author Ian Romanick <idr@us.ibm.com>
30 */
31
32 #include "pipe/p_format.h"
33 #include "spu_main.h"
34 #include "spu_per_fragment_op.h"
35
36 #define ZERO 0x80
37
38
39 /**
40 * Get a "quad" of four fragment Z/stencil values from the given tile.
41 * \param tile the tile of Z/stencil values
42 * \param x, y location of the quad in the tile, in pixels
43 * \param depth_format format of the tile's data
44 * \param detph returns four depth values
45 * \param stencil returns four stencil values
46 */
47 static void
48 read_ds_quad(tile_t *tile, unsigned x, unsigned y,
49 enum pipe_format depth_format, qword *depth,
50 qword *stencil)
51 {
52 const int ix = x / 2;
53 const int iy = y / 2;
54
55 switch (depth_format) {
56 case PIPE_FORMAT_Z16_UNORM: {
57 qword *ptr = (qword *) &tile->us8[iy][ix / 2];
58
59 const qword shuf_vec = (qword) {
60 ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
61 ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
62 };
63
64 /* At even X values we want the first 4 shorts, and at odd X values we
65 * want the second 4 shorts.
66 */
67 qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
68 qword bias_mask = si_fsmbi(0x3333);
69 qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
70
71 *depth = si_shufb(*ptr, *ptr, sv);
72 *stencil = si_il(0);
73 break;
74 }
75
76 case PIPE_FORMAT_Z32_UNORM: {
77 qword *ptr = (qword *) &tile->ui4[iy][ix];
78
79 *depth = *ptr;
80 *stencil = si_il(0);
81 break;
82 }
83
84 case PIPE_FORMAT_Z24S8_UNORM: {
85 qword *ptr = (qword *) &tile->ui4[iy][ix];
86 qword mask = si_fsmbi(0xEEEE);
87
88 *depth = si_rotmai(si_and(*ptr, mask), -8);
89 *stencil = si_andc(*ptr, mask);
90 break;
91 }
92
93 case PIPE_FORMAT_S8Z24_UNORM: {
94 qword *ptr = (qword *) &tile->ui4[iy][ix];
95
96 *depth = si_and(*ptr, si_fsmbi(0x7777));
97 *stencil = si_andi(si_roti(*ptr, 8), 0x0ff);
98 break;
99 }
100
101 default:
102 ASSERT(0);
103 break;
104 }
105 }
106
107
108 /**
109 * Put a quad of Z/stencil values into a tile.
110 * \param tile the tile of Z/stencil values to write into
111 * \param x, y location of the quad in the tile, in pixels
112 * \param depth_format format of the tile's data
113 * \param detph depth values to store
114 * \param stencil stencil values to store
115 */
116 static void
117 write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
118 enum pipe_format depth_format,
119 qword depth, qword stencil)
120 {
121 const int ix = x / 2;
122 const int iy = y / 2;
123
124 (void) stencil;
125
126 switch (depth_format) {
127 case PIPE_FORMAT_Z16_UNORM: {
128 qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
129
130 qword sv = ((ix & 0x01) == 0)
131 ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
132 24, 25, 26, 27, 28, 29, 30, 31 }
133 : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
134 2, 3, 6, 7, 10, 11, 14, 15 };
135 *ptr = si_shufb(depth, *ptr, sv);
136 break;
137 }
138
139 case PIPE_FORMAT_Z32_UNORM: {
140 qword *ptr = (qword *) &buffer->ui4[iy][ix];
141 *ptr = depth;
142 break;
143 }
144
145 case PIPE_FORMAT_Z24S8_UNORM: {
146 qword *ptr = (qword *) &buffer->ui4[iy][ix];
147 /* form select mask = 1110,1110,1110,1110 */
148 qword mask = si_fsmbi(0xEEEE);
149 /* depth[i] = depth[i] << 8 */
150 depth = si_shli(depth, 8);
151 /* *ptr[i] = depth[i][31:8] | stencil[i][7:0] */
152 *ptr = si_selb(stencil, depth, mask);
153 break;
154 }
155
156 case PIPE_FORMAT_S8Z24_UNORM: {
157 qword *ptr = (qword *) &buffer->ui4[iy][ix];
158 /* form select mask = 0111,0111,0111,0111 */
159 qword mask = si_fsmbi(0x7777);
160 /* stencil[i] = stencil[i] << 24 */
161 stencil = si_shli(stencil, 24);
162 /* *ptr[i] = stencil[i][31:24] | depth[i][23:0] */
163 *ptr = si_selb(stencil, depth, mask);
164 break;
165 }
166
167 default:
168 ASSERT(0);
169 break;
170 }
171 }
172
173
174 /**
175 * Do depth/stencil/alpha test for a "quad" of 4 fragments.
176 * \param x,y location of quad within tile
177 * \param frag_mask indicates which fragments are "alive"
178 * \param frag_depth four fragment depth values
179 * \param frag_alpha four fragment alpha values
180 * \param facing front/back facing for four fragments (1=front, 0=back)
181 */
182 qword
183 spu_do_depth_stencil(int x, int y,
184 qword frag_mask, qword frag_depth, qword frag_alpha,
185 qword facing)
186 {
187 struct spu_frag_test_results result;
188 qword pixel_depth;
189 qword pixel_stencil;
190
191 /* All of this preable code (everthing before the call to frag_test) should
192 * be generated on the PPU and upload to the SPU.
193 */
194 if (spu.read_depth || spu.read_stencil) {
195 read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
196 &pixel_depth, &pixel_stencil);
197 }
198
199 /* convert floating point Z values to 32-bit uint */
200
201 /* frag_depth *= spu.fb.zscale */
202 frag_depth = si_fm(frag_depth, (qword)spu_splats(spu.fb.zscale));
203 /* frag_depth = uint(frag_depth) */
204 frag_depth = si_cfltu(frag_depth, 0);
205
206 result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
207 frag_depth, frag_alpha, facing);
208
209
210 /* This code (everthing after the call to frag_test) should
211 * be generated on the PPU and upload to the SPU.
212 */
213 if (spu.read_depth || spu.read_stencil) {
214 write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
215 result.depth, result.stencil);
216 }
217
218 return result.mask;
219 }