st/nine: Implement normal transformation with vertex blending
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define DBG_CHANNEL DBG_FF
28
29 #define NINE_FF_NUM_VS_CONST 196
30 #define NINE_FF_NUM_PS_CONST 24
31
32 struct fvec4
33 {
34 float x, y, z, w;
35 };
36
37 struct nine_ff_vs_key
38 {
39 union {
40 struct {
41 uint32_t position_t : 1;
42 uint32_t lighting : 1;
43 uint32_t darkness : 1; /* lighting enabled but no active lights */
44 uint32_t localviewer : 1;
45 uint32_t vertexpointsize : 1;
46 uint32_t pointscale : 1;
47 uint32_t vertexblend : 3;
48 uint32_t vertexblend_indexed : 1;
49 uint32_t vertextween : 1;
50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
51 uint32_t mtl_ambient : 2;
52 uint32_t mtl_specular : 2;
53 uint32_t mtl_emissive : 2;
54 uint32_t fog_mode : 2;
55 uint32_t fog_range : 1;
56 uint32_t color0in_one : 1;
57 uint32_t color1in_one : 1;
58 uint32_t fog : 1;
59 uint32_t specular_enable : 1;
60 uint32_t normalizenormals : 1;
61 uint32_t pad1 : 5;
62 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
63 uint32_t pad2 : 16;
64 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
65 uint32_t pad3 : 8;
66 uint32_t tc_gen : 24; /* 8 * 3 bits */
67 uint32_t pad4 : 8;
68 uint32_t tc_idx : 24;
69 uint32_t pad5 : 8;
70 uint32_t passthrough;
71 };
72 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
73 uint32_t value32[6];
74 };
75 };
76
77 /* Texture stage state:
78 *
79 * COLOROP D3DTOP 5 bit
80 * ALPHAOP D3DTOP 5 bit
81 * COLORARG0 D3DTA 3 bit
82 * COLORARG1 D3DTA 3 bit
83 * COLORARG2 D3DTA 3 bit
84 * ALPHAARG0 D3DTA 3 bit
85 * ALPHAARG1 D3DTA 3 bit
86 * ALPHAARG2 D3DTA 3 bit
87 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
88 * TEXCOORDINDEX 0 - 7 3 bit
89 * ===========================
90 * 32 bit per stage
91 */
92 struct nine_ff_ps_key
93 {
94 union {
95 struct {
96 struct {
97 uint32_t colorop : 5;
98 uint32_t alphaop : 5;
99 uint32_t colorarg0 : 3;
100 uint32_t colorarg1 : 3;
101 uint32_t colorarg2 : 3;
102 uint32_t alphaarg0 : 3;
103 uint32_t alphaarg1 : 3;
104 uint32_t alphaarg2 : 3;
105 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
106 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
107 uint32_t pad : 1;
108 /* that's 32 bit exactly */
109 } ts[8];
110 uint32_t projected : 16;
111 uint32_t fog : 1; /* for vFog coming from VS */
112 uint32_t fog_mode : 2;
113 uint32_t specular : 1;
114 uint32_t pad1 : 12; /* 9 32-bit words with this */
115 uint8_t colorarg_b4[3];
116 uint8_t colorarg_b5[3];
117 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
118 uint8_t pad2[3];
119 };
120 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
121 uint32_t value32[12];
122 };
123 };
124
125 static unsigned nine_ff_vs_key_hash(void *key)
126 {
127 struct nine_ff_vs_key *vs = key;
128 unsigned i;
129 uint32_t hash = vs->value32[0];
130 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
131 hash ^= vs->value32[i];
132 return hash;
133 }
134 static int nine_ff_vs_key_comp(void *key1, void *key2)
135 {
136 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
137 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
138
139 return memcmp(a->value64, b->value64, sizeof(a->value64));
140 }
141 static unsigned nine_ff_ps_key_hash(void *key)
142 {
143 struct nine_ff_ps_key *ps = key;
144 unsigned i;
145 uint32_t hash = ps->value32[0];
146 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
147 hash ^= ps->value32[i];
148 return hash;
149 }
150 static int nine_ff_ps_key_comp(void *key1, void *key2)
151 {
152 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
153 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
154
155 return memcmp(a->value64, b->value64, sizeof(a->value64));
156 }
157 static unsigned nine_ff_fvf_key_hash(void *key)
158 {
159 return *(DWORD *)key;
160 }
161 static int nine_ff_fvf_key_comp(void *key1, void *key2)
162 {
163 return *(DWORD *)key1 != *(DWORD *)key2;
164 }
165
166 static void nine_ff_prune_vs(struct NineDevice9 *);
167 static void nine_ff_prune_ps(struct NineDevice9 *);
168
169 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
170 {
171 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
172 unsigned count;
173 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
174 tgsi_dump(toks, 0);
175 ureg_free_tokens(toks);
176 }
177 }
178
179 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
180 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
181 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
182 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
183
184 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
185 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
186 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
187 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
188
189 #define _XYZW(r) (r)
190
191 /* AL should contain base address of lights table. */
192 #define LIGHT_CONST(i) \
193 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
194
195 #define MATERIAL_CONST(i) \
196 ureg_DECL_constant(ureg, 19 + (i))
197
198 #define _CONST(n) ureg_DECL_constant(ureg, n)
199
200 /* VS FF constants layout:
201 *
202 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
203 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
204 * CONST[ 8..11] D3DTS_PROJECTION
205 * CONST[12..15] D3DTS_VIEW
206 * CONST[16..18] Normal matrix
207 *
208 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
209 * CONST[20] MATERIAL.Diffuse
210 * CONST[21] MATERIAL.Ambient
211 * CONST[22] MATERIAL.Specular
212 * CONST[23].x___ MATERIAL.Power
213 * CONST[24] MATERIAL.Emissive
214 * CONST[25] RS.Ambient
215 *
216 * CONST[26].x___ RS.PointSizeMin
217 * CONST[26]._y__ RS.PointSizeMax
218 * CONST[26].__z_ RS.PointSize
219 * CONST[26].___w RS.PointScaleA
220 * CONST[27].x___ RS.PointScaleB
221 * CONST[27]._y__ RS.PointScaleC
222 *
223 * CONST[28].x___ RS.FogEnd
224 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
225 * CONST[28].__z_ RS.FogDensity
226
227 * CONST[30].x___ TWEENFACTOR
228 *
229 * CONST[32].x___ LIGHT[0].Type
230 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
231 * CONST[33] LIGHT[0].Diffuse
232 * CONST[34] LIGHT[0].Specular
233 * CONST[35] LIGHT[0].Ambient
234 * CONST[36].xyz_ LIGHT[0].Position
235 * CONST[36].___w LIGHT[0].Range
236 * CONST[37].xyz_ LIGHT[0].Direction
237 * CONST[37].___w LIGHT[0].Falloff
238 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
239 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
240 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
241 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
242 * CONST[39].___w 1 if this is the last active light, 0 if not
243 * CONST[40] LIGHT[1]
244 * CONST[48] LIGHT[2]
245 * CONST[56] LIGHT[3]
246 * CONST[64] LIGHT[4]
247 * CONST[72] LIGHT[5]
248 * CONST[80] LIGHT[6]
249 * CONST[88] LIGHT[7]
250 * NOTE: no lighting code is generated if there are no active lights
251 *
252 * CONST[100].x___ Viewport 2/width
253 * CONST[100]._y__ Viewport 2/height
254 * CONST[100].__z_ Viewport 1/(zmax - zmin)
255 * CONST[100].___w Viewport width
256 * CONST[101].x___ Viewport x0
257 * CONST[101]._y__ Viewport y0
258 * CONST[101].__z_ Viewport z0
259 *
260 * CONST[128..131] D3DTS_TEXTURE0
261 * CONST[132..135] D3DTS_TEXTURE1
262 * CONST[136..139] D3DTS_TEXTURE2
263 * CONST[140..143] D3DTS_TEXTURE3
264 * CONST[144..147] D3DTS_TEXTURE4
265 * CONST[148..151] D3DTS_TEXTURE5
266 * CONST[152..155] D3DTS_TEXTURE6
267 * CONST[156..159] D3DTS_TEXTURE7
268 *
269 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
270 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
271 * ...
272 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
273 */
274 struct vs_build_ctx
275 {
276 struct ureg_program *ureg;
277 const struct nine_ff_vs_key *key;
278
279 uint16_t input[PIPE_MAX_ATTRIBS];
280 unsigned num_inputs;
281
282 struct ureg_src aVtx;
283 struct ureg_src aNrm;
284 struct ureg_src aCol[2];
285 struct ureg_src aTex[8];
286 struct ureg_src aPsz;
287 struct ureg_src aInd;
288 struct ureg_src aWgt;
289
290 struct ureg_src aVtx1; /* tweening */
291 struct ureg_src aNrm1;
292
293 struct ureg_src mtlA;
294 struct ureg_src mtlD;
295 struct ureg_src mtlS;
296 struct ureg_src mtlE;
297 };
298
299 static inline unsigned
300 get_texcoord_sn(struct pipe_screen *screen)
301 {
302 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
303 return TGSI_SEMANTIC_TEXCOORD;
304 return TGSI_SEMANTIC_GENERIC;
305 }
306
307 static inline struct ureg_src
308 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
309 {
310 const unsigned i = vs->num_inputs++;
311 assert(i < PIPE_MAX_ATTRIBS);
312 vs->input[i] = ndecl;
313 return ureg_DECL_vs_input(vs->ureg, i);
314 }
315
316 /* NOTE: dst may alias src */
317 static inline void
318 ureg_normalize3(struct ureg_program *ureg,
319 struct ureg_dst dst, struct ureg_src src)
320 {
321 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
322 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
323
324 ureg_DP3(ureg, tmp_x, src, src);
325 ureg_RSQ(ureg, tmp_x, _X(tmp));
326 ureg_MUL(ureg, dst, src, _X(tmp));
327 ureg_release_temporary(ureg, tmp);
328 }
329
330 static void *
331 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
332 {
333 const struct nine_ff_vs_key *key = vs->key;
334 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
335 struct ureg_dst oPos, oCol[2], oPsz, oFog;
336 struct ureg_dst AR;
337 unsigned i, c;
338 unsigned label[32], l = 0;
339 boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
340 boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale;
341 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
342
343 vs->ureg = ureg;
344
345 /* Check which inputs we should transform. */
346 for (i = 0; i < 8 * 3; i += 3) {
347 switch ((key->tc_gen >> i) & 0x3) {
348 case NINED3DTSS_TCI_CAMERASPACENORMAL:
349 need_aNrm = TRUE;
350 break;
351 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
352 need_aVtx = TRUE;
353 break;
354 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
355 need_aVtx = need_aNrm = TRUE;
356 break;
357 default:
358 break;
359 }
360 }
361
362 /* Declare and record used inputs (needed for linkage with vertex format):
363 * (texture coordinates handled later)
364 */
365 vs->aVtx = build_vs_add_input(vs,
366 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
367
368 if (need_aNrm)
369 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
370
371 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
372 vs->aCol[1] = ureg_imm1f(ureg, 1.0f);
373
374 if (key->lighting || key->darkness) {
375 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
376 key->mtl_ambient | key->mtl_emissive;
377 if ((mask & 0x1) && !key->color0in_one)
378 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
379 if ((mask & 0x2) && !key->color1in_one)
380 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
381
382 vs->mtlD = MATERIAL_CONST(1);
383 vs->mtlA = MATERIAL_CONST(2);
384 vs->mtlS = MATERIAL_CONST(3);
385 vs->mtlE = MATERIAL_CONST(5);
386 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
387 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
388 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
389 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
390 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
391 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
392 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
393 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
394 } else {
395 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
396 if (!key->color1in_one) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
397 }
398
399 if (key->vertexpointsize)
400 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
401
402 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
403 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
404 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
405 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
406 if (key->vertextween) {
407 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
408 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
409 }
410
411 /* Declare outputs:
412 */
413 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
414 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
415 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
416 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
417 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
418 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
419 }
420
421 if (key->vertexpointsize || key->pointscale) {
422 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
423 TGSI_WRITEMASK_X, 0, 1);
424 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
425 }
426
427 if (key->lighting || key->vertexblend)
428 AR = ureg_DECL_address(ureg);
429
430 /* === Vertex transformation / vertex blending:
431 */
432
433 if (key->position_t) {
434 if (device->driver_caps.window_space_position_support) {
435 ureg_MOV(ureg, oPos, vs->aVtx);
436 } else {
437 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
438 /* vs->aVtx contains the coordinates buffer wise.
439 * later in the pipeline, clipping, viewport and division
440 * by w (rhw = 1/w) are going to be applied, so do the reverse
441 * of these transformations (except clipping) to have the good
442 * position at the end.*/
443 ureg_MOV(ureg, tmp, vs->aVtx);
444 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
445 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
446 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
447 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
448 /* Y needs to be reversed */
449 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
450 /* inverse rhw */
451 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
452 /* multiply X, Y, Z by w */
453 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
454 ureg_MOV(ureg, oPos, ureg_src(tmp));
455 ureg_release_temporary(ureg, tmp);
456 }
457 } else if (key->vertexblend) {
458 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
459 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
460 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
461 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
462 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
463 struct ureg_src cWM[4];
464
465 for (i = 160; i <= 195; ++i)
466 ureg_DECL_constant(ureg, i);
467
468 /* translate world matrix index to constant file index */
469 if (key->vertexblend_indexed) {
470 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
471 ureg_ARL(ureg, AR, ureg_src(tmp));
472 }
473
474 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
475 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
476 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
477
478 for (i = 0; i < key->vertexblend; ++i) {
479 for (c = 0; c < 4; ++c) {
480 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c);
481 if (key->vertexblend_indexed)
482 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
483 }
484
485 /* multiply by WORLD(index) */
486 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
487 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
488 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
489 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
490
491 if (need_aNrm) {
492 /* Note: the spec says the transpose of the inverse of the
493 * WorldView matrices should be used, but all tests show
494 * otherwise.
495 * Only case unknown: D3DVBF_0WEIGHTS */
496 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
497 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
498 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
499 }
500
501 if (i < (key->vertexblend - 1)) {
502 /* accumulate weighted position value */
503 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
504 if (need_aNrm)
505 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
506 /* subtract weighted position value for last value */
507 ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i));
508 }
509 }
510
511 /* the last weighted position is always 1 - sum_of_previous_weights */
512 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
513 if (need_aNrm)
514 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
515
516 /* multiply by VIEW_PROJ */
517 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
518 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
519 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
520 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
521
522 if (need_aVtx)
523 vs->aVtx = ureg_src(aVtx_dst);
524
525 ureg_release_temporary(ureg, tmp);
526 ureg_release_temporary(ureg, tmp2);
527 ureg_release_temporary(ureg, sum_blendweights);
528 if (!need_aVtx)
529 ureg_release_temporary(ureg, aVtx_dst);
530
531 if (need_aNrm) {
532 if (key->normalizenormals)
533 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
534 vs->aNrm = ureg_src(aNrm_dst);
535 } else
536 ureg_release_temporary(ureg, aNrm_dst);
537 } else {
538 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
539
540 if (key->vertextween) {
541 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
542 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx, vs->aVtx1);
543 vs->aVtx = ureg_src(aVtx_dst);
544 if (need_aNrm) {
545 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
546 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm, vs->aNrm1);
547 vs->aNrm = ureg_src(aNrm_dst);
548 }
549 }
550
551 /* position = vertex * WORLD_VIEW_PROJ */
552 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
553 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
554 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
555 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
556 ureg_release_temporary(ureg, tmp);
557
558 if (need_aVtx) {
559 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
560 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
561 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
562 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
563 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
564 vs->aVtx = ureg_src(aVtx_dst);
565 }
566 if (need_aNrm) {
567 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
568 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
569 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
570 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
571 if (key->normalizenormals)
572 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
573 vs->aNrm = ureg_src(aNrm_dst);
574 }
575 }
576
577 /* === Process point size:
578 */
579 if (key->vertexpointsize || key->pointscale) {
580 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
581 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
582 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
583 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
584 if (key->vertexpointsize) {
585 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
586 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
587 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
588 } else {
589 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
590 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
591 }
592
593 if (key->pointscale) {
594 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
595 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
596
597 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
598 ureg_RSQ(ureg, tmp_y, _X(tmp));
599 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
600 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
601 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
602 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
603 ureg_RSQ(ureg, tmp_x, _X(tmp));
604 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
605 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
606 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
607 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
608 }
609
610 ureg_MOV(ureg, oPsz, _Z(tmp));
611 ureg_release_temporary(ureg, tmp);
612 }
613
614 for (i = 0; i < 8; ++i) {
615 struct ureg_dst tmp, tmp_x;
616 struct ureg_dst oTex, input_coord, transformed, t;
617 unsigned c, writemask;
618 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
619 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
620 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
621 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
622
623 /* No texture output of index s */
624 if (tci == NINED3DTSS_TCI_DISABLE)
625 continue;
626 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
627 tmp = ureg_DECL_temporary(ureg);
628 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
629 input_coord = ureg_DECL_temporary(ureg);
630 transformed = ureg_DECL_temporary(ureg);
631
632 /* Get the coordinate */
633 switch (tci) {
634 case NINED3DTSS_TCI_PASSTHRU:
635 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
636 * Else the idx is used only to determine wrapping mode. */
637 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
638 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
639 break;
640 case NINED3DTSS_TCI_CAMERASPACENORMAL:
641 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
642 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
643 dim_input = 4;
644 break;
645 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
646 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
647 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
648 dim_input = 4;
649 break;
650 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
651 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
652 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aNrm);
653 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
654 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
655 ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx, ureg_src(tmp));
656 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
657 dim_input = 4;
658 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
659 break;
660 case NINED3DTSS_TCI_SPHEREMAP:
661 assert(!"TODO");
662 break;
663 default:
664 assert(0);
665 break;
666 }
667
668 /* Apply the transformation */
669 /* dim_output == 0 => do not transform the components.
670 * XYZRHW also disables transformation */
671 if (!dim_output || key->position_t) {
672 ureg_release_temporary(ureg, transformed);
673 transformed = input_coord;
674 writemask = TGSI_WRITEMASK_XYZW;
675 } else {
676 for (c = 0; c < dim_output; c++) {
677 t = ureg_writemask(transformed, 1 << c);
678 switch (dim_input) {
679 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
680 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
681 break;
682 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
683 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
684 break;
685 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
686 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
687 break;
688 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
689 default:
690 assert(0);
691 }
692 }
693 writemask = (1 << dim_output) - 1;
694 ureg_release_temporary(ureg, input_coord);
695 }
696
697 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
698 ureg_release_temporary(ureg, transformed);
699 ureg_release_temporary(ureg, tmp);
700 }
701
702 /* === Lighting:
703 *
704 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
705 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
706 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
707 *
708 * vec3 normal = normalize(in.Normal * NormalMatrix);
709 * vec3 hitDir = light.direction;
710 * float atten = 1.0;
711 *
712 * if (light.type != DIRECTIONAL)
713 * {
714 * vec3 hitVec = light.position - eyeVertex;
715 * float d = length(hitVec);
716 * hitDir = hitVec / d;
717 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
718 * }
719 *
720 * if (light.type == SPOTLIGHT)
721 * {
722 * float rho = dp3(-hitVec, light.direction);
723 * if (rho < cos(light.phi / 2))
724 * atten = 0;
725 * if (rho < cos(light.theta / 2))
726 * atten *= pow(some_func(rho), light.falloff);
727 * }
728 *
729 * float nDotHit = dp3_sat(normal, hitVec);
730 * float powFact = 0.0;
731 *
732 * if (nDotHit > 0.0)
733 * {
734 * vec3 midVec = normalize(hitDir + eye);
735 * float nDotMid = dp3_sat(normal, midVec);
736 * pFact = pow(nDotMid, material.power);
737 * }
738 *
739 * ambient += light.ambient * atten;
740 * diffuse += light.diffuse * atten * nDotHit;
741 * specular += light.specular * atten * powFact;
742 */
743 if (key->lighting) {
744 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
745 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
746 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
747 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
748 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
749 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
750 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
751
752 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
753
754 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
755
756 /* Light.*.Alpha is not used. */
757 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
758 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
759 struct ureg_dst rS = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
760
761 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
762
763 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
764 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
765 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
766 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
767 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
768 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
769 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
770 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
771 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
772 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
773 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
774 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
775 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
776 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
777 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
778
779 const unsigned loop_label = l++;
780
781 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
782 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
783 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
784 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
785 rD = ureg_saturate(rD);
786 rA = ureg_saturate(rA);
787 rS = ureg_saturate(rS);
788
789
790 /* loop management */
791 ureg_BGNLOOP(ureg, &label[loop_label]);
792 ureg_ARL(ureg, AL, _W(rCtr));
793
794 /* if (not DIRECTIONAL light): */
795 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
796 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
797 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
798 ureg_IF(ureg, _X(tmp), &label[l++]);
799 {
800 /* hitDir = light.position - eyeVtx
801 * d = length(hitDir)
802 */
803 ureg_SUB(ureg, rHit, cLPos, vs->aVtx);
804 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
805 ureg_RSQ(ureg, tmp_y, _X(tmp));
806 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
807
808 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
809 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
810 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
811 ureg_RCP(ureg, rAtt, _W(rAtt));
812 /* cut-off if distance exceeds Light.Range */
813 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
814 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
815 }
816 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
817 ureg_ENDIF(ureg);
818
819 /* normalize hitDir */
820 ureg_normalize3(ureg, rHit, ureg_src(rHit));
821
822 /* if (SPOT light) */
823 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
824 ureg_IF(ureg, _X(tmp), &label[l++]);
825 {
826 /* rho = dp3(-hitDir, light.spotDir)
827 *
828 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
829 * spotAtt = 1
830 * else
831 * if (rho <= light.cphi2)
832 * spotAtt = 0
833 * else
834 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
835 */
836 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
837 ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
838 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
839 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
840 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
841 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
842 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
843 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
844 }
845 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
846 ureg_ENDIF(ureg);
847
848 /* directional factors, let's not use LIT because of clarity */
849 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
850 ureg_MOV(ureg, tmp_y, ureg_imm1f(ureg, 0.0f));
851 ureg_IF(ureg, _X(tmp), &label[l++]);
852 {
853 /* midVec = normalize(hitDir + eyeDir) */
854 if (key->localviewer) {
855 ureg_normalize3(ureg, rMid, vs->aVtx);
856 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
857 } else {
858 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
859 }
860 ureg_normalize3(ureg, rMid, ureg_src(rMid));
861 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
862 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
863
864 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
865 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
866 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
867 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
868 }
869 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
870 ureg_ENDIF(ureg);
871
872 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
873
874 /* break if this was the last light */
875 ureg_IF(ureg, cLLast, &label[l++]);
876 ureg_BRK(ureg);
877 ureg_ENDIF(ureg);
878 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
879
880 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
881 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
882 ureg_ENDLOOP(ureg, &label[loop_label]);
883
884 /* Set alpha factors of illumination to 1.0 for the multiplications. */
885 rD.WriteMask = TGSI_WRITEMASK_W; rD.Saturate = 0;
886 rS.WriteMask = TGSI_WRITEMASK_W; rS.Saturate = 0;
887 rA.WriteMask = TGSI_WRITEMASK_W; rA.Saturate = 0;
888 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 1.0f));
889 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 1.0f));
890
891 /* Apply to material:
892 *
893 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
894 * material.ambient * ambient +
895 * material.diffuse * diffuse +
896 * oCol[1] = material.specular * specular;
897 */
898 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
899 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 1.0f));
900 ureg_MAD(ureg, tmp, ureg_src(rA), vs->mtlA, _CONST(19));
901 } else {
902 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
903 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
904 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
905 }
906
907 if (key->specular_enable) {
908 /* add oCol[1] to oCol[0] */
909 ureg_MAD(ureg, tmp, ureg_src(rD), vs->mtlD, ureg_src(tmp));
910 ureg_MAD(ureg, oCol[0], ureg_src(rS), vs->mtlS, ureg_src(tmp));
911 } else {
912 ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
913 }
914 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
915 ureg_release_temporary(ureg, rAtt);
916 ureg_release_temporary(ureg, rHit);
917 ureg_release_temporary(ureg, rMid);
918 ureg_release_temporary(ureg, rCtr);
919 ureg_release_temporary(ureg, rD);
920 ureg_release_temporary(ureg, rA);
921 ureg_release_temporary(ureg, rS);
922 ureg_release_temporary(ureg, rAtt);
923 ureg_release_temporary(ureg, tmp);
924 } else
925 /* COLOR */
926 if (key->darkness) {
927 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
928 ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
929 } else {
930 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
931 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
932 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
933 ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
934 ureg_release_temporary(ureg, tmp);
935 }
936 ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
937 } else {
938 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
939 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
940 }
941
942 /* === Process fog.
943 *
944 * exp(x) = ex2(log2(e) * x)
945 */
946 if (key->fog_mode) {
947 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
948 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
949 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
950 if (key->fog_range) {
951 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
952 ureg_RSQ(ureg, tmp_z, _X(tmp));
953 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
954 } else {
955 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
956 }
957
958 if (key->fog_mode == D3DFOG_EXP) {
959 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
960 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
961 ureg_EX2(ureg, tmp_x, _X(tmp));
962 } else
963 if (key->fog_mode == D3DFOG_EXP2) {
964 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
965 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
966 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
967 ureg_EX2(ureg, tmp_x, _X(tmp));
968 } else
969 if (key->fog_mode == D3DFOG_LINEAR) {
970 ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
971 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
972 }
973 ureg_MOV(ureg, oFog, _X(tmp));
974 ureg_release_temporary(ureg, tmp);
975 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
976 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
977 }
978
979 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
980 struct ureg_src input;
981 struct ureg_dst output;
982 input = vs->aWgt;
983 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
984 ureg_MOV(ureg, output, input);
985 }
986 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
987 struct ureg_src input;
988 struct ureg_dst output;
989 input = vs->aInd;
990 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
991 ureg_MOV(ureg, output, input);
992 }
993 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
994 struct ureg_src input;
995 struct ureg_dst output;
996 input = vs->aNrm;
997 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
998 ureg_MOV(ureg, output, input);
999 }
1000 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1001 struct ureg_src input;
1002 struct ureg_dst output;
1003 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1004 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1005 ureg_MOV(ureg, output, input);
1006 }
1007 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1008 struct ureg_src input;
1009 struct ureg_dst output;
1010 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1011 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1012 ureg_MOV(ureg, output, input);
1013 }
1014 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1015 struct ureg_src input;
1016 struct ureg_dst output;
1017 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1018 input = ureg_scalar(input, TGSI_SWIZZLE_X);
1019 output = oFog;
1020 ureg_MOV(ureg, output, input);
1021 }
1022 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1023 (void) 0; /* TODO: replace z of position output ? */
1024 }
1025
1026
1027 if (key->position_t && device->driver_caps.window_space_position_support)
1028 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
1029
1030 ureg_END(ureg);
1031 nine_ureg_tgsi_dump(ureg, FALSE);
1032 return ureg_create_shader_and_destroy(ureg, device->pipe);
1033 }
1034
1035 /* PS FF constants layout:
1036 *
1037 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1038 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1039 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1040 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1041 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1042 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1043 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1044 *
1045 * CONST[20] D3DRS_TEXTUREFACTOR
1046 * CONST[21] D3DRS_FOGCOLOR
1047 * CONST[22].x___ RS.FogEnd
1048 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1049 * CONST[22].__z_ RS.FogDensity
1050 */
1051 struct ps_build_ctx
1052 {
1053 struct ureg_program *ureg;
1054
1055 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1056 struct ureg_src vT[8]; /* TEXCOORD[i] */
1057 struct ureg_dst r[6]; /* TEMPs */
1058 struct ureg_dst rCur; /* D3DTA_CURRENT */
1059 struct ureg_dst rMod;
1060 struct ureg_src rCurSrc;
1061 struct ureg_dst rTmp; /* D3DTA_TEMP */
1062 struct ureg_src rTmpSrc;
1063 struct ureg_dst rTex;
1064 struct ureg_src rTexSrc;
1065 struct ureg_src cBEM[8];
1066 struct ureg_src s[8];
1067
1068 struct {
1069 unsigned index;
1070 unsigned index_pre_mod;
1071 unsigned num_regs;
1072 } stage;
1073 };
1074
1075 static struct ureg_src
1076 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1077 {
1078 struct ureg_src reg;
1079
1080 switch (ta & D3DTA_SELECTMASK) {
1081 case D3DTA_CONSTANT:
1082 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1083 break;
1084 case D3DTA_CURRENT:
1085 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1086 break;
1087 case D3DTA_DIFFUSE:
1088 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1089 break;
1090 case D3DTA_SPECULAR:
1091 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1092 break;
1093 case D3DTA_TEMP:
1094 reg = ps->rTmpSrc;
1095 break;
1096 case D3DTA_TEXTURE:
1097 reg = ps->rTexSrc;
1098 break;
1099 case D3DTA_TFACTOR:
1100 reg = ureg_DECL_constant(ps->ureg, 20);
1101 break;
1102 default:
1103 assert(0);
1104 reg = ureg_src_undef();
1105 break;
1106 }
1107 if (ta & D3DTA_COMPLEMENT) {
1108 struct ureg_dst dst = ps->r[ps->stage.num_regs++];
1109 ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
1110 reg = ureg_src(dst);
1111 }
1112 if (ta & D3DTA_ALPHAREPLICATE)
1113 reg = _WWWW(reg);
1114 return reg;
1115 }
1116
1117 static struct ureg_dst
1118 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1119 {
1120 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1121
1122 switch (ta & D3DTA_SELECTMASK) {
1123 case D3DTA_CURRENT:
1124 return ps->rCur;
1125 case D3DTA_TEMP:
1126 return ps->rTmp;
1127 default:
1128 assert(0);
1129 return ureg_dst_undef();
1130 }
1131 }
1132
1133 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1134 {
1135 switch (top) {
1136 case D3DTOP_DISABLE:
1137 return 0x0;
1138 case D3DTOP_SELECTARG1:
1139 case D3DTOP_PREMODULATE:
1140 return 0x2;
1141 case D3DTOP_SELECTARG2:
1142 return 0x4;
1143 case D3DTOP_MULTIPLYADD:
1144 case D3DTOP_LERP:
1145 return 0x7;
1146 default:
1147 return 0x6;
1148 }
1149 }
1150
1151 static inline boolean
1152 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1153 {
1154 return !dst.WriteMask ||
1155 (dst.File == src.File &&
1156 dst.Index == src.Index &&
1157 !dst.Indirect &&
1158 !dst.Saturate &&
1159 !src.Indirect &&
1160 !src.Negate &&
1161 !src.Absolute &&
1162 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1163 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1164 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1165 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1166
1167 }
1168
1169 static void
1170 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1171 {
1172 struct ureg_program *ureg = ps->ureg;
1173 struct ureg_dst tmp = ps->r[ps->stage.num_regs];
1174 struct ureg_dst tmp2 = ps->r[ps->stage.num_regs+1];
1175 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1176
1177 tmp.WriteMask = dst.WriteMask;
1178
1179 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1180 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1181 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1182 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1183 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1184 top != D3DTOP_LERP)
1185 dst = ureg_saturate(dst);
1186
1187 switch (top) {
1188 case D3DTOP_SELECTARG1:
1189 if (!is_MOV_no_op(dst, arg[1]))
1190 ureg_MOV(ureg, dst, arg[1]);
1191 break;
1192 case D3DTOP_SELECTARG2:
1193 if (!is_MOV_no_op(dst, arg[2]))
1194 ureg_MOV(ureg, dst, arg[2]);
1195 break;
1196 case D3DTOP_MODULATE:
1197 ureg_MUL(ureg, dst, arg[1], arg[2]);
1198 break;
1199 case D3DTOP_MODULATE2X:
1200 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1201 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1202 break;
1203 case D3DTOP_MODULATE4X:
1204 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1205 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1206 break;
1207 case D3DTOP_ADD:
1208 ureg_ADD(ureg, dst, arg[1], arg[2]);
1209 break;
1210 case D3DTOP_ADDSIGNED:
1211 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1212 ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
1213 break;
1214 case D3DTOP_ADDSIGNED2X:
1215 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1216 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1217 break;
1218 case D3DTOP_SUBTRACT:
1219 ureg_SUB(ureg, dst, arg[1], arg[2]);
1220 break;
1221 case D3DTOP_ADDSMOOTH:
1222 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1223 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1224 break;
1225 case D3DTOP_BLENDDIFFUSEALPHA:
1226 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1227 break;
1228 case D3DTOP_BLENDTEXTUREALPHA:
1229 /* XXX: alpha taken from previous stage, texture or result ? */
1230 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1231 break;
1232 case D3DTOP_BLENDFACTORALPHA:
1233 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1234 break;
1235 case D3DTOP_BLENDTEXTUREALPHAPM:
1236 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
1237 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1238 break;
1239 case D3DTOP_BLENDCURRENTALPHA:
1240 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1241 break;
1242 case D3DTOP_PREMODULATE:
1243 ureg_MOV(ureg, dst, arg[1]);
1244 ps->stage.index_pre_mod = ps->stage.index + 1;
1245 break;
1246 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1247 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1248 break;
1249 case D3DTOP_MODULATECOLOR_ADDALPHA:
1250 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1251 break;
1252 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1253 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
1254 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1255 break;
1256 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1257 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1258 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1259 break;
1260 case D3DTOP_BUMPENVMAP:
1261 break;
1262 case D3DTOP_BUMPENVMAPLUMINANCE:
1263 break;
1264 case D3DTOP_DOTPRODUCT3:
1265 ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1266 ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1267 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1268 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1269 break;
1270 case D3DTOP_MULTIPLYADD:
1271 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1272 break;
1273 case D3DTOP_LERP:
1274 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1275 break;
1276 case D3DTOP_DISABLE:
1277 /* no-op ? */
1278 break;
1279 default:
1280 assert(!"invalid D3DTOP");
1281 break;
1282 }
1283 }
1284
1285 static void *
1286 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1287 {
1288 struct ps_build_ctx ps;
1289 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1290 struct ureg_dst oCol;
1291 unsigned i, s;
1292 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1293
1294 memset(&ps, 0, sizeof(ps));
1295 ps.ureg = ureg;
1296 ps.stage.index_pre_mod = -1;
1297
1298 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1299
1300 /* Declare all TEMPs we might need, serious drivers have a register allocator. */
1301 for (i = 0; i < ARRAY_SIZE(ps.r); ++i)
1302 ps.r[i] = ureg_DECL_temporary(ureg);
1303 ps.rCur = ps.r[0];
1304 ps.rTmp = ps.r[1];
1305 ps.rTex = ps.r[2];
1306 ps.rCurSrc = ureg_src(ps.rCur);
1307 ps.rTmpSrc = ureg_src(ps.rTmp);
1308 ps.rTexSrc = ureg_src(ps.rTex);
1309
1310 for (s = 0; s < 8; ++s) {
1311 ps.s[s] = ureg_src_undef();
1312
1313 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1314 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1315 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1316 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1317 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1318
1319 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1320 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1321 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1322 ps.s[s] = ureg_DECL_sampler(ureg, s);
1323 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1324 }
1325 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1326 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1327 ps.s[s] = ureg_DECL_sampler(ureg, s);
1328 }
1329
1330 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1331 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1332 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1333 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1334 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1335
1336 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1337 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1338 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1339 ps.s[s] = ureg_DECL_sampler(ureg, s);
1340 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1341 }
1342 }
1343 }
1344 if (key->specular)
1345 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1346
1347 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1348
1349 if (key->ts[0].colorop == D3DTOP_DISABLE &&
1350 key->ts[0].alphaop == D3DTOP_DISABLE)
1351 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1352 /* Or is it undefined then ? */
1353
1354 /* Run stages.
1355 */
1356 for (s = 0; s < 8; ++s) {
1357 unsigned colorarg[3];
1358 unsigned alphaarg[3];
1359 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1360 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1361 struct ureg_dst dst;
1362 struct ureg_src arg[3];
1363
1364 if (key->ts[s].colorop == D3DTOP_DISABLE &&
1365 key->ts[s].alphaop == D3DTOP_DISABLE)
1366 continue;
1367 ps.stage.index = s;
1368 ps.stage.num_regs = 3;
1369
1370 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1371 nine_D3DTOP_to_str(key->ts[s].colorop),
1372 nine_D3DTOP_to_str(key->ts[s].alphaop));
1373
1374 if (!ureg_src_is_undef(ps.s[s])) {
1375 unsigned target;
1376 struct ureg_src texture_coord = ps.vT[s];
1377 struct ureg_dst delta;
1378 switch (key->ts[s].textarget) {
1379 case 0: target = TGSI_TEXTURE_1D; break;
1380 case 1: target = TGSI_TEXTURE_2D; break;
1381 case 2: target = TGSI_TEXTURE_3D; break;
1382 case 3: target = TGSI_TEXTURE_CUBE; break;
1383 /* this is a 2 bit bitfield, do I really need a default case ? */
1384 }
1385
1386 /* Modify coordinates */
1387 if (s >= 1 &&
1388 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1389 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1390 delta = ureg_DECL_temporary(ureg);
1391 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1392 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1393 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1394 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1395 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1396 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1397 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1398 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1399 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1400 /* Prepare luminance multiplier
1401 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1402 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1403 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1404 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1405
1406 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1407 }
1408 }
1409 if (key->projected & (3 << (s *2))) {
1410 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1411 if (dim == 4)
1412 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1413 else {
1414 ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1415 ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), texture_coord);
1416 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1417 }
1418 } else {
1419 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1420 }
1421 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1422 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1423 }
1424
1425 if (((s == 0 && key->ts[0].colorop != D3DTOP_BUMPENVMAP &&
1426 key->ts[0].colorop != D3DTOP_BUMPENVMAPLUMINANCE) ||
1427 (s == 1 &&
1428 (key->ts[0].colorop == D3DTOP_BUMPENVMAP ||
1429 key->ts[0].colorop == D3DTOP_BUMPENVMAPLUMINANCE)))&&
1430 (key->ts[s].resultarg != 0 /* not current */ ||
1431 key->ts[s].colorop == D3DTOP_DISABLE ||
1432 key->ts[s].alphaop == D3DTOP_DISABLE ||
1433 key->ts[s].colorop == D3DTOP_BLENDCURRENTALPHA ||
1434 key->ts[s].alphaop == D3DTOP_BLENDCURRENTALPHA ||
1435 key->ts[s].colorarg0 == D3DTA_CURRENT ||
1436 key->ts[s].colorarg1 == D3DTA_CURRENT ||
1437 key->ts[s].colorarg2 == D3DTA_CURRENT ||
1438 key->ts[s].alphaarg0 == D3DTA_CURRENT ||
1439 key->ts[s].alphaarg1 == D3DTA_CURRENT ||
1440 key->ts[s].alphaarg2 == D3DTA_CURRENT)) {
1441 /* Initialize D3DTA_CURRENT.
1442 * (Yes we can do this before the loop but not until
1443 * NVE4 has an instruction scheduling pass.)
1444 */
1445 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1446 }
1447
1448 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1449 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1450 continue;
1451
1452 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1453
1454 if (ps.stage.index_pre_mod == ps.stage.index) {
1455 ps.rMod = ps.r[ps.stage.num_regs++];
1456 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1457 }
1458
1459 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1460 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1461 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1462 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1463 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1464 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1465
1466 if (key->ts[s].colorop != key->ts[s].alphaop ||
1467 colorarg[0] != alphaarg[0] ||
1468 colorarg[1] != alphaarg[1] ||
1469 colorarg[2] != alphaarg[2])
1470 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1471
1472 /* Special DOTPRODUCT behaviour (see wine tests) */
1473 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1474 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1475
1476 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1477 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1478 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1479 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1480
1481 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1482 dst.WriteMask = TGSI_WRITEMASK_W;
1483
1484 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1485 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1486 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1487 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1488 }
1489 }
1490
1491 if (key->specular)
1492 ureg_ADD(ureg, ps.rCur, ps.rCurSrc, ps.vC[1]);
1493
1494 /* Fog.
1495 */
1496 if (key->fog_mode) {
1497 struct ureg_src vPos;
1498 if (device->screen->get_param(device->screen,
1499 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
1500 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1501 } else {
1502 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1503 TGSI_INTERPOLATE_LINEAR);
1504 }
1505
1506 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1507 if (key->fog_mode == D3DFOG_EXP) {
1508 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1509 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1510 ureg_EX2(ureg, rFog, _X(rFog));
1511 } else
1512 if (key->fog_mode == D3DFOG_EXP2) {
1513 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1514 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1515 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1516 ureg_EX2(ureg, rFog, _X(rFog));
1517 } else
1518 if (key->fog_mode == D3DFOG_LINEAR) {
1519 ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _ZZZZ(vPos));
1520 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1521 }
1522 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1523 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1524 } else
1525 if (key->fog) {
1526 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1527 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1528 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1529 } else {
1530 ureg_MOV(ureg, oCol, ps.rCurSrc);
1531 }
1532
1533 ureg_END(ureg);
1534 nine_ureg_tgsi_dump(ureg, FALSE);
1535 return ureg_create_shader_and_destroy(ureg, device->pipe);
1536 }
1537
1538 static struct NineVertexShader9 *
1539 nine_ff_get_vs(struct NineDevice9 *device)
1540 {
1541 const struct nine_state *state = &device->state;
1542 struct NineVertexShader9 *vs;
1543 enum pipe_error err;
1544 struct vs_build_ctx bld;
1545 struct nine_ff_vs_key key;
1546 unsigned s, i;
1547 char input_texture_coord[8];
1548
1549 assert(sizeof(key) <= sizeof(key.value32));
1550
1551 memset(&key, 0, sizeof(key));
1552 memset(&bld, 0, sizeof(bld));
1553 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1554
1555 bld.key = &key;
1556
1557 /* FIXME: this shouldn't be NULL, but it is on init */
1558 if (state->vdecl) {
1559 key.color0in_one = 1;
1560 key.color1in_one = 1;
1561 for (i = 0; i < state->vdecl->nelems; i++) {
1562 uint16_t usage = state->vdecl->usage_map[i];
1563 if (usage == NINE_DECLUSAGE_POSITIONT)
1564 key.position_t = 1;
1565 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1566 key.color0in_one = 0;
1567 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1568 key.color1in_one = 0;
1569 else if (usage == NINE_DECLUSAGE_PSIZE)
1570 key.vertexpointsize = 1;
1571 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1572 s = usage / NINE_DECLUSAGE_COUNT;
1573 if (s < 8)
1574 input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
1575 else
1576 DBG("FF given texture coordinate >= 8. Ignoring\n");
1577 } else if (usage < NINE_DECLUSAGE_NONE)
1578 key.passthrough |= 1 << usage;
1579 }
1580 }
1581 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1582 * We do restrict to indices 0 */
1583 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1584 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1585 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1586 key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
1587
1588 key.lighting = !!state->rs[D3DRS_LIGHTING] && state->ff.num_lights_active;
1589 key.darkness = !!state->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
1590 if (key.position_t) {
1591 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1592 key.lighting = 0;
1593 }
1594 if ((key.lighting | key.darkness) && state->rs[D3DRS_COLORVERTEX]) {
1595 key.mtl_diffuse = state->rs[D3DRS_DIFFUSEMATERIALSOURCE];
1596 key.mtl_ambient = state->rs[D3DRS_AMBIENTMATERIALSOURCE];
1597 key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE];
1598 key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE];
1599 }
1600 key.fog = !!state->rs[D3DRS_FOGENABLE];
1601 key.fog_mode = (!key.position_t && state->rs[D3DRS_FOGENABLE]) ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
1602 if (key.fog_mode)
1603 key.fog_range = state->rs[D3DRS_RANGEFOGENABLE];
1604
1605 key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
1606 key.specular_enable = !!state->rs[D3DRS_SPECULARENABLE];
1607 key.normalizenormals = !!state->rs[D3DRS_NORMALIZENORMALS];
1608
1609 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1610 key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
1611
1612 switch (state->rs[D3DRS_VERTEXBLEND]) {
1613 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1614 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1615 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1616 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1617 case D3DVBF_TWEENING: key.vertextween = 1; break;
1618 default:
1619 assert(!"invalid D3DVBF");
1620 break;
1621 }
1622 }
1623
1624 for (s = 0; s < 8; ++s) {
1625 unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1626 unsigned dim;
1627
1628 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1629 gen = NINED3DTSS_TCI_PASSTHRU;
1630
1631 if (!input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
1632 gen = NINED3DTSS_TCI_DISABLE;
1633
1634 key.tc_gen |= gen << (s * 3);
1635 key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
1636 key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
1637
1638 dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1639 if (dim > 4)
1640 dim = input_texture_coord[s];
1641 if (dim == 1) /* NV behaviour */
1642 dim = 0;
1643 key.tc_dim_output |= dim << (s * 3);
1644 }
1645
1646 vs = util_hash_table_get(device->ff.ht_vs, &key);
1647 if (vs)
1648 return vs;
1649 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1650
1651 nine_ff_prune_vs(device);
1652 if (vs) {
1653 unsigned n;
1654
1655 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1656
1657 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1658 (void)err;
1659 assert(err == PIPE_OK);
1660 device->ff.num_vs++;
1661 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1662
1663 vs->num_inputs = bld.num_inputs;
1664 for (n = 0; n < bld.num_inputs; ++n)
1665 vs->input_map[n].ndecl = bld.input[n];
1666
1667 vs->position_t = key.position_t;
1668 vs->point_size = key.vertexpointsize | key.pointscale;
1669 }
1670 return vs;
1671 }
1672
1673 static struct NinePixelShader9 *
1674 nine_ff_get_ps(struct NineDevice9 *device)
1675 {
1676 struct nine_state *state = &device->state;
1677 struct NinePixelShader9 *ps;
1678 enum pipe_error err;
1679 struct nine_ff_ps_key key;
1680 unsigned s;
1681 uint8_t sampler_mask = 0;
1682
1683 assert(sizeof(key) <= sizeof(key.value32));
1684
1685 memset(&key, 0, sizeof(key));
1686 for (s = 0; s < 8; ++s) {
1687 key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
1688 key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
1689 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1690 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1691 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1692 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1693 break;
1694 }
1695
1696 if (!state->texture[s] &&
1697 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE) {
1698 /* This should also disable the stage. */
1699 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1700 break;
1701 }
1702
1703 if (state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE)
1704 sampler_mask |= (1 << s);
1705
1706 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1707 uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1708 if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
1709 if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
1710 if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
1711 if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1712 if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1713 if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1714 if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1715 if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1716 if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1717 }
1718 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1719 uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1720 if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1721 if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1722 if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1723 if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1724 if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1725 if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1726 }
1727 key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1728
1729 if (state->texture[s]) {
1730 switch (state->texture[s]->base.type) {
1731 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1732 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1733 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1734 default:
1735 assert(!"unexpected texture type");
1736 break;
1737 }
1738 } else {
1739 key.ts[s].textarget = 1;
1740 }
1741 }
1742
1743 key.projected = nine_ff_get_projected_key(state);
1744
1745 for (; s < 8; ++s)
1746 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1747 if (state->rs[D3DRS_FOGENABLE])
1748 key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
1749 key.fog = !!state->rs[D3DRS_FOGENABLE];
1750
1751 ps = util_hash_table_get(device->ff.ht_ps, &key);
1752 if (ps)
1753 return ps;
1754 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1755
1756 nine_ff_prune_ps(device);
1757 if (ps) {
1758 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1759
1760 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1761 (void)err;
1762 assert(err == PIPE_OK);
1763 device->ff.num_ps++;
1764 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1765
1766 ps->rt_mask = 0x1;
1767 ps->sampler_mask = sampler_mask;
1768 }
1769 return ps;
1770 }
1771
1772 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1773 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1774 static void
1775 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1776 {
1777 struct nine_state *state = &device->state;
1778 D3DMATRIX T;
1779 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1780 unsigned i;
1781
1782 /* TODO: make this nicer, and only upload the ones we need */
1783 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1784
1785 if (IS_D3DTS_DIRTY(state, WORLD) ||
1786 IS_D3DTS_DIRTY(state, VIEW) ||
1787 IS_D3DTS_DIRTY(state, PROJECTION)) {
1788 /* WVP, WV matrices */
1789 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1790 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1791
1792 /* normal matrix == transpose(inverse(WV)) */
1793 nine_d3d_matrix_inverse_3x3(&T, &M[1]);
1794 nine_d3d_matrix_transpose(&M[4], &T);
1795
1796 /* P matrix */
1797 M[2] = *GET_D3DTS(PROJECTION);
1798
1799 /* V and W matrix */
1800 M[3] = *GET_D3DTS(VIEW);
1801 M[40] = M[1];
1802 }
1803
1804 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1805 /* load other world matrices */
1806 for (i = 1; i <= 8; ++i) {
1807 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1808 }
1809 }
1810
1811 device->ff.vs_const[30 * 4] = asfloat(state->rs[D3DRS_TWEENFACTOR]);
1812 }
1813
1814 static void
1815 nine_ff_load_lights(struct NineDevice9 *device)
1816 {
1817 struct nine_state *state = &device->state;
1818 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1819 unsigned l;
1820
1821 if (state->changed.group & NINE_STATE_FF_MATERIAL) {
1822 const D3DMATERIAL9 *mtl = &state->ff.material;
1823
1824 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1825 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1826 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1827 dst[23].x = mtl->Power;
1828 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1829 d3dcolor_to_rgba(&dst[25].x, state->rs[D3DRS_AMBIENT]);
1830 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1831 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1832 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1833 dst[19].w = mtl->Ambient.a + mtl->Emissive.a;
1834 }
1835
1836 if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
1837 return;
1838
1839 for (l = 0; l < state->ff.num_lights_active; ++l) {
1840 const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
1841
1842 dst[32 + l * 8].x = light->Type;
1843 dst[32 + l * 8].y = light->Attenuation0;
1844 dst[32 + l * 8].z = light->Attenuation1;
1845 dst[32 + l * 8].w = light->Attenuation2;
1846 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1847 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1848 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1849 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1850 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1851 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1852 dst[37 + l * 8].w = light->Falloff;
1853 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1854 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1855 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1856 dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
1857 }
1858 }
1859
1860 static void
1861 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1862 {
1863 const struct nine_state *state = &device->state;
1864 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1865
1866 if (!(state->changed.group & NINE_STATE_FF_OTHER))
1867 return;
1868 dst[26].x = asfloat(state->rs[D3DRS_POINTSIZE_MIN]);
1869 dst[26].y = asfloat(state->rs[D3DRS_POINTSIZE_MAX]);
1870 dst[26].z = asfloat(state->rs[D3DRS_POINTSIZE]);
1871 dst[26].w = asfloat(state->rs[D3DRS_POINTSCALE_A]);
1872 dst[27].x = asfloat(state->rs[D3DRS_POINTSCALE_B]);
1873 dst[27].y = asfloat(state->rs[D3DRS_POINTSCALE_C]);
1874 dst[28].x = asfloat(state->rs[D3DRS_FOGEND]);
1875 dst[28].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1876 if (isinf(dst[28].y))
1877 dst[28].y = 0.0f;
1878 dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1879 }
1880
1881 static void
1882 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1883 {
1884 struct nine_state *state = &device->state;
1885 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1886 unsigned s;
1887
1888 if (!(state->ff.changed.transform[0] & 0xff0000))
1889 return;
1890 for (s = 0; s < 8; ++s) {
1891 if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
1892 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
1893 }
1894 }
1895
1896 static void
1897 nine_ff_load_ps_params(struct NineDevice9 *device)
1898 {
1899 const struct nine_state *state = &device->state;
1900 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1901 unsigned s;
1902
1903 if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1904 return;
1905
1906 for (s = 0; s < 8; ++s)
1907 d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
1908
1909 for (s = 0; s < 8; ++s) {
1910 dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1911 dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1912 dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1913 dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1914 if (s & 1) {
1915 dst[16 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1916 dst[16 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1917 } else {
1918 dst[16 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1919 dst[16 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1920 }
1921 }
1922
1923 d3dcolor_to_rgba(&dst[20].x, state->rs[D3DRS_TEXTUREFACTOR]);
1924 d3dcolor_to_rgba(&dst[21].x, state->rs[D3DRS_FOGCOLOR]);
1925 dst[22].x = asfloat(state->rs[D3DRS_FOGEND]);
1926 dst[22].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1927 dst[22].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1928 }
1929
1930 static void
1931 nine_ff_load_viewport_info(struct NineDevice9 *device)
1932 {
1933 D3DVIEWPORT9 *viewport = &device->state.viewport;
1934 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1935 float diffZ = viewport->MaxZ - viewport->MinZ;
1936
1937 /* Note: the other functions avoids to fill the const again if nothing changed.
1938 * But we don't have much to fill, and adding code to allow that may be complex
1939 * so just fill it always */
1940 dst[100].x = 2.0f / (float)(viewport->Width);
1941 dst[100].y = 2.0f / (float)(viewport->Height);
1942 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
1943 dst[100].w = (float)(viewport->Width);
1944 dst[101].x = (float)(viewport->X);
1945 dst[101].y = (float)(viewport->Y);
1946 dst[101].z = (float)(viewport->MinZ);
1947 }
1948
1949 void
1950 nine_ff_update(struct NineDevice9 *device)
1951 {
1952 struct nine_state *state = &device->state;
1953 struct pipe_constant_buffer cb;
1954
1955 DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
1956
1957 /* NOTE: the only reference belongs to the hash table */
1958 if (!state->programmable_vs) {
1959 device->ff.vs = nine_ff_get_vs(device);
1960 device->state.changed.group |= NINE_STATE_VS;
1961 }
1962 if (!device->state.ps) {
1963 device->ff.ps = nine_ff_get_ps(device);
1964 device->state.changed.group |= NINE_STATE_PS;
1965 }
1966
1967 if (!state->programmable_vs) {
1968 nine_ff_load_vs_transforms(device);
1969 nine_ff_load_tex_matrices(device);
1970 nine_ff_load_lights(device);
1971 nine_ff_load_point_and_fog_params(device);
1972 nine_ff_load_viewport_info(device);
1973
1974 memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
1975
1976 cb.buffer_offset = 0;
1977 cb.buffer = NULL;
1978 cb.user_buffer = device->ff.vs_const;
1979 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
1980
1981 if (!device->driver_caps.user_cbufs) {
1982 u_upload_data(device->constbuf_uploader,
1983 0,
1984 cb.buffer_size,
1985 device->constbuf_alignment,
1986 cb.user_buffer,
1987 &cb.buffer_offset,
1988 &cb.buffer);
1989 u_upload_unmap(device->constbuf_uploader);
1990 cb.user_buffer = NULL;
1991 }
1992 state->pipe.cb_vs_ff = cb;
1993 state->commit |= NINE_STATE_COMMIT_CONST_VS;
1994 }
1995
1996 if (!device->state.ps) {
1997 nine_ff_load_ps_params(device);
1998
1999 cb.buffer_offset = 0;
2000 cb.buffer = NULL;
2001 cb.user_buffer = device->ff.ps_const;
2002 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2003
2004 if (!device->driver_caps.user_cbufs) {
2005 u_upload_data(device->constbuf_uploader,
2006 0,
2007 cb.buffer_size,
2008 device->constbuf_alignment,
2009 cb.user_buffer,
2010 &cb.buffer_offset,
2011 &cb.buffer);
2012 u_upload_unmap(device->constbuf_uploader);
2013 cb.user_buffer = NULL;
2014 }
2015 state->pipe.cb_ps_ff = cb;
2016 state->commit |= NINE_STATE_COMMIT_CONST_PS;
2017 }
2018
2019 device->state.changed.group &= ~NINE_STATE_FF;
2020 }
2021
2022
2023 boolean
2024 nine_ff_init(struct NineDevice9 *device)
2025 {
2026 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
2027 nine_ff_vs_key_comp);
2028 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
2029 nine_ff_ps_key_comp);
2030
2031 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
2032 nine_ff_fvf_key_comp);
2033
2034 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2035 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2036
2037 return device->ff.ht_vs && device->ff.ht_ps &&
2038 device->ff.ht_fvf &&
2039 device->ff.vs_const && device->ff.ps_const;
2040 }
2041
2042 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2043 {
2044 NineUnknown_Unbind(NineUnknown(value));
2045 return PIPE_OK;
2046 }
2047
2048 void
2049 nine_ff_fini(struct NineDevice9 *device)
2050 {
2051 if (device->ff.ht_vs) {
2052 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2053 util_hash_table_destroy(device->ff.ht_vs);
2054 }
2055 if (device->ff.ht_ps) {
2056 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2057 util_hash_table_destroy(device->ff.ht_ps);
2058 }
2059 if (device->ff.ht_fvf) {
2060 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2061 util_hash_table_destroy(device->ff.ht_fvf);
2062 }
2063 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2064 device->ff.ps = NULL;
2065
2066 FREE(device->ff.vs_const);
2067 FREE(device->ff.ps_const);
2068 }
2069
2070 static void
2071 nine_ff_prune_vs(struct NineDevice9 *device)
2072 {
2073 if (device->ff.num_vs > 100) {
2074 /* could destroy the bound one here, so unbind */
2075 device->pipe->bind_vs_state(device->pipe, NULL);
2076 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2077 util_hash_table_clear(device->ff.ht_vs);
2078 device->ff.num_vs = 0;
2079 device->state.changed.group |= NINE_STATE_VS;
2080 }
2081 }
2082 static void
2083 nine_ff_prune_ps(struct NineDevice9 *device)
2084 {
2085 if (device->ff.num_ps > 100) {
2086 /* could destroy the bound one here, so unbind */
2087 device->pipe->bind_fs_state(device->pipe, NULL);
2088 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2089 util_hash_table_clear(device->ff.ht_ps);
2090 device->ff.num_ps = 0;
2091 device->state.changed.group |= NINE_STATE_PS;
2092 }
2093 }
2094
2095 /* ========================================================================== */
2096
2097 /* Matrix multiplication:
2098 *
2099 * in memory: 0 1 2 3 (row major)
2100 * 4 5 6 7
2101 * 8 9 a b
2102 * c d e f
2103 *
2104 * cA cB cC cD
2105 * r0 = (r0 * cA) (r0 * cB) . .
2106 * r1 = (r1 * cA) (r1 * cB)
2107 * r2 = (r2 * cA) .
2108 * r3 = (r3 * cA) .
2109 *
2110 * r: (11) (12) (13) (14)
2111 * (21) (22) (23) (24)
2112 * (31) (32) (33) (34)
2113 * (41) (42) (43) (44)
2114 * l: (11 12 13 14)
2115 * (21 22 23 24)
2116 * (31 32 33 34)
2117 * (41 42 43 44)
2118 *
2119 * v: (x y z 1 )
2120 *
2121 * t.xyzw = MUL(v.xxxx, r[0]);
2122 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2123 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2124 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2125 *
2126 * v.x = DP4(v, c[0]);
2127 * v.y = DP4(v, c[1]);
2128 * v.z = DP4(v, c[2]);
2129 * v.w = DP4(v, c[3]) = 1
2130 */
2131
2132 /*
2133 static void
2134 nine_D3DMATRIX_print(const D3DMATRIX *M)
2135 {
2136 DBG("\n(%f %f %f %f)\n"
2137 "(%f %f %f %f)\n"
2138 "(%f %f %f %f)\n"
2139 "(%f %f %f %f)\n",
2140 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2141 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2142 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2143 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2144 }
2145 */
2146
2147 static inline float
2148 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2149 {
2150 return A->m[r][0] * B->m[0][c] +
2151 A->m[r][1] * B->m[1][c] +
2152 A->m[r][2] * B->m[2][c] +
2153 A->m[r][3] * B->m[3][c];
2154 }
2155
2156 static inline float
2157 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2158 {
2159 return v->x * M->m[0][c] +
2160 v->y * M->m[1][c] +
2161 v->z * M->m[2][c] +
2162 1.0f * M->m[3][c];
2163 }
2164
2165 static inline float
2166 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2167 {
2168 return v->x * M->m[0][c] +
2169 v->y * M->m[1][c] +
2170 v->z * M->m[2][c];
2171 }
2172
2173 void
2174 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2175 {
2176 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2177 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2178 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2179 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2180
2181 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2182 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2183 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2184 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2185
2186 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2187 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2188 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2189 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2190
2191 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2192 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2193 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2194 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2195 }
2196
2197 void
2198 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2199 {
2200 d->x = nine_DP4_vec_col(v, M, 0);
2201 d->y = nine_DP4_vec_col(v, M, 1);
2202 d->z = nine_DP4_vec_col(v, M, 2);
2203 }
2204
2205 void
2206 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2207 {
2208 d->x = nine_DP3_vec_col(v, M, 0);
2209 d->y = nine_DP3_vec_col(v, M, 1);
2210 d->z = nine_DP3_vec_col(v, M, 2);
2211 }
2212
2213 void
2214 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2215 {
2216 unsigned i, j;
2217 for (i = 0; i < 4; ++i)
2218 for (j = 0; j < 4; ++j)
2219 D->m[i][j] = M->m[j][i];
2220 }
2221
2222 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2223 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2224 if (t > 0.0f) pos += t; else neg += t; } while(0)
2225
2226 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2227 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2228 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2229 float
2230 nine_d3d_matrix_det(const D3DMATRIX *M)
2231 {
2232 float pos = 0.0f;
2233 float neg = 0.0f;
2234
2235 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2236 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2237 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2238
2239 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2240 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2241 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2242
2243 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2244 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2245 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2246
2247 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2248 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2249 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2250
2251 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2252 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2253 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2254
2255 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2256 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2257 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2258
2259 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2260 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2261 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2262
2263 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2264 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2265 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2266
2267 return pos + neg;
2268 }
2269
2270 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2271 * I have no idea where this code came from.
2272 */
2273 void
2274 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2275 {
2276 int i, k;
2277 float det;
2278
2279 D->m[0][0] =
2280 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2281 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2282 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2283 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2284 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2285 M->m[1][3] * M->m[3][1] * M->m[2][2];
2286
2287 D->m[0][1] =
2288 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2289 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2290 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2291 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2292 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2293 M->m[0][3] * M->m[3][1] * M->m[2][2];
2294
2295 D->m[0][2] =
2296 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2297 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2298 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2299 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2300 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2301 M->m[0][3] * M->m[3][1] * M->m[1][2];
2302
2303 D->m[0][3] =
2304 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2305 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2306 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2307 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2308 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2309 M->m[0][3] * M->m[2][1] * M->m[1][2];
2310
2311 D->m[1][0] =
2312 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2313 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2314 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2315 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2316 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2317 M->m[1][3] * M->m[3][0] * M->m[2][2];
2318
2319 D->m[1][1] =
2320 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2321 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2322 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2323 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2324 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2325 M->m[0][3] * M->m[3][0] * M->m[2][2];
2326
2327 D->m[1][2] =
2328 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2329 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2330 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2331 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2332 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2333 M->m[0][3] * M->m[3][0] * M->m[1][2];
2334
2335 D->m[1][3] =
2336 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2337 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2338 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2339 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2340 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2341 M->m[0][3] * M->m[2][0] * M->m[1][2];
2342
2343 D->m[2][0] =
2344 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2345 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2346 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2347 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2348 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2349 M->m[1][3] * M->m[3][0] * M->m[2][1];
2350
2351 D->m[2][1] =
2352 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2353 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2354 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2355 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2356 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2357 M->m[0][3] * M->m[3][0] * M->m[2][1];
2358
2359 D->m[2][2] =
2360 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2361 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2362 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2363 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2364 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2365 M->m[0][3] * M->m[3][0] * M->m[1][1];
2366
2367 D->m[2][3] =
2368 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2369 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2370 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2371 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2372 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2373 M->m[0][3] * M->m[2][0] * M->m[1][1];
2374
2375 D->m[3][0] =
2376 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2377 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2378 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2379 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2380 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2381 M->m[1][2] * M->m[3][0] * M->m[2][1];
2382
2383 D->m[3][1] =
2384 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2385 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2386 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2387 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2388 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2389 M->m[0][2] * M->m[3][0] * M->m[2][1];
2390
2391 D->m[3][2] =
2392 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2393 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2394 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2395 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2396 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2397 M->m[0][2] * M->m[3][0] * M->m[1][1];
2398
2399 D->m[3][3] =
2400 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2401 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2402 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2403 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2404 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2405 M->m[0][2] * M->m[2][0] * M->m[1][1];
2406
2407 det =
2408 M->m[0][0] * D->m[0][0] +
2409 M->m[1][0] * D->m[0][1] +
2410 M->m[2][0] * D->m[0][2] +
2411 M->m[3][0] * D->m[0][3];
2412
2413 det = 1.0 / det;
2414
2415 for (i = 0; i < 4; i++)
2416 for (k = 0; k < 4; k++)
2417 D->m[i][k] *= det;
2418
2419 #ifdef DEBUG
2420 {
2421 D3DMATRIX I;
2422
2423 nine_d3d_matrix_matrix_mul(&I, D, M);
2424
2425 for (i = 0; i < 4; ++i)
2426 for (k = 0; k < 4; ++k)
2427 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2428 DBG("Matrix inversion check FAILED !\n");
2429 }
2430 #endif
2431 }
2432
2433 /* TODO: don't use 4x4 inverse, unless this gets all nicely inlined ? */
2434 void
2435 nine_d3d_matrix_inverse_3x3(D3DMATRIX *D, const D3DMATRIX *M)
2436 {
2437 D3DMATRIX T;
2438 unsigned i, j;
2439
2440 for (i = 0; i < 3; ++i)
2441 for (j = 0; j < 3; ++j)
2442 T.m[i][j] = M->m[i][j];
2443 for (i = 0; i < 3; ++i) {
2444 T.m[i][3] = 0.0f;
2445 T.m[3][i] = 0.0f;
2446 }
2447 T.m[3][3] = 1.0f;
2448
2449 nine_d3d_matrix_inverse(D, &T);
2450 }