st/nine: Implement BumpEnvMap for ff
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define DBG_CHANNEL DBG_FF
28
29 #define NINE_FF_NUM_VS_CONST 256
30 #define NINE_FF_NUM_PS_CONST 24
31
32 struct fvec4
33 {
34 float x, y, z, w;
35 };
36
37 struct nine_ff_vs_key
38 {
39 union {
40 struct {
41 uint32_t position_t : 1;
42 uint32_t lighting : 1;
43 uint32_t darkness : 1; /* lighting enabled but no active lights */
44 uint32_t localviewer : 1;
45 uint32_t vertexpointsize : 1;
46 uint32_t pointscale : 1;
47 uint32_t vertexblend : 3;
48 uint32_t vertexblend_indexed : 1;
49 uint32_t vertextween : 1;
50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
51 uint32_t mtl_ambient : 2;
52 uint32_t mtl_specular : 2;
53 uint32_t mtl_emissive : 2;
54 uint32_t fog_mode : 2;
55 uint32_t fog_range : 1;
56 uint32_t color0in_one : 1;
57 uint32_t color1in_one : 1;
58 uint32_t fog : 1;
59 uint32_t specular_enable : 1;
60 uint32_t pad1 : 6;
61 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
62 uint32_t pad2 : 16;
63 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
64 uint32_t pad3 : 8;
65 uint32_t tc_gen : 24; /* 8 * 3 bits */
66 uint32_t pad4 : 8;
67 uint32_t tc_idx : 24;
68 uint32_t pad5 : 8;
69 uint32_t passthrough;
70 };
71 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
72 uint32_t value32[6];
73 };
74 };
75
76 /* Texture stage state:
77 *
78 * COLOROP D3DTOP 5 bit
79 * ALPHAOP D3DTOP 5 bit
80 * COLORARG0 D3DTA 3 bit
81 * COLORARG1 D3DTA 3 bit
82 * COLORARG2 D3DTA 3 bit
83 * ALPHAARG0 D3DTA 3 bit
84 * ALPHAARG1 D3DTA 3 bit
85 * ALPHAARG2 D3DTA 3 bit
86 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
87 * TEXCOORDINDEX 0 - 7 3 bit
88 * ===========================
89 * 32 bit per stage
90 */
91 struct nine_ff_ps_key
92 {
93 union {
94 struct {
95 struct {
96 uint32_t colorop : 5;
97 uint32_t alphaop : 5;
98 uint32_t colorarg0 : 3;
99 uint32_t colorarg1 : 3;
100 uint32_t colorarg2 : 3;
101 uint32_t alphaarg0 : 3;
102 uint32_t alphaarg1 : 3;
103 uint32_t alphaarg2 : 3;
104 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
105 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
106 uint32_t pad : 1;
107 /* that's 32 bit exactly */
108 } ts[8];
109 uint32_t projected : 16;
110 uint32_t fog : 1; /* for vFog coming from VS */
111 uint32_t fog_mode : 2;
112 uint32_t specular : 1;
113 uint32_t pad1 : 12; /* 9 32-bit words with this */
114 uint8_t colorarg_b4[3];
115 uint8_t colorarg_b5[3];
116 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
117 uint8_t pad2[3];
118 };
119 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
120 uint32_t value32[12];
121 };
122 };
123
124 static unsigned nine_ff_vs_key_hash(void *key)
125 {
126 struct nine_ff_vs_key *vs = key;
127 unsigned i;
128 uint32_t hash = vs->value32[0];
129 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
130 hash ^= vs->value32[i];
131 return hash;
132 }
133 static int nine_ff_vs_key_comp(void *key1, void *key2)
134 {
135 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
136 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
137
138 return memcmp(a->value64, b->value64, sizeof(a->value64));
139 }
140 static unsigned nine_ff_ps_key_hash(void *key)
141 {
142 struct nine_ff_ps_key *ps = key;
143 unsigned i;
144 uint32_t hash = ps->value32[0];
145 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
146 hash ^= ps->value32[i];
147 return hash;
148 }
149 static int nine_ff_ps_key_comp(void *key1, void *key2)
150 {
151 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
152 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
153
154 return memcmp(a->value64, b->value64, sizeof(a->value64));
155 }
156 static unsigned nine_ff_fvf_key_hash(void *key)
157 {
158 return *(DWORD *)key;
159 }
160 static int nine_ff_fvf_key_comp(void *key1, void *key2)
161 {
162 return *(DWORD *)key1 != *(DWORD *)key2;
163 }
164
165 static void nine_ff_prune_vs(struct NineDevice9 *);
166 static void nine_ff_prune_ps(struct NineDevice9 *);
167
168 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
169 {
170 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
171 unsigned count;
172 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
173 tgsi_dump(toks, 0);
174 ureg_free_tokens(toks);
175 }
176 }
177
178 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
179 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
180 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
181 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
182
183 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
184 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
185 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
186 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
187
188 #define _XYZW(r) (r)
189
190 /* AL should contain base address of lights table. */
191 #define LIGHT_CONST(i) \
192 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
193
194 #define MATERIAL_CONST(i) \
195 ureg_DECL_constant(ureg, 19 + (i))
196
197 #define _CONST(n) ureg_DECL_constant(ureg, n)
198
199 /* VS FF constants layout:
200 *
201 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
202 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
203 * CONST[ 8..11] D3DTS_VIEW * D3DTS_PROJECTION
204 * CONST[12..15] D3DTS_VIEW
205 * CONST[16..18] Normal matrix
206 *
207 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
208 * CONST[20] MATERIAL.Diffuse
209 * CONST[21] MATERIAL.Ambient
210 * CONST[22] MATERIAL.Specular
211 * CONST[23].x___ MATERIAL.Power
212 * CONST[24] MATERIAL.Emissive
213 * CONST[25] RS.Ambient
214 *
215 * CONST[26].x___ RS.PointSizeMin
216 * CONST[26]._y__ RS.PointSizeMax
217 * CONST[26].__z_ RS.PointSize
218 * CONST[26].___w RS.PointScaleA
219 * CONST[27].x___ RS.PointScaleB
220 * CONST[27]._y__ RS.PointScaleC
221 *
222 * CONST[28].x___ RS.FogEnd
223 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
224 * CONST[28].__z_ RS.FogDensity
225
226 * CONST[30].x___ TWEENFACTOR
227 *
228 * CONST[32].x___ LIGHT[0].Type
229 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
230 * CONST[33] LIGHT[0].Diffuse
231 * CONST[34] LIGHT[0].Specular
232 * CONST[35] LIGHT[0].Ambient
233 * CONST[36].xyz_ LIGHT[0].Position
234 * CONST[36].___w LIGHT[0].Range
235 * CONST[37].xyz_ LIGHT[0].Direction
236 * CONST[37].___w LIGHT[0].Falloff
237 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
238 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
239 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
240 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
241 * CONST[39].___w 1 if this is the last active light, 0 if not
242 * CONST[40] LIGHT[1]
243 * CONST[48] LIGHT[2]
244 * CONST[56] LIGHT[3]
245 * CONST[64] LIGHT[4]
246 * CONST[72] LIGHT[5]
247 * CONST[80] LIGHT[6]
248 * CONST[88] LIGHT[7]
249 * NOTE: no lighting code is generated if there are no active lights
250 *
251 * CONST[100].x___ Viewport 2/width
252 * CONST[100]._y__ Viewport 2/height
253 * CONST[100].__z_ Viewport 1/(zmax - zmin)
254 * CONST[100].___w Viewport width
255 * CONST[101].x___ Viewport x0
256 * CONST[101]._y__ Viewport y0
257 * CONST[101].__z_ Viewport z0
258 *
259 * CONST[128..131] D3DTS_TEXTURE0
260 * CONST[132..135] D3DTS_TEXTURE1
261 * CONST[136..139] D3DTS_TEXTURE2
262 * CONST[140..143] D3DTS_TEXTURE3
263 * CONST[144..147] D3DTS_TEXTURE4
264 * CONST[148..151] D3DTS_TEXTURE5
265 * CONST[152..155] D3DTS_TEXTURE6
266 * CONST[156..159] D3DTS_TEXTURE7
267 *
268 * CONST[224] D3DTS_WORLDMATRIX[0]
269 * CONST[228] D3DTS_WORLDMATRIX[1]
270 * ...
271 * CONST[252] D3DTS_WORLDMATRIX[7]
272 */
273 struct vs_build_ctx
274 {
275 struct ureg_program *ureg;
276 const struct nine_ff_vs_key *key;
277
278 uint16_t input[PIPE_MAX_ATTRIBS];
279 unsigned num_inputs;
280
281 struct ureg_src aVtx;
282 struct ureg_src aNrm;
283 struct ureg_src aCol[2];
284 struct ureg_src aTex[8];
285 struct ureg_src aPsz;
286 struct ureg_src aInd;
287 struct ureg_src aWgt;
288
289 struct ureg_src aVtx1; /* tweening */
290 struct ureg_src aNrm1;
291
292 struct ureg_src mtlA;
293 struct ureg_src mtlD;
294 struct ureg_src mtlS;
295 struct ureg_src mtlE;
296 };
297
298 static inline unsigned
299 get_texcoord_sn(struct pipe_screen *screen)
300 {
301 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
302 return TGSI_SEMANTIC_TEXCOORD;
303 return TGSI_SEMANTIC_GENERIC;
304 }
305
306 static inline struct ureg_src
307 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
308 {
309 const unsigned i = vs->num_inputs++;
310 assert(i < PIPE_MAX_ATTRIBS);
311 vs->input[i] = ndecl;
312 return ureg_DECL_vs_input(vs->ureg, i);
313 }
314
315 /* NOTE: dst may alias src */
316 static inline void
317 ureg_normalize3(struct ureg_program *ureg,
318 struct ureg_dst dst, struct ureg_src src,
319 struct ureg_dst tmp)
320 {
321 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
322
323 ureg_DP3(ureg, tmp_x, src, src);
324 ureg_RSQ(ureg, tmp_x, _X(tmp));
325 ureg_MUL(ureg, dst, src, _X(tmp));
326 }
327
328 static void *
329 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
330 {
331 const struct nine_ff_vs_key *key = vs->key;
332 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
333 struct ureg_dst oPos, oCol[2], oPsz, oFog;
334 struct ureg_dst rVtx, rNrm;
335 struct ureg_dst r[8];
336 struct ureg_dst AR;
337 struct ureg_dst tmp, tmp_x, tmp_y, tmp_z;
338 unsigned i, c;
339 unsigned label[32], l = 0;
340 unsigned num_r = 8;
341 boolean need_rNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
342 boolean need_rVtx = key->lighting || key->fog_mode || key->pointscale;
343 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
344
345 vs->ureg = ureg;
346
347 /* Check which inputs we should transform. */
348 for (i = 0; i < 8 * 3; i += 3) {
349 switch ((key->tc_gen >> i) & 0x3) {
350 case NINED3DTSS_TCI_CAMERASPACENORMAL:
351 need_rNrm = TRUE;
352 break;
353 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
354 need_rVtx = TRUE;
355 break;
356 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
357 need_rVtx = need_rNrm = TRUE;
358 break;
359 default:
360 break;
361 }
362 }
363
364 /* Declare and record used inputs (needed for linkage with vertex format):
365 * (texture coordinates handled later)
366 */
367 vs->aVtx = build_vs_add_input(vs,
368 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
369
370 if (need_rNrm)
371 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
372
373 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
374 vs->aCol[1] = ureg_imm1f(ureg, 1.0f);
375
376 if (key->lighting || key->darkness) {
377 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
378 key->mtl_ambient | key->mtl_emissive;
379 if ((mask & 0x1) && !key->color0in_one)
380 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
381 if ((mask & 0x2) && !key->color1in_one)
382 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
383
384 vs->mtlD = MATERIAL_CONST(1);
385 vs->mtlA = MATERIAL_CONST(2);
386 vs->mtlS = MATERIAL_CONST(3);
387 vs->mtlE = MATERIAL_CONST(5);
388 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
389 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
390 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
391 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
392 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
393 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
394 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
395 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
396 } else {
397 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
398 if (!key->color1in_one) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
399 }
400
401 if (key->vertexpointsize)
402 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
403
404 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
405 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
406 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
407 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
408 if (key->vertextween) {
409 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
410 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
411 }
412
413 /* Declare outputs:
414 */
415 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
416 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
417 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
418 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
419 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
420 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
421 }
422
423 if (key->vertexpointsize || key->pointscale) {
424 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
425 TGSI_WRITEMASK_X, 0, 1);
426 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
427 }
428
429 /* Declare TEMPs:
430 */
431 for (i = 0; i < num_r; ++i)
432 r[i] = ureg_DECL_local_temporary(ureg);
433 tmp = r[0];
434 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
435 tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
436 tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
437 if (key->lighting || key->vertexblend)
438 AR = ureg_DECL_address(ureg);
439
440 rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ);
441 rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ);
442
443 /* === Vertex transformation / vertex blending:
444 */
445 if (key->vertextween) {
446 assert(!key->vertexblend);
447 ureg_LRP(ureg, r[2], _XXXX(_CONST(30)), vs->aVtx, vs->aVtx1);
448 if (need_rNrm)
449 ureg_LRP(ureg, r[3], _XXXX(_CONST(30)), vs->aNrm, vs->aNrm1);
450 vs->aVtx = ureg_src(r[2]);
451 vs->aNrm = ureg_src(r[3]);
452 }
453
454 if (key->vertexblend) {
455 struct ureg_src cWM[4];
456
457 for (i = 224; i <= 255; ++i)
458 ureg_DECL_constant(ureg, i);
459
460 /* translate world matrix index to constant file index */
461 if (key->vertexblend_indexed) {
462 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f));
463 ureg_ARL(ureg, AR, ureg_src(tmp));
464 }
465
466 ureg_MOV(ureg, r[2], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
467 ureg_MOV(ureg, r[3], ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
468
469 for (i = 0; i < key->vertexblend; ++i) {
470 for (c = 0; c < 4; ++c) {
471 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c);
472 if (key->vertexblend_indexed)
473 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
474 }
475 /* multiply by WORLD(index) */
476 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
477 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
478 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
479 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
480
481 if (i < (key->vertexblend - 1)) {
482 /* accumulate weighted position value */
483 ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
484 /* subtract weighted position value for last value */
485 ureg_SUB(ureg, r[3], ureg_src(r[3]), ureg_scalar(vs->aWgt, i));
486 }
487 }
488
489 /* the last weighted position is always 1 - sum_of_previous_weights */
490 ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(ureg_src(r[3]), key->vertexblend - 1), ureg_src(r[2]));
491
492 /* multiply by VIEW_PROJ */
493 ureg_MUL(ureg, tmp, _X(r[2]), _CONST(8));
494 ureg_MAD(ureg, tmp, _Y(r[2]), _CONST(9), ureg_src(tmp));
495 ureg_MAD(ureg, tmp, _Z(r[2]), _CONST(10), ureg_src(tmp));
496 ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(tmp));
497
498 if (need_rVtx)
499 vs->aVtx = ureg_src(r[2]);
500 } else
501 if (key->position_t && device->driver_caps.window_space_position_support) {
502 ureg_MOV(ureg, oPos, vs->aVtx);
503 } else if (key->position_t) {
504 /* vs->aVtx contains the coordinates buffer wise.
505 * later in the pipeline, clipping, viewport and division
506 * by w (rhw = 1/w) are going to be applied, so do the reverse
507 * of these transformations (except clipping) to have the good
508 * position at the end.*/
509 ureg_MOV(ureg, tmp, vs->aVtx);
510 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
511 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
512 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
513 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
514 /* Y needs to be reversed */
515 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
516 /* inverse rhw */
517 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
518 /* multiply X, Y, Z by w */
519 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
520 ureg_MOV(ureg, oPos, ureg_src(tmp));
521 } else {
522 /* position = vertex * WORLD_VIEW_PROJ */
523 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
524 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
525 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
526 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
527 }
528
529 if (need_rVtx) {
530 ureg_MUL(ureg, rVtx, _XXXX(vs->aVtx), _CONST(4));
531 ureg_MAD(ureg, rVtx, _YYYY(vs->aVtx), _CONST(5), ureg_src(rVtx));
532 ureg_MAD(ureg, rVtx, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(rVtx));
533 ureg_MAD(ureg, rVtx, _WWWW(vs->aVtx), _CONST(7), ureg_src(rVtx));
534 }
535 if (need_rNrm) {
536 ureg_MUL(ureg, rNrm, _XXXX(vs->aNrm), _CONST(16));
537 ureg_MAD(ureg, rNrm, _YYYY(vs->aNrm), _CONST(17), ureg_src(rNrm));
538 ureg_MAD(ureg, rNrm, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(rNrm));
539 ureg_normalize3(ureg, rNrm, ureg_src(rNrm), tmp);
540 }
541 /* NOTE: don't use vs->aVtx, vs->aNrm after this line */
542
543 /* === Process point size:
544 */
545 if (key->vertexpointsize) {
546 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
547 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
548 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
549 } else if (key->pointscale) {
550 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
551 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
552 }
553
554 if (key->pointscale) {
555 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
556 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
557
558 ureg_DP3(ureg, tmp_x, ureg_src(r[1]), ureg_src(r[1]));
559 ureg_RSQ(ureg, tmp_y, _X(tmp));
560 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
561 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
562 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
563 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
564 ureg_RSQ(ureg, tmp_x, _X(tmp));
565 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
566 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
567 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
568 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
569 }
570 if (key->vertexpointsize || key->pointscale)
571 ureg_MOV(ureg, oPsz, _Z(tmp));
572
573 for (i = 0; i < 8; ++i) {
574 struct ureg_dst oTex, input_coord, transformed, t;
575 unsigned c, writemask;
576 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
577 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
578 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
579 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
580
581 /* No texture output of index s */
582 if (tci == NINED3DTSS_TCI_DISABLE)
583 continue;
584 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
585 input_coord = r[5];
586 transformed = r[6];
587
588 /* Get the coordinate */
589 switch (tci) {
590 case NINED3DTSS_TCI_PASSTHRU:
591 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
592 * Else the idx is used only to determine wrapping mode. */
593 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
594 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
595 break;
596 case NINED3DTSS_TCI_CAMERASPACENORMAL:
597 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
598 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
599 dim_input = 4;
600 break;
601 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
602 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
603 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
604 dim_input = 4;
605 break;
606 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
607 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
608 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm));
609 ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp));
610 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
611 ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
612 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
613 dim_input = 4;
614 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
615 break;
616 case NINED3DTSS_TCI_SPHEREMAP:
617 assert(!"TODO");
618 break;
619 default:
620 assert(0);
621 break;
622 }
623
624 /* Apply the transformation */
625 /* dim_output == 0 => do not transform the components.
626 * XYZRHW also disables transformation */
627 if (!dim_output || key->position_t) {
628 transformed = input_coord;
629 writemask = TGSI_WRITEMASK_XYZW;
630 } else {
631 for (c = 0; c < dim_output; c++) {
632 t = ureg_writemask(transformed, 1 << c);
633 switch (dim_input) {
634 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
635 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
636 break;
637 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
638 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
639 break;
640 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
641 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
642 break;
643 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
644 default:
645 assert(0);
646 }
647 }
648 writemask = (1 << dim_output) - 1;
649 }
650
651 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
652 }
653
654 /* === Lighting:
655 *
656 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
657 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
658 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
659 *
660 * vec3 normal = normalize(in.Normal * NormalMatrix);
661 * vec3 hitDir = light.direction;
662 * float atten = 1.0;
663 *
664 * if (light.type != DIRECTIONAL)
665 * {
666 * vec3 hitVec = light.position - eyeVertex;
667 * float d = length(hitVec);
668 * hitDir = hitVec / d;
669 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
670 * }
671 *
672 * if (light.type == SPOTLIGHT)
673 * {
674 * float rho = dp3(-hitVec, light.direction);
675 * if (rho < cos(light.phi / 2))
676 * atten = 0;
677 * if (rho < cos(light.theta / 2))
678 * atten *= pow(some_func(rho), light.falloff);
679 * }
680 *
681 * float nDotHit = dp3_sat(normal, hitVec);
682 * float powFact = 0.0;
683 *
684 * if (nDotHit > 0.0)
685 * {
686 * vec3 midVec = normalize(hitDir + eye);
687 * float nDotMid = dp3_sat(normal, midVec);
688 * pFact = pow(nDotMid, material.power);
689 * }
690 *
691 * ambient += light.ambient * atten;
692 * diffuse += light.diffuse * atten * nDotHit;
693 * specular += light.specular * atten * powFact;
694 */
695 if (key->lighting) {
696 struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W);
697 struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ);
698 struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ);
699
700 struct ureg_dst rCtr = ureg_writemask(r[2], TGSI_WRITEMASK_W);
701
702 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
703
704 /* Light.*.Alpha is not used. */
705 struct ureg_dst rD = ureg_writemask(r[5], TGSI_WRITEMASK_XYZ);
706 struct ureg_dst rA = ureg_writemask(r[6], TGSI_WRITEMASK_XYZ);
707 struct ureg_dst rS = ureg_writemask(r[7], TGSI_WRITEMASK_XYZ);
708
709 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
710
711 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
712 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
713 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
714 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
715 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
716 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
717 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
718 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
719 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
720 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
721 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
722 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
723 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
724 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
725 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
726
727 const unsigned loop_label = l++;
728
729 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
730 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
731 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
732 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
733 rD = ureg_saturate(rD);
734 rA = ureg_saturate(rA);
735 rS = ureg_saturate(rS);
736
737
738 /* loop management */
739 ureg_BGNLOOP(ureg, &label[loop_label]);
740 ureg_ARL(ureg, AL, _W(rCtr));
741
742 /* if (not DIRECTIONAL light): */
743 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
744 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
745 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
746 ureg_IF(ureg, _X(tmp), &label[l++]);
747 {
748 /* hitDir = light.position - eyeVtx
749 * d = length(hitDir)
750 */
751 ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx));
752 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
753 ureg_RSQ(ureg, tmp_y, _X(tmp));
754 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
755
756 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
757 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
758 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
759 ureg_RCP(ureg, rAtt, _W(rAtt));
760 /* cut-off if distance exceeds Light.Range */
761 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
762 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
763 }
764 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
765 ureg_ENDIF(ureg);
766
767 /* normalize hitDir */
768 ureg_normalize3(ureg, rHit, ureg_src(rHit), tmp);
769
770 /* if (SPOT light) */
771 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
772 ureg_IF(ureg, _X(tmp), &label[l++]);
773 {
774 /* rho = dp3(-hitDir, light.spotDir)
775 *
776 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
777 * spotAtt = 1
778 * else
779 * if (rho <= light.cphi2)
780 * spotAtt = 0
781 * else
782 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
783 */
784 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
785 ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
786 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
787 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
788 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
789 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
790 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
791 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
792 }
793 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
794 ureg_ENDIF(ureg);
795
796 /* directional factors, let's not use LIT because of clarity */
797 ureg_DP3(ureg, ureg_saturate(tmp_x), ureg_src(rNrm), ureg_src(rHit));
798 ureg_MOV(ureg, tmp_y, ureg_imm1f(ureg, 0.0f));
799 ureg_IF(ureg, _X(tmp), &label[l++]);
800 {
801 /* midVec = normalize(hitDir + eyeDir) */
802 if (key->localviewer) {
803 ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp);
804 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
805 } else {
806 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
807 }
808 ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp);
809 ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid));
810 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
811
812 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
813 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
814 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
815 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
816 }
817 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
818 ureg_ENDIF(ureg);
819
820 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
821
822 /* break if this was the last light */
823 ureg_IF(ureg, cLLast, &label[l++]);
824 ureg_BRK(ureg);
825 ureg_ENDIF(ureg);
826 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
827
828 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
829 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
830 ureg_ENDLOOP(ureg, &label[loop_label]);
831
832 /* Set alpha factors of illumination to 1.0 for the multiplications. */
833 rD.WriteMask = TGSI_WRITEMASK_W; rD.Saturate = 0;
834 rS.WriteMask = TGSI_WRITEMASK_W; rS.Saturate = 0;
835 rA.WriteMask = TGSI_WRITEMASK_W; rA.Saturate = 0;
836 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 1.0f));
837 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 1.0f));
838
839 /* Apply to material:
840 *
841 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
842 * material.ambient * ambient +
843 * material.diffuse * diffuse +
844 * oCol[1] = material.specular * specular;
845 */
846 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
847 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 1.0f));
848 ureg_MAD(ureg, tmp, ureg_src(rA), vs->mtlA, _CONST(19));
849 } else {
850 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
851 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
852 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
853 }
854
855 if (key->specular_enable) {
856 /* add oCol[1] to oCol[0] */
857 ureg_MAD(ureg, tmp, ureg_src(rD), vs->mtlD, ureg_src(tmp));
858 ureg_MAD(ureg, oCol[0], ureg_src(rS), vs->mtlS, ureg_src(tmp));
859 } else {
860 ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
861 }
862 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
863 } else
864 /* COLOR */
865 if (key->darkness) {
866 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
867 ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
868 } else {
869 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
870 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
871 ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
872 }
873 ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
874 } else {
875 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
876 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
877 }
878
879 /* === Process fog.
880 *
881 * exp(x) = ex2(log2(e) * x)
882 */
883 if (key->fog_mode) {
884 if (key->position_t) {
885 ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
886 } else
887 if (key->fog_range) {
888 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rVtx));
889 ureg_RSQ(ureg, tmp_z, _X(tmp));
890 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
891 } else {
892 ureg_MOV(ureg, tmp_z, ureg_abs(_Z(rVtx)));
893 }
894
895 if (key->fog_mode == D3DFOG_EXP) {
896 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
897 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
898 ureg_EX2(ureg, tmp_x, _X(tmp));
899 } else
900 if (key->fog_mode == D3DFOG_EXP2) {
901 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
902 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
903 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
904 ureg_EX2(ureg, tmp_x, _X(tmp));
905 } else
906 if (key->fog_mode == D3DFOG_LINEAR && !key->position_t) {
907 ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
908 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
909 }
910 ureg_MOV(ureg, oFog, _X(tmp));
911 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
912 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
913 }
914
915 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
916 struct ureg_src input;
917 struct ureg_dst output;
918 input = vs->aWgt;
919 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
920 ureg_MOV(ureg, output, input);
921 }
922 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
923 struct ureg_src input;
924 struct ureg_dst output;
925 input = vs->aInd;
926 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
927 ureg_MOV(ureg, output, input);
928 }
929 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
930 struct ureg_src input;
931 struct ureg_dst output;
932 input = vs->aNrm;
933 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
934 ureg_MOV(ureg, output, input);
935 }
936 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
937 struct ureg_src input;
938 struct ureg_dst output;
939 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
940 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
941 ureg_MOV(ureg, output, input);
942 }
943 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
944 struct ureg_src input;
945 struct ureg_dst output;
946 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
947 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
948 ureg_MOV(ureg, output, input);
949 }
950 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
951 struct ureg_src input;
952 struct ureg_dst output;
953 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
954 input = ureg_scalar(input, TGSI_SWIZZLE_X);
955 output = oFog;
956 ureg_MOV(ureg, output, input);
957 }
958 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
959 (void) 0; /* TODO: replace z of position output ? */
960 }
961
962
963 if (key->position_t && device->driver_caps.window_space_position_support)
964 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
965
966 ureg_END(ureg);
967 nine_ureg_tgsi_dump(ureg, FALSE);
968 return ureg_create_shader_and_destroy(ureg, device->pipe);
969 }
970
971 /* PS FF constants layout:
972 *
973 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
974 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
975 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
976 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
977 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
978 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
979 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
980 *
981 * CONST[20] D3DRS_TEXTUREFACTOR
982 * CONST[21] D3DRS_FOGCOLOR
983 * CONST[22].x___ RS.FogEnd
984 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
985 * CONST[22].__z_ RS.FogDensity
986 */
987 struct ps_build_ctx
988 {
989 struct ureg_program *ureg;
990
991 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
992 struct ureg_src vT[8]; /* TEXCOORD[i] */
993 struct ureg_dst r[6]; /* TEMPs */
994 struct ureg_dst rCur; /* D3DTA_CURRENT */
995 struct ureg_dst rMod;
996 struct ureg_src rCurSrc;
997 struct ureg_dst rTmp; /* D3DTA_TEMP */
998 struct ureg_src rTmpSrc;
999 struct ureg_dst rTex;
1000 struct ureg_src rTexSrc;
1001 struct ureg_src cBEM[8];
1002 struct ureg_src s[8];
1003
1004 struct {
1005 unsigned index;
1006 unsigned index_pre_mod;
1007 unsigned num_regs;
1008 } stage;
1009 };
1010
1011 static struct ureg_src
1012 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1013 {
1014 struct ureg_src reg;
1015
1016 switch (ta & D3DTA_SELECTMASK) {
1017 case D3DTA_CONSTANT:
1018 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1019 break;
1020 case D3DTA_CURRENT:
1021 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1022 break;
1023 case D3DTA_DIFFUSE:
1024 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1025 break;
1026 case D3DTA_SPECULAR:
1027 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1028 break;
1029 case D3DTA_TEMP:
1030 reg = ps->rTmpSrc;
1031 break;
1032 case D3DTA_TEXTURE:
1033 reg = ps->rTexSrc;
1034 break;
1035 case D3DTA_TFACTOR:
1036 reg = ureg_DECL_constant(ps->ureg, 20);
1037 break;
1038 default:
1039 assert(0);
1040 reg = ureg_src_undef();
1041 break;
1042 }
1043 if (ta & D3DTA_COMPLEMENT) {
1044 struct ureg_dst dst = ps->r[ps->stage.num_regs++];
1045 ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
1046 reg = ureg_src(dst);
1047 }
1048 if (ta & D3DTA_ALPHAREPLICATE)
1049 reg = _WWWW(reg);
1050 return reg;
1051 }
1052
1053 static struct ureg_dst
1054 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1055 {
1056 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1057
1058 switch (ta & D3DTA_SELECTMASK) {
1059 case D3DTA_CURRENT:
1060 return ps->rCur;
1061 case D3DTA_TEMP:
1062 return ps->rTmp;
1063 default:
1064 assert(0);
1065 return ureg_dst_undef();
1066 }
1067 }
1068
1069 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1070 {
1071 switch (top) {
1072 case D3DTOP_DISABLE:
1073 return 0x0;
1074 case D3DTOP_SELECTARG1:
1075 case D3DTOP_PREMODULATE:
1076 return 0x2;
1077 case D3DTOP_SELECTARG2:
1078 return 0x4;
1079 case D3DTOP_MULTIPLYADD:
1080 case D3DTOP_LERP:
1081 return 0x7;
1082 default:
1083 return 0x6;
1084 }
1085 }
1086
1087 static inline boolean
1088 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1089 {
1090 return !dst.WriteMask ||
1091 (dst.File == src.File &&
1092 dst.Index == src.Index &&
1093 !dst.Indirect &&
1094 !dst.Saturate &&
1095 !src.Indirect &&
1096 !src.Negate &&
1097 !src.Absolute &&
1098 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1099 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1100 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1101 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1102
1103 }
1104
1105 static void
1106 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1107 {
1108 struct ureg_program *ureg = ps->ureg;
1109 struct ureg_dst tmp = ps->r[ps->stage.num_regs];
1110 struct ureg_dst tmp2 = ps->r[ps->stage.num_regs+1];
1111 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1112
1113 tmp.WriteMask = dst.WriteMask;
1114
1115 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1116 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1117 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1118 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1119 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1120 top != D3DTOP_LERP)
1121 dst = ureg_saturate(dst);
1122
1123 switch (top) {
1124 case D3DTOP_SELECTARG1:
1125 if (!is_MOV_no_op(dst, arg[1]))
1126 ureg_MOV(ureg, dst, arg[1]);
1127 break;
1128 case D3DTOP_SELECTARG2:
1129 if (!is_MOV_no_op(dst, arg[2]))
1130 ureg_MOV(ureg, dst, arg[2]);
1131 break;
1132 case D3DTOP_MODULATE:
1133 ureg_MUL(ureg, dst, arg[1], arg[2]);
1134 break;
1135 case D3DTOP_MODULATE2X:
1136 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1137 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1138 break;
1139 case D3DTOP_MODULATE4X:
1140 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1141 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1142 break;
1143 case D3DTOP_ADD:
1144 ureg_ADD(ureg, dst, arg[1], arg[2]);
1145 break;
1146 case D3DTOP_ADDSIGNED:
1147 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1148 ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
1149 break;
1150 case D3DTOP_ADDSIGNED2X:
1151 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1152 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1153 break;
1154 case D3DTOP_SUBTRACT:
1155 ureg_SUB(ureg, dst, arg[1], arg[2]);
1156 break;
1157 case D3DTOP_ADDSMOOTH:
1158 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1159 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1160 break;
1161 case D3DTOP_BLENDDIFFUSEALPHA:
1162 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1163 break;
1164 case D3DTOP_BLENDTEXTUREALPHA:
1165 /* XXX: alpha taken from previous stage, texture or result ? */
1166 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1167 break;
1168 case D3DTOP_BLENDFACTORALPHA:
1169 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1170 break;
1171 case D3DTOP_BLENDTEXTUREALPHAPM:
1172 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
1173 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1174 break;
1175 case D3DTOP_BLENDCURRENTALPHA:
1176 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1177 break;
1178 case D3DTOP_PREMODULATE:
1179 ureg_MOV(ureg, dst, arg[1]);
1180 ps->stage.index_pre_mod = ps->stage.index + 1;
1181 break;
1182 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1183 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1184 break;
1185 case D3DTOP_MODULATECOLOR_ADDALPHA:
1186 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1187 break;
1188 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1189 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
1190 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1191 break;
1192 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1193 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1194 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1195 break;
1196 case D3DTOP_BUMPENVMAP:
1197 break;
1198 case D3DTOP_BUMPENVMAPLUMINANCE:
1199 break;
1200 case D3DTOP_DOTPRODUCT3:
1201 ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1202 ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1203 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1204 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1205 break;
1206 case D3DTOP_MULTIPLYADD:
1207 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1208 break;
1209 case D3DTOP_LERP:
1210 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1211 break;
1212 case D3DTOP_DISABLE:
1213 /* no-op ? */
1214 break;
1215 default:
1216 assert(!"invalid D3DTOP");
1217 break;
1218 }
1219 }
1220
1221 static void *
1222 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1223 {
1224 struct ps_build_ctx ps;
1225 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1226 struct ureg_dst oCol;
1227 unsigned i, s;
1228 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1229
1230 memset(&ps, 0, sizeof(ps));
1231 ps.ureg = ureg;
1232 ps.stage.index_pre_mod = -1;
1233
1234 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1235
1236 /* Declare all TEMPs we might need, serious drivers have a register allocator. */
1237 for (i = 0; i < ARRAY_SIZE(ps.r); ++i)
1238 ps.r[i] = ureg_DECL_local_temporary(ureg);
1239 ps.rCur = ps.r[0];
1240 ps.rTmp = ps.r[1];
1241 ps.rTex = ps.r[2];
1242 ps.rCurSrc = ureg_src(ps.rCur);
1243 ps.rTmpSrc = ureg_src(ps.rTmp);
1244 ps.rTexSrc = ureg_src(ps.rTex);
1245
1246 for (s = 0; s < 8; ++s) {
1247 ps.s[s] = ureg_src_undef();
1248
1249 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1250 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1251 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1252 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1253 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1254
1255 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1256 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1257 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1258 ps.s[s] = ureg_DECL_sampler(ureg, s);
1259 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1260 }
1261 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1262 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1263 ps.s[s] = ureg_DECL_sampler(ureg, s);
1264 }
1265
1266 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1267 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1268 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1269 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1270 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1271
1272 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1273 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1274 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1275 ps.s[s] = ureg_DECL_sampler(ureg, s);
1276 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1277 }
1278 }
1279 }
1280 if (key->specular)
1281 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1282
1283 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1284
1285 if (key->ts[0].colorop == D3DTOP_DISABLE &&
1286 key->ts[0].alphaop == D3DTOP_DISABLE)
1287 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1288 /* Or is it undefined then ? */
1289
1290 /* Run stages.
1291 */
1292 for (s = 0; s < 8; ++s) {
1293 unsigned colorarg[3];
1294 unsigned alphaarg[3];
1295 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1296 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1297 struct ureg_dst dst;
1298 struct ureg_src arg[3];
1299
1300 if (key->ts[s].colorop == D3DTOP_DISABLE &&
1301 key->ts[s].alphaop == D3DTOP_DISABLE)
1302 continue;
1303 ps.stage.index = s;
1304 ps.stage.num_regs = 3;
1305
1306 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1307 nine_D3DTOP_to_str(key->ts[s].colorop),
1308 nine_D3DTOP_to_str(key->ts[s].alphaop));
1309
1310 if (!ureg_src_is_undef(ps.s[s])) {
1311 unsigned target;
1312 struct ureg_src texture_coord = ps.vT[s];
1313 struct ureg_dst delta;
1314 switch (key->ts[s].textarget) {
1315 case 0: target = TGSI_TEXTURE_1D; break;
1316 case 1: target = TGSI_TEXTURE_2D; break;
1317 case 2: target = TGSI_TEXTURE_3D; break;
1318 case 3: target = TGSI_TEXTURE_CUBE; break;
1319 /* this is a 2 bit bitfield, do I really need a default case ? */
1320 }
1321
1322 /* Modify coordinates */
1323 if (s >= 1 &&
1324 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1325 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1326 delta = ureg_DECL_temporary(ureg);
1327 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1328 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1329 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1330 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1331 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1332 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1333 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1334 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1335 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1336 /* Prepare luminance multiplier
1337 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1338 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1339 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1340 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1341
1342 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1343 }
1344 }
1345 if (key->projected & (3 << (s *2))) {
1346 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1347 if (dim == 4)
1348 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1349 else {
1350 ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1351 ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), texture_coord);
1352 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1353 }
1354 } else {
1355 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1356 }
1357 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1358 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1359 }
1360
1361 if (((s == 0 && key->ts[0].colorop != D3DTOP_BUMPENVMAP &&
1362 key->ts[0].colorop != D3DTOP_BUMPENVMAPLUMINANCE) ||
1363 (s == 1 &&
1364 (key->ts[0].colorop == D3DTOP_BUMPENVMAP ||
1365 key->ts[0].colorop == D3DTOP_BUMPENVMAPLUMINANCE)))&&
1366 (key->ts[s].resultarg != 0 /* not current */ ||
1367 key->ts[s].colorop == D3DTOP_DISABLE ||
1368 key->ts[s].alphaop == D3DTOP_DISABLE ||
1369 key->ts[s].colorop == D3DTOP_BLENDCURRENTALPHA ||
1370 key->ts[s].alphaop == D3DTOP_BLENDCURRENTALPHA ||
1371 key->ts[s].colorarg0 == D3DTA_CURRENT ||
1372 key->ts[s].colorarg1 == D3DTA_CURRENT ||
1373 key->ts[s].colorarg2 == D3DTA_CURRENT ||
1374 key->ts[s].alphaarg0 == D3DTA_CURRENT ||
1375 key->ts[s].alphaarg1 == D3DTA_CURRENT ||
1376 key->ts[s].alphaarg2 == D3DTA_CURRENT)) {
1377 /* Initialize D3DTA_CURRENT.
1378 * (Yes we can do this before the loop but not until
1379 * NVE4 has an instruction scheduling pass.)
1380 */
1381 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1382 }
1383
1384 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1385 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1386 continue;
1387
1388 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1389
1390 if (ps.stage.index_pre_mod == ps.stage.index) {
1391 ps.rMod = ps.r[ps.stage.num_regs++];
1392 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1393 }
1394
1395 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1396 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1397 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1398 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1399 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1400 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1401
1402 if (key->ts[s].colorop != key->ts[s].alphaop ||
1403 colorarg[0] != alphaarg[0] ||
1404 colorarg[1] != alphaarg[1] ||
1405 colorarg[2] != alphaarg[2])
1406 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1407
1408 /* Special DOTPRODUCT behaviour (see wine tests) */
1409 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1410 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1411
1412 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1413 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1414 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1415 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1416
1417 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1418 dst.WriteMask = TGSI_WRITEMASK_W;
1419
1420 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1421 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1422 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1423 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1424 }
1425 }
1426
1427 if (key->specular)
1428 ureg_ADD(ureg, ps.rCur, ps.rCurSrc, ps.vC[1]);
1429
1430 /* Fog.
1431 */
1432 if (key->fog_mode) {
1433 struct ureg_src vPos;
1434 if (device->screen->get_param(device->screen,
1435 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
1436 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1437 } else {
1438 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1439 TGSI_INTERPOLATE_LINEAR);
1440 }
1441
1442 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1443 if (key->fog_mode == D3DFOG_EXP) {
1444 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1445 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1446 ureg_EX2(ureg, rFog, _X(rFog));
1447 } else
1448 if (key->fog_mode == D3DFOG_EXP2) {
1449 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1450 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1451 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1452 ureg_EX2(ureg, rFog, _X(rFog));
1453 } else
1454 if (key->fog_mode == D3DFOG_LINEAR) {
1455 ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _ZZZZ(vPos));
1456 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1457 }
1458 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1459 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1460 } else
1461 if (key->fog) {
1462 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1463 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1464 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1465 } else {
1466 ureg_MOV(ureg, oCol, ps.rCurSrc);
1467 }
1468
1469 ureg_END(ureg);
1470 nine_ureg_tgsi_dump(ureg, FALSE);
1471 return ureg_create_shader_and_destroy(ureg, device->pipe);
1472 }
1473
1474 static struct NineVertexShader9 *
1475 nine_ff_get_vs(struct NineDevice9 *device)
1476 {
1477 const struct nine_state *state = &device->state;
1478 struct NineVertexShader9 *vs;
1479 enum pipe_error err;
1480 struct vs_build_ctx bld;
1481 struct nine_ff_vs_key key;
1482 unsigned s, i;
1483 char input_texture_coord[8];
1484
1485 assert(sizeof(key) <= sizeof(key.value32));
1486
1487 memset(&key, 0, sizeof(key));
1488 memset(&bld, 0, sizeof(bld));
1489 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1490
1491 bld.key = &key;
1492
1493 /* FIXME: this shouldn't be NULL, but it is on init */
1494 if (state->vdecl) {
1495 key.color0in_one = 1;
1496 key.color1in_one = 1;
1497 for (i = 0; i < state->vdecl->nelems; i++) {
1498 uint16_t usage = state->vdecl->usage_map[i];
1499 if (usage == NINE_DECLUSAGE_POSITIONT)
1500 key.position_t = 1;
1501 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1502 key.color0in_one = 0;
1503 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1504 key.color1in_one = 0;
1505 else if (usage == NINE_DECLUSAGE_PSIZE)
1506 key.vertexpointsize = 1;
1507 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1508 s = usage / NINE_DECLUSAGE_COUNT;
1509 if (s < 8)
1510 input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
1511 else
1512 DBG("FF given texture coordinate >= 8. Ignoring\n");
1513 } else if (usage < NINE_DECLUSAGE_NONE)
1514 key.passthrough |= 1 << usage;
1515 }
1516 }
1517 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1518 * We do restrict to indices 0 */
1519 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1520 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1521 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1522 key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
1523
1524 key.lighting = !!state->rs[D3DRS_LIGHTING] && state->ff.num_lights_active;
1525 key.darkness = !!state->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
1526 if (key.position_t) {
1527 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1528 key.lighting = 0;
1529 }
1530 if ((key.lighting | key.darkness) && state->rs[D3DRS_COLORVERTEX]) {
1531 key.mtl_diffuse = state->rs[D3DRS_DIFFUSEMATERIALSOURCE];
1532 key.mtl_ambient = state->rs[D3DRS_AMBIENTMATERIALSOURCE];
1533 key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE];
1534 key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE];
1535 }
1536 key.fog = !!state->rs[D3DRS_FOGENABLE];
1537 key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
1538 if (key.fog_mode)
1539 key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
1540
1541 key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
1542 key.specular_enable = !!state->rs[D3DRS_SPECULARENABLE];
1543
1544 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1545 key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
1546
1547 switch (state->rs[D3DRS_VERTEXBLEND]) {
1548 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1549 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1550 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1551 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1552 case D3DVBF_TWEENING: key.vertextween = 1; break;
1553 default:
1554 assert(!"invalid D3DVBF");
1555 break;
1556 }
1557 }
1558
1559 for (s = 0; s < 8; ++s) {
1560 unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1561 unsigned dim;
1562
1563 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1564 gen = NINED3DTSS_TCI_PASSTHRU;
1565
1566 if (!input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
1567 gen = NINED3DTSS_TCI_DISABLE;
1568
1569 key.tc_gen |= gen << (s * 3);
1570 key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
1571 key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
1572
1573 dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1574 if (dim > 4)
1575 dim = input_texture_coord[s];
1576 if (dim == 1) /* NV behaviour */
1577 dim = 0;
1578 key.tc_dim_output |= dim << (s * 3);
1579 }
1580
1581 vs = util_hash_table_get(device->ff.ht_vs, &key);
1582 if (vs)
1583 return vs;
1584 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1585
1586 nine_ff_prune_vs(device);
1587 if (vs) {
1588 unsigned n;
1589
1590 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1591
1592 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1593 (void)err;
1594 assert(err == PIPE_OK);
1595 device->ff.num_vs++;
1596 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1597
1598 vs->num_inputs = bld.num_inputs;
1599 for (n = 0; n < bld.num_inputs; ++n)
1600 vs->input_map[n].ndecl = bld.input[n];
1601
1602 vs->position_t = key.position_t;
1603 vs->point_size = key.vertexpointsize | key.pointscale;
1604 }
1605 return vs;
1606 }
1607
1608 static struct NinePixelShader9 *
1609 nine_ff_get_ps(struct NineDevice9 *device)
1610 {
1611 struct nine_state *state = &device->state;
1612 struct NinePixelShader9 *ps;
1613 enum pipe_error err;
1614 struct nine_ff_ps_key key;
1615 unsigned s;
1616 uint8_t sampler_mask = 0;
1617
1618 assert(sizeof(key) <= sizeof(key.value32));
1619
1620 memset(&key, 0, sizeof(key));
1621 for (s = 0; s < 8; ++s) {
1622 key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
1623 key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
1624 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1625 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1626 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1627 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1628 break;
1629 }
1630
1631 if (!state->texture[s] &&
1632 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE) {
1633 /* This should also disable the stage. */
1634 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1635 break;
1636 }
1637
1638 if (state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE)
1639 sampler_mask |= (1 << s);
1640
1641 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1642 uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1643 if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
1644 if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
1645 if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
1646 if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1647 if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1648 if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1649 if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1650 if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1651 if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1652 }
1653 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1654 uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1655 if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1656 if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1657 if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1658 if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1659 if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1660 if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1661 }
1662 key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1663
1664 if (state->texture[s]) {
1665 switch (state->texture[s]->base.type) {
1666 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1667 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1668 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1669 default:
1670 assert(!"unexpected texture type");
1671 break;
1672 }
1673 } else {
1674 key.ts[s].textarget = 1;
1675 }
1676 }
1677
1678 key.projected = nine_ff_get_projected_key(state);
1679
1680 for (; s < 8; ++s)
1681 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1682 if (state->rs[D3DRS_FOGENABLE])
1683 key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
1684 key.fog = !!state->rs[D3DRS_FOGENABLE];
1685
1686 ps = util_hash_table_get(device->ff.ht_ps, &key);
1687 if (ps)
1688 return ps;
1689 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1690
1691 nine_ff_prune_ps(device);
1692 if (ps) {
1693 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1694
1695 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1696 (void)err;
1697 assert(err == PIPE_OK);
1698 device->ff.num_ps++;
1699 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1700
1701 ps->rt_mask = 0x1;
1702 ps->sampler_mask = sampler_mask;
1703 }
1704 return ps;
1705 }
1706
1707 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1708 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1709 static void
1710 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1711 {
1712 struct nine_state *state = &device->state;
1713 D3DMATRIX T;
1714 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1715 unsigned i;
1716
1717 /* TODO: make this nicer, and only upload the ones we need */
1718 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1719
1720 if (IS_D3DTS_DIRTY(state, WORLD) ||
1721 IS_D3DTS_DIRTY(state, VIEW) ||
1722 IS_D3DTS_DIRTY(state, PROJECTION)) {
1723 /* WVP, WV matrices */
1724 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1725 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1726
1727 /* normal matrix == transpose(inverse(WV)) */
1728 nine_d3d_matrix_inverse_3x3(&T, &M[1]);
1729 nine_d3d_matrix_transpose(&M[4], &T);
1730
1731 /* VP matrix */
1732 nine_d3d_matrix_matrix_mul(&M[2], GET_D3DTS(VIEW), GET_D3DTS(PROJECTION));
1733
1734 /* V and W matrix */
1735 M[3] = *GET_D3DTS(VIEW);
1736 M[56] = *GET_D3DTS(WORLD);
1737 }
1738
1739 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1740 /* load other world matrices */
1741 for (i = 1; i <= 7; ++i)
1742 M[56 + i] = *GET_D3DTS(WORLDMATRIX(i));
1743 }
1744
1745 device->ff.vs_const[30 * 4] = asfloat(state->rs[D3DRS_TWEENFACTOR]);
1746 }
1747
1748 static void
1749 nine_ff_load_lights(struct NineDevice9 *device)
1750 {
1751 struct nine_state *state = &device->state;
1752 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1753 unsigned l;
1754
1755 if (state->changed.group & NINE_STATE_FF_MATERIAL) {
1756 const D3DMATERIAL9 *mtl = &state->ff.material;
1757
1758 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1759 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1760 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1761 dst[23].x = mtl->Power;
1762 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1763 d3dcolor_to_rgba(&dst[25].x, state->rs[D3DRS_AMBIENT]);
1764 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1765 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1766 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1767 dst[19].w = mtl->Ambient.a + mtl->Emissive.a;
1768 }
1769
1770 if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
1771 return;
1772
1773 for (l = 0; l < state->ff.num_lights_active; ++l) {
1774 const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
1775
1776 dst[32 + l * 8].x = light->Type;
1777 dst[32 + l * 8].y = light->Attenuation0;
1778 dst[32 + l * 8].z = light->Attenuation1;
1779 dst[32 + l * 8].w = light->Attenuation2;
1780 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1781 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1782 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1783 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1784 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1785 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1786 dst[37 + l * 8].w = light->Falloff;
1787 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1788 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1789 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1790 dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
1791 }
1792 }
1793
1794 static void
1795 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1796 {
1797 const struct nine_state *state = &device->state;
1798 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1799
1800 if (!(state->changed.group & NINE_STATE_FF_OTHER))
1801 return;
1802 dst[26].x = asfloat(state->rs[D3DRS_POINTSIZE_MIN]);
1803 dst[26].y = asfloat(state->rs[D3DRS_POINTSIZE_MAX]);
1804 dst[26].z = asfloat(state->rs[D3DRS_POINTSIZE]);
1805 dst[26].w = asfloat(state->rs[D3DRS_POINTSCALE_A]);
1806 dst[27].x = asfloat(state->rs[D3DRS_POINTSCALE_B]);
1807 dst[27].y = asfloat(state->rs[D3DRS_POINTSCALE_C]);
1808 dst[28].x = asfloat(state->rs[D3DRS_FOGEND]);
1809 dst[28].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1810 if (isinf(dst[28].y))
1811 dst[28].y = 0.0f;
1812 dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1813 }
1814
1815 static void
1816 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1817 {
1818 struct nine_state *state = &device->state;
1819 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1820 unsigned s;
1821
1822 if (!(state->ff.changed.transform[0] & 0xff0000))
1823 return;
1824 for (s = 0; s < 8; ++s) {
1825 if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
1826 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
1827 }
1828 }
1829
1830 static void
1831 nine_ff_load_ps_params(struct NineDevice9 *device)
1832 {
1833 const struct nine_state *state = &device->state;
1834 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1835 unsigned s;
1836
1837 if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1838 return;
1839
1840 for (s = 0; s < 8; ++s)
1841 d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
1842
1843 for (s = 0; s < 8; ++s) {
1844 dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1845 dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1846 dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1847 dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1848 if (s & 1) {
1849 dst[16 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1850 dst[16 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1851 } else {
1852 dst[16 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1853 dst[16 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1854 }
1855 }
1856
1857 d3dcolor_to_rgba(&dst[20].x, state->rs[D3DRS_TEXTUREFACTOR]);
1858 d3dcolor_to_rgba(&dst[21].x, state->rs[D3DRS_FOGCOLOR]);
1859 dst[22].x = asfloat(state->rs[D3DRS_FOGEND]);
1860 dst[22].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1861 dst[22].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1862 }
1863
1864 static void
1865 nine_ff_load_viewport_info(struct NineDevice9 *device)
1866 {
1867 D3DVIEWPORT9 *viewport = &device->state.viewport;
1868 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1869 float diffZ = viewport->MaxZ - viewport->MinZ;
1870
1871 /* Note: the other functions avoids to fill the const again if nothing changed.
1872 * But we don't have much to fill, and adding code to allow that may be complex
1873 * so just fill it always */
1874 dst[100].x = 2.0f / (float)(viewport->Width);
1875 dst[100].y = 2.0f / (float)(viewport->Height);
1876 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
1877 dst[100].w = (float)(viewport->Width);
1878 dst[101].x = (float)(viewport->X);
1879 dst[101].y = (float)(viewport->Y);
1880 dst[101].z = (float)(viewport->MinZ);
1881 }
1882
1883 void
1884 nine_ff_update(struct NineDevice9 *device)
1885 {
1886 struct nine_state *state = &device->state;
1887 struct pipe_constant_buffer cb;
1888
1889 DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
1890
1891 /* NOTE: the only reference belongs to the hash table */
1892 if (!state->programmable_vs) {
1893 device->ff.vs = nine_ff_get_vs(device);
1894 device->state.changed.group |= NINE_STATE_VS;
1895 }
1896 if (!device->state.ps) {
1897 device->ff.ps = nine_ff_get_ps(device);
1898 device->state.changed.group |= NINE_STATE_PS;
1899 }
1900
1901 if (!state->programmable_vs) {
1902 nine_ff_load_vs_transforms(device);
1903 nine_ff_load_tex_matrices(device);
1904 nine_ff_load_lights(device);
1905 nine_ff_load_point_and_fog_params(device);
1906 nine_ff_load_viewport_info(device);
1907
1908 memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
1909
1910 cb.buffer_offset = 0;
1911 cb.buffer = NULL;
1912 cb.user_buffer = device->ff.vs_const;
1913 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
1914
1915 if (!device->driver_caps.user_cbufs) {
1916 u_upload_data(device->constbuf_uploader,
1917 0,
1918 cb.buffer_size,
1919 device->constbuf_alignment,
1920 cb.user_buffer,
1921 &cb.buffer_offset,
1922 &cb.buffer);
1923 u_upload_unmap(device->constbuf_uploader);
1924 cb.user_buffer = NULL;
1925 }
1926 state->pipe.cb_vs_ff = cb;
1927 state->commit |= NINE_STATE_COMMIT_CONST_VS;
1928 }
1929
1930 if (!device->state.ps) {
1931 nine_ff_load_ps_params(device);
1932
1933 cb.buffer_offset = 0;
1934 cb.buffer = NULL;
1935 cb.user_buffer = device->ff.ps_const;
1936 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
1937
1938 if (!device->driver_caps.user_cbufs) {
1939 u_upload_data(device->constbuf_uploader,
1940 0,
1941 cb.buffer_size,
1942 device->constbuf_alignment,
1943 cb.user_buffer,
1944 &cb.buffer_offset,
1945 &cb.buffer);
1946 u_upload_unmap(device->constbuf_uploader);
1947 cb.user_buffer = NULL;
1948 }
1949 state->pipe.cb_ps_ff = cb;
1950 state->commit |= NINE_STATE_COMMIT_CONST_PS;
1951 }
1952
1953 device->state.changed.group &= ~NINE_STATE_FF;
1954 }
1955
1956
1957 boolean
1958 nine_ff_init(struct NineDevice9 *device)
1959 {
1960 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
1961 nine_ff_vs_key_comp);
1962 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
1963 nine_ff_ps_key_comp);
1964
1965 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
1966 nine_ff_fvf_key_comp);
1967
1968 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
1969 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
1970
1971 return device->ff.ht_vs && device->ff.ht_ps &&
1972 device->ff.ht_fvf &&
1973 device->ff.vs_const && device->ff.ps_const;
1974 }
1975
1976 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
1977 {
1978 NineUnknown_Unbind(NineUnknown(value));
1979 return PIPE_OK;
1980 }
1981
1982 void
1983 nine_ff_fini(struct NineDevice9 *device)
1984 {
1985 if (device->ff.ht_vs) {
1986 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
1987 util_hash_table_destroy(device->ff.ht_vs);
1988 }
1989 if (device->ff.ht_ps) {
1990 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
1991 util_hash_table_destroy(device->ff.ht_ps);
1992 }
1993 if (device->ff.ht_fvf) {
1994 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
1995 util_hash_table_destroy(device->ff.ht_fvf);
1996 }
1997 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
1998 device->ff.ps = NULL;
1999
2000 FREE(device->ff.vs_const);
2001 FREE(device->ff.ps_const);
2002 }
2003
2004 static void
2005 nine_ff_prune_vs(struct NineDevice9 *device)
2006 {
2007 if (device->ff.num_vs > 100) {
2008 /* could destroy the bound one here, so unbind */
2009 device->pipe->bind_vs_state(device->pipe, NULL);
2010 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2011 util_hash_table_clear(device->ff.ht_vs);
2012 device->ff.num_vs = 0;
2013 device->state.changed.group |= NINE_STATE_VS;
2014 }
2015 }
2016 static void
2017 nine_ff_prune_ps(struct NineDevice9 *device)
2018 {
2019 if (device->ff.num_ps > 100) {
2020 /* could destroy the bound one here, so unbind */
2021 device->pipe->bind_fs_state(device->pipe, NULL);
2022 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2023 util_hash_table_clear(device->ff.ht_ps);
2024 device->ff.num_ps = 0;
2025 device->state.changed.group |= NINE_STATE_PS;
2026 }
2027 }
2028
2029 /* ========================================================================== */
2030
2031 /* Matrix multiplication:
2032 *
2033 * in memory: 0 1 2 3 (row major)
2034 * 4 5 6 7
2035 * 8 9 a b
2036 * c d e f
2037 *
2038 * cA cB cC cD
2039 * r0 = (r0 * cA) (r0 * cB) . .
2040 * r1 = (r1 * cA) (r1 * cB)
2041 * r2 = (r2 * cA) .
2042 * r3 = (r3 * cA) .
2043 *
2044 * r: (11) (12) (13) (14)
2045 * (21) (22) (23) (24)
2046 * (31) (32) (33) (34)
2047 * (41) (42) (43) (44)
2048 * l: (11 12 13 14)
2049 * (21 22 23 24)
2050 * (31 32 33 34)
2051 * (41 42 43 44)
2052 *
2053 * v: (x y z 1 )
2054 *
2055 * t.xyzw = MUL(v.xxxx, r[0]);
2056 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2057 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2058 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2059 *
2060 * v.x = DP4(v, c[0]);
2061 * v.y = DP4(v, c[1]);
2062 * v.z = DP4(v, c[2]);
2063 * v.w = DP4(v, c[3]) = 1
2064 */
2065
2066 /*
2067 static void
2068 nine_D3DMATRIX_print(const D3DMATRIX *M)
2069 {
2070 DBG("\n(%f %f %f %f)\n"
2071 "(%f %f %f %f)\n"
2072 "(%f %f %f %f)\n"
2073 "(%f %f %f %f)\n",
2074 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2075 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2076 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2077 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2078 }
2079 */
2080
2081 static inline float
2082 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2083 {
2084 return A->m[r][0] * B->m[0][c] +
2085 A->m[r][1] * B->m[1][c] +
2086 A->m[r][2] * B->m[2][c] +
2087 A->m[r][3] * B->m[3][c];
2088 }
2089
2090 static inline float
2091 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2092 {
2093 return v->x * M->m[0][c] +
2094 v->y * M->m[1][c] +
2095 v->z * M->m[2][c] +
2096 1.0f * M->m[3][c];
2097 }
2098
2099 static inline float
2100 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2101 {
2102 return v->x * M->m[0][c] +
2103 v->y * M->m[1][c] +
2104 v->z * M->m[2][c];
2105 }
2106
2107 void
2108 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2109 {
2110 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2111 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2112 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2113 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2114
2115 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2116 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2117 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2118 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2119
2120 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2121 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2122 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2123 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2124
2125 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2126 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2127 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2128 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2129 }
2130
2131 void
2132 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2133 {
2134 d->x = nine_DP4_vec_col(v, M, 0);
2135 d->y = nine_DP4_vec_col(v, M, 1);
2136 d->z = nine_DP4_vec_col(v, M, 2);
2137 }
2138
2139 void
2140 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2141 {
2142 d->x = nine_DP3_vec_col(v, M, 0);
2143 d->y = nine_DP3_vec_col(v, M, 1);
2144 d->z = nine_DP3_vec_col(v, M, 2);
2145 }
2146
2147 void
2148 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2149 {
2150 unsigned i, j;
2151 for (i = 0; i < 4; ++i)
2152 for (j = 0; j < 4; ++j)
2153 D->m[i][j] = M->m[j][i];
2154 }
2155
2156 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2157 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2158 if (t > 0.0f) pos += t; else neg += t; } while(0)
2159
2160 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2161 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2162 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2163 float
2164 nine_d3d_matrix_det(const D3DMATRIX *M)
2165 {
2166 float pos = 0.0f;
2167 float neg = 0.0f;
2168
2169 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2170 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2171 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2172
2173 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2174 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2175 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2176
2177 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2178 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2179 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2180
2181 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2182 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2183 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2184
2185 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2186 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2187 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2188
2189 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2190 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2191 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2192
2193 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2194 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2195 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2196
2197 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2198 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2199 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2200
2201 return pos + neg;
2202 }
2203
2204 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2205 * I have no idea where this code came from.
2206 */
2207 void
2208 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2209 {
2210 int i, k;
2211 float det;
2212
2213 D->m[0][0] =
2214 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2215 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2216 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2217 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2218 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2219 M->m[1][3] * M->m[3][1] * M->m[2][2];
2220
2221 D->m[0][1] =
2222 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2223 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2224 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2225 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2226 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2227 M->m[0][3] * M->m[3][1] * M->m[2][2];
2228
2229 D->m[0][2] =
2230 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2231 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2232 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2233 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2234 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2235 M->m[0][3] * M->m[3][1] * M->m[1][2];
2236
2237 D->m[0][3] =
2238 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2239 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2240 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2241 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2242 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2243 M->m[0][3] * M->m[2][1] * M->m[1][2];
2244
2245 D->m[1][0] =
2246 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2247 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2248 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2249 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2250 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2251 M->m[1][3] * M->m[3][0] * M->m[2][2];
2252
2253 D->m[1][1] =
2254 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2255 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2256 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2257 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2258 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2259 M->m[0][3] * M->m[3][0] * M->m[2][2];
2260
2261 D->m[1][2] =
2262 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2263 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2264 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2265 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2266 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2267 M->m[0][3] * M->m[3][0] * M->m[1][2];
2268
2269 D->m[1][3] =
2270 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2271 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2272 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2273 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2274 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2275 M->m[0][3] * M->m[2][0] * M->m[1][2];
2276
2277 D->m[2][0] =
2278 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2279 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2280 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2281 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2282 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2283 M->m[1][3] * M->m[3][0] * M->m[2][1];
2284
2285 D->m[2][1] =
2286 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2287 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2288 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2289 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2290 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2291 M->m[0][3] * M->m[3][0] * M->m[2][1];
2292
2293 D->m[2][2] =
2294 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2295 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2296 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2297 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2298 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2299 M->m[0][3] * M->m[3][0] * M->m[1][1];
2300
2301 D->m[2][3] =
2302 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2303 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2304 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2305 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2306 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2307 M->m[0][3] * M->m[2][0] * M->m[1][1];
2308
2309 D->m[3][0] =
2310 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2311 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2312 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2313 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2314 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2315 M->m[1][2] * M->m[3][0] * M->m[2][1];
2316
2317 D->m[3][1] =
2318 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2319 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2320 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2321 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2322 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2323 M->m[0][2] * M->m[3][0] * M->m[2][1];
2324
2325 D->m[3][2] =
2326 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2327 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2328 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2329 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2330 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2331 M->m[0][2] * M->m[3][0] * M->m[1][1];
2332
2333 D->m[3][3] =
2334 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2335 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2336 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2337 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2338 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2339 M->m[0][2] * M->m[2][0] * M->m[1][1];
2340
2341 det =
2342 M->m[0][0] * D->m[0][0] +
2343 M->m[1][0] * D->m[0][1] +
2344 M->m[2][0] * D->m[0][2] +
2345 M->m[3][0] * D->m[0][3];
2346
2347 det = 1.0 / det;
2348
2349 for (i = 0; i < 4; i++)
2350 for (k = 0; k < 4; k++)
2351 D->m[i][k] *= det;
2352
2353 #ifdef DEBUG
2354 {
2355 D3DMATRIX I;
2356
2357 nine_d3d_matrix_matrix_mul(&I, D, M);
2358
2359 for (i = 0; i < 4; ++i)
2360 for (k = 0; k < 4; ++k)
2361 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2362 DBG("Matrix inversion check FAILED !\n");
2363 }
2364 #endif
2365 }
2366
2367 /* TODO: don't use 4x4 inverse, unless this gets all nicely inlined ? */
2368 void
2369 nine_d3d_matrix_inverse_3x3(D3DMATRIX *D, const D3DMATRIX *M)
2370 {
2371 D3DMATRIX T;
2372 unsigned i, j;
2373
2374 for (i = 0; i < 3; ++i)
2375 for (j = 0; j < 3; ++j)
2376 T.m[i][j] = M->m[i][j];
2377 for (i = 0; i < 3; ++i) {
2378 T.m[i][3] = 0.0f;
2379 T.m[3][i] = 0.0f;
2380 }
2381 T.m[3][3] = 1.0f;
2382
2383 nine_d3d_matrix_inverse(D, &T);
2384 }