st/nine: Implement SPHEREMAP
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define DBG_CHANNEL DBG_FF
28
29 #define NINE_FF_NUM_VS_CONST 196
30 #define NINE_FF_NUM_PS_CONST 24
31
32 struct fvec4
33 {
34 float x, y, z, w;
35 };
36
37 struct nine_ff_vs_key
38 {
39 union {
40 struct {
41 uint32_t position_t : 1;
42 uint32_t lighting : 1;
43 uint32_t darkness : 1; /* lighting enabled but no active lights */
44 uint32_t localviewer : 1;
45 uint32_t vertexpointsize : 1;
46 uint32_t pointscale : 1;
47 uint32_t vertexblend : 3;
48 uint32_t vertexblend_indexed : 1;
49 uint32_t vertextween : 1;
50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
51 uint32_t mtl_ambient : 2;
52 uint32_t mtl_specular : 2;
53 uint32_t mtl_emissive : 2;
54 uint32_t fog_mode : 2;
55 uint32_t fog_range : 1;
56 uint32_t color0in_one : 1;
57 uint32_t color1in_zero : 1;
58 uint32_t fog : 1;
59 uint32_t normalizenormals : 1;
60 uint32_t pad1 : 6;
61 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
62 uint32_t pad2 : 16;
63 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
64 uint32_t pad3 : 8;
65 uint32_t tc_gen : 24; /* 8 * 3 bits */
66 uint32_t pad4 : 8;
67 uint32_t tc_idx : 24;
68 uint32_t pad5 : 8;
69 uint32_t passthrough;
70 };
71 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
72 uint32_t value32[6];
73 };
74 };
75
76 /* Texture stage state:
77 *
78 * COLOROP D3DTOP 5 bit
79 * ALPHAOP D3DTOP 5 bit
80 * COLORARG0 D3DTA 3 bit
81 * COLORARG1 D3DTA 3 bit
82 * COLORARG2 D3DTA 3 bit
83 * ALPHAARG0 D3DTA 3 bit
84 * ALPHAARG1 D3DTA 3 bit
85 * ALPHAARG2 D3DTA 3 bit
86 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
87 * TEXCOORDINDEX 0 - 7 3 bit
88 * ===========================
89 * 32 bit per stage
90 */
91 struct nine_ff_ps_key
92 {
93 union {
94 struct {
95 struct {
96 uint32_t colorop : 5;
97 uint32_t alphaop : 5;
98 uint32_t colorarg0 : 3;
99 uint32_t colorarg1 : 3;
100 uint32_t colorarg2 : 3;
101 uint32_t alphaarg0 : 3;
102 uint32_t alphaarg1 : 3;
103 uint32_t alphaarg2 : 3;
104 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
105 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
106 uint32_t pad : 1;
107 /* that's 32 bit exactly */
108 } ts[8];
109 uint32_t projected : 16;
110 uint32_t fog : 1; /* for vFog coming from VS */
111 uint32_t fog_mode : 2;
112 uint32_t specular : 1;
113 uint32_t pad1 : 12; /* 9 32-bit words with this */
114 uint8_t colorarg_b4[3];
115 uint8_t colorarg_b5[3];
116 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
117 uint8_t pad2[3];
118 };
119 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
120 uint32_t value32[12];
121 };
122 };
123
124 static unsigned nine_ff_vs_key_hash(void *key)
125 {
126 struct nine_ff_vs_key *vs = key;
127 unsigned i;
128 uint32_t hash = vs->value32[0];
129 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
130 hash ^= vs->value32[i];
131 return hash;
132 }
133 static int nine_ff_vs_key_comp(void *key1, void *key2)
134 {
135 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
136 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
137
138 return memcmp(a->value64, b->value64, sizeof(a->value64));
139 }
140 static unsigned nine_ff_ps_key_hash(void *key)
141 {
142 struct nine_ff_ps_key *ps = key;
143 unsigned i;
144 uint32_t hash = ps->value32[0];
145 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
146 hash ^= ps->value32[i];
147 return hash;
148 }
149 static int nine_ff_ps_key_comp(void *key1, void *key2)
150 {
151 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
152 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
153
154 return memcmp(a->value64, b->value64, sizeof(a->value64));
155 }
156 static unsigned nine_ff_fvf_key_hash(void *key)
157 {
158 return *(DWORD *)key;
159 }
160 static int nine_ff_fvf_key_comp(void *key1, void *key2)
161 {
162 return *(DWORD *)key1 != *(DWORD *)key2;
163 }
164
165 static void nine_ff_prune_vs(struct NineDevice9 *);
166 static void nine_ff_prune_ps(struct NineDevice9 *);
167
168 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
169 {
170 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
171 unsigned count;
172 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
173 tgsi_dump(toks, 0);
174 ureg_free_tokens(toks);
175 }
176 }
177
178 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
179 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
180 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
181 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
182
183 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
184 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
185 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
186 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
187
188 #define _XYZW(r) (r)
189
190 /* AL should contain base address of lights table. */
191 #define LIGHT_CONST(i) \
192 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
193
194 #define MATERIAL_CONST(i) \
195 ureg_DECL_constant(ureg, 19 + (i))
196
197 #define _CONST(n) ureg_DECL_constant(ureg, n)
198
199 /* VS FF constants layout:
200 *
201 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
202 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
203 * CONST[ 8..11] D3DTS_PROJECTION
204 * CONST[12..15] D3DTS_VIEW
205 * CONST[16..18] Normal matrix
206 *
207 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
208 * CONST[20] MATERIAL.Diffuse
209 * CONST[21] MATERIAL.Ambient
210 * CONST[22] MATERIAL.Specular
211 * CONST[23].x___ MATERIAL.Power
212 * CONST[24] MATERIAL.Emissive
213 * CONST[25] RS.Ambient
214 *
215 * CONST[26].x___ RS.PointSizeMin
216 * CONST[26]._y__ RS.PointSizeMax
217 * CONST[26].__z_ RS.PointSize
218 * CONST[26].___w RS.PointScaleA
219 * CONST[27].x___ RS.PointScaleB
220 * CONST[27]._y__ RS.PointScaleC
221 *
222 * CONST[28].x___ RS.FogEnd
223 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
224 * CONST[28].__z_ RS.FogDensity
225
226 * CONST[30].x___ TWEENFACTOR
227 *
228 * CONST[32].x___ LIGHT[0].Type
229 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
230 * CONST[33] LIGHT[0].Diffuse
231 * CONST[34] LIGHT[0].Specular
232 * CONST[35] LIGHT[0].Ambient
233 * CONST[36].xyz_ LIGHT[0].Position
234 * CONST[36].___w LIGHT[0].Range
235 * CONST[37].xyz_ LIGHT[0].Direction
236 * CONST[37].___w LIGHT[0].Falloff
237 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
238 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
239 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
240 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
241 * CONST[39].___w 1 if this is the last active light, 0 if not
242 * CONST[40] LIGHT[1]
243 * CONST[48] LIGHT[2]
244 * CONST[56] LIGHT[3]
245 * CONST[64] LIGHT[4]
246 * CONST[72] LIGHT[5]
247 * CONST[80] LIGHT[6]
248 * CONST[88] LIGHT[7]
249 * NOTE: no lighting code is generated if there are no active lights
250 *
251 * CONST[100].x___ Viewport 2/width
252 * CONST[100]._y__ Viewport 2/height
253 * CONST[100].__z_ Viewport 1/(zmax - zmin)
254 * CONST[100].___w Viewport width
255 * CONST[101].x___ Viewport x0
256 * CONST[101]._y__ Viewport y0
257 * CONST[101].__z_ Viewport z0
258 *
259 * CONST[128..131] D3DTS_TEXTURE0
260 * CONST[132..135] D3DTS_TEXTURE1
261 * CONST[136..139] D3DTS_TEXTURE2
262 * CONST[140..143] D3DTS_TEXTURE3
263 * CONST[144..147] D3DTS_TEXTURE4
264 * CONST[148..151] D3DTS_TEXTURE5
265 * CONST[152..155] D3DTS_TEXTURE6
266 * CONST[156..159] D3DTS_TEXTURE7
267 *
268 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
269 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
270 * ...
271 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
272 */
273 struct vs_build_ctx
274 {
275 struct ureg_program *ureg;
276 const struct nine_ff_vs_key *key;
277
278 uint16_t input[PIPE_MAX_ATTRIBS];
279 unsigned num_inputs;
280
281 struct ureg_src aVtx;
282 struct ureg_src aNrm;
283 struct ureg_src aCol[2];
284 struct ureg_src aTex[8];
285 struct ureg_src aPsz;
286 struct ureg_src aInd;
287 struct ureg_src aWgt;
288
289 struct ureg_src aVtx1; /* tweening */
290 struct ureg_src aNrm1;
291
292 struct ureg_src mtlA;
293 struct ureg_src mtlD;
294 struct ureg_src mtlS;
295 struct ureg_src mtlE;
296 };
297
298 static inline unsigned
299 get_texcoord_sn(struct pipe_screen *screen)
300 {
301 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
302 return TGSI_SEMANTIC_TEXCOORD;
303 return TGSI_SEMANTIC_GENERIC;
304 }
305
306 static inline struct ureg_src
307 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
308 {
309 const unsigned i = vs->num_inputs++;
310 assert(i < PIPE_MAX_ATTRIBS);
311 vs->input[i] = ndecl;
312 return ureg_DECL_vs_input(vs->ureg, i);
313 }
314
315 /* NOTE: dst may alias src */
316 static inline void
317 ureg_normalize3(struct ureg_program *ureg,
318 struct ureg_dst dst, struct ureg_src src)
319 {
320 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
321 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
322
323 ureg_DP3(ureg, tmp_x, src, src);
324 ureg_RSQ(ureg, tmp_x, _X(tmp));
325 ureg_MUL(ureg, dst, src, _X(tmp));
326 ureg_release_temporary(ureg, tmp);
327 }
328
329 static void *
330 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
331 {
332 const struct nine_ff_vs_key *key = vs->key;
333 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
334 struct ureg_dst oPos, oCol[2], oPsz, oFog;
335 struct ureg_dst AR;
336 unsigned i, c;
337 unsigned label[32], l = 0;
338 boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
339 boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale;
340 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
341
342 vs->ureg = ureg;
343
344 /* Check which inputs we should transform. */
345 for (i = 0; i < 8 * 3; i += 3) {
346 switch ((key->tc_gen >> i) & 0x7) {
347 case NINED3DTSS_TCI_CAMERASPACENORMAL:
348 need_aNrm = TRUE;
349 break;
350 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
351 need_aVtx = TRUE;
352 break;
353 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
354 need_aVtx = need_aNrm = TRUE;
355 break;
356 case NINED3DTSS_TCI_SPHEREMAP:
357 need_aVtx = need_aNrm = TRUE;
358 break;
359 default:
360 break;
361 }
362 }
363
364 /* Declare and record used inputs (needed for linkage with vertex format):
365 * (texture coordinates handled later)
366 */
367 vs->aVtx = build_vs_add_input(vs,
368 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
369
370 if (need_aNrm)
371 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
372
373 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
374 vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
375
376 if (key->lighting || key->darkness) {
377 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
378 key->mtl_ambient | key->mtl_emissive;
379 if ((mask & 0x1) && !key->color0in_one)
380 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
381 if ((mask & 0x2) && !key->color1in_zero)
382 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
383
384 vs->mtlD = MATERIAL_CONST(1);
385 vs->mtlA = MATERIAL_CONST(2);
386 vs->mtlS = MATERIAL_CONST(3);
387 vs->mtlE = MATERIAL_CONST(5);
388 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
389 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
390 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
391 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
392 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
393 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
394 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
395 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
396 } else {
397 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
398 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
399 }
400
401 if (key->vertexpointsize)
402 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
403
404 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
405 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
406 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
407 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
408 if (key->vertextween) {
409 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
410 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
411 }
412
413 /* Declare outputs:
414 */
415 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
416 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
417 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
418 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
419 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
420 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
421 }
422
423 if (key->vertexpointsize || key->pointscale) {
424 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
425 TGSI_WRITEMASK_X, 0, 1);
426 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
427 }
428
429 if (key->lighting || key->vertexblend)
430 AR = ureg_DECL_address(ureg);
431
432 /* === Vertex transformation / vertex blending:
433 */
434
435 if (key->position_t) {
436 if (device->driver_caps.window_space_position_support) {
437 ureg_MOV(ureg, oPos, vs->aVtx);
438 } else {
439 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
440 /* vs->aVtx contains the coordinates buffer wise.
441 * later in the pipeline, clipping, viewport and division
442 * by w (rhw = 1/w) are going to be applied, so do the reverse
443 * of these transformations (except clipping) to have the good
444 * position at the end.*/
445 ureg_MOV(ureg, tmp, vs->aVtx);
446 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
447 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
448 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
449 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
450 /* Y needs to be reversed */
451 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
452 /* inverse rhw */
453 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
454 /* multiply X, Y, Z by w */
455 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
456 ureg_MOV(ureg, oPos, ureg_src(tmp));
457 ureg_release_temporary(ureg, tmp);
458 }
459 } else if (key->vertexblend) {
460 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
461 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
462 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
463 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
464 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
465 struct ureg_src cWM[4];
466
467 for (i = 160; i <= 195; ++i)
468 ureg_DECL_constant(ureg, i);
469
470 /* translate world matrix index to constant file index */
471 if (key->vertexblend_indexed) {
472 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
473 ureg_ARL(ureg, AR, ureg_src(tmp));
474 }
475
476 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
477 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
478 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
479
480 for (i = 0; i < key->vertexblend; ++i) {
481 for (c = 0; c < 4; ++c) {
482 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c);
483 if (key->vertexblend_indexed)
484 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
485 }
486
487 /* multiply by WORLD(index) */
488 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
489 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
490 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
491 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
492
493 if (need_aNrm) {
494 /* Note: the spec says the transpose of the inverse of the
495 * WorldView matrices should be used, but all tests show
496 * otherwise.
497 * Only case unknown: D3DVBF_0WEIGHTS */
498 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
499 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
500 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
501 }
502
503 if (i < (key->vertexblend - 1)) {
504 /* accumulate weighted position value */
505 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
506 if (need_aNrm)
507 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
508 /* subtract weighted position value for last value */
509 ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i));
510 }
511 }
512
513 /* the last weighted position is always 1 - sum_of_previous_weights */
514 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
515 if (need_aNrm)
516 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
517
518 /* multiply by VIEW_PROJ */
519 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
520 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
521 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
522 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
523
524 if (need_aVtx)
525 vs->aVtx = ureg_src(aVtx_dst);
526
527 ureg_release_temporary(ureg, tmp);
528 ureg_release_temporary(ureg, tmp2);
529 ureg_release_temporary(ureg, sum_blendweights);
530 if (!need_aVtx)
531 ureg_release_temporary(ureg, aVtx_dst);
532
533 if (need_aNrm) {
534 if (key->normalizenormals)
535 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
536 vs->aNrm = ureg_src(aNrm_dst);
537 } else
538 ureg_release_temporary(ureg, aNrm_dst);
539 } else {
540 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
541
542 if (key->vertextween) {
543 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
544 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
545 vs->aVtx = ureg_src(aVtx_dst);
546 if (need_aNrm) {
547 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
548 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
549 vs->aNrm = ureg_src(aNrm_dst);
550 }
551 }
552
553 /* position = vertex * WORLD_VIEW_PROJ */
554 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
555 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
556 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
557 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
558 ureg_release_temporary(ureg, tmp);
559
560 if (need_aVtx) {
561 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
562 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
563 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
564 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
565 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
566 vs->aVtx = ureg_src(aVtx_dst);
567 }
568 if (need_aNrm) {
569 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
570 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
571 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
572 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
573 if (key->normalizenormals)
574 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
575 vs->aNrm = ureg_src(aNrm_dst);
576 }
577 }
578
579 /* === Process point size:
580 */
581 if (key->vertexpointsize || key->pointscale) {
582 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
583 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
584 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
585 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
586 if (key->vertexpointsize) {
587 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
588 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
589 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
590 } else {
591 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
592 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
593 }
594
595 if (key->pointscale) {
596 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
597 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
598
599 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
600 ureg_RSQ(ureg, tmp_y, _X(tmp));
601 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
602 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
603 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
604 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
605 ureg_RSQ(ureg, tmp_x, _X(tmp));
606 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
607 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
608 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
609 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
610 }
611
612 ureg_MOV(ureg, oPsz, _Z(tmp));
613 ureg_release_temporary(ureg, tmp);
614 }
615
616 for (i = 0; i < 8; ++i) {
617 struct ureg_dst tmp, tmp_x, tmp2;
618 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
619 unsigned c, writemask;
620 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
621 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
622 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
623 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
624
625 /* No texture output of index s */
626 if (tci == NINED3DTSS_TCI_DISABLE)
627 continue;
628 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
629 tmp = ureg_DECL_temporary(ureg);
630 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
631 input_coord = ureg_DECL_temporary(ureg);
632 transformed = ureg_DECL_temporary(ureg);
633
634 /* Get the coordinate */
635 switch (tci) {
636 case NINED3DTSS_TCI_PASSTHRU:
637 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
638 * Else the idx is used only to determine wrapping mode. */
639 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
640 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
641 break;
642 case NINED3DTSS_TCI_CAMERASPACENORMAL:
643 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
644 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
645 dim_input = 4;
646 break;
647 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
648 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
649 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
650 dim_input = 4;
651 break;
652 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
653 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
654 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aNrm);
655 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
656 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
657 ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx, ureg_src(tmp));
658 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
659 dim_input = 4;
660 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
661 break;
662 case NINED3DTSS_TCI_SPHEREMAP:
663 /* Implement the formula of GL_SPHERE_MAP */
664 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
665 aVtx_normed = ureg_DECL_temporary(ureg);
666 tmp2 = ureg_DECL_temporary(ureg);
667 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
668 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
669 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
670 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
671 ureg_SUB(ureg, tmp, ureg_src(aVtx_normed), ureg_src(tmp));
672 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
673 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
674 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
675 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
676 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
677 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
678 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
679 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
680 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
681 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
682 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
683 ureg_release_temporary(ureg, aVtx_normed);
684 ureg_release_temporary(ureg, tmp2);
685 dim_input = 4;
686 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
687 break;
688 default:
689 assert(0);
690 break;
691 }
692
693 /* Apply the transformation */
694 /* dim_output == 0 => do not transform the components.
695 * XYZRHW also disables transformation */
696 if (!dim_output || key->position_t) {
697 ureg_release_temporary(ureg, transformed);
698 transformed = input_coord;
699 writemask = TGSI_WRITEMASK_XYZW;
700 } else {
701 for (c = 0; c < dim_output; c++) {
702 t = ureg_writemask(transformed, 1 << c);
703 switch (dim_input) {
704 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
705 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
706 break;
707 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
708 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
709 break;
710 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
711 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
712 break;
713 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
714 default:
715 assert(0);
716 }
717 }
718 writemask = (1 << dim_output) - 1;
719 ureg_release_temporary(ureg, input_coord);
720 }
721
722 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
723 ureg_release_temporary(ureg, transformed);
724 ureg_release_temporary(ureg, tmp);
725 }
726
727 /* === Lighting:
728 *
729 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
730 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
731 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
732 *
733 * vec3 normal = normalize(in.Normal * NormalMatrix);
734 * vec3 hitDir = light.direction;
735 * float atten = 1.0;
736 *
737 * if (light.type != DIRECTIONAL)
738 * {
739 * vec3 hitVec = light.position - eyeVertex;
740 * float d = length(hitVec);
741 * hitDir = hitVec / d;
742 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
743 * }
744 *
745 * if (light.type == SPOTLIGHT)
746 * {
747 * float rho = dp3(-hitVec, light.direction);
748 * if (rho < cos(light.phi / 2))
749 * atten = 0;
750 * if (rho < cos(light.theta / 2))
751 * atten *= pow(some_func(rho), light.falloff);
752 * }
753 *
754 * float nDotHit = dp3_sat(normal, hitVec);
755 * float powFact = 0.0;
756 *
757 * if (nDotHit > 0.0)
758 * {
759 * vec3 midVec = normalize(hitDir + eye);
760 * float nDotMid = dp3_sat(normal, midVec);
761 * pFact = pow(nDotMid, material.power);
762 * }
763 *
764 * ambient += light.ambient * atten;
765 * diffuse += light.diffuse * atten * nDotHit;
766 * specular += light.specular * atten * powFact;
767 */
768 if (key->lighting) {
769 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
770 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
771 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
772 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
773 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
774 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
775 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
776
777 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
778
779 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
780
781 /* Light.*.Alpha is not used. */
782 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
783 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
784 struct ureg_dst rS = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
785
786 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
787
788 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
789 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
790 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
791 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
792 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
793 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
794 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
795 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
796 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
797 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
798 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
799 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
800 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
801 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
802 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
803
804 const unsigned loop_label = l++;
805
806 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
807 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
808 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
809 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
810 rD = ureg_saturate(rD);
811 rA = ureg_saturate(rA);
812 rS = ureg_saturate(rS);
813
814
815 /* loop management */
816 ureg_BGNLOOP(ureg, &label[loop_label]);
817 ureg_ARL(ureg, AL, _W(rCtr));
818
819 /* if (not DIRECTIONAL light): */
820 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
821 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
822 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
823 ureg_IF(ureg, _X(tmp), &label[l++]);
824 {
825 /* hitDir = light.position - eyeVtx
826 * d = length(hitDir)
827 */
828 ureg_SUB(ureg, rHit, cLPos, vs->aVtx);
829 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
830 ureg_RSQ(ureg, tmp_y, _X(tmp));
831 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
832
833 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
834 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
835 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
836 ureg_RCP(ureg, rAtt, _W(rAtt));
837 /* cut-off if distance exceeds Light.Range */
838 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
839 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
840 }
841 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
842 ureg_ENDIF(ureg);
843
844 /* normalize hitDir */
845 ureg_normalize3(ureg, rHit, ureg_src(rHit));
846
847 /* if (SPOT light) */
848 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
849 ureg_IF(ureg, _X(tmp), &label[l++]);
850 {
851 /* rho = dp3(-hitDir, light.spotDir)
852 *
853 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
854 * spotAtt = 1
855 * else
856 * if (rho <= light.cphi2)
857 * spotAtt = 0
858 * else
859 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
860 */
861 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
862 ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
863 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
864 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
865 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
866 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
867 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
868 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
869 }
870 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
871 ureg_ENDIF(ureg);
872
873 /* directional factors, let's not use LIT because of clarity */
874 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
875 ureg_MOV(ureg, tmp_y, ureg_imm1f(ureg, 0.0f));
876 ureg_IF(ureg, _X(tmp), &label[l++]);
877 {
878 /* midVec = normalize(hitDir + eyeDir) */
879 if (key->localviewer) {
880 ureg_normalize3(ureg, rMid, vs->aVtx);
881 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
882 } else {
883 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
884 }
885 ureg_normalize3(ureg, rMid, ureg_src(rMid));
886 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
887 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
888
889 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
890 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
891 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
892 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
893 }
894 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
895 ureg_ENDIF(ureg);
896
897 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
898
899 /* break if this was the last light */
900 ureg_IF(ureg, cLLast, &label[l++]);
901 ureg_BRK(ureg);
902 ureg_ENDIF(ureg);
903 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
904
905 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
906 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
907 ureg_ENDLOOP(ureg, &label[loop_label]);
908
909 /* Set alpha factors of illumination to 1.0 for the multiplications. */
910 rD.WriteMask = TGSI_WRITEMASK_W; rD.Saturate = 0;
911 rS.WriteMask = TGSI_WRITEMASK_W; rS.Saturate = 0;
912 rA.WriteMask = TGSI_WRITEMASK_W; rA.Saturate = 0;
913 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 1.0f));
914 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 1.0f));
915
916 /* Apply to material:
917 *
918 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
919 * material.ambient * ambient +
920 * material.diffuse * diffuse +
921 * oCol[1] = material.specular * specular;
922 */
923 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
924 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 1.0f));
925 ureg_MAD(ureg, tmp, ureg_src(rA), vs->mtlA, _CONST(19));
926 } else {
927 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
928 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
929 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
930 }
931
932 ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
933 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
934 ureg_release_temporary(ureg, rAtt);
935 ureg_release_temporary(ureg, rHit);
936 ureg_release_temporary(ureg, rMid);
937 ureg_release_temporary(ureg, rCtr);
938 ureg_release_temporary(ureg, rD);
939 ureg_release_temporary(ureg, rA);
940 ureg_release_temporary(ureg, rS);
941 ureg_release_temporary(ureg, rAtt);
942 ureg_release_temporary(ureg, tmp);
943 } else
944 /* COLOR */
945 if (key->darkness) {
946 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
947 ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
948 } else {
949 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
950 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
951 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
952 ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
953 ureg_release_temporary(ureg, tmp);
954 }
955 ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
956 } else {
957 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
958 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
959 }
960
961 /* === Process fog.
962 *
963 * exp(x) = ex2(log2(e) * x)
964 */
965 if (key->fog_mode) {
966 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
967 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
968 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
969 if (key->fog_range) {
970 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
971 ureg_RSQ(ureg, tmp_z, _X(tmp));
972 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
973 } else {
974 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
975 }
976
977 if (key->fog_mode == D3DFOG_EXP) {
978 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
979 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
980 ureg_EX2(ureg, tmp_x, _X(tmp));
981 } else
982 if (key->fog_mode == D3DFOG_EXP2) {
983 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
984 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
985 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
986 ureg_EX2(ureg, tmp_x, _X(tmp));
987 } else
988 if (key->fog_mode == D3DFOG_LINEAR) {
989 ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
990 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
991 }
992 ureg_MOV(ureg, oFog, _X(tmp));
993 ureg_release_temporary(ureg, tmp);
994 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
995 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
996 }
997
998 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
999 struct ureg_src input;
1000 struct ureg_dst output;
1001 input = vs->aWgt;
1002 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
1003 ureg_MOV(ureg, output, input);
1004 }
1005 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1006 struct ureg_src input;
1007 struct ureg_dst output;
1008 input = vs->aInd;
1009 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1010 ureg_MOV(ureg, output, input);
1011 }
1012 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1013 struct ureg_src input;
1014 struct ureg_dst output;
1015 input = vs->aNrm;
1016 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1017 ureg_MOV(ureg, output, input);
1018 }
1019 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1020 struct ureg_src input;
1021 struct ureg_dst output;
1022 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1023 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1024 ureg_MOV(ureg, output, input);
1025 }
1026 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1027 struct ureg_src input;
1028 struct ureg_dst output;
1029 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1030 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1031 ureg_MOV(ureg, output, input);
1032 }
1033 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1034 struct ureg_src input;
1035 struct ureg_dst output;
1036 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1037 input = ureg_scalar(input, TGSI_SWIZZLE_X);
1038 output = oFog;
1039 ureg_MOV(ureg, output, input);
1040 }
1041 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1042 (void) 0; /* TODO: replace z of position output ? */
1043 }
1044
1045
1046 if (key->position_t && device->driver_caps.window_space_position_support)
1047 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
1048
1049 ureg_END(ureg);
1050 nine_ureg_tgsi_dump(ureg, FALSE);
1051 return ureg_create_shader_and_destroy(ureg, device->pipe);
1052 }
1053
1054 /* PS FF constants layout:
1055 *
1056 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1057 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1058 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1059 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1060 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1061 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1062 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1063 *
1064 * CONST[20] D3DRS_TEXTUREFACTOR
1065 * CONST[21] D3DRS_FOGCOLOR
1066 * CONST[22].x___ RS.FogEnd
1067 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1068 * CONST[22].__z_ RS.FogDensity
1069 */
1070 struct ps_build_ctx
1071 {
1072 struct ureg_program *ureg;
1073
1074 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1075 struct ureg_src vT[8]; /* TEXCOORD[i] */
1076 struct ureg_dst r[6]; /* TEMPs */
1077 struct ureg_dst rCur; /* D3DTA_CURRENT */
1078 struct ureg_dst rMod;
1079 struct ureg_src rCurSrc;
1080 struct ureg_dst rTmp; /* D3DTA_TEMP */
1081 struct ureg_src rTmpSrc;
1082 struct ureg_dst rTex;
1083 struct ureg_src rTexSrc;
1084 struct ureg_src cBEM[8];
1085 struct ureg_src s[8];
1086
1087 struct {
1088 unsigned index;
1089 unsigned index_pre_mod;
1090 unsigned num_regs;
1091 } stage;
1092 };
1093
1094 static struct ureg_src
1095 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1096 {
1097 struct ureg_src reg;
1098
1099 switch (ta & D3DTA_SELECTMASK) {
1100 case D3DTA_CONSTANT:
1101 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1102 break;
1103 case D3DTA_CURRENT:
1104 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1105 break;
1106 case D3DTA_DIFFUSE:
1107 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1108 break;
1109 case D3DTA_SPECULAR:
1110 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1111 break;
1112 case D3DTA_TEMP:
1113 reg = ps->rTmpSrc;
1114 break;
1115 case D3DTA_TEXTURE:
1116 reg = ps->rTexSrc;
1117 break;
1118 case D3DTA_TFACTOR:
1119 reg = ureg_DECL_constant(ps->ureg, 20);
1120 break;
1121 default:
1122 assert(0);
1123 reg = ureg_src_undef();
1124 break;
1125 }
1126 if (ta & D3DTA_COMPLEMENT) {
1127 struct ureg_dst dst = ps->r[ps->stage.num_regs++];
1128 ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
1129 reg = ureg_src(dst);
1130 }
1131 if (ta & D3DTA_ALPHAREPLICATE)
1132 reg = _WWWW(reg);
1133 return reg;
1134 }
1135
1136 static struct ureg_dst
1137 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1138 {
1139 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1140
1141 switch (ta & D3DTA_SELECTMASK) {
1142 case D3DTA_CURRENT:
1143 return ps->rCur;
1144 case D3DTA_TEMP:
1145 return ps->rTmp;
1146 default:
1147 assert(0);
1148 return ureg_dst_undef();
1149 }
1150 }
1151
1152 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1153 {
1154 switch (top) {
1155 case D3DTOP_DISABLE:
1156 return 0x0;
1157 case D3DTOP_SELECTARG1:
1158 case D3DTOP_PREMODULATE:
1159 return 0x2;
1160 case D3DTOP_SELECTARG2:
1161 return 0x4;
1162 case D3DTOP_MULTIPLYADD:
1163 case D3DTOP_LERP:
1164 return 0x7;
1165 default:
1166 return 0x6;
1167 }
1168 }
1169
1170 static inline boolean
1171 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1172 {
1173 return !dst.WriteMask ||
1174 (dst.File == src.File &&
1175 dst.Index == src.Index &&
1176 !dst.Indirect &&
1177 !dst.Saturate &&
1178 !src.Indirect &&
1179 !src.Negate &&
1180 !src.Absolute &&
1181 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1182 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1183 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1184 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1185
1186 }
1187
1188 static void
1189 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1190 {
1191 struct ureg_program *ureg = ps->ureg;
1192 struct ureg_dst tmp = ps->r[ps->stage.num_regs];
1193 struct ureg_dst tmp2 = ps->r[ps->stage.num_regs+1];
1194 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1195
1196 tmp.WriteMask = dst.WriteMask;
1197
1198 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1199 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1200 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1201 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1202 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1203 top != D3DTOP_LERP)
1204 dst = ureg_saturate(dst);
1205
1206 switch (top) {
1207 case D3DTOP_SELECTARG1:
1208 if (!is_MOV_no_op(dst, arg[1]))
1209 ureg_MOV(ureg, dst, arg[1]);
1210 break;
1211 case D3DTOP_SELECTARG2:
1212 if (!is_MOV_no_op(dst, arg[2]))
1213 ureg_MOV(ureg, dst, arg[2]);
1214 break;
1215 case D3DTOP_MODULATE:
1216 ureg_MUL(ureg, dst, arg[1], arg[2]);
1217 break;
1218 case D3DTOP_MODULATE2X:
1219 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1220 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1221 break;
1222 case D3DTOP_MODULATE4X:
1223 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1224 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1225 break;
1226 case D3DTOP_ADD:
1227 ureg_ADD(ureg, dst, arg[1], arg[2]);
1228 break;
1229 case D3DTOP_ADDSIGNED:
1230 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1231 ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
1232 break;
1233 case D3DTOP_ADDSIGNED2X:
1234 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1235 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1236 break;
1237 case D3DTOP_SUBTRACT:
1238 ureg_SUB(ureg, dst, arg[1], arg[2]);
1239 break;
1240 case D3DTOP_ADDSMOOTH:
1241 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1242 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1243 break;
1244 case D3DTOP_BLENDDIFFUSEALPHA:
1245 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1246 break;
1247 case D3DTOP_BLENDTEXTUREALPHA:
1248 /* XXX: alpha taken from previous stage, texture or result ? */
1249 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1250 break;
1251 case D3DTOP_BLENDFACTORALPHA:
1252 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1253 break;
1254 case D3DTOP_BLENDTEXTUREALPHAPM:
1255 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
1256 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1257 break;
1258 case D3DTOP_BLENDCURRENTALPHA:
1259 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1260 break;
1261 case D3DTOP_PREMODULATE:
1262 ureg_MOV(ureg, dst, arg[1]);
1263 ps->stage.index_pre_mod = ps->stage.index + 1;
1264 break;
1265 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1266 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1267 break;
1268 case D3DTOP_MODULATECOLOR_ADDALPHA:
1269 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1270 break;
1271 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1272 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
1273 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1274 break;
1275 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1276 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1277 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1278 break;
1279 case D3DTOP_BUMPENVMAP:
1280 break;
1281 case D3DTOP_BUMPENVMAPLUMINANCE:
1282 break;
1283 case D3DTOP_DOTPRODUCT3:
1284 ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1285 ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1286 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1287 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1288 break;
1289 case D3DTOP_MULTIPLYADD:
1290 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1291 break;
1292 case D3DTOP_LERP:
1293 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1294 break;
1295 case D3DTOP_DISABLE:
1296 /* no-op ? */
1297 break;
1298 default:
1299 assert(!"invalid D3DTOP");
1300 break;
1301 }
1302 }
1303
1304 static void *
1305 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1306 {
1307 struct ps_build_ctx ps;
1308 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1309 struct ureg_dst oCol;
1310 unsigned i, s;
1311 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1312
1313 memset(&ps, 0, sizeof(ps));
1314 ps.ureg = ureg;
1315 ps.stage.index_pre_mod = -1;
1316
1317 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1318
1319 /* Declare all TEMPs we might need, serious drivers have a register allocator. */
1320 for (i = 0; i < ARRAY_SIZE(ps.r); ++i)
1321 ps.r[i] = ureg_DECL_temporary(ureg);
1322 ps.rCur = ps.r[0];
1323 ps.rTmp = ps.r[1];
1324 ps.rTex = ps.r[2];
1325 ps.rCurSrc = ureg_src(ps.rCur);
1326 ps.rTmpSrc = ureg_src(ps.rTmp);
1327 ps.rTexSrc = ureg_src(ps.rTex);
1328
1329 for (s = 0; s < 8; ++s) {
1330 ps.s[s] = ureg_src_undef();
1331
1332 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1333 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1334 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1335 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1336 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1337
1338 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1339 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1340 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1341 ps.s[s] = ureg_DECL_sampler(ureg, s);
1342 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1343 }
1344 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1345 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1346 ps.s[s] = ureg_DECL_sampler(ureg, s);
1347 }
1348
1349 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1350 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1351 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1352 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1353 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1354
1355 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1356 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1357 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1358 ps.s[s] = ureg_DECL_sampler(ureg, s);
1359 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1360 }
1361 }
1362 }
1363 if (key->specular)
1364 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1365
1366 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1367
1368 if (key->ts[0].colorop == D3DTOP_DISABLE &&
1369 key->ts[0].alphaop == D3DTOP_DISABLE)
1370 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1371 /* Or is it undefined then ? */
1372
1373 /* Run stages.
1374 */
1375 for (s = 0; s < 8; ++s) {
1376 unsigned colorarg[3];
1377 unsigned alphaarg[3];
1378 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1379 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1380 struct ureg_dst dst;
1381 struct ureg_src arg[3];
1382
1383 if (key->ts[s].colorop == D3DTOP_DISABLE &&
1384 key->ts[s].alphaop == D3DTOP_DISABLE)
1385 continue;
1386 ps.stage.index = s;
1387 ps.stage.num_regs = 3;
1388
1389 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1390 nine_D3DTOP_to_str(key->ts[s].colorop),
1391 nine_D3DTOP_to_str(key->ts[s].alphaop));
1392
1393 if (!ureg_src_is_undef(ps.s[s])) {
1394 unsigned target;
1395 struct ureg_src texture_coord = ps.vT[s];
1396 struct ureg_dst delta;
1397 switch (key->ts[s].textarget) {
1398 case 0: target = TGSI_TEXTURE_1D; break;
1399 case 1: target = TGSI_TEXTURE_2D; break;
1400 case 2: target = TGSI_TEXTURE_3D; break;
1401 case 3: target = TGSI_TEXTURE_CUBE; break;
1402 /* this is a 2 bit bitfield, do I really need a default case ? */
1403 }
1404
1405 /* Modify coordinates */
1406 if (s >= 1 &&
1407 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1408 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1409 delta = ureg_DECL_temporary(ureg);
1410 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1411 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1412 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1413 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1414 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1415 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1416 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1417 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1418 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1419 /* Prepare luminance multiplier
1420 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1421 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1422 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1423 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1424
1425 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1426 }
1427 }
1428 if (key->projected & (3 << (s *2))) {
1429 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1430 if (dim == 4)
1431 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1432 else {
1433 ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1434 ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), texture_coord);
1435 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1436 }
1437 } else {
1438 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1439 }
1440 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1441 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1442 }
1443
1444 if (((s == 0 && key->ts[0].colorop != D3DTOP_BUMPENVMAP &&
1445 key->ts[0].colorop != D3DTOP_BUMPENVMAPLUMINANCE) ||
1446 (s == 1 &&
1447 (key->ts[0].colorop == D3DTOP_BUMPENVMAP ||
1448 key->ts[0].colorop == D3DTOP_BUMPENVMAPLUMINANCE)))&&
1449 (key->ts[s].resultarg != 0 /* not current */ ||
1450 key->ts[s].colorop == D3DTOP_DISABLE ||
1451 key->ts[s].alphaop == D3DTOP_DISABLE ||
1452 key->ts[s].colorop == D3DTOP_BLENDCURRENTALPHA ||
1453 key->ts[s].alphaop == D3DTOP_BLENDCURRENTALPHA ||
1454 key->ts[s].colorarg0 == D3DTA_CURRENT ||
1455 key->ts[s].colorarg1 == D3DTA_CURRENT ||
1456 key->ts[s].colorarg2 == D3DTA_CURRENT ||
1457 key->ts[s].alphaarg0 == D3DTA_CURRENT ||
1458 key->ts[s].alphaarg1 == D3DTA_CURRENT ||
1459 key->ts[s].alphaarg2 == D3DTA_CURRENT)) {
1460 /* Initialize D3DTA_CURRENT.
1461 * (Yes we can do this before the loop but not until
1462 * NVE4 has an instruction scheduling pass.)
1463 */
1464 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1465 }
1466
1467 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1468 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1469 continue;
1470
1471 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1472
1473 if (ps.stage.index_pre_mod == ps.stage.index) {
1474 ps.rMod = ps.r[ps.stage.num_regs++];
1475 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1476 }
1477
1478 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1479 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1480 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1481 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1482 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1483 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1484
1485 if (key->ts[s].colorop != key->ts[s].alphaop ||
1486 colorarg[0] != alphaarg[0] ||
1487 colorarg[1] != alphaarg[1] ||
1488 colorarg[2] != alphaarg[2])
1489 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1490
1491 /* Special DOTPRODUCT behaviour (see wine tests) */
1492 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1493 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1494
1495 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1496 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1497 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1498 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1499
1500 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1501 dst.WriteMask = TGSI_WRITEMASK_W;
1502
1503 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1504 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1505 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1506 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1507 }
1508 }
1509
1510 if (key->specular)
1511 ureg_ADD(ureg, ps.rCur, ps.rCurSrc, ps.vC[1]);
1512
1513 /* Fog.
1514 */
1515 if (key->fog_mode) {
1516 struct ureg_src vPos;
1517 if (device->screen->get_param(device->screen,
1518 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
1519 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1520 } else {
1521 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1522 TGSI_INTERPOLATE_LINEAR);
1523 }
1524
1525 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1526 if (key->fog_mode == D3DFOG_EXP) {
1527 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1528 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1529 ureg_EX2(ureg, rFog, _X(rFog));
1530 } else
1531 if (key->fog_mode == D3DFOG_EXP2) {
1532 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1533 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1534 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1535 ureg_EX2(ureg, rFog, _X(rFog));
1536 } else
1537 if (key->fog_mode == D3DFOG_LINEAR) {
1538 ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _ZZZZ(vPos));
1539 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1540 }
1541 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1542 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1543 } else
1544 if (key->fog) {
1545 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1546 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1547 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1548 } else {
1549 ureg_MOV(ureg, oCol, ps.rCurSrc);
1550 }
1551
1552 ureg_END(ureg);
1553 nine_ureg_tgsi_dump(ureg, FALSE);
1554 return ureg_create_shader_and_destroy(ureg, device->pipe);
1555 }
1556
1557 static struct NineVertexShader9 *
1558 nine_ff_get_vs(struct NineDevice9 *device)
1559 {
1560 const struct nine_state *state = &device->state;
1561 struct NineVertexShader9 *vs;
1562 enum pipe_error err;
1563 struct vs_build_ctx bld;
1564 struct nine_ff_vs_key key;
1565 unsigned s, i;
1566 boolean has_indexes = false;
1567 boolean has_weights = false;
1568 char input_texture_coord[8];
1569
1570 assert(sizeof(key) <= sizeof(key.value32));
1571
1572 memset(&key, 0, sizeof(key));
1573 memset(&bld, 0, sizeof(bld));
1574 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1575
1576 bld.key = &key;
1577
1578 /* FIXME: this shouldn't be NULL, but it is on init */
1579 if (state->vdecl) {
1580 key.color0in_one = 1;
1581 key.color1in_zero = 1;
1582 for (i = 0; i < state->vdecl->nelems; i++) {
1583 uint16_t usage = state->vdecl->usage_map[i];
1584 if (usage == NINE_DECLUSAGE_POSITIONT)
1585 key.position_t = 1;
1586 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1587 key.color0in_one = 0;
1588 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1589 key.color1in_zero = 0;
1590 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1591 has_indexes = true;
1592 key.passthrough |= 1 << usage;
1593 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1594 has_weights = true;
1595 key.passthrough |= 1 << usage;
1596 } else if (usage == NINE_DECLUSAGE_PSIZE)
1597 key.vertexpointsize = 1;
1598 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1599 s = usage / NINE_DECLUSAGE_COUNT;
1600 if (s < 8)
1601 input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
1602 else
1603 DBG("FF given texture coordinate >= 8. Ignoring\n");
1604 } else if (usage < NINE_DECLUSAGE_NONE)
1605 key.passthrough |= 1 << usage;
1606 }
1607 }
1608 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1609 * We do restrict to indices 0 */
1610 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1611 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1612 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1613 if (!key.position_t)
1614 key.passthrough = 0;
1615 key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
1616
1617 key.lighting = !!state->rs[D3DRS_LIGHTING] && state->ff.num_lights_active;
1618 key.darkness = !!state->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
1619 if (key.position_t) {
1620 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1621 key.lighting = 0;
1622 }
1623 if ((key.lighting | key.darkness) && state->rs[D3DRS_COLORVERTEX]) {
1624 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1625 key.mtl_diffuse = state->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1626 key.mtl_ambient = state->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1627 key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1628 key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1629 }
1630 key.fog = !!state->rs[D3DRS_FOGENABLE];
1631 key.fog_mode = (!key.position_t && state->rs[D3DRS_FOGENABLE]) ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
1632 if (key.fog_mode)
1633 key.fog_range = state->rs[D3DRS_RANGEFOGENABLE];
1634
1635 key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
1636 key.normalizenormals = !!state->rs[D3DRS_NORMALIZENORMALS];
1637
1638 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1639 key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1640
1641 switch (state->rs[D3DRS_VERTEXBLEND]) {
1642 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1643 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1644 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1645 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1646 case D3DVBF_TWEENING: key.vertextween = 1; break;
1647 default:
1648 assert(!"invalid D3DVBF");
1649 break;
1650 }
1651 if (!has_weights && state->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1652 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1653 }
1654
1655 for (s = 0; s < 8; ++s) {
1656 unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1657 unsigned dim;
1658
1659 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1660 gen = NINED3DTSS_TCI_PASSTHRU;
1661
1662 if (!input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
1663 gen = NINED3DTSS_TCI_DISABLE;
1664
1665 key.tc_gen |= gen << (s * 3);
1666 key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
1667 key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
1668
1669 dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1670 if (dim > 4)
1671 dim = input_texture_coord[s];
1672 if (dim == 1) /* NV behaviour */
1673 dim = 0;
1674 key.tc_dim_output |= dim << (s * 3);
1675 }
1676
1677 vs = util_hash_table_get(device->ff.ht_vs, &key);
1678 if (vs)
1679 return vs;
1680 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1681
1682 nine_ff_prune_vs(device);
1683 if (vs) {
1684 unsigned n;
1685
1686 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1687
1688 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1689 (void)err;
1690 assert(err == PIPE_OK);
1691 device->ff.num_vs++;
1692 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1693
1694 vs->num_inputs = bld.num_inputs;
1695 for (n = 0; n < bld.num_inputs; ++n)
1696 vs->input_map[n].ndecl = bld.input[n];
1697
1698 vs->position_t = key.position_t;
1699 vs->point_size = key.vertexpointsize | key.pointscale;
1700 }
1701 return vs;
1702 }
1703
1704 static struct NinePixelShader9 *
1705 nine_ff_get_ps(struct NineDevice9 *device)
1706 {
1707 struct nine_state *state = &device->state;
1708 struct NinePixelShader9 *ps;
1709 enum pipe_error err;
1710 struct nine_ff_ps_key key;
1711 unsigned s;
1712 uint8_t sampler_mask = 0;
1713
1714 assert(sizeof(key) <= sizeof(key.value32));
1715
1716 memset(&key, 0, sizeof(key));
1717 for (s = 0; s < 8; ++s) {
1718 key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
1719 key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
1720 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1721 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1722 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1723 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1724 break;
1725 }
1726
1727 if (!state->texture[s] &&
1728 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE) {
1729 /* This should also disable the stage. */
1730 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1731 break;
1732 }
1733
1734 if (state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE)
1735 sampler_mask |= (1 << s);
1736
1737 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1738 uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1739 if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
1740 if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
1741 if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
1742 if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1743 if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1744 if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1745 if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1746 if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1747 if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1748 }
1749 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1750 uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1751 if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1752 if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1753 if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1754 if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1755 if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1756 if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1757 }
1758 key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1759
1760 if (state->texture[s]) {
1761 switch (state->texture[s]->base.type) {
1762 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1763 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1764 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1765 default:
1766 assert(!"unexpected texture type");
1767 break;
1768 }
1769 } else {
1770 key.ts[s].textarget = 1;
1771 }
1772 }
1773
1774 key.projected = nine_ff_get_projected_key(state);
1775 key.specular = !!state->rs[D3DRS_SPECULARENABLE];
1776
1777 for (; s < 8; ++s)
1778 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1779 if (state->rs[D3DRS_FOGENABLE])
1780 key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
1781 key.fog = !!state->rs[D3DRS_FOGENABLE];
1782
1783 ps = util_hash_table_get(device->ff.ht_ps, &key);
1784 if (ps)
1785 return ps;
1786 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1787
1788 nine_ff_prune_ps(device);
1789 if (ps) {
1790 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1791
1792 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1793 (void)err;
1794 assert(err == PIPE_OK);
1795 device->ff.num_ps++;
1796 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1797
1798 ps->rt_mask = 0x1;
1799 ps->sampler_mask = sampler_mask;
1800 }
1801 return ps;
1802 }
1803
1804 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1805 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1806 static void
1807 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1808 {
1809 struct nine_state *state = &device->state;
1810 D3DMATRIX T;
1811 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1812 unsigned i;
1813
1814 /* TODO: make this nicer, and only upload the ones we need */
1815 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1816
1817 if (IS_D3DTS_DIRTY(state, WORLD) ||
1818 IS_D3DTS_DIRTY(state, VIEW) ||
1819 IS_D3DTS_DIRTY(state, PROJECTION)) {
1820 /* WVP, WV matrices */
1821 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1822 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1823
1824 /* normal matrix == transpose(inverse(WV)) */
1825 nine_d3d_matrix_inverse_3x3(&T, &M[1]);
1826 nine_d3d_matrix_transpose(&M[4], &T);
1827
1828 /* P matrix */
1829 M[2] = *GET_D3DTS(PROJECTION);
1830
1831 /* V and W matrix */
1832 M[3] = *GET_D3DTS(VIEW);
1833 M[40] = M[1];
1834 }
1835
1836 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1837 /* load other world matrices */
1838 for (i = 1; i <= 8; ++i) {
1839 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1840 }
1841 }
1842
1843 device->ff.vs_const[30 * 4] = asfloat(state->rs[D3DRS_TWEENFACTOR]);
1844 }
1845
1846 static void
1847 nine_ff_load_lights(struct NineDevice9 *device)
1848 {
1849 struct nine_state *state = &device->state;
1850 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1851 unsigned l;
1852
1853 if (state->changed.group & NINE_STATE_FF_MATERIAL) {
1854 const D3DMATERIAL9 *mtl = &state->ff.material;
1855
1856 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1857 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1858 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1859 dst[23].x = mtl->Power;
1860 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1861 d3dcolor_to_rgba(&dst[25].x, state->rs[D3DRS_AMBIENT]);
1862 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1863 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1864 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1865 dst[19].w = mtl->Ambient.a + mtl->Emissive.a;
1866 }
1867
1868 if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
1869 return;
1870
1871 for (l = 0; l < state->ff.num_lights_active; ++l) {
1872 const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
1873
1874 dst[32 + l * 8].x = light->Type;
1875 dst[32 + l * 8].y = light->Attenuation0;
1876 dst[32 + l * 8].z = light->Attenuation1;
1877 dst[32 + l * 8].w = light->Attenuation2;
1878 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1879 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1880 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1881 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1882 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1883 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1884 dst[37 + l * 8].w = light->Falloff;
1885 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1886 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1887 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1888 dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
1889 }
1890 }
1891
1892 static void
1893 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1894 {
1895 const struct nine_state *state = &device->state;
1896 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1897
1898 if (!(state->changed.group & NINE_STATE_FF_OTHER))
1899 return;
1900 dst[26].x = asfloat(state->rs[D3DRS_POINTSIZE_MIN]);
1901 dst[26].y = asfloat(state->rs[D3DRS_POINTSIZE_MAX]);
1902 dst[26].z = asfloat(state->rs[D3DRS_POINTSIZE]);
1903 dst[26].w = asfloat(state->rs[D3DRS_POINTSCALE_A]);
1904 dst[27].x = asfloat(state->rs[D3DRS_POINTSCALE_B]);
1905 dst[27].y = asfloat(state->rs[D3DRS_POINTSCALE_C]);
1906 dst[28].x = asfloat(state->rs[D3DRS_FOGEND]);
1907 dst[28].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1908 if (isinf(dst[28].y))
1909 dst[28].y = 0.0f;
1910 dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1911 }
1912
1913 static void
1914 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1915 {
1916 struct nine_state *state = &device->state;
1917 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1918 unsigned s;
1919
1920 if (!(state->ff.changed.transform[0] & 0xff0000))
1921 return;
1922 for (s = 0; s < 8; ++s) {
1923 if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
1924 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
1925 }
1926 }
1927
1928 static void
1929 nine_ff_load_ps_params(struct NineDevice9 *device)
1930 {
1931 const struct nine_state *state = &device->state;
1932 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1933 unsigned s;
1934
1935 if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1936 return;
1937
1938 for (s = 0; s < 8; ++s)
1939 d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
1940
1941 for (s = 0; s < 8; ++s) {
1942 dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1943 dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1944 dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1945 dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1946 if (s & 1) {
1947 dst[16 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1948 dst[16 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1949 } else {
1950 dst[16 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1951 dst[16 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1952 }
1953 }
1954
1955 d3dcolor_to_rgba(&dst[20].x, state->rs[D3DRS_TEXTUREFACTOR]);
1956 d3dcolor_to_rgba(&dst[21].x, state->rs[D3DRS_FOGCOLOR]);
1957 dst[22].x = asfloat(state->rs[D3DRS_FOGEND]);
1958 dst[22].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1959 dst[22].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1960 }
1961
1962 static void
1963 nine_ff_load_viewport_info(struct NineDevice9 *device)
1964 {
1965 D3DVIEWPORT9 *viewport = &device->state.viewport;
1966 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1967 float diffZ = viewport->MaxZ - viewport->MinZ;
1968
1969 /* Note: the other functions avoids to fill the const again if nothing changed.
1970 * But we don't have much to fill, and adding code to allow that may be complex
1971 * so just fill it always */
1972 dst[100].x = 2.0f / (float)(viewport->Width);
1973 dst[100].y = 2.0f / (float)(viewport->Height);
1974 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
1975 dst[100].w = (float)(viewport->Width);
1976 dst[101].x = (float)(viewport->X);
1977 dst[101].y = (float)(viewport->Y);
1978 dst[101].z = (float)(viewport->MinZ);
1979 }
1980
1981 void
1982 nine_ff_update(struct NineDevice9 *device)
1983 {
1984 struct nine_state *state = &device->state;
1985 struct pipe_constant_buffer cb;
1986
1987 DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
1988
1989 /* NOTE: the only reference belongs to the hash table */
1990 if (!state->programmable_vs) {
1991 device->ff.vs = nine_ff_get_vs(device);
1992 device->state.changed.group |= NINE_STATE_VS;
1993 }
1994 if (!device->state.ps) {
1995 device->ff.ps = nine_ff_get_ps(device);
1996 device->state.changed.group |= NINE_STATE_PS;
1997 }
1998
1999 if (!state->programmable_vs) {
2000 nine_ff_load_vs_transforms(device);
2001 nine_ff_load_tex_matrices(device);
2002 nine_ff_load_lights(device);
2003 nine_ff_load_point_and_fog_params(device);
2004 nine_ff_load_viewport_info(device);
2005
2006 memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
2007
2008 cb.buffer_offset = 0;
2009 cb.buffer = NULL;
2010 cb.user_buffer = device->ff.vs_const;
2011 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2012
2013 if (!device->driver_caps.user_cbufs) {
2014 u_upload_data(device->constbuf_uploader,
2015 0,
2016 cb.buffer_size,
2017 device->constbuf_alignment,
2018 cb.user_buffer,
2019 &cb.buffer_offset,
2020 &cb.buffer);
2021 u_upload_unmap(device->constbuf_uploader);
2022 cb.user_buffer = NULL;
2023 }
2024 state->pipe.cb_vs_ff = cb;
2025 state->commit |= NINE_STATE_COMMIT_CONST_VS;
2026 }
2027
2028 if (!device->state.ps) {
2029 nine_ff_load_ps_params(device);
2030
2031 cb.buffer_offset = 0;
2032 cb.buffer = NULL;
2033 cb.user_buffer = device->ff.ps_const;
2034 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2035
2036 if (!device->driver_caps.user_cbufs) {
2037 u_upload_data(device->constbuf_uploader,
2038 0,
2039 cb.buffer_size,
2040 device->constbuf_alignment,
2041 cb.user_buffer,
2042 &cb.buffer_offset,
2043 &cb.buffer);
2044 u_upload_unmap(device->constbuf_uploader);
2045 cb.user_buffer = NULL;
2046 }
2047 state->pipe.cb_ps_ff = cb;
2048 state->commit |= NINE_STATE_COMMIT_CONST_PS;
2049 }
2050
2051 device->state.changed.group &= ~NINE_STATE_FF;
2052 }
2053
2054
2055 boolean
2056 nine_ff_init(struct NineDevice9 *device)
2057 {
2058 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
2059 nine_ff_vs_key_comp);
2060 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
2061 nine_ff_ps_key_comp);
2062
2063 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
2064 nine_ff_fvf_key_comp);
2065
2066 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2067 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2068
2069 return device->ff.ht_vs && device->ff.ht_ps &&
2070 device->ff.ht_fvf &&
2071 device->ff.vs_const && device->ff.ps_const;
2072 }
2073
2074 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2075 {
2076 NineUnknown_Unbind(NineUnknown(value));
2077 return PIPE_OK;
2078 }
2079
2080 void
2081 nine_ff_fini(struct NineDevice9 *device)
2082 {
2083 if (device->ff.ht_vs) {
2084 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2085 util_hash_table_destroy(device->ff.ht_vs);
2086 }
2087 if (device->ff.ht_ps) {
2088 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2089 util_hash_table_destroy(device->ff.ht_ps);
2090 }
2091 if (device->ff.ht_fvf) {
2092 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2093 util_hash_table_destroy(device->ff.ht_fvf);
2094 }
2095 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2096 device->ff.ps = NULL;
2097
2098 FREE(device->ff.vs_const);
2099 FREE(device->ff.ps_const);
2100 }
2101
2102 static void
2103 nine_ff_prune_vs(struct NineDevice9 *device)
2104 {
2105 if (device->ff.num_vs > 100) {
2106 /* could destroy the bound one here, so unbind */
2107 device->pipe->bind_vs_state(device->pipe, NULL);
2108 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2109 util_hash_table_clear(device->ff.ht_vs);
2110 device->ff.num_vs = 0;
2111 device->state.changed.group |= NINE_STATE_VS;
2112 }
2113 }
2114 static void
2115 nine_ff_prune_ps(struct NineDevice9 *device)
2116 {
2117 if (device->ff.num_ps > 100) {
2118 /* could destroy the bound one here, so unbind */
2119 device->pipe->bind_fs_state(device->pipe, NULL);
2120 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2121 util_hash_table_clear(device->ff.ht_ps);
2122 device->ff.num_ps = 0;
2123 device->state.changed.group |= NINE_STATE_PS;
2124 }
2125 }
2126
2127 /* ========================================================================== */
2128
2129 /* Matrix multiplication:
2130 *
2131 * in memory: 0 1 2 3 (row major)
2132 * 4 5 6 7
2133 * 8 9 a b
2134 * c d e f
2135 *
2136 * cA cB cC cD
2137 * r0 = (r0 * cA) (r0 * cB) . .
2138 * r1 = (r1 * cA) (r1 * cB)
2139 * r2 = (r2 * cA) .
2140 * r3 = (r3 * cA) .
2141 *
2142 * r: (11) (12) (13) (14)
2143 * (21) (22) (23) (24)
2144 * (31) (32) (33) (34)
2145 * (41) (42) (43) (44)
2146 * l: (11 12 13 14)
2147 * (21 22 23 24)
2148 * (31 32 33 34)
2149 * (41 42 43 44)
2150 *
2151 * v: (x y z 1 )
2152 *
2153 * t.xyzw = MUL(v.xxxx, r[0]);
2154 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2155 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2156 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2157 *
2158 * v.x = DP4(v, c[0]);
2159 * v.y = DP4(v, c[1]);
2160 * v.z = DP4(v, c[2]);
2161 * v.w = DP4(v, c[3]) = 1
2162 */
2163
2164 /*
2165 static void
2166 nine_D3DMATRIX_print(const D3DMATRIX *M)
2167 {
2168 DBG("\n(%f %f %f %f)\n"
2169 "(%f %f %f %f)\n"
2170 "(%f %f %f %f)\n"
2171 "(%f %f %f %f)\n",
2172 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2173 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2174 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2175 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2176 }
2177 */
2178
2179 static inline float
2180 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2181 {
2182 return A->m[r][0] * B->m[0][c] +
2183 A->m[r][1] * B->m[1][c] +
2184 A->m[r][2] * B->m[2][c] +
2185 A->m[r][3] * B->m[3][c];
2186 }
2187
2188 static inline float
2189 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2190 {
2191 return v->x * M->m[0][c] +
2192 v->y * M->m[1][c] +
2193 v->z * M->m[2][c] +
2194 1.0f * M->m[3][c];
2195 }
2196
2197 static inline float
2198 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2199 {
2200 return v->x * M->m[0][c] +
2201 v->y * M->m[1][c] +
2202 v->z * M->m[2][c];
2203 }
2204
2205 void
2206 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2207 {
2208 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2209 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2210 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2211 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2212
2213 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2214 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2215 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2216 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2217
2218 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2219 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2220 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2221 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2222
2223 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2224 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2225 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2226 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2227 }
2228
2229 void
2230 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2231 {
2232 d->x = nine_DP4_vec_col(v, M, 0);
2233 d->y = nine_DP4_vec_col(v, M, 1);
2234 d->z = nine_DP4_vec_col(v, M, 2);
2235 }
2236
2237 void
2238 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2239 {
2240 d->x = nine_DP3_vec_col(v, M, 0);
2241 d->y = nine_DP3_vec_col(v, M, 1);
2242 d->z = nine_DP3_vec_col(v, M, 2);
2243 }
2244
2245 void
2246 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2247 {
2248 unsigned i, j;
2249 for (i = 0; i < 4; ++i)
2250 for (j = 0; j < 4; ++j)
2251 D->m[i][j] = M->m[j][i];
2252 }
2253
2254 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2255 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2256 if (t > 0.0f) pos += t; else neg += t; } while(0)
2257
2258 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2259 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2260 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2261 float
2262 nine_d3d_matrix_det(const D3DMATRIX *M)
2263 {
2264 float pos = 0.0f;
2265 float neg = 0.0f;
2266
2267 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2268 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2269 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2270
2271 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2272 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2273 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2274
2275 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2276 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2277 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2278
2279 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2280 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2281 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2282
2283 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2284 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2285 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2286
2287 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2288 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2289 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2290
2291 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2292 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2293 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2294
2295 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2296 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2297 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2298
2299 return pos + neg;
2300 }
2301
2302 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2303 * I have no idea where this code came from.
2304 */
2305 void
2306 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2307 {
2308 int i, k;
2309 float det;
2310
2311 D->m[0][0] =
2312 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2313 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2314 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2315 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2316 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2317 M->m[1][3] * M->m[3][1] * M->m[2][2];
2318
2319 D->m[0][1] =
2320 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2321 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2322 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2323 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2324 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2325 M->m[0][3] * M->m[3][1] * M->m[2][2];
2326
2327 D->m[0][2] =
2328 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2329 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2330 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2331 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2332 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2333 M->m[0][3] * M->m[3][1] * M->m[1][2];
2334
2335 D->m[0][3] =
2336 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2337 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2338 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2339 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2340 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2341 M->m[0][3] * M->m[2][1] * M->m[1][2];
2342
2343 D->m[1][0] =
2344 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2345 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2346 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2347 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2348 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2349 M->m[1][3] * M->m[3][0] * M->m[2][2];
2350
2351 D->m[1][1] =
2352 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2353 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2354 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2355 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2356 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2357 M->m[0][3] * M->m[3][0] * M->m[2][2];
2358
2359 D->m[1][2] =
2360 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2361 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2362 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2363 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2364 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2365 M->m[0][3] * M->m[3][0] * M->m[1][2];
2366
2367 D->m[1][3] =
2368 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2369 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2370 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2371 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2372 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2373 M->m[0][3] * M->m[2][0] * M->m[1][2];
2374
2375 D->m[2][0] =
2376 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2377 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2378 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2379 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2380 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2381 M->m[1][3] * M->m[3][0] * M->m[2][1];
2382
2383 D->m[2][1] =
2384 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2385 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2386 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2387 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2388 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2389 M->m[0][3] * M->m[3][0] * M->m[2][1];
2390
2391 D->m[2][2] =
2392 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2393 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2394 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2395 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2396 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2397 M->m[0][3] * M->m[3][0] * M->m[1][1];
2398
2399 D->m[2][3] =
2400 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2401 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2402 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2403 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2404 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2405 M->m[0][3] * M->m[2][0] * M->m[1][1];
2406
2407 D->m[3][0] =
2408 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2409 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2410 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2411 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2412 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2413 M->m[1][2] * M->m[3][0] * M->m[2][1];
2414
2415 D->m[3][1] =
2416 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2417 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2418 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2419 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2420 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2421 M->m[0][2] * M->m[3][0] * M->m[2][1];
2422
2423 D->m[3][2] =
2424 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2425 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2426 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2427 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2428 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2429 M->m[0][2] * M->m[3][0] * M->m[1][1];
2430
2431 D->m[3][3] =
2432 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2433 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2434 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2435 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2436 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2437 M->m[0][2] * M->m[2][0] * M->m[1][1];
2438
2439 det =
2440 M->m[0][0] * D->m[0][0] +
2441 M->m[1][0] * D->m[0][1] +
2442 M->m[2][0] * D->m[0][2] +
2443 M->m[3][0] * D->m[0][3];
2444
2445 det = 1.0 / det;
2446
2447 for (i = 0; i < 4; i++)
2448 for (k = 0; k < 4; k++)
2449 D->m[i][k] *= det;
2450
2451 #ifdef DEBUG
2452 {
2453 D3DMATRIX I;
2454
2455 nine_d3d_matrix_matrix_mul(&I, D, M);
2456
2457 for (i = 0; i < 4; ++i)
2458 for (k = 0; k < 4; ++k)
2459 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2460 DBG("Matrix inversion check FAILED !\n");
2461 }
2462 #endif
2463 }
2464
2465 /* TODO: don't use 4x4 inverse, unless this gets all nicely inlined ? */
2466 void
2467 nine_d3d_matrix_inverse_3x3(D3DMATRIX *D, const D3DMATRIX *M)
2468 {
2469 D3DMATRIX T;
2470 unsigned i, j;
2471
2472 for (i = 0; i < 3; ++i)
2473 for (j = 0; j < 3; ++j)
2474 T.m[i][j] = M->m[i][j];
2475 for (i = 0; i < 3; ++i) {
2476 T.m[i][3] = 0.0f;
2477 T.m[3][i] = 0.0f;
2478 }
2479 T.m[3][3] = 1.0f;
2480
2481 nine_d3d_matrix_inverse(D, &T);
2482 }