Merge remote-tracking branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / auxiliary / vl / vl_zscan.c
1 /**************************************************************************
2 *
3 * Copyright 2011 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <assert.h>
29
30 #include <pipe/p_screen.h>
31 #include <pipe/p_context.h>
32
33 #include <util/u_draw.h>
34 #include <util/u_sampler.h>
35 #include <util/u_inlines.h>
36
37 #include <tgsi/tgsi_ureg.h>
38
39 #include <vl/vl_defines.h>
40 #include <vl/vl_types.h>
41
42 #include "vl_zscan.h"
43 #include "vl_vertex_buffers.h"
44
45 enum VS_OUTPUT
46 {
47 VS_O_VPOS,
48 VS_O_VTEX
49 };
50
51 const int vl_zscan_linear[] =
52 {
53 /* Linear scan pattern */
54 0, 1, 2, 3, 4, 5, 6, 7,
55 8, 9,10,11,12,13,14,15,
56 16,17,18,19,20,21,22,23,
57 24,25,26,27,28,29,30,31,
58 32,33,34,35,36,37,38,39,
59 40,41,42,43,44,45,46,47,
60 48,49,50,51,52,53,54,55,
61 56,57,58,59,60,61,62,63
62 };
63
64 const int vl_zscan_normal[] =
65 {
66 /* Zig-Zag scan pattern */
67 0, 1, 8,16, 9, 2, 3,10,
68 17,24,32,25,18,11, 4, 5,
69 12,19,26,33,40,48,41,34,
70 27,20,13, 6, 7,14,21,28,
71 35,42,49,56,57,50,43,36,
72 29,22,15,23,30,37,44,51,
73 58,59,52,45,38,31,39,46,
74 53,60,61,54,47,55,62,63
75 };
76
77 const int vl_zscan_alternate[] =
78 {
79 /* Alternate scan pattern */
80 0, 8,16,24, 1, 9, 2,10,
81 17,25,32,40,48,56,57,49,
82 41,33,26,18, 3,11, 4,12,
83 19,27,34,42,50,58,35,43,
84 51,59,20,28, 5,13, 6,14,
85 21,29,36,44,52,60,37,45,
86 53,61,22,30, 7,15,23,31,
87 38,46,54,62,39,47,55,63
88 };
89
90 static void *
91 create_vert_shader(struct vl_zscan *zscan)
92 {
93 struct ureg_program *shader;
94
95 struct ureg_src scale, instance;
96 struct ureg_src vrect, vpos;
97
98 struct ureg_dst tmp;
99 struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
100
101 signed i;
102
103 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
104 if (!shader)
105 return NULL;
106
107 scale = ureg_imm2f(shader,
108 (float)BLOCK_WIDTH / zscan->buffer_width,
109 (float)BLOCK_HEIGHT / zscan->buffer_height);
110
111 instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
112
113 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
114 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
115
116 tmp = ureg_DECL_temporary(shader);
117
118 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
119
120 for (i = 0; i < zscan->num_channels; ++i)
121 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
122
123 /*
124 * o_vpos.xy = (vpos + vrect) * scale
125 * o_vpos.zw = 1.0f
126 *
127 * tmp.xy = InstanceID / blocks_per_line
128 * tmp.x = frac(tmp.x)
129 * tmp.y = floor(tmp.y)
130 *
131 * o_vtex.x = vrect.x / blocks_per_line + tmp.x
132 * o_vtex.y = vrect.y
133 * o_vtex.z = tmp.z * blocks_per_line / blocks_total
134 */
135 ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
136 ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
137 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
138
139 ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), ureg_scalar(instance, TGSI_SWIZZLE_X),
140 ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
141
142 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
143 ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
144
145 for (i = 0; i < zscan->num_channels; ++i) {
146 ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
147 ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2)));
148
149 ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
150 ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
151 ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
152 ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
153 ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
154 }
155
156 ureg_release_temporary(shader, tmp);
157 ureg_END(shader);
158
159 return ureg_create_shader_and_destroy(shader, zscan->pipe);
160 }
161
162 static void *
163 create_frag_shader(struct vl_zscan *zscan)
164 {
165 struct ureg_program *shader;
166 struct ureg_src vtex[zscan->num_channels];
167
168 struct ureg_src src, scan, quant;
169
170 struct ureg_dst tmp[zscan->num_channels];
171 struct ureg_dst fragment;
172
173 unsigned i;
174
175 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
176 if (!shader)
177 return NULL;
178
179 for (i = 0; i < zscan->num_channels; ++i)
180 vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
181
182 src = ureg_DECL_sampler(shader, 0);
183 scan = ureg_DECL_sampler(shader, 1);
184 quant = ureg_DECL_sampler(shader, 2);
185
186 for (i = 0; i < zscan->num_channels; ++i)
187 tmp[i] = ureg_DECL_temporary(shader);
188
189 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
190
191 /*
192 * tmp.x = tex(vtex, 1)
193 * tmp.y = vtex.z
194 * fragment = tex(tmp, 0) * quant
195 */
196 for (i = 0; i < zscan->num_channels; ++i)
197 ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
198
199 for (i = 0; i < zscan->num_channels; ++i)
200 ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
201
202 for (i = 0; i < zscan->num_channels; ++i)
203 ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
204
205 // TODO: Fetch quant and use it
206 for (i = 0; i < zscan->num_channels; ++i)
207 ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
208
209 for (i = 0; i < zscan->num_channels; ++i)
210 ureg_release_temporary(shader, tmp[i]);
211 ureg_END(shader);
212
213 return ureg_create_shader_and_destroy(shader, zscan->pipe);
214 }
215
216 static bool
217 init_shaders(struct vl_zscan *zscan)
218 {
219 assert(zscan);
220
221 zscan->vs = create_vert_shader(zscan);
222 if (!zscan->vs)
223 goto error_vs;
224
225 zscan->fs = create_frag_shader(zscan);
226 if (!zscan->fs)
227 goto error_fs;
228
229 return true;
230
231 error_fs:
232 zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
233
234 error_vs:
235 return false;
236 }
237
238 static void
239 cleanup_shaders(struct vl_zscan *zscan)
240 {
241 assert(zscan);
242
243 zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
244 zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
245 }
246
247 static bool
248 init_state(struct vl_zscan *zscan)
249 {
250 struct pipe_blend_state blend;
251 struct pipe_rasterizer_state rs_state;
252 struct pipe_sampler_state sampler;
253 unsigned i;
254
255 assert(zscan);
256
257 memset(&rs_state, 0, sizeof(rs_state));
258 rs_state.gl_rasterization_rules = true;
259 zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
260 if (!zscan->rs_state)
261 goto error_rs_state;
262
263 memset(&blend, 0, sizeof blend);
264
265 blend.independent_blend_enable = 0;
266 blend.rt[0].blend_enable = 0;
267 blend.rt[0].rgb_func = PIPE_BLEND_ADD;
268 blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
269 blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
270 blend.rt[0].alpha_func = PIPE_BLEND_ADD;
271 blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
272 blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
273 blend.logicop_enable = 0;
274 blend.logicop_func = PIPE_LOGICOP_CLEAR;
275 /* Needed to allow color writes to FB, even if blending disabled */
276 blend.rt[0].colormask = PIPE_MASK_RGBA;
277 blend.dither = 0;
278 zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
279 if (!zscan->blend)
280 goto error_blend;
281
282 for (i = 0; i < 3; ++i) {
283 memset(&sampler, 0, sizeof(sampler));
284 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
285 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
286 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
287 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
288 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
289 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
290 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
291 sampler.compare_func = PIPE_FUNC_ALWAYS;
292 sampler.normalized_coords = 1;
293 zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
294 if (!zscan->samplers[i])
295 goto error_samplers;
296 }
297
298 return true;
299
300 error_samplers:
301 for (i = 0; i < 2; ++i)
302 if (zscan->samplers[i])
303 zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
304
305 zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
306
307 error_blend:
308 zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
309
310 error_rs_state:
311 return false;
312 }
313
314 static void
315 cleanup_state(struct vl_zscan *zscan)
316 {
317 unsigned i;
318
319 assert(zscan);
320
321 for (i = 0; i < 3; ++i)
322 zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
323
324 zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
325 zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
326 }
327
328 struct pipe_sampler_view *
329 vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line)
330 {
331 const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
332
333 int patched_layout[64];
334
335 struct pipe_resource res_tmpl, *res;
336 struct pipe_sampler_view sv_tmpl, *sv;
337 struct pipe_transfer *buf_transfer;
338 unsigned x, y, i, pitch;
339 float *f;
340
341 struct pipe_box rect =
342 {
343 0, 0, 0,
344 BLOCK_WIDTH * blocks_per_line,
345 BLOCK_HEIGHT,
346 1
347 };
348
349 assert(pipe && layout && blocks_per_line);
350
351 for (i = 0; i < 64; ++i)
352 patched_layout[layout[i]] = i;
353
354 memset(&res_tmpl, 0, sizeof(res_tmpl));
355 res_tmpl.target = PIPE_TEXTURE_2D;
356 res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
357 res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line;
358 res_tmpl.height0 = BLOCK_HEIGHT;
359 res_tmpl.depth0 = 1;
360 res_tmpl.array_size = 1;
361 res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
362 res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
363
364 res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
365 if (!res)
366 goto error_resource;
367
368 buf_transfer = pipe->get_transfer
369 (
370 pipe, res,
371 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
372 &rect
373 );
374 if (!buf_transfer)
375 goto error_transfer;
376
377 pitch = buf_transfer->stride / sizeof(float);
378
379 f = pipe->transfer_map(pipe, buf_transfer);
380 if (!f)
381 goto error_map;
382
383 for (i = 0; i < blocks_per_line; ++i)
384 for (y = 0; y < BLOCK_HEIGHT; ++y)
385 for (x = 0; x < BLOCK_WIDTH; ++x) {
386 float addr = patched_layout[x + y * BLOCK_WIDTH] +
387 i * BLOCK_WIDTH * BLOCK_HEIGHT;
388
389 addr /= total_size;
390
391 f[i * BLOCK_WIDTH + y * pitch + x] = addr;
392 }
393
394 pipe->transfer_unmap(pipe, buf_transfer);
395 pipe->transfer_destroy(pipe, buf_transfer);
396
397 memset(&sv_tmpl, 0, sizeof(sv_tmpl));
398 u_sampler_view_default_template(&sv_tmpl, res, res->format);
399 sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
400 pipe_resource_reference(&res, NULL);
401 if (!sv)
402 goto error_map;
403
404 return sv;
405
406 error_map:
407 pipe->transfer_destroy(pipe, buf_transfer);
408
409 error_transfer:
410 pipe_resource_reference(&res, NULL);
411
412 error_resource:
413 return NULL;
414 }
415
416 #if 0
417 // TODO
418 struct pipe_sampler_view *
419 vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
420
421 struct pipe_sampler_view *
422 vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
423 #endif
424
425 bool
426 vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
427 unsigned buffer_width, unsigned buffer_height,
428 unsigned blocks_per_line, unsigned blocks_total,
429 unsigned num_channels)
430 {
431 assert(zscan && pipe);
432
433 zscan->pipe = pipe;
434 zscan->buffer_width = buffer_width;
435 zscan->buffer_height = buffer_height;
436 zscan->num_channels = num_channels;
437 zscan->blocks_per_line = blocks_per_line;
438 zscan->blocks_total = blocks_total;
439
440 if(!init_shaders(zscan))
441 return false;
442
443 if(!init_state(zscan)) {
444 cleanup_shaders(zscan);
445 return false;
446 }
447
448 return true;
449 }
450
451 void
452 vl_zscan_cleanup(struct vl_zscan *zscan)
453 {
454 assert(zscan);
455
456 cleanup_shaders(zscan);
457 cleanup_state(zscan);
458 }
459
460 #if 0
461 // TODO
462 void
463 vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
464 #endif
465
466 bool
467 vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
468 struct pipe_sampler_view *src, struct pipe_surface *dst)
469 {
470 assert(zscan && buffer);
471
472 memset(buffer, 0, sizeof(struct vl_zscan_buffer));
473
474 buffer->zscan = zscan;
475
476 pipe_sampler_view_reference(&buffer->src, src);
477
478 buffer->viewport.scale[0] = dst->width;
479 buffer->viewport.scale[1] = dst->height;
480 buffer->viewport.scale[2] = 1;
481 buffer->viewport.scale[3] = 1;
482 buffer->viewport.translate[0] = 0;
483 buffer->viewport.translate[1] = 0;
484 buffer->viewport.translate[2] = 0;
485 buffer->viewport.translate[3] = 0;
486
487 buffer->fb_state.width = dst->width;
488 buffer->fb_state.height = dst->height;
489 buffer->fb_state.nr_cbufs = 1;
490 pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
491
492 return true;
493 }
494
495 void
496 vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
497 {
498 assert(buffer);
499
500 pipe_sampler_view_reference(&buffer->src, NULL);
501 pipe_sampler_view_reference(&buffer->layout, NULL);
502 pipe_sampler_view_reference(&buffer->quant, NULL);
503 pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
504 }
505
506 void
507 vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout)
508 {
509 assert(buffer);
510 assert(layout);
511
512 pipe_sampler_view_reference(&buffer->layout, layout);
513 }
514
515 void
516 vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
517 {
518 struct vl_zscan *zscan;
519
520 assert(buffer);
521
522 zscan = buffer->zscan;
523
524 zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
525 zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
526 zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
527 zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
528 zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
529 zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
530 zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
531 zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
532 util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
533 }