1 /**************************************************************************
3 * Copyright 2016 Nayan Deshmukh.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include "pipe/p_context.h"
32 #include "tgsi/tgsi_ureg.h"
34 #include "util/u_draw.h"
35 #include "util/u_memory.h"
36 #include "util/u_math.h"
37 #include "util/u_rect.h"
40 #include "vl_vertex_buffers.h"
41 #include "vl_bicubic_filter.h"
50 create_vert_shader(struct vl_bicubic_filter
*filter
)
52 struct ureg_program
*shader
;
53 struct ureg_src i_vpos
;
54 struct ureg_dst o_vpos
, o_vtex
;
56 shader
= ureg_create(PIPE_SHADER_VERTEX
);
60 i_vpos
= ureg_DECL_vs_input(shader
, 0);
61 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
62 o_vtex
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_VTEX
);
64 ureg_MOV(shader
, o_vpos
, i_vpos
);
65 ureg_MOV(shader
, o_vtex
, i_vpos
);
69 return ureg_create_shader_and_destroy(shader
, filter
->pipe
);
73 create_frag_shader_cubic_interpolater(struct ureg_program
*shader
, struct ureg_src tex_a
,
74 struct ureg_src tex_b
, struct ureg_src tex_c
,
75 struct ureg_src tex_d
, struct ureg_src t
,
76 struct ureg_dst o_fragment
)
78 struct ureg_dst temp
[11];
82 for(i
= 0; i
< 11; ++i
)
83 temp
[i
] = ureg_DECL_temporary(shader
);
84 t_2
= ureg_DECL_temporary(shader
);
87 * |temp[0]| | 0 2 0 0 | |tex_a|
88 * |temp[1]| = | -1 0 1 0 |* |tex_b|
89 * |temp[2]| | 2 -5 4 -1 | |tex_c|
90 * |temp[3]| | -1 3 -3 1 | |tex_d|
92 ureg_MUL(shader
, temp
[0], tex_b
, ureg_imm1f(shader
, 2.0f
));
94 ureg_MUL(shader
, temp
[1], tex_a
, ureg_imm1f(shader
, -1.0f
));
95 ureg_MAD(shader
, temp
[1], tex_c
, ureg_imm1f(shader
, 1.0f
),
98 ureg_MUL(shader
, temp
[2], tex_a
, ureg_imm1f(shader
, 2.0f
));
99 ureg_MAD(shader
, temp
[2], tex_b
, ureg_imm1f(shader
, -5.0f
),
101 ureg_MAD(shader
, temp
[2], tex_c
, ureg_imm1f(shader
, 4.0f
),
103 ureg_MAD(shader
, temp
[2], tex_d
, ureg_imm1f(shader
, -1.0f
),
106 ureg_MUL(shader
, temp
[3], tex_a
, ureg_imm1f(shader
, -1.0f
));
107 ureg_MAD(shader
, temp
[3], tex_b
, ureg_imm1f(shader
, 3.0f
),
109 ureg_MAD(shader
, temp
[3], tex_c
, ureg_imm1f(shader
, -3.0f
),
111 ureg_MAD(shader
, temp
[3], tex_d
, ureg_imm1f(shader
, 1.0f
),
116 * o_fragment = 0.5*|1 t t^2 t^3|*|temp[0]|
122 ureg_MUL(shader
, t_2
, t
, t
);
123 ureg_MUL(shader
, temp
[4], ureg_src(t_2
), t
);
125 ureg_MUL(shader
, temp
[4], ureg_src(temp
[4]),
127 ureg_MUL(shader
, temp
[5], ureg_src(t_2
),
129 ureg_MUL(shader
, temp
[6], t
,
131 ureg_MUL(shader
, temp
[7], ureg_imm1f(shader
, 1.0f
),
133 ureg_ADD(shader
, temp
[8], ureg_src(temp
[4]),
135 ureg_ADD(shader
, temp
[9], ureg_src(temp
[6]),
138 ureg_ADD(shader
, temp
[10], ureg_src(temp
[8]),
140 ureg_MUL(shader
, o_fragment
, ureg_src(temp
[10]),
141 ureg_imm1f(shader
, 0.5f
));
144 for(i
= 0; i
< 11; ++i
)
145 ureg_release_temporary(shader
, temp
[i
]);
146 ureg_release_temporary(shader
, t_2
);
150 create_frag_shader(struct vl_bicubic_filter
*filter
, unsigned video_width
,
151 unsigned video_height
, struct vertex2f
*offsets
)
153 struct pipe_screen
*screen
= filter
->pipe
->screen
;
154 struct ureg_program
*shader
;
155 struct ureg_src i_vtex
, vtex
;
156 struct ureg_src sampler
;
157 struct ureg_src half_pixel
;
158 struct ureg_dst t_array
[23];
159 struct ureg_dst o_fragment
;
163 if (screen
->get_shader_param(
164 screen
, PIPE_SHADER_FRAGMENT
, PIPE_SHADER_CAP_MAX_TEMPS
) < 23) {
169 shader
= ureg_create(PIPE_SHADER_FRAGMENT
);
174 i_vtex
= ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_VTEX
, TGSI_INTERPOLATE_LINEAR
);
175 sampler
= ureg_DECL_sampler(shader
, 0);
177 for (i
= 0; i
< 23; ++i
)
178 t_array
[i
] = ureg_DECL_temporary(shader
);
179 t
= ureg_DECL_temporary(shader
);
181 half_pixel
= ureg_DECL_constant(shader
, 0);
182 o_fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
185 * temp = (i_vtex - (0.5/dst_size)) * i_size)
187 * vtex = floor(i_vtex)/i_size
189 ureg_SUB(shader
, ureg_writemask(t_array
[21], TGSI_WRITEMASK_XY
),
191 ureg_MUL(shader
, ureg_writemask(t_array
[22], TGSI_WRITEMASK_XY
),
192 ureg_src(t_array
[21]), ureg_imm2f(shader
, video_width
, video_height
));
193 ureg_FRC(shader
, ureg_writemask(t
, TGSI_WRITEMASK_XY
),
194 ureg_src(t_array
[22]));
196 ureg_FLR(shader
, ureg_writemask(t_array
[22], TGSI_WRITEMASK_XY
),
197 ureg_src(t_array
[22]));
198 ureg_DIV(shader
, ureg_writemask(t_array
[22], TGSI_WRITEMASK_XY
),
199 ureg_src(t_array
[22]), ureg_imm2f(shader
, video_width
, video_height
));
202 * t_array[0..*] = vtex + offset[0..*]
203 * t_array[0..*] = tex(t_array[0..*], sampler)
204 * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x)
205 * o_fragment = cubic_interpolate(t_array[16..19], t_y)
207 vtex
= ureg_src(t_array
[22]);
208 for (i
= 0; i
< 16; ++i
) {
209 ureg_ADD(shader
, ureg_writemask(t_array
[i
], TGSI_WRITEMASK_XY
),
210 vtex
, ureg_imm2f(shader
, offsets
[i
].x
, offsets
[i
].y
));
211 ureg_MOV(shader
, ureg_writemask(t_array
[i
], TGSI_WRITEMASK_ZW
),
212 ureg_imm1f(shader
, 0.0f
));
215 for (i
= 0; i
< 16; ++i
) {
216 ureg_TEX(shader
, t_array
[i
], TGSI_TEXTURE_2D
, ureg_src(t_array
[i
]), sampler
);
219 for(i
= 0; i
< 4; ++i
)
220 create_frag_shader_cubic_interpolater(shader
, ureg_src(t_array
[4*i
]),
221 ureg_src(t_array
[4*i
+1]), ureg_src(t_array
[4*i
+2]), ureg_src(t_array
[4*i
+3]),
222 ureg_scalar(ureg_src(t
), TGSI_SWIZZLE_X
), t_array
[16+i
]);
224 create_frag_shader_cubic_interpolater(shader
, ureg_src(t_array
[16]),
225 ureg_src(t_array
[17]), ureg_src(t_array
[18]), ureg_src(t_array
[19]),
226 ureg_scalar(ureg_src(t
), TGSI_SWIZZLE_Y
), o_fragment
);
228 for(i
= 0; i
< 23; ++i
)
229 ureg_release_temporary(shader
, t_array
[i
]);
230 ureg_release_temporary(shader
, t
);
234 return ureg_create_shader_and_destroy(shader
, filter
->pipe
);
238 vl_bicubic_filter_init(struct vl_bicubic_filter
*filter
, struct pipe_context
*pipe
,
239 unsigned width
, unsigned height
)
241 struct pipe_rasterizer_state rs_state
;
242 struct pipe_blend_state blend
;
243 struct vertex2f
*offsets
= NULL
;
244 struct pipe_sampler_state sampler
;
245 struct pipe_vertex_element ve
;
248 assert(filter
&& pipe
);
249 assert(width
&& height
);
251 memset(filter
, 0, sizeof(*filter
));
254 memset(&rs_state
, 0, sizeof(rs_state
));
255 rs_state
.half_pixel_center
= true;
256 rs_state
.bottom_edge_rule
= true;
257 rs_state
.depth_clip
= 1;
258 filter
->rs_state
= pipe
->create_rasterizer_state(pipe
, &rs_state
);
259 if (!filter
->rs_state
)
262 memset(&blend
, 0, sizeof blend
);
263 blend
.rt
[0].rgb_func
= PIPE_BLEND_ADD
;
264 blend
.rt
[0].rgb_src_factor
= PIPE_BLENDFACTOR_ONE
;
265 blend
.rt
[0].rgb_dst_factor
= PIPE_BLENDFACTOR_ONE
;
266 blend
.rt
[0].alpha_func
= PIPE_BLEND_ADD
;
267 blend
.rt
[0].alpha_src_factor
= PIPE_BLENDFACTOR_ONE
;
268 blend
.rt
[0].alpha_dst_factor
= PIPE_BLENDFACTOR_ONE
;
269 blend
.logicop_func
= PIPE_LOGICOP_CLEAR
;
270 blend
.rt
[0].colormask
= PIPE_MASK_RGBA
;
271 filter
->blend
= pipe
->create_blend_state(pipe
, &blend
);
275 memset(&sampler
, 0, sizeof(sampler
));
276 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
277 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
278 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
279 sampler
.min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
280 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
281 sampler
.mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
282 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
283 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
284 sampler
.normalized_coords
= 1;
285 filter
->sampler
= pipe
->create_sampler_state(pipe
, &sampler
);
286 if (!filter
->sampler
)
289 filter
->quad
= vl_vb_upload_quads(pipe
);
290 if(!filter
->quad
.buffer
)
293 memset(&ve
, 0, sizeof(ve
));
295 ve
.instance_divisor
= 0;
296 ve
.vertex_buffer_index
= 0;
297 ve
.src_format
= PIPE_FORMAT_R32G32_FLOAT
;
298 filter
->ves
= pipe
->create_vertex_elements_state(pipe
, 1, &ve
);
302 offsets
= MALLOC(sizeof(struct vertex2f
) * 16);
306 offsets
[0].x
= -1.0f
; offsets
[0].y
= -1.0f
;
307 offsets
[1].x
= 0.0f
; offsets
[1].y
= -1.0f
;
308 offsets
[2].x
= 1.0f
; offsets
[2].y
= -1.0f
;
309 offsets
[3].x
= 2.0f
; offsets
[3].y
= -1.0f
;
311 offsets
[4].x
= -1.0f
; offsets
[4].y
= 0.0f
;
312 offsets
[5].x
= 0.0f
; offsets
[5].y
= 0.0f
;
313 offsets
[6].x
= 1.0f
; offsets
[6].y
= 0.0f
;
314 offsets
[7].x
= 2.0f
; offsets
[7].y
= 0.0f
;
316 offsets
[8].x
= -1.0f
; offsets
[8].y
= 1.0f
;
317 offsets
[9].x
= 0.0f
; offsets
[9].y
= 1.0f
;
318 offsets
[10].x
= 1.0f
; offsets
[10].y
= 1.0f
;
319 offsets
[11].x
= 2.0f
; offsets
[11].y
= 1.0f
;
321 offsets
[12].x
= -1.0f
; offsets
[12].y
= 2.0f
;
322 offsets
[13].x
= 0.0f
; offsets
[13].y
= 2.0f
;
323 offsets
[14].x
= 1.0f
; offsets
[14].y
= 2.0f
;
324 offsets
[15].x
= 2.0f
; offsets
[15].y
= 2.0f
;
326 for (i
= 0; i
< 16; ++i
) {
327 offsets
[i
].x
/= width
;
328 offsets
[i
].y
/= height
;
331 filter
->vs
= create_vert_shader(filter
);
335 filter
->fs
= create_frag_shader(filter
, width
, height
, offsets
);
342 pipe
->delete_vs_state(pipe
, filter
->vs
);
347 pipe
->delete_vertex_elements_state(pipe
, filter
->ves
);
350 pipe_resource_reference(&filter
->quad
.buffer
, NULL
);
353 pipe
->delete_sampler_state(pipe
, filter
->sampler
);
356 pipe
->delete_blend_state(pipe
, filter
->blend
);
359 pipe
->delete_rasterizer_state(pipe
, filter
->rs_state
);
366 vl_bicubic_filter_cleanup(struct vl_bicubic_filter
*filter
)
370 filter
->pipe
->delete_sampler_state(filter
->pipe
, filter
->sampler
);
371 filter
->pipe
->delete_blend_state(filter
->pipe
, filter
->blend
);
372 filter
->pipe
->delete_rasterizer_state(filter
->pipe
, filter
->rs_state
);
373 filter
->pipe
->delete_vertex_elements_state(filter
->pipe
, filter
->ves
);
374 pipe_resource_reference(&filter
->quad
.buffer
, NULL
);
376 filter
->pipe
->delete_vs_state(filter
->pipe
, filter
->vs
);
377 filter
->pipe
->delete_fs_state(filter
->pipe
, filter
->fs
);
381 vl_bicubic_filter_render(struct vl_bicubic_filter
*filter
,
382 struct pipe_sampler_view
*src
,
383 struct pipe_surface
*dst
,
384 struct u_rect
*dst_area
,
385 struct u_rect
*dst_clip
)
387 struct pipe_viewport_state viewport
;
388 struct pipe_framebuffer_state fb_state
;
389 struct pipe_scissor_state scissor
;
390 union pipe_color_union clear_color
;
391 struct pipe_transfer
*buf_transfer
;
392 struct pipe_resource
*surface_size
;
393 assert(filter
&& src
&& dst
);
396 scissor
.minx
= dst_clip
->x0
;
397 scissor
.miny
= dst_clip
->y0
;
398 scissor
.maxx
= dst_clip
->x1
;
399 scissor
.maxy
= dst_clip
->y1
;
403 scissor
.maxx
= dst
->width
;
404 scissor
.maxy
= dst
->height
;
407 clear_color
.f
[0] = clear_color
.f
[1] = 0.0f
;
408 clear_color
.f
[2] = clear_color
.f
[3] = 0.0f
;
409 surface_size
= pipe_buffer_create
411 filter
->pipe
->screen
,
412 PIPE_BIND_CONSTANT_BUFFER
,
418 memset(&viewport
, 0, sizeof(viewport
));
420 viewport
.scale
[0] = dst_area
->x1
- dst_area
->x0
;
421 viewport
.scale
[1] = dst_area
->y1
- dst_area
->y0
;
422 viewport
.translate
[0] = dst_area
->x0
;
423 viewport
.translate
[1] = dst_area
->y0
;
425 viewport
.scale
[0] = dst
->width
;
426 viewport
.scale
[1] = dst
->height
;
428 viewport
.scale
[2] = 1;
430 float *ptr
= pipe_buffer_map(filter
->pipe
, surface_size
,
431 PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD_RANGE
,
434 ptr
[0] = 0.5f
/viewport
.scale
[0];
435 ptr
[1] = 0.5f
/viewport
.scale
[1];
437 pipe_buffer_unmap(filter
->pipe
, buf_transfer
);
439 memset(&fb_state
, 0, sizeof(fb_state
));
440 fb_state
.width
= dst
->width
;
441 fb_state
.height
= dst
->height
;
442 fb_state
.nr_cbufs
= 1;
443 fb_state
.cbufs
[0] = dst
;
445 filter
->pipe
->set_scissor_states(filter
->pipe
, 0, 1, &scissor
);
446 filter
->pipe
->clear_render_target(filter
->pipe
, dst
, &clear_color
,
447 0, 0, dst
->width
, dst
->height
);
448 pipe_set_constant_buffer(filter
->pipe
, PIPE_SHADER_FRAGMENT
, 0, surface_size
);
449 filter
->pipe
->bind_rasterizer_state(filter
->pipe
, filter
->rs_state
);
450 filter
->pipe
->bind_blend_state(filter
->pipe
, filter
->blend
);
451 filter
->pipe
->bind_sampler_states(filter
->pipe
, PIPE_SHADER_FRAGMENT
,
452 0, 1, &filter
->sampler
);
453 filter
->pipe
->set_sampler_views(filter
->pipe
, PIPE_SHADER_FRAGMENT
,
455 filter
->pipe
->bind_vs_state(filter
->pipe
, filter
->vs
);
456 filter
->pipe
->bind_fs_state(filter
->pipe
, filter
->fs
);
457 filter
->pipe
->set_framebuffer_state(filter
->pipe
, &fb_state
);
458 filter
->pipe
->set_viewport_states(filter
->pipe
, 0, 1, &viewport
);
459 filter
->pipe
->set_vertex_buffers(filter
->pipe
, 0, 1, &filter
->quad
);
460 filter
->pipe
->bind_vertex_elements_state(filter
->pipe
, filter
->ves
);
462 util_draw_arrays(filter
->pipe
, PIPE_PRIM_QUADS
, 0, 4);