--- /dev/null
- 'vl/vl_shader_build.c',
+ Import('*')
+
+ from sys import executable as python_cmd
+
+ env.Append(CPPPATH = [
+ 'indices',
+ 'util',
+ ])
+
+ env.CodeGenerate(
+ target = 'indices/u_indices_gen.c',
+ script = 'indices/u_indices_gen.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+ )
+
+ env.CodeGenerate(
+ target = 'indices/u_unfilled_gen.c',
+ script = 'indices/u_unfilled_gen.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+ )
+
+ env.CodeGenerate(
+ target = 'util/u_format_table.c',
+ script = 'util/u_format_table.py',
+ source = ['util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+ )
+
+ env.CodeGenerate(
+ target = File('util/u_format_pack.h').srcnode(),
+ script = 'util/u_format_pack.py',
+ source = ['util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+ )
+
+ env.CodeGenerate(
+ target = 'util/u_format_access.c',
+ script = 'util/u_format_access.py',
+ source = ['util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+ )
+
+ source = [
+ 'cso_cache/cso_context.c',
+ 'cso_cache/cso_cache.c',
+ 'cso_cache/cso_hash.c',
+ 'draw/draw_context.c',
+ 'draw/draw_pipe.c',
+ 'draw/draw_pipe_aaline.c',
+ 'draw/draw_pipe_aapoint.c',
+ 'draw/draw_pipe_clip.c',
+ 'draw/draw_pipe_cull.c',
+ 'draw/draw_pipe_flatshade.c',
+ 'draw/draw_pipe_offset.c',
+ 'draw/draw_pipe_pstipple.c',
+ 'draw/draw_pipe_stipple.c',
+ 'draw/draw_pipe_twoside.c',
+ 'draw/draw_pipe_unfilled.c',
+ 'draw/draw_pipe_util.c',
+ 'draw/draw_pipe_validate.c',
+ 'draw/draw_pipe_vbuf.c',
+ 'draw/draw_pipe_wide_line.c',
+ 'draw/draw_pipe_wide_point.c',
+ 'draw/draw_pt.c',
+ 'draw/draw_pt_elts.c',
+ 'draw/draw_pt_emit.c',
+ 'draw/draw_pt_fetch.c',
+ 'draw/draw_pt_fetch_emit.c',
+ 'draw/draw_pt_fetch_shade_emit.c',
+ 'draw/draw_pt_fetch_shade_pipeline.c',
+ 'draw/draw_pt_post_vs.c',
+ 'draw/draw_pt_util.c',
+ 'draw/draw_pt_varray.c',
+ 'draw/draw_pt_vcache.c',
+ 'draw/draw_vertex.c',
+ 'draw/draw_vs.c',
+ 'draw/draw_vs_aos.c',
+ 'draw/draw_vs_aos_io.c',
+ 'draw/draw_vs_aos_machine.c',
+ 'draw/draw_vs_exec.c',
+ 'draw/draw_vs_llvm.c',
+ 'draw/draw_vs_ppc.c',
+ 'draw/draw_vs_sse.c',
+ 'draw/draw_vs_varient.c',
+ 'draw/draw_gs.c',
+ #'indices/u_indices.c',
+ #'indices/u_unfilled_indices.c',
+ 'indices/u_indices_gen.c',
+ 'indices/u_unfilled_gen.c',
+ 'os/os_misc.c',
+ 'os/os_stream_log.c',
+ 'os/os_stream_stdc.c',
+ 'os/os_stream_str.c',
+ 'os/os_stream_null.c',
+ 'os/os_time.c',
+ 'pipebuffer/pb_buffer_fenced.c',
+ 'pipebuffer/pb_buffer_malloc.c',
+ 'pipebuffer/pb_bufmgr_alt.c',
+ 'pipebuffer/pb_bufmgr_cache.c',
+ 'pipebuffer/pb_bufmgr_debug.c',
+ 'pipebuffer/pb_bufmgr_mm.c',
+ 'pipebuffer/pb_bufmgr_ondemand.c',
+ 'pipebuffer/pb_bufmgr_pool.c',
+ 'pipebuffer/pb_bufmgr_slab.c',
+ 'pipebuffer/pb_validate.c',
+ 'rbug/rbug_core.c',
+ 'rbug/rbug_shader.c',
+ 'rbug/rbug_context.c',
+ 'rbug/rbug_texture.c',
+ 'rbug/rbug_demarshal.c',
+ 'rbug/rbug_connection.c',
+ 'rtasm/rtasm_cpu.c',
+ 'rtasm/rtasm_execmem.c',
+ 'rtasm/rtasm_x86sse.c',
+ 'rtasm/rtasm_ppc.c',
+ 'rtasm/rtasm_ppc_spe.c',
+ 'tgsi/tgsi_build.c',
+ 'tgsi/tgsi_dump.c',
+ 'tgsi/tgsi_exec.c',
+ 'tgsi/tgsi_info.c',
+ 'tgsi/tgsi_iterate.c',
+ 'tgsi/tgsi_parse.c',
+ 'tgsi/tgsi_sanity.c',
+ 'tgsi/tgsi_scan.c',
+ 'tgsi/tgsi_ppc.c',
+ 'tgsi/tgsi_sse2.c',
+ 'tgsi/tgsi_text.c',
+ 'tgsi/tgsi_transform.c',
+ 'tgsi/tgsi_ureg.c',
+ 'tgsi/tgsi_util.c',
+ 'translate/translate_generic.c',
+ 'translate/translate_sse.c',
+ 'translate/translate.c',
+ 'translate/translate_cache.c',
+ 'util/u_bitmask.c',
+ 'util/u_blit.c',
+ 'util/u_blitter.c',
+ 'util/u_cache.c',
+ 'util/u_cpu_detect.c',
+ 'util/u_debug.c',
+ 'util/u_debug_memory.c',
+ 'util/u_debug_stack.c',
+ 'util/u_debug_symbol.c',
+ 'util/u_dump_defines.c',
+ 'util/u_dump_state.c',
+ 'util/u_dl.c',
+ 'util/u_draw_quad.c',
+ 'util/u_format_access.c',
+ 'util/u_format_table.c',
+ 'util/u_gen_mipmap.c',
+ 'util/u_handle_table.c',
+ 'util/u_hash.c',
+ 'util/u_hash_table.c',
+ 'util/u_keymap.c',
+ 'util/u_network.c',
+ 'util/u_math.c',
+ 'util/u_mm.c',
+ 'util/u_rect.c',
+ 'util/u_ringbuffer.c',
+ 'util/u_simple_shaders.c',
+ 'util/u_snprintf.c',
+ 'util/u_surface.c',
+ 'util/u_texture.c',
+ 'util/u_tile.c',
+ 'util/u_timed_winsys.c',
+ 'util/u_upload_mgr.c',
+ 'util/u_simple_screen.c',
+ 'vl/vl_bitstream_parser.c',
+ 'vl/vl_mpeg12_mc_renderer.c',
+ 'vl/vl_compositor.c',
+ 'vl/vl_csc.c',
+ ]
+
+ if drawllvm:
+ source += [
+ 'gallivm/lp_bld_alpha.c',
+ 'gallivm/lp_bld_arit.c',
+ 'gallivm/lp_bld_blend_aos.c',
+ 'gallivm/lp_bld_blend_logicop.c',
+ 'gallivm/lp_bld_blend_soa.c',
+ 'gallivm/lp_bld_const.c',
+ 'gallivm/lp_bld_conv.c',
+ 'gallivm/lp_bld_debug.c',
+ 'gallivm/lp_bld_depth.c',
+ 'gallivm/lp_bld_flow.c',
+ 'gallivm/lp_bld_format_aos.c',
+ 'gallivm/lp_bld_format_query.c',
+ 'gallivm/lp_bld_format_soa.c',
+ 'gallivm/lp_bld_interp.c',
+ 'gallivm/lp_bld_intr.c',
+ 'gallivm/lp_bld_logic.c',
+ 'gallivm/lp_bld_init.cpp',
+ 'gallivm/lp_bld_pack.c',
+ 'gallivm/lp_bld_sample.c',
+ 'gallivm/lp_bld_sample_soa.c',
+ 'gallivm/lp_bld_struct.c',
+ 'gallivm/lp_bld_swizzle.c',
+ 'gallivm/lp_bld_tgsi_soa.c',
+ 'gallivm/lp_bld_type.c',
+ ]
+
+ gallium = env.ConvenienceLibrary(
+ target = 'gallium',
+ source = source,
+ )
+
+ Export('gallium')
#include "vl_mpeg12_mc_renderer.h"
#include <assert.h>
#include <pipe/p_context.h>
- #include <pipe/p_inlines.h>
+ #include <util/u_inlines.h>
+ #include <util/u_format.h>
#include <util/u_math.h>
#include <util/u_memory.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_build.h>
-#include "vl_shader_build.h"
+#include <tgsi/tgsi_ureg.h>
#define DEFAULT_BUF_ALIGNMENT 1
#define MACROBLOCK_WIDTH 16
NUM_MACROBLOCK_TYPES
};
-static void
+static bool
create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
- const unsigned max_tokens = 50;
-
- struct pipe_shader_state vs;
- struct tgsi_token *tokens;
- struct tgsi_header *header;
-
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
-
- unsigned ti;
-
+ struct ureg_program *shader;
+ struct ureg_src vpos, vtex[3];
+ struct ureg_dst o_vpos, o_vtex[3];
unsigned i;
- assert(r);
-
- tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
- header = (struct tgsi_header *) &tokens[0];
- *header = tgsi_build_header();
- *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
- ti = 2;
-
- /*
- * decl i0 ; Vertex pos
- * decl i1 ; Luma texcoords
- * decl i2 ; Chroma Cb texcoords
- * decl i3 ; Chroma Cr texcoords
- */
- for (i = 0; i < 4; i++) {
- decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
+ shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+ if (!shader)
+ return false;
- /*
- * decl o0 ; Vertex pos
- * decl o1 ; Luma texcoords
- * decl o2 ; Chroma Cb texcoords
- * decl o3 ; Chroma Cr texcoords
- */
- for (i = 0; i < 4; i++) {
- decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
+ vpos = ureg_DECL_vs_input(shader, 0);
+ for (i = 0; i < 3; ++i)
+ vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+ o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+ for (i = 0; i < 3; ++i)
+ o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
/*
- * mov o0, i0 ; Move input vertex pos to output
- * mov o1, i1 ; Move input luma texcoords to output
- * mov o2, i2 ; Move input chroma Cb texcoords to output
- * mov o3, i3 ; Move input chroma Cr texcoords to output
+ * o_vpos = vpos
+ * o_vtex[0..2] = vtex[0..2]
*/
- for (i = 0; i < 4; ++i) {
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- }
+ ureg_MOV(shader, o_vpos, vpos);
+ for (i = 0; i < 3; ++i)
+ ureg_MOV(shader, o_vtex[i], vtex[i]);
- /* end */
- inst = vl_end();
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_END(shader);
- assert(ti <= max_tokens);
+ r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
+ if (!r->i_vs)
+ return false;
- vs.tokens = tokens;
- r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
- free(tokens);
+ return true;
}
-static void
+static bool
create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
- const unsigned max_tokens = 100;
-
- struct pipe_shader_state fs;
- struct tgsi_token *tokens;
- struct tgsi_header *header;
-
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
-
- unsigned ti;
-
+ struct ureg_program *shader;
+ struct ureg_src tc[3];
+ struct ureg_src sampler[3];
+ struct ureg_dst texel, temp;
+ struct ureg_dst fragment;
unsigned i;
- assert(r);
-
- tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
- header = (struct tgsi_header *) &tokens[0];
- *header = tgsi_build_header();
- *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
- ti = 2;
-
- /*
- * decl i0 ; Luma texcoords
- * decl i1 ; Chroma Cb texcoords
- * decl i2 ; Chroma Cr texcoords
- */
- for (i = 0; i < 3; ++i) {
- decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
-
- /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
- decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
- /* decl o0 ; Fragment color */
- decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
- /* decl t0, t1 */
- decl = vl_decl_temps(0, 1);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!shader)
+ return false;
- /*
- * decl s0 ; Sampler for luma texture
- * decl s1 ; Sampler for chroma Cb texture
- * decl s2 ; Sampler for chroma Cr texture
- */
- for (i = 0; i < 3; ++i) {
- decl = vl_decl_samplers(i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ for (i = 0; i < 3; ++i) {
+ tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+ sampler[i] = ureg_DECL_sampler(shader, i);
}
+ texel = ureg_DECL_temporary(shader);
+ temp = ureg_DECL_temporary(shader);
+ fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
/*
- * tex2d t1, i0, s0 ; Read texel from luma texture
- * mov t0.x, t1.x ; Move luma sample into .x component
- * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
- * mov t0.y, t1.x ; Move Cb sample into .y component
- * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
- * mov t0.z, t1.x ; Move Cr sample into .z component
+ * texel.r = tex(tc[0], sampler[0])
+ * texel.g = tex(tc[1], sampler[1])
+ * texel.b = tex(tc[2], sampler[2])
+ * fragment = texel * scale
*/
for (i = 0; i < 3; ++i) {
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
- inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+ ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+ ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
}
+ ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
- /* mul o0, t0, c0 ; Rescale texel to correct range */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* end */
- inst = vl_end();
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_release_temporary(shader, texel);
+ ureg_release_temporary(shader, temp);
+ ureg_END(shader);
- assert(ti <= max_tokens);
+ r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
+ if (!r->i_fs)
+ return false;
- fs.tokens = tokens;
- r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
- free(tokens);
+ return true;
}
-static void
+static bool
create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
- const unsigned max_tokens = 100;
-
- struct pipe_shader_state vs;
- struct tgsi_token *tokens;
- struct tgsi_header *header;
-
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
-
- unsigned ti;
-
+ struct ureg_program *shader;
+ struct ureg_src vpos, vtex[4];
+ struct ureg_dst o_vpos, o_vtex[4];
unsigned i;
- assert(r);
-
- tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
- header = (struct tgsi_header *) &tokens[0];
- *header = tgsi_build_header();
- *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
- ti = 2;
-
- /*
- * decl i0 ; Vertex pos
- * decl i1 ; Luma texcoords
- * decl i2 ; Chroma Cb texcoords
- * decl i3 ; Chroma Cr texcoords
- * decl i4 ; Ref surface top field texcoords
- * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
- */
- for (i = 0; i < 6; i++) {
- decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
+ shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+ if (!shader)
+ return false;
- /*
- * decl o0 ; Vertex pos
- * decl o1 ; Luma texcoords
- * decl o2 ; Chroma Cb texcoords
- * decl o3 ; Chroma Cr texcoords
- * decl o4 ; Ref macroblock texcoords
- */
- for (i = 0; i < 5; i++) {
- decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
+ vpos = ureg_DECL_vs_input(shader, 0);
+ for (i = 0; i < 4; ++i)
+ vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+ o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+ for (i = 0; i < 4; ++i)
+ o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
/*
- * mov o0, i0 ; Move input vertex pos to output
- * mov o1, i1 ; Move input luma texcoords to output
- * mov o2, i2 ; Move input chroma Cb texcoords to output
- * mov o3, i3 ; Move input chroma Cr texcoords to output
+ * o_vpos = vpos
+ * o_vtex[0..2] = vtex[0..2]
+ * o_vtex[3] = vpos + vtex[3] // Apply motion vector
*/
- for (i = 0; i < 4; ++i) {
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- }
-
- /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_MOV(shader, o_vpos, vpos);
+ for (i = 0; i < 3; ++i)
+ ureg_MOV(shader, o_vtex[i], vtex[i]);
+ ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
- /* end */
- inst = vl_end();
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_END(shader);
- assert(ti <= max_tokens);
+ r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+ if (!r->p_vs[0])
+ return false;
- vs.tokens = tokens;
- r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
- free(tokens);
+ return true;
}
+ #if 0
static void
create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
assert(false);
}
+ #endif
-static void
+static bool
create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
- const unsigned max_tokens = 100;
-
- struct pipe_shader_state fs;
- struct tgsi_token *tokens;
- struct tgsi_header *header;
-
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
-
- unsigned ti;
-
+ struct ureg_program *shader;
+ struct ureg_src tc[4];
+ struct ureg_src sampler[4];
+ struct ureg_dst texel, ref;
+ struct ureg_dst fragment;
unsigned i;
- assert(r);
-
- tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
- header = (struct tgsi_header *) &tokens[0];
- *header = tgsi_build_header();
- *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
- ti = 2;
-
- /*
- * decl i0 ; Luma texcoords
- * decl i1 ; Chroma Cb texcoords
- * decl i2 ; Chroma Cr texcoords
- * decl i3 ; Ref macroblock texcoords
- */
- for (i = 0; i < 4; ++i) {
- decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
-
- /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
- decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
- /* decl o0 ; Fragment color */
- decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
- /* decl t0, t1 */
- decl = vl_decl_temps(0, 1);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!shader)
+ return false;
- /*
- * decl s0 ; Sampler for luma texture
- * decl s1 ; Sampler for chroma Cb texture
- * decl s2 ; Sampler for chroma Cr texture
- * decl s3 ; Sampler for ref surface texture
- */
- for (i = 0; i < 4; ++i) {
- decl = vl_decl_samplers(i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ for (i = 0; i < 4; ++i) {
+ tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+ sampler[i] = ureg_DECL_sampler(shader, i);
}
+ texel = ureg_DECL_temporary(shader);
+ ref = ureg_DECL_temporary(shader);
+ fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
/*
- * tex2d t1, i0, s0 ; Read texel from luma texture
- * mov t0.x, t1.x ; Move luma sample into .x component
- * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
- * mov t0.y, t1.x ; Move Cb sample into .y component
- * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
- * mov t0.z, t1.x ; Move Cr sample into .z component
+ * texel.r = tex(tc[0], sampler[0])
+ * texel.g = tex(tc[1], sampler[1])
+ * texel.b = tex(tc[2], sampler[2])
+ * ref = tex(tc[3], sampler[3])
+ * fragment = texel * scale + ref
*/
for (i = 0; i < 3; ++i) {
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
- inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+ ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+ ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
}
+ ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
+ ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
- /* mul t0, t0, c0 ; Rescale texel to correct range */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* add o0, t0, t1 ; Add ref and differential to form final output */
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_release_temporary(shader, texel);
+ ureg_release_temporary(shader, ref);
+ ureg_END(shader);
- /* end */
- inst = vl_end();
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- assert(ti <= max_tokens);
+ r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+ if (!r->p_fs[0])
+ return false;
- fs.tokens = tokens;
- r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
- free(tokens);
+ return true;
}
+ #if 0
static void
create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
assert(false);
}
+ #endif
-static void
+static bool
create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
- const unsigned max_tokens = 100;
-
- struct pipe_shader_state vs;
- struct tgsi_token *tokens;
- struct tgsi_header *header;
-
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
-
- unsigned ti;
-
+ struct ureg_program *shader;
+ struct ureg_src vpos, vtex[5];
+ struct ureg_dst o_vpos, o_vtex[5];
unsigned i;
- assert(r);
-
- tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
- header = (struct tgsi_header *) &tokens[0];
- *header = tgsi_build_header();
- *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
- ti = 2;
-
- /*
- * decl i0 ; Vertex pos
- * decl i1 ; Luma texcoords
- * decl i2 ; Chroma Cb texcoords
- * decl i3 ; Chroma Cr texcoords
- * decl i4 ; First ref macroblock top field texcoords
- * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
- * decl i6 ; Second ref macroblock top field texcoords
- * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
- */
- for (i = 0; i < 8; i++) {
- decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
-
- /*
- * decl o0 ; Vertex pos
- * decl o1 ; Luma texcoords
- * decl o2 ; Chroma Cb texcoords
- * decl o3 ; Chroma Cr texcoords
- * decl o4 ; First ref macroblock texcoords
- * decl o5 ; Second ref macroblock texcoords
- */
- for (i = 0; i < 6; i++) {
- decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
+ shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+ if (!shader)
+ return false;
- /*
- * mov o0, i0 ; Move input vertex pos to output
- * mov o1, i1 ; Move input luma texcoords to output
- * mov o2, i2 ; Move input chroma Cb texcoords to output
- * mov o3, i3 ; Move input chroma Cr texcoords to output
- */
- for (i = 0; i < 4; ++i) {
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- }
+ vpos = ureg_DECL_vs_input(shader, 0);
+ for (i = 0; i < 4; ++i)
+ vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+ /* Skip input 5 */
+ vtex[4] = ureg_DECL_vs_input(shader, 6);
+ o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+ for (i = 0; i < 5; ++i)
+ o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
/*
- * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
- * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
+ * o_vpos = vpos
+ * o_vtex[0..2] = vtex[0..2]
+ * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
*/
- for (i = 0; i < 2; ++i) {
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- }
+ ureg_MOV(shader, o_vpos, vpos);
+ for (i = 0; i < 3; ++i)
+ ureg_MOV(shader, o_vtex[i], vtex[i]);
+ for (i = 3; i < 5; ++i)
+ ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
- /* end */
- inst = vl_end();
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_END(shader);
- assert(ti <= max_tokens);
+ r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+ if (!r->b_vs[0])
+ return false;
- vs.tokens = tokens;
- r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
- free(tokens);
+ return true;
}
+ #if 0
static void
create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
assert(false);
}
+ #endif
-static void
+static bool
create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
- const unsigned max_tokens = 100;
-
- struct pipe_shader_state fs;
- struct tgsi_token *tokens;
- struct tgsi_header *header;
-
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
-
- unsigned ti;
-
+ struct ureg_program *shader;
+ struct ureg_src tc[5];
+ struct ureg_src sampler[5];
+ struct ureg_dst texel, ref[2];
+ struct ureg_dst fragment;
unsigned i;
- assert(r);
-
- tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
- header = (struct tgsi_header *) &tokens[0];
- *header = tgsi_build_header();
- *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
- ti = 2;
-
- /*
- * decl i0 ; Luma texcoords
- * decl i1 ; Chroma Cb texcoords
- * decl i2 ; Chroma Cr texcoords
- * decl i3 ; First ref macroblock texcoords
- * decl i4 ; Second ref macroblock texcoords
- */
- for (i = 0; i < 5; ++i) {
- decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
- }
-
- /*
- * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
- * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
- */
- decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
- /* decl o0 ; Fragment color */
- decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
- /* decl t0-t2 */
- decl = vl_decl_temps(0, 2);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!shader)
+ return false;
- /*
- * decl s0 ; Sampler for luma texture
- * decl s1 ; Sampler for chroma Cb texture
- * decl s2 ; Sampler for chroma Cr texture
- * decl s3 ; Sampler for first ref surface texture
- * decl s4 ; Sampler for second ref surface texture
- */
- for (i = 0; i < 5; ++i) {
- decl = vl_decl_samplers(i, i);
- ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ for (i = 0; i < 5; ++i) {
+ tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+ sampler[i] = ureg_DECL_sampler(shader, i);
}
+ texel = ureg_DECL_temporary(shader);
+ ref[0] = ureg_DECL_temporary(shader);
+ ref[1] = ureg_DECL_temporary(shader);
+ fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
/*
- * tex2d t1, i0, s0 ; Read texel from luma texture
- * mov t0.x, t1.x ; Move luma sample into .x component
- * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
- * mov t0.y, t1.x ; Move Cb sample into .y component
- * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
- * mov t0.z, t1.x ; Move Cr sample into .z component
+ * texel.r = tex(tc[0], sampler[0])
+ * texel.g = tex(tc[1], sampler[1])
+ * texel.b = tex(tc[2], sampler[2])
+ * ref[0..1 = tex(tc[3..4], sampler[3..4])
+ * ref[0] = lerp(ref[0], ref[1], 0.5)
+ * fragment = texel * scale + ref[0]
*/
for (i = 0; i < 3; ++i) {
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
- inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- }
-
- /* mul t0, t0, c0 ; Rescale texel to correct range */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /*
- * tex2d t1, i3, s3 ; Read texel from first ref macroblock
- * tex2d t2, i4, s4 ; Read texel from second ref macroblock
- */
- for (i = 0; i < 2; ++i) {
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+ ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
+ ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
}
+ ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
+ ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
+ ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
- /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
- inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
- inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
- /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ ureg_release_temporary(shader, texel);
+ ureg_release_temporary(shader, ref[0]);
+ ureg_release_temporary(shader, ref[1]);
+ ureg_END(shader);
- /* end */
- inst = vl_end();
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- assert(ti <= max_tokens);
+ r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+ if (!r->b_fs[0])
+ return false;
- fs.tokens = tokens;
- r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
- free(tokens);
+ return true;
}
+ #if 0
static void
create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
r->viewport.translate[2] = 0;
r->viewport.translate[3] = 0;
-- r->scissor.maxx = r->pot_buffers ?
-- util_next_power_of_two(r->picture_width) : r->picture_width;
-- r->scissor.maxy = r->pot_buffers ?
-- util_next_power_of_two(r->picture_height) : r->picture_height;
--
r->fb_state.width = r->pot_buffers ?
util_next_power_of_two(r->picture_width) : r->picture_width;
r->fb_state.height = r->pot_buffers ?
assert(r);
- pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
+ pipe_buffer_reference(&r->vs_const_buf, NULL);
- pipe_buffer_reference(&r->fs_const_buf, NULL);
for (i = 0; i < 3; ++i)
pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
r->pipe->set_viewport_state(r->pipe, &r->viewport);
-- r->pipe->set_scissor_state(r->pipe, &r->scissor);
vs_consts = pipe_buffer_map
(
vs_consts->denorm.x = r->surface->width0;
vs_consts->denorm.y = r->surface->height0;
- pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
+ pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf);
r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
- &r->vs_const_buf);
+ r->vs_const_buf);
- r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
- r->fs_const_buf);
if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
xfer_buffers_unmap(renderer);
flush(renderer);
}
--
++
new_surface = true;
}