Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //-----------------------------------------------------------------
- // v422to444_v2mult_24t.v - 422 to 444 Format Converter
- // Virtex-II Video Demo Board
- //
- //
- //
- //
- // Author: Gregg C. Hawkes
- // Senior Staff Applications Engineer
- //
- // Video Applications
- // Advanced Products Division
- // Xilinx, Inc.
- //
- // Copyright (c) 1999 Xilinx, Inc.
- // All rights reserved
- //
- // Date: Aug. 6, 2001
- // For: Video Demo Board
- //
- // RESTRICTED RIGHTS LEGEND
- //
- // This software has not been published by the author, and
- // has been disclosed to others for the purpose of enhancing
- // and promoting design productivity in Xilinx products.
- //
- // Therefore use, duplication or disclosure, now and in the
- // future should give consideration to the productivity
- // enhancements afforded the user of this code by the author's
- // efforts. Thank you for using our products !
- //
- // Disclaimer: THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY
- // WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY
- // IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
- // A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
- //
- //
- //
- // Revision:
- // Aug. 6, 2001 Creation
- //
- //
- // Other modules instanced in this design:
- //
- // none
- /*
- BRIEF DESCRIPTION
- The process of 4:2:2 to 4:4:4 is simply creating the missing Cr and Cb
- components. This version accomplishes this task by merely duplicating
- the Cr and Cb information.
- DETAILED DESCRIPTION
- The video standard ITU-R BT.601 was introduced as the need for
- transporting digital component video between countries and standards
- increased. The analog component R'G'B' can be sampled in a very regular
- way and converted from 4:4:4 to the digital 4:2:2 format, essentially
- cutting in half the number of different components, Cr and Cb.
- The digital data is efficiently stored or transmitted to a destination
- that reverses the process, i.e. converts back to 4:4:4 format, and
- produces analog YUV or R'G'B' for display.
- 422 TO 444 CONVERSION
- ---------------------
- Bob Turney, Xilinx Labs supplied me with this one to try.
- - 4 = 18'h3FFFC
- 6 = 18'h00006
- - 12 = 18'h3FFF4
- 20 = 18'h00014
- - 32 = 18'h3FFE0
- 48 = 18'h00030
- - 70 = 18'h3FFBA
- 104 = 18'h00068
- - 152 = 18'h3FF68
- 236 = 18'h000EC
- - 420 = 18'h3FE5C
- 1300 = 18'h00514
- 1300 = 18'h00514
- - 420 = 18'h3FE5C
- 236 = 18'h000EC
- - 152 = 18'h3FF68
- 104 = 18'h00068
- - 70 = 18'h3FFBA
- 48 = 18'h00030
- - 32 = 18'h3FFE0
- 20 = 18'h00014
- - 12 = 18'h3FFF4
- 6 = 18'h00006
- - 4 = 18'h3FFFC
- CrCb[i] = (
- - 4*(CrCb[1]+CrCb[24])
- + 6*(CrCb[2]+CrCb[23])
- - 12*(CrCb[3]+CrCb[22])
- + 20*(CrCb[4]+CrCb[21])
- - 32*(CrCb[5]+CrCb[20])
- + 48*(CrCb[6]+CrCb[19])
- - 70*(CrCb[7]+CrCb[18])
- + 104*(CrCb[8]+CrCb[17])
- - 152*(CrCb[9]+CrCb[16])
- + 236*(CrCb[10]+CrCb[15])
- - 420*(CrCb[11]+CrCb[14])
- + 1300*(CrCb[12]+CrCb[13]))/2048;
- Design Information
- ------------------
- xc2v1000-ff896-6
- MULT18X18s: 12 30%
- Slices 864 16%
- FFs 1,224 11%
- LUTs 409 3%
- IO 49 11%
- gates 62,199
- Minimum period: 12.042ns (Maximum frequency: 83 MHz)
- Maximum net delay: 5.124ns
- Note: Using coregen the V2 Mults can be replaced with parallel, 10 bit
- signed integer X 12 bit coefficients for roughly 68 LUTs and 34
- Registers each. This alternative would save the high performance V2
- Multipliers for other uses or allow the design to directly map to
- SPARTAN families.
- */
- `timescale 1ns / 100ps
- module v422to444_v2mult_24t (
- rst, // resets input data register and control
- clk, // video component rate clock, 27Mhz for SDTV
- Fi, // Low to High signals start of Field One
- Vi, // High signals Vertical Blanking
- Hi, // High signals Horizontal Blanking
- Fo, // Field signal delayed by pipe length
- Vo, // Vertical signal delayed by pipe length
- Ho, // Horizontal signal delayed by pipe length
- ceo, // output data rate is 1/2 the clock rate
- YCrCb_in, // video component data, I[8].F[2], twos complement
- Y_out, // Y out, I[8].F[2], twos complement, clamped
- Cr_out, // Cr out, I[8].F[2], twos complement, clamped
- Cb_out, // Cb out, I[8].F[2], twos complement, clamped
- );
- /*
- TAPS must be an even number thereby making the length of the pipe
- an even number of FFs. I think 4 is the minimum.
- */
- parameter TAPS = 24;
- parameter FILTER_PIPE_LENGTH = 4;
- input rst, clk, Hi, Vi, Fi;
- input[9:0] YCrCb_in;
- output [9:0] Y_out, Cr_out, Cb_out;
- output Ho, Vo, Fo, ceo;
- reg [TAPS+FILTER_PIPE_LENGTH+7:0] H_rg, V_rg, F_rg;
- wire Ho, Vo, Fo, ceo, H_rising;
- reg [1:0] cnt;
- wire Y_ld, select_real_CrCb, select_filt_CrCb;
- reg [9:0] Y_rg, CrCb_rg;
- // pixel component pipelines
- reg [9:0] Y_pipe [(TAPS/2)+FILTER_PIPE_LENGTH-1:0];
- reg [9:0] CrCb_pipe [(TAPS*2)-1:0];
- // filter components
- reg [10:0] CrCb_pre_add [(TAPS/2)-1:0];
- wire [35:0] P0, P1, P2, P3, P4, P5,
- P6, P7, P8, P9, P10, P11;
- reg [23:0] CrCb_mult [11:0]; // only use [23:0]
- reg [23:0] CrCb_post_add;
- reg [9:0] CrCb_corrected, Cb_filt, Cr_filt;
- reg [9:0] Y_out, Cr_out, Cb_out;
- integer i;
- //-----------------------------------------------------------------------
- //
- /*
- Delay SMPTE control signals F, V, H, by an amount equivalent to the
- modules pipe length. The will allow different modules to be swapped out
- without changing exterior control. this occurs in most of my modules.
- */
- always @ (posedge clk) begin
- if (rst) begin F_rg <= 0; V_rg <= 0; H_rg <= 0; end
- else begin
- F_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {F_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Fi};
- V_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {V_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Vi};
- H_rg[TAPS+FILTER_PIPE_LENGTH+7:0] <= {H_rg[TAPS+FILTER_PIPE_LENGTH+6:0], Hi};
- end
- end
- assign Ho = H_rg[TAPS+FILTER_PIPE_LENGTH+7];
- assign Vo = V_rg[TAPS+FILTER_PIPE_LENGTH+7];
- assign Fo = F_rg[TAPS+FILTER_PIPE_LENGTH+7];
- //-----------------------------------------------------------------------
- //
- // Identify Y and CrCb valid
- //
- assign H_rising = ~H_rg[0] & Hi;
- always @ (posedge clk) begin
- if (rst | H_rising) cnt <= 1;
- else cnt <= cnt+1;
- end
- assign Y_ld = (cnt == 2'b01) | (cnt == 2'b11);
- assign ceo = Y_ld;
- assign select_real_CrCb = (cnt == 2'b11);
- assign select_filt_CrCb = (cnt == 2'b01);
- //-----------------------------------------------------------------------
- //
- // Separate the Y and CrCb data streams
- //
- always @ (posedge clk) begin
- if (rst) Y_rg <= 10'h040;
- else if (Y_ld) Y_rg <= YCrCb_in;
- else Y_rg <= Y_rg;
- end
- always @ (posedge clk) begin
- if (rst) CrCb_rg <= 0;
- else if (~Y_ld) CrCb_rg <= YCrCb_in;
- else CrCb_rg <= CrCb_rg;
- end
- //-----------------------------------------------------------------------
- //
- // 10 bit Y, Cr and Cb pipe registers, connected head to tail. There are
- // 12 Y registers, 48 CrCb registers.
- //
- always @ (posedge clk) begin
- if (rst) Y_pipe[0] <= 10'h040;
- else if (Y_ld) Y_pipe[0] <= Y_rg;
- else Y_pipe[0] <= Y_pipe[0];
- end
- always @ (posedge clk) begin
- for (i = 1; i <= (TAPS/2)+FILTER_PIPE_LENGTH-1; i = i+1) begin
- if (rst) Y_pipe[i] <= 10'h040;
- else if (Y_ld) Y_pipe[i] <= Y_pipe[i-1];
- else Y_pipe[i] <= Y_pipe[i];
- end
- end
- always @ (posedge clk) begin
- if (rst) CrCb_pipe[0] <= 0;
- else if (~Y_ld) CrCb_pipe[0] <= CrCb_rg;
- else CrCb_pipe[0] <= CrCb_pipe[0];
- end
- always @ (posedge clk) begin
- for (i = 1; i <= (2*TAPS)-1; i = i+1) begin
- if (rst) CrCb_pipe[i] <= 0;
- else if (~Y_ld) CrCb_pipe[i] <= CrCb_pipe[i-1];
- else CrCb_pipe[i] <= CrCb_pipe[i];
- end
- end
- //-----------------------------------------------------------------------
- //
- // Pre multiply adder
- //
- always @ (posedge clk) begin
- for (i = 0; i <= (TAPS/2)-1; i = i+1) begin
- if (rst) CrCb_pre_add[i] <= 0;
- else if (~Y_ld)
- CrCb_pre_add[i] <= CrCb_pipe[((2*TAPS)-1)-(2*i)] + CrCb_pipe[(2*i)+1];
- else CrCb_pre_add[i] <= CrCb_pre_add[i];
- end
- end
- //-----------------------------------------------------------------------
- //
- /*
- Multipliers
- Note: A is 12 bits, B is 10 bits + 10 bits, so make P = 24 bits
- maximum positive value = 960 X 1300 = 1248000 or 130b00 hex = 21 bits
- maximum negative value = 960 X -420 = -403200 or 9d900 hex = 20 bits
- */
- MULT18X18 U1 (.P(P0), .A(18'h3FFFC), .B({7'h00, CrCb_pre_add[0]}));
- MULT18X18 U2 (.P(P1), .A(18'h00006), .B({7'h00, CrCb_pre_add[1]}));
- MULT18X18 U3 (.P(P2), .A(18'h3FFF4), .B({7'h00, CrCb_pre_add[2]}));
- MULT18X18 U4 (.P(P3), .A(18'h00014), .B({7'h00, CrCb_pre_add[3]}));
- MULT18X18 U5 (.P(P4), .A(18'h3FFE0), .B({7'h00, CrCb_pre_add[4]}));
- MULT18X18 U6 (.P(P5), .A(18'h00030), .B({7'h00, CrCb_pre_add[5]}));
- MULT18X18 U7 (.P(P6), .A(18'h3FFBA), .B({7'h00, CrCb_pre_add[6]}));
- MULT18X18 U8 (.P(P7), .A(18'h00068), .B({7'h00, CrCb_pre_add[7]}));
- MULT18X18 U9 (.P(P8), .A(18'h3FF68), .B({7'h00, CrCb_pre_add[8]}));
- MULT18X18 U10 (.P(P9), .A(18'h000EC), .B({7'h00, CrCb_pre_add[9]}));
- MULT18X18 U11 (.P(P10), .A(18'h3FE5C), .B({7'h00, CrCb_pre_add[10]}));
- MULT18X18 U12 (.P(P11), .A(18'h00514), .B({7'h00, CrCb_pre_add[11]}));
- //-----------------------------------------------------------------------
- //
- // Register outputs of multiply
- //
- always @ (posedge clk) begin
- if (rst) begin
- for (i = 0; i <= 11; i = i+1) CrCb_mult[i] <= 0;
- end
- else if (~Y_ld) begin
- CrCb_mult[0] <= P0[23:0];
- CrCb_mult[1] <= P1[23:0];
- CrCb_mult[2] <= P2[23:0];
- CrCb_mult[3] <= P3[23:0];
- CrCb_mult[4] <= P4[23:0];
- CrCb_mult[5] <= P5[23:0];
- CrCb_mult[6] <= P6[23:0];
- CrCb_mult[7] <= P7[23:0];
- CrCb_mult[8] <= P8[23:0];
- CrCb_mult[9] <= P9[23:0];
- CrCb_mult[10] <= P10[23:0];
- CrCb_mult[11] <= P11[23:0];
- end
- else begin
- for (i = 0; i <= 11; i = i+1) CrCb_mult[i] <= CrCb_mult[i];
- end
- end
- //-----------------------------------------------------------------------
- //
- // Post multiply adder (this needs to run at 74.25 MHz for HDTV). This
- // is the performance bottle-neck. It can be easily pipelined.
- //
- always @ (posedge clk) begin
- if (rst) CrCb_post_add <= 0;
- else if (~Y_ld) CrCb_post_add <=
- CrCb_mult[0] + CrCb_mult[1] + CrCb_mult[2] + CrCb_mult[3]
- + CrCb_mult[4] + CrCb_mult[5] + CrCb_mult[6] + CrCb_mult[7]
- + CrCb_mult[8] + CrCb_mult[9] + CrCb_mult[10] + CrCb_mult[11];
- else CrCb_post_add <= CrCb_post_add;
- end
- //-----------------------------------------------------------------------
- //
- // Correct overflows and underflows
- //
- /*
- Note 1: Wire shift by 11 bits is equivalent to dividing by 2048 on the
- input. This is to account for the non fractional coefficients in the FIR
- filter multiplies.
- Note 2: A new Cr_filt and Cb_filt are available every four clock ticks.
- */
- always @ (posedge clk) begin
- if (rst) CrCb_corrected <= 9'h40;
- else if (~Y_ld & (CrCb_post_add[20:11] > 12'h3AC)) CrCb_corrected <= 12'h3AC;
- else if (~Y_ld & (CrCb_post_add[20:11] < 12'h40 )) CrCb_corrected <= 12'h40;
- else if (~Y_ld) CrCb_corrected <= CrCb_post_add[20:11];
- else CrCb_corrected <= CrCb_corrected;
- end
- always @ (posedge clk) begin
- if (rst) begin Cr_filt <= 0; Cb_filt <= 0; end
- else if (~Y_ld) begin Cr_filt <= CrCb_corrected; Cb_filt <= Cr_filt; end
- end
- //-----------------------------------------------------------------------
- //
- // Divide the CrCb stream into separate outgoing components
- //
- always @ (posedge clk) begin
- if (rst) begin Y_out <= 0; Cr_out <= 0; Cb_out <= 0; end
- else if (select_real_CrCb) begin
- Y_out <= Y_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
- Cb_out <= CrCb_pipe[(TAPS/2)+FILTER_PIPE_LENGTH];
- Cr_out <= CrCb_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
- end
- else if (select_filt_CrCb) begin
- Y_out <= Y_pipe[(TAPS/2)+FILTER_PIPE_LENGTH-1];
- Cb_out <= Cb_filt;
- Cr_out <= Cr_filt;
- end
- else begin
- Y_out <= Y_out;
- Cb_out <= Cb_out;
- Cr_out <= Cr_out;
- end
- end
- endmodule
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement