A simple video codec.
/*
dct_video.cpp
An implementation of 8x8 DCT and IDCT for video compression.
32-bit integer-arithmetic is assumed.
For DCT operation:
dct ( int *X, int *Y );
X is the input pointing to an 8x8 array of samples; values must be within [0, 255]
Y is the output pointing to an 8x8 array of DCT coefficients.
For IDCT operation:
idct ( int *Y, int *X );
Y is the input pointing to an 8x8 array of DCT coefficients.
X is the output pointing to an 8x8 array of sample values.
For more detailed explanations, see
http://www.webkinesia.com/games/vcompress3.php
compile: g++ -c dct_video.cpp
To use them, include the the following header statements in your application.
void dct(int *X, int *Y);
void idct(int *Y, int *X);
*/
#include <string.h>
#include <stdio.h>
#include <math.h>
#define PI 3.141592653589
const short shift = 10; //10 bits precision for fixed-point arithmetic
const short shift1 = 2 * shift; //at the final stage, values have been shifted twice
const int fac = 1 << shift ; //multiply all constants by 2^10 ( = 1024 )
const int delta = 1 << (shift-1) ; //for rounding adjustment ~ 0.5x2^10
const int delta1 = 1 << (shift1-1); //for final rounding adjustment ~ 0.5x2^20
const double a = PI / 16.0; //angle theta
//DCT constants; use integer-arithmetic.
const int c0 = (int) ( 1 / sqrt ( 2 ) * fac );
const int c1 = (int) ( cos ( a ) / 2 * fac );
const int c2 = (int) ( cos ( 2*a ) / 2 * fac );
const int c3 = (int) ( cos ( 3*a ) / 2 * fac );
const int c4 = (int) ( cos ( 4*a ) / 2 * fac );
const int c5 = (int) ( cos ( 5*a ) / 2 * fac );
const int c6 = (int) ( cos ( 6*a ) / 2 * fac );
const int c7 = (int) ( cos ( 7*a ) / 2 * fac );
/*
DCT function.
Input: X, array of 8x8, containing data with values in [0, 255].
Ouput: Y, array of 8x8 DCT coefficients.
*/
void dct(int *X, int *Y)
{
int i, j, j1, k;
int x[8], x1[8], m[8][8];
/*
Row transform
i-th row, k-th element
*/
for (i = 0, k = 0; i < 8; i++, k += 8) {
for (j = 0; j < 8; j++)
x[j] = X[k+j]; //data for one row
for (j = 0; j < 4; j++) { //first stage transform, see flow-graph
j1 = 7 - j;
x1[j] = x[j] + x[j1];
x1[j1] = x[j] - x[j1];
}
x[0] = x1[0] + x1[3]; //second stage transform
x[1] = x1[1] + x1[2];
x[2] = x1[1] - x1[2];
x[3] = x1[0] - x1[3];
x[4] = x1[4];
x[5] = ((x1[6] - x1[5]) * c0 + delta ) >> shift; //after multiplication, add delta for rounding,
x[6] = ((x1[6] + x1[5]) * c0 + delta ) >> shift; // shift-right to undo 'x fac' to line up
x[7] = x1[7]; // binary points of all x[i]
m[i][0] = (x[0] + x[1]) * c4; //upper-half of third (final) stage, see flow-graph
m[i][4] = (x[0] - x[1]) * c4;
m[i][2] = x[2] * c6 + x[3] * c2;
m[i][6] = x[3] * c6 - x[2] * c2;
x1[4] = x[4] + x[5]; //lower-half of third stage
x1[5] = x[4] - x[5];
x1[6] = x[7] - x[6];
x1[7] = x[7] + x[6];
m[i][1] = x1[4] * c7 + x1[7] * c1; //lower-half of fourth ( final ) stage
m[i][7] = x1[7] * c7 - x1[4] * c1;
m[i][5] = x1[5] * c3 + x1[6] * c5;
m[i][3] = x1[6] * c3 - x1[5] * c5;
} //for i
/*
At this point, coefficients of each row ( m[i][j] ) has been multiplied by 2^10.
We can undo the multiplication by << 10 here before doing the vertical transform.
However, as we are using int variables, which are 32-bit to do multiplications, we
can tolerate another multiplication of 2^10. So we delay our undoing until the end
of the vertical transform and we undo all left-shift operations by shifting
the results right 20 bits ( i.e. << 2 * 10 ).
*/
// Column transform
for (i = 0; i < 8; i++) { //eight columns
//consider one column
for (j = 0; j < 4; j++) { //first-stage operation
j1 = 7 - j;
x1[j] = m[j][i] + m[j1][i];
x1[j1] = m[j][i] - m[j1][i];
}
//second-stage operation
x[0] = x1[0] + x1[3];
x[1] = x1[1] + x1[2];
x[2] = x1[1] - x1[2];
x[3] = x1[0] - x1[3];
x[4] = x1[4];
//undo one shift for x[5], x[6] to avoid overflow
x1[5] = (x1[5] + delta) >> shift;
x1[6] = (x1[6] + delta) >> shift;
x[5] = (x1[6] - x1[5]) * c0;
x[6] = (x1[6] + x1[5]) * c0;
x[7] = x1[7];
m[0][i] = (x[0] + x[1]) * c4; //upper-half of third (final) stage, see flow-graph
m[4][i] = (x[0] - x[1]) * c4;
m[2][i] = ( x[2] * c6 + x[3] * c2 ) ;
m[6][i] = ( x[3] * c6 - x[2] * c2 );
x1[4] = x[4] + x[5]; //lower-half of third stage
x1[7] = x[7] + x[6];
x1[5] = x[4] - x[5];
x1[6] = x[7] - x[6];
m[1][i] = x1[4] * c7 + x1[7] * c1; //lower-half of fourth stage
m[5][i] = x1[5] * c3 + x1[6] * c5;
m[3][i] = x1[6] * c3 - x1[5] * c5;
m[7][i] = x1[7] * c7 - x1[4] * c1;
} // for i
//we have left-shift (multiplying constants) twice
for ( i = 0; i < 8; ++i ) {
for ( j = 0; j < 8; ++j ) {
*Y++ = ( m[i][j] + delta1 ) >> shift1; //round the value by adding delta
}
}
}
/*
Implementation of idct() is to reverse the operations of dct().
We first do vertical transform and then horizontal; this is easier
for debugging as the operations are just the reverse of those in dct();
of course, it works just as well if you do the horizontal transform first.
So in this implementation, the first stage of idct() is the final stage of dct()
and the final stage of idct() is the first stage of dct().
*/
void idct(int *Y, int *X)
{
int j1, i, j, k;
int x[8], x1[8], m[8][8], y[8];
k = i = 0;
//column transform
for ( i = 0; i < 8; ++i ) {
for (j = 0; j < 8; j++)
y[j] = Y[i+8*j];
//print_x ( y );
//getchar();
x1[4] = y[1] * c7 - y[7] * c1; //lower-half final stage of dct
x1[7] = y[1] * c1 + y[7] * c7;
x1[6] = y[3] * c3 + y[5] * c5;
x1[5] = -y[3] * c5 + y[5] * c3;
x[4] = ( x1[4] + x1[5] ); //lower-half of third stage of dct
x[5] = ( x1[4] - x1[5] );
x[6] = ( x1[7] - x1[6] );
x[7] = ( x1[7] + x1[6] );
x1[0] = ( y[0] + y[4] ) * c4; //upper-half of third ( final ) stage of dct
x1[1] = ( y[0] - y[4] ) * c4;
x1[2] = y[2] * c6 - y[6] * c2;
x1[3] = y[2] * c2 + y[6] * c6;
x[0] = ( x1[0] + x1[3] ); //second stage of dct
x[1] = ( x1[1] + x1[2] );
x[2] = ( x1[1] - x1[2] );
x[3] = ( x1[0] - x1[3] );
//x[4], x[7] no change
x1[5] = ((x[6] - x[5]) * c0 + delta ) >> shift; //after multiplication, add delta for rounding,
x1[6] = ((x[6] + x[5]) * c0 + delta ) >> shift; // shift-right to undo 'x fac' to line up x[]s
x[5] = x1[5];
x[6] = x1[6];
for (j = 0; j < 4; j++) { //first stage transform of dct
j1 = 7 - j;
m[j][i] = (x[j] + x[j1] );
m[j1][i] = (x[j] - x[j1] );
}
} //for i
//row transform
for ( i = 0; i < 8; i++ ) {
for (j = 0; j < 8; j++)
y[j] = m[i][j] ; //data for one row
x1[4] = y[1] * c7 - y[7] * c1;
x1[7] = y[1] * c1 + y[7] * c7;
x1[6] = y[3] * c3 + y[5] * c5;
x1[5] = -y[3] * c5 + y[5] * c3;
x[4] = ( x1[4] + x1[5] ); //lower-half of third stage
x[5] = ( x1[4] - x1[5] );
x[6] = ( x1[7] - x1[6] );
x[7] = ( x1[7] + x1[6] );
x1[0] = ( y[0] + y[4] ) * c4;
x1[1] = ( y[0] - y[4] ) * c4;
x1[2] = y[2] * c6 - y[6] * c2;
x1[3] = y[2] * c2 + y[6] * c6;
//undo one shift for x[5], x[6] to avoid overflow
x1[5] = (x[5] + delta) >> shift;
x1[6] = (x[6] + delta) >> shift;
x[5] = (x1[6] - x1[5]) * c0;
x[6] = (x1[6] + x1[5]) * c0;
//x[4], x[7] no change
x[0] = ( x1[0] + x1[3] ); //second stage transform
x[1] = ( x1[1] + x1[2] );
x[2] = ( x1[1] - x1[2] );
x[3] = ( x1[0] - x1[3] );
for (j = 0; j < 4; j++) { //first stage transform, see flow-graph
j1 = 7 - j;
m[i][j] = (x[j] + x[j1]);
m[i][j1] = (x[j] - x[j1]);
}
}
//we have left-shift (multiplying constants) twice
for ( i = 0; i < 8; ++i ) {
for ( j = 0; j < 8; ++j ) {
*X++ = ( m[i][j] + delta1 ) >> shift1; //round the value by adding delta
}
}
}