Linux Game Programming for PC & Embedded Systems using SDL
Presented by
Fore June
Author of Windows Fan, Linux Fan

Sample program for converting RGB to YCbCr to DCT coefficients and vice versa.

common.h
rgb_ybr.h
dct_video.h
encode.h
dctplayer.cpp
encode.cpp
dct_video.cpp
rgb_ybr.cpp
Makefile
sample_video.raw



dct_video.cpp:
/*
  dct_video.cpp
  An implementation of 8x8 DCT and IDCT for video compression.
  32-bit integer-arithmetic is assumed.
  For DCT operation:
	dct ( int *X, int *Y );
	X is the input pointing to an 8x8 array of samples; values must be within [0, 255]
	Y is the output pointing to an 8x8 array of DCT coefficients.
  For IDCT operation:
	idct ( int *Y, int *X );
	Y is the input pointing to an 8x8 array of DCT coefficients.
	X is the output pointing to an 8x8 array of sample values.
  For more detailed explanations, see
	http://www.webkinesia.com/games/vcompress3.php

  compile: g++ -c dct_video.cpp
  To use them, include the the following header statements in your application.
	void dct(int *X, int *Y);
	void idct(int *Y, int *X);
	
*/

#include <string.h>
#include <stdio.h>
#include <math.h>

#define PI 3.141592653589
const short shift = 10;			//10 bits precision for fixed-point arithmetic
const short shift1 = 2 * shift;		//at the final stage, values have been shifted twice
const int fac = 1 << shift ;		//multiply all constants by 2^10 ( = 1024 )
const int delta = 1 << (shift-1) ;	//for rounding adjustment ~ 0.5x2^10
const int delta1 = 1 << (shift1-1);	//for final rounding adjustment ~ 0.5x2^20
const double a = PI / 16.0;		//angle theta

//DCT constants; use integer-arithmetic.
const int c0 = (int) ( 1 / sqrt ( 2 ) * fac );
const int c1 = (int) ( cos ( a ) / 2 * fac );
const int c2 = (int) ( cos ( 2*a ) / 2 * fac );
const int c3 = (int) ( cos ( 3*a ) / 2 * fac );
const int c4 = (int) ( cos ( 4*a ) / 2 * fac );
const int c5 = (int) ( cos ( 5*a ) / 2 * fac );
const int c6 = (int) ( cos ( 6*a ) / 2 * fac );
const int c7 = (int) ( cos ( 7*a ) / 2 * fac );

/*
  DCT function.
  Input: X, array of 8x8, containing data with values in [0, 255].
  Ouput: Y, array of 8x8 DCT coefficients.
*/
void dct(int *X, int *Y)
{
   int   i, j, j1, k;
   int x[8], x1[8], m[8][8];
      
   /*
     Row transform 
     i-th row, k-th element 
   */
   for (i = 0, k = 0; i < 8; i++, k += 8) {
      for (j = 0; j < 8; j++) 
        x[j] = X[k+j];				//data for one row
      
      for (j = 0; j < 4; j++) {			//first stage transform, see flow-graph
        j1 = 7 - j;
        x1[j] = x[j] + x[j1];
        x1[j1] = x[j] - x[j1];
      }
      x[0] = x1[0] + x1[3];			//second stage transform
      x[1] = x1[1] + x1[2];
      x[2] = x1[1] - x1[2];
      x[3] = x1[0] - x1[3];
      x[4] = x1[4];
      x[5] = ((x1[6] - x1[5]) * c0 + delta ) >> shift;	//after multiplication, add delta for rounding,
      x[6] = ((x1[6] + x1[5]) * c0 + delta ) >> shift;	//  shift-right to undo 'x fac' to line up
      x[7] = x1[7];					//  binary points of all x[i]

      m[i][0] = (x[0] + x[1]) * c4;		//upper-half of third (final) stage, see flow-graph
      m[i][4] = (x[0] - x[1]) * c4;
      m[i][2] = x[2] * c6 + x[3] * c2;
      m[i][6] = x[3] * c6 - x[2] * c2;

      x1[4] = x[4] + x[5];			//lower-half of third stage
      x1[5] = x[4] - x[5];
      x1[6] = x[7] - x[6];
      x1[7] = x[7] + x[6];
	
      m[i][1] = x1[4] * c7 + x1[7] * c1;	//lower-half of fourth ( final ) stage
      m[i][7] = x1[7] * c7 - x1[4] * c1;
      m[i][5] = x1[5] * c3 + x1[6] * c5;
      m[i][3] = x1[6] * c3 - x1[5] * c5;
   } //for i

   /*
	At this point, coefficients of each row ( m[i][j] ) has been multiplied by 2^10.
	We can undo the multiplication by << 10 here before doing the vertical transform.
	However, as we are using int variables, which are 32-bit to do multiplications, we
	can tolerate another multiplication of 2^10. So we delay our undoing until the end
	of the vertical transform and we undo all left-shift operations by shifting
	the results right 20 bits ( i.e. << 2 * 10 ).
   */
   // Column transform 
   for (i = 0; i < 8; i++) {			//eight columns

      //consider one column
      for (j = 0; j < 4; j++) {			//first-stage operation
        j1 = 7 - j;
        x1[j] = m[j][i] + m[j1][i];
        x1[j1] = m[j][i] - m[j1][i];
      }
						//second-stage operation
      x[0] = x1[0] + x1[3];
      x[1] = x1[1] + x1[2];
      x[2] = x1[1] - x1[2];
      x[3] = x1[0] - x1[3];
      x[4] = x1[4];
      //undo one shift for x[5], x[6] to avoid overflow
      x1[5] = (x1[5] + delta) >> shift;
      x1[6] = (x1[6] + delta) >> shift;

      x[5] = (x1[6] - x1[5]) * c0;	
      x[6] = (x1[6] + x1[5]) * c0;
      x[7] = x1[7];

      m[0][i] = (x[0] + x[1]) * c4;		//upper-half of third (final) stage, see flow-graph
      m[4][i] = (x[0] - x[1]) * c4;
      m[2][i] = ( x[2] * c6 + x[3] * c2 ) ;
      m[6][i] = ( x[3] * c6 - x[2] * c2 );
      
      x1[4] = x[4] + x[5];			//lower-half of third stage
      x1[7] = x[7] + x[6];
      x1[5] = x[4] - x[5];
      x1[6] = x[7] - x[6];

      m[1][i] = x1[4] * c7 + x1[7] * c1;	//lower-half of fourth stage
      m[5][i] = x1[5] * c3 + x1[6] * c5;
      m[3][i] = x1[6] * c3 - x1[5] * c5;
      m[7][i] = x1[7] * c7 - x1[4] * c1;
   } // for i

  //we have left-shift (multiplying constants) twice
  for ( i = 0; i < 8; ++i ) {
    for ( j = 0; j < 8; ++j ) {
      *Y++ =  ( m[i][j] + delta1 ) >> shift1; 	//round the value by adding delta
    }
  }
}

/*
  Implementation of idct() is to reverse the operations of dct().
  We first do vertical transform and then horizontal; this is easier
  for debugging as the operations are just the reverse of those in dct();
  of course, it works just as well if you do the horizontal transform first.
  So in this implementation, the first stage of idct() is the final stage of dct()
  and the final stage of idct() is the first stage of dct().
*/

void idct(int *Y, int *X)
{
   int   j1, i, j, k;
   int x[8], x1[8], m[8][8], y[8];
   k = i = 0;

  //column transform
  for ( i = 0; i < 8; ++i ) {
    for (j = 0; j < 8; j++) 
        y[j] = Y[i+8*j];				
    //print_x ( y );
    //getchar();

    x1[4] = y[1] * c7 - y[7] * c1;		//lower-half final stage of dct
    x1[7] = y[1] * c1 + y[7] * c7;
    x1[6] = y[3] * c3 + y[5] * c5;
    x1[5] = -y[3] * c5 + y[5] * c3;
      
    x[4] = ( x1[4] + x1[5] );			//lower-half of third stage of dct
    x[5] = ( x1[4] - x1[5] );
    x[6] = ( x1[7] - x1[6] );
    x[7] = ( x1[7] + x1[6] );
    
    x1[0] = ( y[0] + y[4] ) * c4;		//upper-half of third ( final ) stage of dct
    x1[1] = ( y[0] - y[4] ) * c4;
    x1[2] = y[2] * c6 - y[6] * c2;
    x1[3] = y[2] * c2 + y[6] * c6;
    
    
    x[0] = ( x1[0] + x1[3] );			//second stage of dct 
    x[1] = ( x1[1] + x1[2] );
    x[2] = ( x1[1] - x1[2] );
    x[3] = ( x1[0] - x1[3] );


    //x[4], x[7] no change
    x1[5] = ((x[6] - x[5]) * c0 + delta ) >> shift;  	//after multiplication, add delta for rounding,
    x1[6] = ((x[6] + x[5]) * c0 + delta ) >> shift;  	//  shift-right to undo 'x fac' to line up x[]s
    x[5] = x1[5];
    x[6] = x1[6];  
    
    for (j = 0; j < 4; j++) {				//first stage transform of dct
        j1 = 7 - j;
        m[j][i] = (x[j] + x[j1] );
        m[j1][i] = (x[j] - x[j1] );
    }
  } //for i

   //row transform
  for ( i = 0; i < 8; i++ ) {
    for (j = 0; j < 8; j++) 
        y[j] = m[i][j] ;				//data for one row
    
    x1[4] = y[1] * c7 - y[7] * c1;
    x1[7] = y[1] * c1 + y[7] * c7;
    x1[6] = y[3] * c3 + y[5] * c5;
    x1[5] = -y[3] * c5 + y[5] * c3;

    x[4] = ( x1[4] + x1[5] );				//lower-half of third stage
    x[5] = ( x1[4] - x1[5] );
    x[6] = ( x1[7] - x1[6] );
    x[7] = ( x1[7] + x1[6] );
    
    x1[0] = ( y[0] + y[4] ) * c4;
    x1[1] = ( y[0] - y[4] ) * c4;
    x1[2] = y[2] * c6 - y[6] * c2;
    x1[3] = y[2] * c2 + y[6] * c6;
    
    //undo one shift for x[5], x[6] to avoid overflow
    x1[5] = (x[5] + delta) >> shift;
    x1[6] = (x[6] + delta) >> shift;
    x[5] = (x1[6] - x1[5]) * c0;  		
    x[6] = (x1[6] + x1[5]) * c0;  		
      //x[4], x[7] no change
    x[0] = ( x1[0] + x1[3] );			//second stage transform
    x[1] = ( x1[1] + x1[2] );
    x[2] = ( x1[1] - x1[2] );
    x[3] = ( x1[0] - x1[3] );

    for (j = 0; j < 4; j++) {			//first stage transform, see flow-graph
        j1 = 7 - j;
        m[i][j] = (x[j] + x[j1]);
	m[i][j1] = (x[j] - x[j1]);
      }
    }
    //we have left-shift (multiplying constants) twice
    for ( i = 0; i < 8; ++i ) {
      for ( j = 0; j < 8; ++j ) {
        *X++ =  ( m[i][j] + delta1 ) >> shift1;    //round the value by adding delta
      }
    }
}