summaryrefslogtreecommitdiffstats
path: root/mpeglib/lib/util/render/dither
diff options
context:
space:
mode:
Diffstat (limited to 'mpeglib/lib/util/render/dither')
-rw-r--r--mpeglib/lib/util/render/dither/Makefile.am40
-rw-r--r--mpeglib/lib/util/render/dither/colorTable8Bit.cpp147
-rw-r--r--mpeglib/lib/util/render/dither/colorTable8Bit.h57
-rw-r--r--mpeglib/lib/util/render/dither/colorTableHighBit.cpp248
-rw-r--r--mpeglib/lib/util/render/dither/colorTableHighBit.h73
-rw-r--r--mpeglib/lib/util/render/dither/dither16Bit.cpp300
-rw-r--r--mpeglib/lib/util/render/dither/dither16Bit.h55
-rw-r--r--mpeglib/lib/util/render/dither/dither32Bit.cpp253
-rw-r--r--mpeglib/lib/util/render/dither/dither32Bit.h55
-rw-r--r--mpeglib/lib/util/render/dither/dither32mmx.cpp272
-rw-r--r--mpeglib/lib/util/render/dither/dither8Bit.cpp306
-rw-r--r--mpeglib/lib/util/render/dither/dither8Bit.h63
-rw-r--r--mpeglib/lib/util/render/dither/ditherDef.h100
-rw-r--r--mpeglib/lib/util/render/dither/ditherMMX.h38
-rw-r--r--mpeglib/lib/util/render/dither/ditherRGB.cpp230
-rw-r--r--mpeglib/lib/util/render/dither/ditherRGB.h45
-rw-r--r--mpeglib/lib/util/render/dither/ditherRGB_flipped.cpp82
-rw-r--r--mpeglib/lib/util/render/dither/ditherRGB_flipped.h34
-rw-r--r--mpeglib/lib/util/render/dither/ditherWrapper.cpp246
-rw-r--r--mpeglib/lib/util/render/dither/ditherWrapper.h80
-rw-r--r--mpeglib/lib/util/render/dither/ditherer_mmx16.cpp256
21 files changed, 2980 insertions, 0 deletions
diff --git a/mpeglib/lib/util/render/dither/Makefile.am b/mpeglib/lib/util/render/dither/Makefile.am
new file mode 100644
index 00000000..166d5ca3
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/Makefile.am
@@ -0,0 +1,40 @@
+# liboutplugin - Makefile.am
+
+INCLUDES = $(all_includes)
+
+EXTRA_DIST = ditherDef.h ditherMMX.h \
+ ditherer_mmx16.cpp dither32mmx.cpp
+
+noinst_LTLIBRARIES = libdither.la
+
+noinst_HEADERS = ditherWrapper.h \
+ dither8Bit.h colorTable8Bit.h \
+ colorTableHighBit.h dither16Bit.h \
+ dither32Bit.h ditherRGB_flipped.h \
+ ditherRGB.h
+
+libdither_la_SOURCES = ditherWrapper.cpp \
+ dither8Bit.cpp \
+ colorTable8Bit.cpp colorTableHighBit.cpp \
+ dither16Bit.cpp dither32Bit.cpp \
+ ditherRGB_flipped.cpp ditherRGB.cpp \
+ ditherer_mmx16.cpp dither32mmx.cpp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mpeglib/lib/util/render/dither/colorTable8Bit.cpp b/mpeglib/lib/util/render/dither/colorTable8Bit.cpp
new file mode 100644
index 00000000..57c533de
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/colorTable8Bit.cpp
@@ -0,0 +1,147 @@
+/*
+ colorTables for 8 Bit depth
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#include "colorTable8Bit.h"
+
+
+ColorTable8Bit::ColorTable8Bit() {
+
+ lum_values = new int[LUM_RANGE];
+ cr_values = new int[CR_RANGE];
+ cb_values = new int[CB_RANGE];
+
+
+ /* We can exploit cache by allocating contiguous blocks */
+
+ colortab = new TABTYPE[5*256];
+
+ Cr_r_tab = &colortab[0*256];
+ Cr_g_tab = &colortab[1*256];
+ Cb_g_tab = &colortab[2*256];
+ Cb_b_tab = &colortab[3*256];
+ L_tab = &colortab[4*256];
+
+ init8BitColor();
+}
+
+
+ColorTable8Bit::~ColorTable8Bit() {
+ delete lum_values;
+ delete cr_values;
+ delete cb_values;
+ delete colortab;
+}
+
+
+void ColorTable8Bit::init8BitColor() {
+ int i;
+
+
+
+ for (i=0; i<LUM_RANGE; i++) {
+ lum_values[i] = ((i * 256) / (LUM_RANGE)) + (256/(LUM_RANGE*2));
+ L_tab[i] = lum_values[i];
+ if (gammaCorrectFlag) {
+ L_tab[i] = GAMMA_CORRECTION(L_tab[i]);
+ }
+
+ }
+
+
+ for (i=0; i<CR_RANGE; i++) {
+ register double tmp;
+ if (chromaCorrectFlag) {
+ tmp = ((i * 256) / (CR_RANGE)) + (256/(CR_RANGE*2));
+ Cr_r_tab[i]=(TABTYPE) ((0.419/0.299)*CHROMA_CORRECTION128D(tmp-128.0));
+ Cr_g_tab[i]=(TABTYPE) (-(0.299/0.419)*CHROMA_CORRECTION128D(tmp-128.0));
+ cr_values[i] = CHROMA_CORRECTION256(tmp);
+ } else {
+ tmp = ((i * 256) / (CR_RANGE)) + (256/(CR_RANGE*2));
+ Cr_r_tab[i] = (TABTYPE) ((0.419/0.299) * (tmp - 128.0));
+ Cr_g_tab[i] = (TABTYPE) (-(0.299/0.419) * (tmp - 128.0));
+ cr_values[i] = (int) tmp;
+ }
+ }
+
+
+ for (i=0; i<CB_RANGE; i++) {
+ register double tmp;
+ if (chromaCorrectFlag) {
+ tmp = ((i * 256) / (CB_RANGE)) + (256/(CB_RANGE*2));
+ Cb_g_tab[i]=(TABTYPE) (-(0.114/0.331)*CHROMA_CORRECTION128D(tmp-128.0));
+ Cb_b_tab[i]=(TABTYPE) ((0.587/0.331)*CHROMA_CORRECTION128D(tmp-128.0));
+ cb_values[i] = CHROMA_CORRECTION256(tmp);
+ } else {
+ tmp = ((i * 256) / (CB_RANGE)) + (256/(CB_RANGE*2));
+ Cb_g_tab[i] = (TABTYPE) (-(0.114/0.331) * (tmp - 128.0));
+ Cb_b_tab[i] = (TABTYPE) ((0.587/0.331) * (tmp - 128.0));
+ cb_values[i] = (int) tmp;
+ }
+ }
+}
+
+
+
+/*
+ *--------------------------------------------------------------
+ *
+ * ConvertColor --
+ *
+ * Given a l, cr, cb tuple, converts it to r,g,b.
+ *
+ * Results:
+ * r,g,b values returned in pointers passed as parameters.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+void ColorTable8Bit::ConvertColor(unsigned int l, unsigned int cr,
+ unsigned int cb, unsigned char* r,
+ unsigned char* g, unsigned char* b) {
+
+ double fl, fr, fg, fb;
+
+ /*
+ * Old method w/o lookup table
+ *
+ * fl = 1.164*(((double) l)-16.0);
+ * fcr = ((double) cr) - 128.0;
+ * fcb = ((double) cb) - 128.0;
+ *
+ * fr = fl + (1.366 * fcr);
+ * fg = fl - (0.700 * fcr) - (0.334 * fcb);
+ * fb = fl + (1.732 * fcb);
+ */
+
+ fl = L_tab[l];
+
+ fr = fl + Cr_r_tab[cr];
+ fg = fl + Cr_g_tab[cr] + Cb_g_tab[cb];
+ fb = fl + Cb_b_tab[cb];
+
+ if (fr < 0.0) fr = 0.0;
+ else if (fr > 255.0) fr = 255.0;
+
+ if (fg < 0.0) fg = 0.0;
+ else if (fg > 255.0) fg = 255.0;
+
+ if (fb < 0.0) fb = 0.0;
+ else if (fb > 255.0) fb = 255.0;
+
+ *r = (unsigned char) fr;
+ *g = (unsigned char) fg;
+ *b = (unsigned char) fb;
+
+}
diff --git a/mpeglib/lib/util/render/dither/colorTable8Bit.h b/mpeglib/lib/util/render/dither/colorTable8Bit.h
new file mode 100644
index 00000000..6d873d1d
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/colorTable8Bit.h
@@ -0,0 +1,57 @@
+/*
+ colorTables for 8 Bit depth
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#ifndef __COLORTABLE8BIT_H
+#define __COLORTABLE8BIT_H
+
+#include "ditherDef.h"
+
+
+class ColorTable8Bit {
+
+ // Arrays holding quantized value ranged for lum, cr, and cb.
+ // (used for 8 Bit)
+
+ int* lum_values;
+ int* cr_values;
+ int* cb_values;
+
+
+
+
+ TABTYPE *L_tab;
+ TABTYPE *Cr_r_tab;
+ TABTYPE *Cr_g_tab;
+ TABTYPE *Cb_g_tab;
+ TABTYPE *Cb_b_tab;
+ TABTYPE *colortab;
+
+
+ public:
+ ColorTable8Bit();
+ ~ColorTable8Bit();
+
+ inline int* getLumValues() { return lum_values; }
+ inline int* getCrValues() { return cr_values; }
+ inline int* getCbValues() { return cb_values; }
+
+ void ConvertColor(unsigned int l, unsigned int cr, unsigned int cb,
+ unsigned char* r, unsigned char* g, unsigned char* b);
+
+
+ private:
+ void init8BitColor();
+
+
+};
+#endif
diff --git a/mpeglib/lib/util/render/dither/colorTableHighBit.cpp b/mpeglib/lib/util/render/dither/colorTableHighBit.cpp
new file mode 100644
index 00000000..171f4e97
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/colorTableHighBit.cpp
@@ -0,0 +1,248 @@
+/*
+ colorTables for 16,32 Bit depth
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+
+#include "colorTableHighBit.h"
+
+//#define INTERPOLATE
+
+
+/*
+ * Erik Corry's multi-byte dither routines.
+ *
+ * The basic idea is that the Init generates all the necessary tables.
+ * The tables incorporate the information about the layout of pixels
+ * in the XImage, so that it should be able to cope with 15-bit, 16-bit
+ * 24-bit (non-packed) and 32-bit (10-11 bits per color!) screens.
+ * At present it cannot cope with 24-bit packed mode, since this involves
+ * getting down to byte level again. It is assumed that the bits for each
+ * color are contiguous in the longword.
+ *
+ * Writing to memory is done in shorts or ints. (Unfortunately, short is not
+ * very fast on Alpha, so there is room for improvement here). There is no
+ * dither time check for overflow - instead the tables have slack at
+ * each end. This is likely to be faster than an 'if' test as many modern
+ * architectures are really bad at ifs. Potentially, each '&&' causes a
+ * pipeline flush!
+ *
+ * There is no shifting and fixed point arithmetic, as I really doubt you
+ * can see the difference, and it costs. This may be just my bias, since I
+ * heard that Intel is really bad at shifting.
+ */
+
+
+/*
+ * How many 1 bits are there in the PIXVALword.
+ * Low performance, do not call often.
+ */
+static int number_of_bits_set(unsigned PIXVAL a) {
+ if(!a) return 0;
+ if(a & 1) return 1 + number_of_bits_set(a >> 1);
+ return(number_of_bits_set(a >> 1));
+}
+
+
+
+/*
+ * How many 0 bits are there at most significant end of PIXVALword.
+ * Low performance, do not call often.
+ */
+static int free_bits_at_top(unsigned PIXVAL a) {
+ /* assume char is 8 bits */
+ if(!a) return sizeof(unsigned PIXVAL) * 8;
+ /* assume twos complement */
+ if(((PIXVAL)a) < 0l) return 0;
+ return 1 + free_bits_at_top ( a << 1);
+}
+
+/*
+ * How many 0 bits are there at least significant end of PIXVALword.
+ * Low performance, do not call often.
+ */
+static int free_bits_at_bottom(unsigned PIXVAL a) {
+ /* assume char is 8 bits */
+ if(!a) return sizeof(unsigned PIXVAL) * 8;
+ if(((PIXVAL)a) & 1l) return 0;
+ return 1 + free_bits_at_bottom ( a >> 1);
+}
+
+
+
+ColorTableHighBit::ColorTableHighBit(int bpp,unsigned int redMask,
+ unsigned int greenMask,
+ unsigned int blueMask) {
+ this->bpp=bpp;
+ this->redMask=redMask;
+ this->greenMask=greenMask;
+ this->blueMask=blueMask;
+
+ colortab = new TABTYPE[5*256];
+
+ Cr_r_tab = &colortab[0*256];
+ Cr_g_tab = &colortab[1*256];
+ Cb_g_tab = &colortab[2*256];
+ Cb_b_tab = &colortab[3*256];
+ L_tab = &colortab[4*256];
+
+ rgb_2_pix = new PIXVAL [3*768];
+
+ r_2_pix_alloc = &rgb_2_pix[0*768];
+ g_2_pix_alloc = &rgb_2_pix[1*768];
+ b_2_pix_alloc = &rgb_2_pix[2*768];
+
+ initHighColor(bpp>=24,redMask,greenMask,blueMask);
+
+}
+
+
+ColorTableHighBit::~ColorTableHighBit() {
+ delete colortab;
+ delete rgb_2_pix;
+}
+
+/*
+ *--------------------------------------------------------------
+ *
+ * InitColor16Dither --
+ *
+ * To get rid of the multiply and other conversions in color
+ * dither, we use a lookup table.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The lookup tables are initialized.
+ *
+ *--------------------------------------------------------------
+ */
+
+void ColorTableHighBit::initHighColor(int thirty2,unsigned int redMask,
+ unsigned int greenMask,
+ unsigned int blueMask) {
+
+ unsigned PIXVAL red_mask = redMask;
+ unsigned PIXVAL green_mask =greenMask;
+ unsigned PIXVAL blue_mask = blueMask;
+
+ int CR, CB, i;
+
+
+ for (i=0; i<256; i++) {
+ L_tab[i] = i;
+ if (gammaCorrectFlag) {
+ L_tab[i] = (TABTYPE)GAMMA_CORRECTION(i);
+ }
+
+ CB = CR = i;
+
+ if (chromaCorrectFlag) {
+ CB -= 128;
+ CB = CHROMA_CORRECTION128(CB);
+ CR -= 128;
+ CR = CHROMA_CORRECTION128(CR);
+ } else {
+ CB -= 128; CR -= 128;
+ }
+/* was
+ Cr_r_tab[i] = 1.596 * CR;
+ Cr_g_tab[i] = -0.813 * CR;
+ Cb_g_tab[i] = -0.391 * CB;
+ Cb_b_tab[i] = 2.018 * CB;
+ but they were just messed up.
+ Then was (_Video Deymstified_):
+ Cr_r_tab[i] = 1.366 * CR;
+ Cr_g_tab[i] = -0.700 * CR;
+ Cb_g_tab[i] = -0.334 * CB;
+ Cb_b_tab[i] = 1.732 * CB;
+ but really should be:
+ (from ITU-R BT.470-2 System B, G and SMPTE 170M )
+*/
+ Cr_r_tab[i] = (TABTYPE) ( (0.419/0.299) * CR );
+ Cr_g_tab[i] = (TABTYPE) ( -(0.299/0.419) * CR );
+ Cb_g_tab[i] = (TABTYPE) ( -(0.114/0.331) * CB );
+ Cb_b_tab[i] = (TABTYPE) ( (0.587/0.331) * CB );
+
+/*
+ though you could argue for:
+ SMPTE 240M
+ Cr_r_tab[i] = (0.445/0.212) * CR;
+ Cr_g_tab[i] = -(0.212/0.445) * CR;
+ Cb_g_tab[i] = -(0.087/0.384) * CB;
+ Cb_b_tab[i] = (0.701/0.384) * CB;
+ FCC
+ Cr_r_tab[i] = (0.421/0.30) * CR;
+ Cr_g_tab[i] = -(0.30/0.421) * CR;
+ Cb_g_tab[i] = -(0.11/0.331) * CB;
+ Cb_b_tab[i] = (0.59/0.331) * CB;
+ ITU-R BT.709
+ Cr_r_tab[i] = (0.454/0.2125) * CR;
+ Cr_g_tab[i] = -(0.2125/0.454) * CR;
+ Cb_g_tab[i] = -(0.0721/0.386) * CB;
+ Cb_b_tab[i] = (0.7154/0.386) * CB;
+*/
+ }
+
+ /*
+ * Set up entries 0-255 in rgb-to-pixel value tables.
+ */
+ for (i = 0; i < 256; i++) {
+ r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(red_mask));
+ r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(red_mask);
+ g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(green_mask));
+ g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(green_mask);
+ b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(blue_mask));
+ b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(blue_mask);
+ /*
+ * If we have 16-bit output depth, then we double the value
+ * in the top word. This means that we can write out both
+ * pixels in the pixel doubling mode with one op. It is
+ * harmless in the normal case as storing a 32-bit value
+ * through a short pointer will lose the top bits anyway.
+ * A similar optimisation for Alpha for 64 bit has been
+ * prepared for, but is not yet implemented.
+ */
+ if(!thirty2) {
+ r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
+ g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
+ b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
+
+ }
+#ifdef SIXTYFOUR_BIT
+ if(thirty2) {
+
+ r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 32;
+ g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 32;
+ b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 32;
+
+ }
+#endif
+ }
+
+ /*
+ * Spread out the values we have to the rest of the array so that
+ * we do not need to check for overflow.
+ */
+ for (i = 0; i < 256; i++) {
+ r_2_pix_alloc[i] = r_2_pix_alloc[256];
+ r_2_pix_alloc[i+ 512] = r_2_pix_alloc[511];
+ g_2_pix_alloc[i] = g_2_pix_alloc[256];
+ g_2_pix_alloc[i+ 512] = g_2_pix_alloc[511];
+ b_2_pix_alloc[i] = b_2_pix_alloc[256];
+ b_2_pix_alloc[i+ 512] = b_2_pix_alloc[511];
+ }
+
+ r_2_pix = r_2_pix_alloc + 256;
+ g_2_pix = g_2_pix_alloc + 256;
+ b_2_pix = b_2_pix_alloc + 256;
+}
diff --git a/mpeglib/lib/util/render/dither/colorTableHighBit.h b/mpeglib/lib/util/render/dither/colorTableHighBit.h
new file mode 100644
index 00000000..9945414d
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/colorTableHighBit.h
@@ -0,0 +1,73 @@
+/*
+ colorTables for 16,32 Bit depth
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#ifndef __COLORTABLEHIGHBIT_H
+#define __COLORTABLEHIGHBIT_H
+
+#include "ditherDef.h"
+
+
+
+
+class ColorTableHighBit {
+
+ TABTYPE *L_tab;
+ TABTYPE *Cr_r_tab;
+ TABTYPE *Cr_g_tab;
+ TABTYPE *Cb_g_tab;
+ TABTYPE *Cb_b_tab;
+ TABTYPE *colortab;
+
+
+ PIXVAL *r_2_pix;
+ PIXVAL *g_2_pix;
+ PIXVAL *b_2_pix;
+ PIXVAL *rgb_2_pix;
+
+ PIXVAL *r_2_pix_alloc;
+ PIXVAL *g_2_pix_alloc;
+ PIXVAL *b_2_pix_alloc;
+
+
+
+ // init stuff
+ int bpp;
+ // colorMask
+ unsigned int redMask;
+ unsigned int greenMask;
+ unsigned int blueMask;
+
+ public:
+ ColorTableHighBit(int bpp,unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask);
+ ~ColorTableHighBit();
+
+ inline TABTYPE* getL_tab() { return L_tab ; }
+ inline TABTYPE* getCr_r_tab() { return Cr_r_tab ; }
+ inline TABTYPE* getCr_g_tab() { return Cr_g_tab ; }
+ inline TABTYPE* getCb_g_tab() { return Cb_g_tab ; }
+ inline TABTYPE* getCb_b_tab() { return Cb_b_tab ; }
+
+
+ inline PIXVAL* getr_2_pix() { return r_2_pix ; }
+ inline PIXVAL* getg_2_pix() { return g_2_pix ; }
+ inline PIXVAL* getb_2_pix() { return b_2_pix ; }
+
+
+
+ private:
+ void initHighColor(int thirty2,unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask);
+
+};
+#endif
diff --git a/mpeglib/lib/util/render/dither/dither16Bit.cpp b/mpeglib/lib/util/render/dither/dither16Bit.cpp
new file mode 100644
index 00000000..0a843ee9
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/dither16Bit.cpp
@@ -0,0 +1,300 @@
+/*
+ dither 16 bit depth yuv images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#include "dither16Bit.h"
+
+
+Dither16Bit::Dither16Bit(unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask) {
+
+
+ colorTableHighBit=new ColorTableHighBit(16,redMask,greenMask,blueMask);
+ L_tab=colorTableHighBit->getL_tab();
+ Cr_r_tab=colorTableHighBit->getCr_r_tab();
+ Cr_g_tab=colorTableHighBit->getCr_g_tab();
+ Cb_g_tab=colorTableHighBit->getCb_g_tab();
+ Cb_b_tab=colorTableHighBit->getCb_b_tab();
+
+ r_2_pix=colorTableHighBit->getr_2_pix();
+ g_2_pix=colorTableHighBit->getg_2_pix();
+ b_2_pix=colorTableHighBit->getb_2_pix();
+
+}
+
+
+Dither16Bit::~Dither16Bit() {
+ delete colorTableHighBit;
+}
+
+
+/*
+ *--------------------------------------------------------------
+ *
+ * Color16DitherImage --
+ *
+ * Converts image into 16 bit color.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+
+void Dither16Bit::ditherImageColor16(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int offset) {
+
+ int L, CR, CB;
+ unsigned short *row1, *row2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int cr_g;
+ int cb_g;
+ int cb_b;
+ int cols_2 = cols/2;
+
+ row1 = (unsigned short *)out;
+ row2=row1+cols_2+cols_2+offset; // start of second row
+
+ offset=2*offset+cols_2+cols_2;
+
+ lum2 = lum + cols_2 + cols_2;
+
+
+ for (y=0; y<rows; y+=2) {
+ for (x=0; x<cols_2; x++) {
+ int R, G, B;
+
+ CR = *cr++;
+ CB = *cb++;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+
+ L = L_tab[(int) *lum++];
+
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ *row1++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+
+
+#ifdef INTERPOLATE
+ if(x != cols_2 - 1) {
+ CR = (CR + *cr) >> 1;
+ CB = (CB + *cb) >> 1;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+ }
+#endif
+
+ L = L_tab[(int) *lum++];
+
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ *row1++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+
+ /*
+ * Now, do second row.
+ */
+#ifdef INTERPOLATE
+ if(y != rows - 2) {
+ CR = (CR + *(cr + cols_2 - 1)) >> 1;
+ CB = (CB + *(cb + cols_2 - 1)) >> 1;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+ }
+#endif
+
+ L = L_tab[(int) *lum2++];
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ *row2++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+
+ L = L_tab[(int) *lum2++];
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ *row2++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ }
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum += cols_2 + cols_2;
+ lum2 += cols_2 + cols_2;
+ row1 += offset;
+ row2 += offset;
+ }
+}
+
+
+/*
+ * Erik Corry's pixel doubling routines for 15/16/24/32 bit screens.
+ */
+
+
+/*
+ *--------------------------------------------------------------
+ *
+ * Twox2Color16DitherImage --
+ *
+ * Converts image into 16 bit color at double size.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+
+/*
+ * In this function I make use of a nasty trick. The tables have the lower
+ * 16 bits replicated in the upper 16. This means I can write ints and get
+ * the horisontal doubling for free (almost).
+ */
+
+void Dither16Bit::ditherImageTwox2Color16(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod) {
+ int L, CR, CB;
+ unsigned int *row1 = (unsigned int *)out;
+ unsigned int *row2 = row1 + cols + mod/2;
+ unsigned int *row3 = row2 + cols + mod/2;
+ unsigned int *row4 = row3 + cols + mod/2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int cr_g;
+ int cb_g;
+ int cb_b;
+ int cols_2 = cols/2;
+
+ lum2 = lum + cols_2 + cols_2;
+ for (y=0; y<rows; y+=2) {
+ for (x=0; x<cols_2; x++) {
+ int R, G, B;
+ int t;
+
+ CR = *cr++;
+ CB = *cb++;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+
+ L = L_tab[(int) *lum++];
+
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row1[0] = t;
+ row1++;
+ row2[0] = t;
+ row2++;
+
+ // INTERPOLATE
+ if(x != cols_2 - 1) {
+ CR = (CR + *cr) >> 1;
+ CB = (CB + *cb) >> 1;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+ }
+ // end
+
+ L = L_tab[(int) *lum++];
+
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row1[0] = t;
+ row1++;
+ row2[0] = t;
+ row2++;
+
+ /*
+ * Now, do second row.
+ */
+ // INTERPOLATE
+ if(y != rows - 2) {
+ CR = (CR + *(cr + cols_2 - 1)) >> 1;
+ CB = (CB + *(cb + cols_2 - 1)) >> 1;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+ }
+ // end
+
+ L = L_tab[(int) *lum2++];
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row3[0] = t;
+ row3++;
+ row4[0] = t;
+ row4++;
+
+ L = L_tab[(int) *lum2++];
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row3[0] = t;
+ row3++;
+ row4[0] = t;
+ row4++;
+ }
+ lum += cols_2 + cols_2;
+ lum2 += cols_2 + cols_2;
+ row1 += 6 * cols_2 + 2*mod;
+ row3 += 6 * cols_2 + 2*mod;
+ row2 += 6 * cols_2 + 2*mod;
+ row4 += 6 * cols_2 + 2*mod;
+ }
+}
diff --git a/mpeglib/lib/util/render/dither/dither16Bit.h b/mpeglib/lib/util/render/dither/dither16Bit.h
new file mode 100644
index 00000000..2e47c01c
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/dither16Bit.h
@@ -0,0 +1,55 @@
+/*
+ dither 16 bit depth yuv images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+#ifndef __DITHER16Bit_H
+#define __DITHER16Bit_H
+
+#include "colorTableHighBit.h"
+
+class Dither16Bit {
+
+ ColorTableHighBit* colorTableHighBit;
+
+ TABTYPE *L_tab;
+ TABTYPE *Cr_r_tab;
+ TABTYPE *Cr_g_tab;
+ TABTYPE *Cb_g_tab;
+ TABTYPE *Cb_b_tab;
+
+ PIXVAL *r_2_pix;
+ PIXVAL *g_2_pix;
+ PIXVAL *b_2_pix;
+
+ public:
+ Dither16Bit(unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask);
+ ~Dither16Bit();
+
+ void ditherImageColor16(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int offset);
+
+ void ditherImageTwox2Color16(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod);
+
+};
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/dither32Bit.cpp b/mpeglib/lib/util/render/dither/dither32Bit.cpp
new file mode 100644
index 00000000..61a1d2dc
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/dither32Bit.cpp
@@ -0,0 +1,253 @@
+/*
+ dither 32 bit depth yuv images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#include "dither32Bit.h"
+
+
+#define doRow(row,Lum) *row++=(local_r_2_pix[Lum] | \
+ local_g_2_pix[Lum] | local_b_2_pix[Lum])
+
+
+Dither32Bit::Dither32Bit(unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask) {
+
+
+ colorTableHighBit=new ColorTableHighBit(32,redMask,greenMask,blueMask);
+ L_tab=colorTableHighBit->getL_tab();
+ Cr_r_tab=colorTableHighBit->getCr_r_tab();
+ Cr_g_tab=colorTableHighBit->getCr_g_tab();
+ Cb_g_tab=colorTableHighBit->getCb_g_tab();
+ Cb_b_tab=colorTableHighBit->getCb_b_tab();
+
+ r_2_pix=colorTableHighBit->getr_2_pix();
+ g_2_pix=colorTableHighBit->getg_2_pix();
+ b_2_pix=colorTableHighBit->getb_2_pix();
+
+}
+
+
+Dither32Bit::~Dither32Bit() {
+ delete colorTableHighBit;
+}
+
+
+void Dither32Bit::ditherImageColor32(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod) {
+
+ int L;
+ int n;
+ int rowWork;
+ int colWork;
+
+ unsigned int *row1, *row2;
+ unsigned char *lum2;
+ PIXVAL* local_r_2_pix;
+ PIXVAL* local_g_2_pix;
+ PIXVAL* local_b_2_pix;
+
+ row1 = (unsigned int *)out;
+
+ row2 = row1+cols+mod;
+ lum2 = lum+cols;
+
+ // because the width/height are a multiply of a macroblocksize
+ // cols/rows always are even
+ colWork=cols>>1;
+ rowWork=rows>>1;
+ mod=cols+2*mod;
+
+ while(rowWork--) {
+ n=colWork;
+ while(n--) {
+
+ local_r_2_pix=r_2_pix+Cr_r_tab[*cr];
+ local_g_2_pix=g_2_pix+Cr_g_tab[*cr++] + Cb_g_tab[*cb];
+ local_b_2_pix=b_2_pix+Cb_b_tab[*cb++];
+
+ L = L_tab[*lum++];
+ doRow(row1,L);
+
+ L = L_tab[*lum++];
+ doRow(row1,L);
+
+ L = L_tab [*lum2++];
+ doRow(row2,L);
+
+ L = L_tab [*lum2++];
+ doRow(row2,L);
+
+
+ }
+ row2 += mod;
+ lum += cols;
+ lum2 += cols;
+ row1 += mod;
+
+ }
+
+}
+
+/*
+ *--------------------------------------------------------------
+ *
+ * Twox2Color32 --
+ *
+ * Converts image into 24/32 bit color.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+
+void Dither32Bit::ditherImageTwox2Color32(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod) {
+ int L, CR, CB;
+ unsigned PIXVAL *row1 = (unsigned PIXVAL *)out;
+ unsigned PIXVAL *row2 = row1 + cols * ONE_TWO + mod;
+ unsigned PIXVAL *row3 = row2 + cols * ONE_TWO + mod;
+ unsigned PIXVAL *row4 = row3 + cols * ONE_TWO + mod;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int cr_g;
+ int cb_g;
+ int cb_b;
+ int cols_2 = cols/2;
+ int loffset = ONE_TWO * 6 *cols_2 + 4*mod ;
+
+ lum2 = lum + cols_2 + cols_2;
+ for (y=0; y<rows; y+=2) {
+ for (x=0; x<cols_2; x++) {
+ int R, G, B;
+ PIXVAL t;
+
+ CR = *cr++;
+ CB = *cb++;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+
+ L = L_tab[ (int) *lum++];
+
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row1[0] = t;
+ row2[0] = t;
+#ifndef SIXTYFOUR_BIT
+ row1[1] = t;
+ row2[1] = t;
+#endif
+ row1 += ONE_TWO;
+ row2 += ONE_TWO;
+
+ /* INTERPOLATE is now standard */
+ // INTERPOLATE
+ if(x != cols_2 - 1) {
+ CR = (CR + *cr) >> 1;
+ CB = (CB + *cb) >> 1;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+ }
+ // end
+ /* end INTERPOLATE */
+
+ L = L_tab[ (int) *lum++];
+
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row1[0] = t;
+ row2[0] = t;
+#ifndef SIXTYFOUR_BIT
+ row1[1] = t;
+ row2[1] = t;
+#endif
+ row1 += ONE_TWO;
+ row2 += ONE_TWO;
+
+ /*
+ * Now, do second row.
+ */
+ /* INTERPOLATE is now standard */
+ // INTERPOLATE
+ if(y != rows - 2) {
+ CR = (unsigned int) (CR + *(cr + cols_2 - 1)) >> 1;
+ CB = (unsigned int) (CB + *(cb + cols_2 - 1)) >> 1;
+ cr_r = Cr_r_tab[CR];
+ cr_g = Cr_g_tab[CR];
+ cb_g = Cb_g_tab[CB];
+ cb_b = Cb_b_tab[CB];
+ }
+ // end
+ /* endif */
+ L = L_tab[ (int) *lum2++];
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row3[0] = t;
+ row4[0] = t;
+#ifndef SIXTYFOUR_BIT
+ row3[1] = t;
+ row4[1] = t;
+#endif
+ row3 += ONE_TWO;
+ row4 += ONE_TWO;
+
+ L = L_tab[(int) *lum2++];
+ R = L + cr_r;
+ G = L + cr_g + cb_g;
+ B = L + cb_b;
+
+ t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]);
+ row3[0] = t;
+ row4[0] = t;
+#ifndef SIXTYFOUR_BIT
+ row3[1] = t;
+ row4[1] = t;
+#endif
+ row3 += ONE_TWO;
+ row4 += ONE_TWO;
+ }
+ lum += cols_2 + cols_2;
+ lum2 += cols_2 + cols_2;
+
+ row1 += loffset;
+ row3 += loffset;
+ row2 += loffset;
+ row4 += loffset;
+ }
+}
diff --git a/mpeglib/lib/util/render/dither/dither32Bit.h b/mpeglib/lib/util/render/dither/dither32Bit.h
new file mode 100644
index 00000000..440d021a
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/dither32Bit.h
@@ -0,0 +1,55 @@
+/*
+ dither 32 bit depth yuv images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+#ifndef __DITHER32Bit_H
+#define __DITHER32Bit_H
+
+#include "colorTableHighBit.h"
+
+class Dither32Bit {
+
+ ColorTableHighBit* colorTableHighBit;
+
+ TABTYPE *L_tab;
+ TABTYPE *Cr_r_tab;
+ TABTYPE *Cr_g_tab;
+ TABTYPE *Cb_g_tab;
+ TABTYPE *Cb_b_tab;
+
+ PIXVAL *r_2_pix;
+ PIXVAL *g_2_pix;
+ PIXVAL *b_2_pix;
+
+ public:
+ Dither32Bit(unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask);
+ ~Dither32Bit();
+
+ void ditherImageColor32(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int offset);
+
+ void ditherImageTwox2Color32(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod);
+
+};
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/dither32mmx.cpp b/mpeglib/lib/util/render/dither/dither32mmx.cpp
new file mode 100644
index 00000000..b5fa4807
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/dither32mmx.cpp
@@ -0,0 +1,272 @@
+/*
+ MMX ditherer for 32 bit displays
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+
+#include "ditherMMX.h"
+
+#include <iostream>
+
+using namespace std;
+
+
+#ifndef INTEL
+ void dither32_mmx(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod) {
+ printf("urgs! dither32_mmx \n");
+ printf("never should happen!\n");
+ exit(0);
+}
+
+#else
+
+
+static unsigned long MMX32_80w[] = {0x00800080, 0x00800080};
+static unsigned long MMX32_10w[] = {0x00100010, 0x00100010};
+static unsigned long MMX32_00FFw[] = {0x00ff00ff, 0x00ff00ff};
+static unsigned long MMX32_FF00w[] = {0xff00ff00, 0xff00ff00};
+static unsigned short MMX32_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a};
+static unsigned short MMX32_Vredcoeff[] = {0x59, 0x59, 0x59, 0x59};
+static unsigned short MMX32_Ubluecoeff[] = {0x72, 0x72, 0x72, 0x72};
+static unsigned short MMX32_Ugrncoeff[] = {0xffea,0xffea,0xffea,0xffea};
+static unsigned short MMX32_Vgrncoeff[] = {0xffd2,0xffd2,0xffd2,0xffd2};
+
+void dummy_dithermmx32() {
+ cout << "MMX32_10w:"<<MMX32_10w<<endl;
+ cout << "MMX32_80w:"<<MMX32_80w<<endl;
+ cout << "MMX32_Ubluecoeff:"<<MMX32_Ubluecoeff<<endl;
+ cout << "MMX32_Vredcoeff:"<<MMX32_Vredcoeff<<endl;
+ cout << "MMX32_Ugrncoeff:"<<MMX32_Ugrncoeff<<endl;
+ cout << "MMX32_Vgrncoeff:"<<MMX32_Vgrncoeff<<endl;
+ cout << "MMX32_Ycoeff:"<<MMX32_Ycoeff<<endl;
+ cout << "MMX32_00FFw:"<<MMX32_00FFw<<endl;
+ cout << "MMX32_FF00w:"<<MMX32_FF00w<<endl;
+}
+
+
+/**
+ This MMX assembler is my first assembler/MMX program ever.
+ Thus it maybe buggy.
+ Send patches to:
+ mvogt@rhrk.uni-kl.de
+
+ After it worked fine I have "obfuscated" the code a bit to have
+ more parallism in the MMX units. This means I moved
+ initilisation around and delayed other instruction.
+ Performance measurement did not show that this brought any advantage
+ but in theory it _should_ be faster this way.
+
+ The overall performanve gain to the C based dither was 30%-40%.
+ The MMX routine calculates 256bit=8RGB values in each cycle
+ (4 for row1 & 4 for row2)
+
+ The red/green/blue.. coefficents are taken from the mpeg_play
+ player. They look nice, but I dont know if you can have
+ better values, to avoid integer rounding errors.
+
+
+ IMPORTANT:
+ ==========
+
+ It is a requirement that the cr/cb/lum are 8 byte aligned and
+ the out are 16byte aligned or you will/may get segfaults
+
+*/
+
+void dither32_mmx(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod) {
+
+
+
+ unsigned int *row1;
+ unsigned int *row2;
+ row1 = (unsigned int *)out; // 32 bit target
+
+ unsigned char* end = lum +cols*rows; // Pointer to the end
+ int x=cols;
+ row2=row1+cols+mod; // start of second row
+ mod=4*cols+8*mod; // increment for row1 in byte
+
+ // buffer for asm function
+ int buf[6];
+ buf[0]=(int)(lum+cols); // lum2 pointer
+ buf[1]=(int)end;
+ buf[2]=x;
+ buf[3]=mod;
+ buf[4]=0; //tmp0;
+ buf[5]=cols;
+
+
+ __asm__ __volatile__ (
+ ".align 32\n"
+ "1:\n"
+
+ // create Cr (result in mm1)
+ "movd (%0), %%mm1\n" // 0 0 0 0 v3 v2 v1 v0
+ "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
+ "movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0
+ "punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0
+ "punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0
+ "psubw MMX32_80w,%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
+
+ // create Cr_g (result in mm0)
+ "movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0
+ "pmullw MMX32_Vgrncoeff,%%mm0\n" // red*-46dec=0.7136*64
+ "pmullw MMX32_Vredcoeff,%%mm1\n" // red*89dec=1.4013*64
+ "psraw $6, %%mm0\n" // red=red/64
+ "psraw $6, %%mm1\n" // red=red/64
+
+
+ // create L1 L2 (result in mm2,mm4)
+ // L2=lum2
+ "movl %2,16%5\n" // store register in tmp0
+ "movl %5,%2\n" // lum2->register
+ "movd (%2),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0
+ "movl 16%5,%2\n" // tmp0->register
+ "punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0
+ "movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0
+ "pand MMX32_FF00w, %%mm2\n" // L3 0 L1 0 l3 0 l1 0
+ "pand MMX32_00FFw, %%mm4\n" // 0 L2 0 L0 0 l2 0 l0
+ "psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1
+
+
+
+ // create R (result in mm6)
+ "movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1
+ "movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0
+ "paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1
+ "paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0
+ "packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1
+ "packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0
+ "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
+ "punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0
+
+
+ // create Cb (result in mm1)
+ "movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0
+ "punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0
+ "punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0
+ "psubw MMX32_80w,%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
+ // create Cb_g (result in mm5)
+ "movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0
+ "pmullw MMX32_Ugrncoeff,%%mm5\n" // blue*-109dec=1.7129*64
+ "pmullw MMX32_Ubluecoeff,%%mm1\n" // blue*114dec=1.78125*64
+ "psraw $6, %%mm5\n" // blue=red/64
+ "psraw $6, %%mm1\n" // blue=blue/64
+
+
+ // create G (result in mm7)
+ "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
+ "movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1
+ "paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t
+ "paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t
+ "paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1
+ "paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0
+ "packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1
+ "packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0
+ "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
+
+
+ // create B (result in mm5)
+ "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
+ "movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1
+ "paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1
+ "paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0
+ "packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1
+ "packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0
+ "punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0
+
+
+ // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0
+ "movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0
+ "movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0
+ // process lower lum
+ "punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0
+ "punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0
+ "movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0
+ "movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0
+ "punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0
+ "punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2
+
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0
+ "punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0
+ "punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0
+ "por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0
+ "movq %%mm2,(%3)\n" // wrote out ! row1
+
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0
+ "punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0
+ "por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2
+ "movq %%mm4,8(%3)\n" // wrote out ! row1
+
+ // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+ // this can be done "destructive"
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0
+ "punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0
+ "movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0
+ "punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0
+ "movq %%mm1,(%4)\n" // wrote out ! row2
+ "punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2
+ "movq %%mm5,8(%4)\n" // wrote out ! row2
+
+ "addl $4,%2\n" // lum+4
+ "addl $4,%5\n" // lum2+4
+ "leal 16(%3),%3\n" // row1+16
+ "leal 16(%4),%4\n" // row2+16
+ "addl $2, %0\n" // cr+2
+ "addl $2, %1\n" // cb+2
+
+ "subl $4,8%5\n" // x+4 x is buf[2]
+ "cmpl $0,8%5\n"
+
+ "jne 1b\n"
+ "addl 20%5, %2\n" // lum += cols
+ "movl %2,16%5\n" // store register in tmp0
+ "movl 20%5,%2\n" // cols->register
+
+ "addl %2, %5\n" // lum2 += cols
+ "addl 12%5, %3\n" // row1+= mod is buf[0]
+ "addl 12%5, %4\n" // row2+= mod is buf[0]
+
+ "movl %2, 8%5\n" // x=cols
+ "movl 16%5,%2\n" // store tmp0 in register
+
+ "cmpl 4%5, %2\n" // buf[1] is end
+ "jl 1b\n"
+ "emms\n"
+ :
+ : "r" (cr), "r"(cb),"r"(lum),
+ "r"(row1),"r"(row2),"m"(buf[0])
+ );
+
+
+
+}
+
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/dither8Bit.cpp b/mpeglib/lib/util/render/dither/dither8Bit.cpp
new file mode 100644
index 00000000..4f85d3fb
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/dither8Bit.cpp
@@ -0,0 +1,306 @@
+/*
+ dither 8 bit depth yuv images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#include "dither8Bit.h"
+
+
+Dither8Bit::Dither8Bit(unsigned char pixel[256]) {
+
+ int i;
+ for(i=0;i<256;i++) {
+ this->pixel[i]=pixel[i];
+ }
+ colorTable8Bit=new ColorTable8Bit();
+
+ lum_values = colorTable8Bit->getLumValues();
+ cr_values = colorTable8Bit->getCrValues();
+ cb_values = colorTable8Bit->getCbValues();
+
+
+
+ initOrderedDither();
+
+}
+
+
+Dither8Bit::~Dither8Bit() {
+ int i;
+ for (i=0; i<DITH_SIZE; i++) {
+ delete cb_darrays[i];
+ delete l_darrays[i];
+ delete cr_darrays[i];
+ }
+}
+
+
+
+
+
+/*
+ *--------------------------------------------------------------
+ *
+ * InitOrderedDither--
+ *
+ * Structures initialized for ordered dithering.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+void Dither8Bit::initOrderedDither() {
+ int i, j, k, err_range, threshval;
+ unsigned char *lmark, *cmark;
+
+ for (i=0; i<DITH_SIZE; i++) {
+ lmark = l_darrays[i] = new unsigned char[256];
+ for (j=0; j<lum_values[0]; j++) {
+ *lmark++ = 0;
+ }
+ for (j=0; j<(LUM_RANGE-1); j++) {
+ err_range = lum_values[j+1] - lum_values[j];
+ threshval = ((i * err_range) / DITH_SIZE)+lum_values[j];
+
+ for (k=lum_values[j]; k<lum_values[j+1]; k++) {
+ if (k > threshval) {
+ *lmark++ = ((j+1) * (CR_RANGE * CB_RANGE));
+ }
+ else {
+ *lmark++ = (j * (CR_RANGE * CB_RANGE));
+ }
+ }
+ }
+ for (j=lum_values[LUM_RANGE-1]; j<256; j++) {
+ *lmark++ = (LUM_RANGE-1)*(CR_RANGE * CB_RANGE);
+ }
+ }
+ for (i=0; i<DITH_SIZE; i++) {
+ cmark = cr_darrays[i] = new unsigned char[256];
+
+ for (j=0; j<cr_values[0]; j++) {
+ *cmark++ = 0;
+ }
+
+ for (j=0; j<(CR_RANGE-1); j++) {
+ err_range = cr_values[j+1] - cr_values[j];
+ threshval = ((i * err_range) / DITH_SIZE)+cr_values[j];
+
+ for (k=cr_values[j]; k<cr_values[j+1]; k++) {
+ if (k > threshval) {
+ *cmark++ = ((j+1) * CB_RANGE);
+ }
+ else {
+ *cmark++ = (j * CB_RANGE);
+ }
+ }
+ }
+
+ for (j=cr_values[CR_RANGE-1]; j<256; j++) {
+ *cmark++ = (CR_RANGE-1)*(CB_RANGE);
+ }
+ }
+
+ for (i=0; i<DITH_SIZE; i++) {
+ cmark = cb_darrays[i] = new unsigned char[256];
+
+ for (j=0; j<cb_values[0]; j++) {
+ *cmark++ = 0;
+ }
+
+ for (j=0; j<(CB_RANGE-1); j++) {
+ err_range = cb_values[j+1] - cb_values[j];
+ threshval = ((i * err_range) / DITH_SIZE)+cb_values[j];
+
+ for (k=cb_values[j]; k<cb_values[j+1]; k++) {
+ if (k > threshval) {
+ *cmark++ = j+1;
+ }
+ else {
+ *cmark++ = j;
+ }
+ }
+ }
+
+ for (j=cb_values[CB_RANGE-1]; j<256; j++) {
+ *cmark++ = CB_RANGE-1;
+ }
+ }
+}
+
+
+
+/*
+ *--------------------------------------------------------------
+ *
+ * OrderedDitherImage --
+ *
+ * Dithers an image using an ordered dither.
+ * Assumptions made:
+ * 1) The color space is allocated y:cr:cb = 8:4:4
+ * 2) The spatial resolution of y:cr:cb is 4:1:1
+ * The channels are dithered based on the standard
+ * ordered dither pattern for a 4x4 area.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *--------------------------------------------------------------
+ */
+
+void Dither8Bit::ditherImageOrdered (unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int h,
+ int w) {
+ unsigned char *l, *r, *b, *o1, *o2;
+ unsigned char *l2;
+ unsigned char L, R, B;
+ int i, j;
+
+ l = lum;
+ l2 = lum+w;
+ r = cr;
+ b = cb;
+ o1 = out;
+ o2 = out+w;
+
+
+ for (i=0; i<h; i+=4) {
+
+ for (j=0; j<w; j+=8) {
+
+ R = r[0]; B = b[0];
+
+ L = l[0];
+ o1[0] = pixel[(l_darrays[0][L] + cr_darrays[0][R] + cb_darrays[0][B])];
+ L = l[1];
+ o1[1] = pixel[(l_darrays[8][L] + cr_darrays[8][R] + cb_darrays[8][B])];
+ L = l2[0];
+ o2[0] = pixel[(l_darrays[12][L] + cr_darrays[12][R] + cb_darrays[12][B])];
+ L = l2[1];
+ o2[1] = pixel[(l_darrays[4][L] + cr_darrays[4][R] + cb_darrays[4][B])];
+
+ R = r[1]; B = b[1];
+
+ L = l[2];
+ o1[2] = pixel[(l_darrays[2][L] + cr_darrays[2][R] + cb_darrays[2][B])];
+ L = l[3];
+ o1[3] = pixel[(l_darrays[10][L] + cr_darrays[10][R] + cb_darrays[10][B])];
+ L = l2[2];
+ o2[2] = pixel[(l_darrays[14][L] + cr_darrays[14][R] + cb_darrays[14][B])];
+ L = l2[3];
+ o2[3] = pixel[(l_darrays[6][L] + cr_darrays[6][R] + cb_darrays[6][B])];
+
+ R = r[2]; B = b[2];
+
+ L = l[4];
+ o1[4] = pixel[(l_darrays[0][L] + cr_darrays[0][R] + cb_darrays[0][B])];
+ L = l[5];
+ o1[5] = pixel[(l_darrays[8][L] + cr_darrays[8][R] + cb_darrays[8][B])];
+ L = l2[4];
+ o2[4] = pixel[(l_darrays[12][L] + cr_darrays[12][R] + cb_darrays[12][B])];
+ L = l2[5];
+ o2[5] = pixel[(l_darrays[4][L] + cr_darrays[4][R] + cb_darrays[4][B])];
+
+ R = r[3]; B = b[3];
+
+ L = l[6];
+ o1[6] = pixel[(l_darrays[2][L] + cr_darrays[2][R] + cb_darrays[2][B])];
+ L = l[7];
+ o1[7] = pixel[(l_darrays[10][L] + cr_darrays[10][R] + cb_darrays[10][B])];
+ L = l2[6];
+ o2[6] = pixel[(l_darrays[14][L] + cr_darrays[14][R] + cb_darrays[14][B])];
+ L = l2[7];
+ o2[7] = pixel[(l_darrays[6][L] + cr_darrays[6][R] + cb_darrays[6][B])];
+
+ l += 8;
+ l2 += 8;
+ r += 4;
+ b += 4;
+ o1 += 8;
+ o2 += 8;
+ }
+
+ l += w;
+ l2 += w;
+ o1 += w;
+ o2 += w;
+
+ for (j=0; j<w; j+=8) {
+
+ R = r[0]; B = b[0];
+
+ L = l[0];
+ o1[0] = pixel[(l_darrays[3][L] + cr_darrays[3][R] + cb_darrays[3][B])];
+ L = l[1];
+ o1[1] = pixel[(l_darrays[11][L] + cr_darrays[11][R] + cb_darrays[11][B])];
+ L = l2[0];
+ o2[0] = pixel[(l_darrays[15][L] + cr_darrays[15][R] + cb_darrays[15][B])];
+ L = l2[1];
+ o2[1] = pixel[(l_darrays[7][L] + cr_darrays[7][R] + cb_darrays[7][B])];
+
+ R = r[1]; B = b[1];
+
+ L = l[2];
+ o1[2] = pixel[(l_darrays[1][L] + cr_darrays[1][R] + cb_darrays[1][B])];
+ L = l[3];
+ o1[3] = pixel[(l_darrays[9][L] + cr_darrays[9][R] + cb_darrays[9][B])];
+ L = l2[2];
+ o2[2] = pixel[(l_darrays[13][L] + cr_darrays[13][R] + cb_darrays[13][B])];
+ L = l2[3];
+ o2[3] = pixel[(l_darrays[5][L] + cr_darrays[5][R] + cb_darrays[5][B])];
+
+ R = r[2]; B = b[2];
+
+ L = l[4];
+ o1[4] = pixel[(l_darrays[3][L] + cr_darrays[3][R] + cb_darrays[3][B])];
+ L = l[5];
+ o1[5] = pixel[(l_darrays[11][L] + cr_darrays[11][R] + cb_darrays[11][B])];
+ L = l2[4];
+ o2[4] = pixel[(l_darrays[15][L] + cr_darrays[15][R] + cb_darrays[15][B])];
+ L = l2[5];
+ o2[5] = pixel[(l_darrays[7][L] + cr_darrays[7][R] + cb_darrays[7][B])];
+
+ R = r[3]; B = b[3];
+
+ L = l[6];
+ o1[6] = pixel[(l_darrays[1][L] + cr_darrays[1][R] + cb_darrays[1][B])];
+ L = l[7];
+ o1[7] = pixel[(l_darrays[9][L] + cr_darrays[9][R] + cb_darrays[9][B])];
+ L = l2[6];
+ o2[6] = pixel[(l_darrays[13][L] + cr_darrays[13][R] + cb_darrays[13][B])];
+ L = l2[7];
+ o2[7] = pixel[(l_darrays[5][L] + cr_darrays[5][R] + cb_darrays[5][B])];
+
+ l += 8;
+ l2 += 8;
+ r += 4;
+ b += 4;
+ o1 += 8;
+ o2 += 8;
+ }
+
+ l += w;
+ l2 += w;
+ o1 += w;
+ o2 += w;
+ }
+}
+
diff --git a/mpeglib/lib/util/render/dither/dither8Bit.h b/mpeglib/lib/util/render/dither/dither8Bit.h
new file mode 100644
index 00000000..7bdd4d8f
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/dither8Bit.h
@@ -0,0 +1,63 @@
+/*
+ dither 8 bit depth yuv images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+
+#ifndef __DITHER_8BIT_H
+#define __DITHER_8BIT_H
+
+
+#include "colorTable8Bit.h"
+
+#define DITH_SIZE 16
+
+
+class Dither8Bit {
+
+ /* Structures used to implement hybrid ordered dither/floyd-steinberg
+ dither algorithm.
+ */
+
+ unsigned char *l_darrays[DITH_SIZE];
+ unsigned char *cr_darrays[DITH_SIZE];
+ unsigned char *cb_darrays[DITH_SIZE];
+
+ // private colormap
+ unsigned char pixel[256];
+
+ ColorTable8Bit* colorTable8Bit;
+
+ // Arrays holding quantized value ranged for lum, cr, and cb.
+ // (used for 8 Bit)
+
+ int* lum_values;
+ int* cr_values;
+ int* cb_values;
+
+
+ public:
+ Dither8Bit(unsigned char pixel[256]);
+ ~Dither8Bit();
+
+ void ditherImageOrdered (unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int h,
+ int w);
+
+ private:
+ void initOrderedDither();
+};
+
+#endif
+
diff --git a/mpeglib/lib/util/render/dither/ditherDef.h b/mpeglib/lib/util/render/dither/ditherDef.h
new file mode 100644
index 00000000..2e8d7d0e
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherDef.h
@@ -0,0 +1,100 @@
+/*
+ global definitions for dithering
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+
+#ifndef __DITHERDEF_H
+#define __DITHERDEF_H
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+extern "C" {
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+}
+
+
+#ifdef __GNUC__
+#if (__GNUC__ < 2 || ( __GNUC__ == 2 && __GNUC_MINOR__ < 91 ) )
+#ifndef _AIX
+#warning "inline code disabled! (buggy egcs version)"
+#undef __NO_MATH_INLINES
+#define __NO_MATH_INLINES 1
+#endif
+#endif
+#endif
+#include <math.h>
+
+
+
+/* Gamma correction stuff */
+extern int gammaCorrectFlag;
+extern double gammaCorrect;
+
+/* Chroma correction stuff */
+extern int chromaCorrectFlag;
+extern double chromaCorrect;
+
+
+#define CB_BASE 1
+#define CR_BASE (CB_BASE*CB_RANGE)
+#define LUM_BASE (CR_BASE*CR_RANGE)
+
+#define TABTYPE short
+
+#ifdef SIXTYFOUR_BIT
+#define PIXVAL long
+#else
+#define PIXVAL int
+#endif
+
+#ifdef SIXTYFOUR_BIT
+#define ONE_TWO 1
+#else
+#define ONE_TWO 2
+#endif
+
+
+
+#define Min(x,y) (((x) < (y)) ? (x) : (y))
+#define Max(x,y) (((x) > (y)) ? (x) : (y))
+
+#define CHROMA_CORRECTION128(x) ((x) >= 0 \
+ ? Min(127, (int)(((x) * chromaCorrect))) \
+ : Max(-128, (int)(((x) * chromaCorrect))))
+#define CHROMA_CORRECTION256D(x) ((x) >= 128 \
+ ? 128.0 + Min(127.0, (((x)-128.0) * chromaCorrect)) \
+ : 128.0 - Min(128.0, (((128.0-(x))* chromaCorrect))))
+
+
+
+#define GAMMA_CORRECTION(x) ((int)(pow((x) / 255.0, 1.0/gammaCorrect)* 255.0))
+
+#define CHROMA_CORRECTION128D(x) ((x) >= 0 \
+ ? Min(127.0, ((x) * chromaCorrect)) \
+ : Max(-128.0, ((x) * chromaCorrect)))
+
+#define CHROMA_CORRECTION256(x) ((x) >= 128 \
+ ? 128 + Min(127, (int)(((x)-128.0) * chromaCorrect)) \
+ : 128 - Min(128, (int)((128.0-(x)) * chromaCorrect)))
+
+// Range values for lum, cr, cb.
+#define LUM_RANGE 8
+#define CR_RANGE 4
+#define CB_RANGE 4
+
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/ditherMMX.h b/mpeglib/lib/util/render/dither/ditherMMX.h
new file mode 100644
index 00000000..2f08b689
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherMMX.h
@@ -0,0 +1,38 @@
+/*
+ mmx ditherer
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#ifndef __DITHERMMX_H
+#define __DITHERMMX_H
+
+#include "ditherDef.h"
+
+
+// The mmx dither routine come from NIST
+// NIST is an mpeg2/dvd player
+// more: http://home.germany.net/100-5083/
+extern void ditherBlock(unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int rows, int cols, int mod);
+
+extern void dither32_mmx(unsigned char* lum,
+ unsigned char* cr,
+ unsigned char* cb,
+ unsigned char* out,
+ int rows,
+ int cols,
+ int mod);
+
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/ditherRGB.cpp b/mpeglib/lib/util/render/dither/ditherRGB.cpp
new file mode 100644
index 00000000..1bcdb2ff
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherRGB.cpp
@@ -0,0 +1,230 @@
+/*
+ copys RGB images to a destination
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#include "ditherRGB.h"
+
+#include <iostream>
+
+using namespace std;
+
+DitherRGB::DitherRGB() {
+}
+
+
+DitherRGB::~DitherRGB() {
+}
+
+
+int DitherRGB::getDepth(int pixel) {
+ int byteDepth=0;
+
+ switch(pixel) {
+ case 8:
+ byteDepth=1;
+ break;
+ case 15:
+ case 16:
+ byteDepth=2;
+ break;
+ case 24:
+ case 32:
+ byteDepth=4;
+ break;
+ default:
+ cout << "unknown byteDepth:"<<pixel
+ << " in DitherRGB_flipped::flipRGBImage"<<endl;
+ }
+ return byteDepth;
+
+}
+
+void DitherRGB::ditherRGBImage(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset) {
+ int byteDepth=getDepth(depth);
+ if (byteDepth == 0) {
+ return;
+ }
+
+
+ if (offset==0) {
+ int bytes=height*width*byteDepth;
+ memcpy(dest,src,bytes);
+ return;
+ }
+
+ int i;
+ int lineSize=width*byteDepth;
+
+ offset=offset*byteDepth+lineSize;
+
+ for (i=0;i<height;i++) {
+ memcpy(dest,src,lineSize);
+ src+=lineSize;
+ dest+=offset;
+ }
+
+
+}
+
+void DitherRGB::ditherRGBImage_x2(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset) {
+
+ int byteDepth=getDepth(depth);
+ if (byteDepth == 0) {
+ return;
+ }
+
+ switch(byteDepth) {
+ case 1:
+ ditherRGB1Byte_x2(dest,src,1,width, height,offset);
+ break;
+ case 2:
+ ditherRGB2Byte_x2(dest,src,2,width, height,offset);
+ break;
+ case 4:
+ ditherRGB4Byte_x2(dest,src,4,width, height,offset);
+ break;
+ default:
+ cout <<"ditherRGBImage_x2 byteDepth:"<<byteDepth
+ <<" not supported"<<endl;
+ }
+}
+
+
+void DitherRGB::ditherRGB1Byte_x2(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset) {
+
+ //
+ // dest destr
+ // destd destrd
+
+ int lineInc=2*width+offset;
+ unsigned char* destr=dest+1;
+ unsigned char* destd=dest+lineInc;
+ unsigned char* destrd=destd+1;
+
+ int row;
+ int col;
+ //
+ // We copy byte by byte this is slow, but works for
+ // all byteDepth
+ // this memcpy can be optimized with MMX very i) good ii) easily
+
+ for(row=0;row<height;row++) {
+ for(col=0;col<width;col++) {
+ *dest++=*src;
+ *destr++=*src;
+ *destd++=*src;
+ *destrd++=*src;
+ dest++;
+ destr++;
+ destd++;
+ destrd++;
+
+ src++;
+ }
+ dest+=lineInc;
+ destr+=lineInc;
+ destd+=lineInc;
+ destrd+=lineInc;
+ }
+}
+
+
+void DitherRGB::ditherRGB2Byte_x2(unsigned char* destination,
+ unsigned char* source,
+ int depth,int width,int height,int offset) {
+ //
+ // dest destr
+ // destd destrd
+
+ unsigned short int* src=(unsigned short int*) source;
+ unsigned short int* dest=(unsigned short int*) destination;
+
+ int lineInc=2*width+offset;
+ unsigned short int* destr=dest+1;
+ unsigned short int* destd=dest+lineInc;
+ unsigned short int* destrd=destd+1;
+
+ int row;
+ int col;
+ //
+ // We copy byte by byte this is slow, but works for
+ // all byteDepth
+ // this memcpy can be optimized with MMX very i) good ii) easily
+
+ for(row=0;row<height;row++) {
+ for(col=0;col<width;col++) {
+ *dest++=*src;
+ *destr++=*src;
+ *destd++=*src;
+ *destrd++=*src;
+ dest++;
+ destr++;
+ destd++;
+ destrd++;
+
+ src++;
+ }
+ dest+=lineInc;
+ destr+=lineInc;
+ destd+=lineInc;
+ destrd+=lineInc;
+ }
+}
+
+
+void DitherRGB::ditherRGB4Byte_x2(unsigned char* destination,
+ unsigned char* source,
+ int depth,int width,int height,int offset) {
+
+ //
+ // dest destr
+ // destd destrd
+
+ unsigned int* src=(unsigned int*) source;
+ unsigned int* dest=(unsigned int*) destination;
+
+ int lineInc=2*width+offset;
+ unsigned int* destr=dest+1;
+ unsigned int* destd=dest+lineInc;
+ unsigned int* destrd=destd+1;
+
+ int row;
+ int col;
+ //
+ // We copy byte by byte this is slow, but works for
+ // all byteDepth
+ // this memcpy can be optimized with MMX very i) good ii) easily
+
+ for(row=0;row<height;row++) {
+ for(col=0;col<width;col++) {
+ *dest++=*src;
+ *destr++=*src;
+ *destd++=*src;
+ *destrd++=*src;
+ dest++;
+ destr++;
+ destd++;
+ destrd++;
+
+ src++;
+ }
+ dest+=lineInc;
+ destr+=lineInc;
+ destd+=lineInc;
+ destrd+=lineInc;
+ }
+
+}
+
diff --git a/mpeglib/lib/util/render/dither/ditherRGB.h b/mpeglib/lib/util/render/dither/ditherRGB.h
new file mode 100644
index 00000000..6f24cd8c
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherRGB.h
@@ -0,0 +1,45 @@
+/*
+ copys RGB images to a destination
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+#ifndef __DITHERRGB_H
+#define __DITHERRGB_H
+
+#include "colorTableHighBit.h"
+
+class DitherRGB {
+
+ int flipSize;
+ unsigned char* flipSpace;
+
+ public:
+ DitherRGB();
+ ~DitherRGB();
+
+ // Note: this methods swaps the image
+ // itsself
+ void ditherRGBImage(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset);
+ void ditherRGBImage_x2(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset);
+ private:
+ int getDepth(int pixel);
+ // depth is here in byte!
+ void ditherRGB1Byte_x2(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset);
+ void ditherRGB2Byte_x2(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset);
+ void ditherRGB4Byte_x2(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset);
+
+};
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/ditherRGB_flipped.cpp b/mpeglib/lib/util/render/dither/ditherRGB_flipped.cpp
new file mode 100644
index 00000000..ba177675
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherRGB_flipped.cpp
@@ -0,0 +1,82 @@
+/*
+ flips RGB images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#include "ditherRGB_flipped.h"
+
+#include <iostream>
+
+using namespace std;
+
+
+DitherRGB_flipped::DitherRGB_flipped() {
+ flipSpace=NULL;
+ flipSize=0;
+}
+
+DitherRGB_flipped::~DitherRGB_flipped() {
+ if (flipSpace != NULL) {
+ delete flipSpace;
+ }
+}
+
+
+
+
+void DitherRGB_flipped::flipRGBImage(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int ) {
+
+ int byteDepth;
+
+ switch(depth) {
+ case 8:
+ byteDepth=1;
+ break;
+ case 15:
+ case 16:
+ byteDepth=2;
+ break;
+ case 24:
+ case 32:
+ byteDepth=4;
+ break;
+ default:
+ cout << "unknown byteDepth:"<<depth
+ << " in DitherRGB_flipped::flipRGBImage"<<endl;
+ return;
+ }
+
+
+ int spaceNeeded=width*height*byteDepth;
+
+ if (spaceNeeded > flipSize) {
+ if (flipSpace != NULL) {
+ delete flipSpace;
+ }
+ cout << "flipSpace:"<<spaceNeeded<<endl;
+ flipSpace=new unsigned char[spaceNeeded+64];
+ flipSize=spaceNeeded;
+ }
+
+ int i;
+ int lineSize=width*byteDepth;
+ unsigned char* end=dest+lineSize*(height-1);
+
+ for (i=0;i<height;i++) {
+ memcpy(end,src,lineSize);
+ src+=lineSize;
+ end-=lineSize;
+ }
+
+}
+
+
diff --git a/mpeglib/lib/util/render/dither/ditherRGB_flipped.h b/mpeglib/lib/util/render/dither/ditherRGB_flipped.h
new file mode 100644
index 00000000..1d99f7f6
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherRGB_flipped.h
@@ -0,0 +1,34 @@
+/*
+ flips RGB images
+ Copyright (C) 2000 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+#ifndef __DITHERRGB_FLIPPED_H
+#define __DITHERRGB_FLIPPED_H
+
+#include "colorTableHighBit.h"
+
+class DitherRGB_flipped {
+
+ int flipSize;
+ unsigned char* flipSpace;
+
+ public:
+ DitherRGB_flipped();
+ ~DitherRGB_flipped();
+
+ // Note: this methods swaps the image
+ // itsself
+ void flipRGBImage(unsigned char* dest,unsigned char* src,
+ int depth,int width,int height,int offset);
+
+};
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/ditherWrapper.cpp b/mpeglib/lib/util/render/dither/ditherWrapper.cpp
new file mode 100644
index 00000000..c6c37a79
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherWrapper.cpp
@@ -0,0 +1,246 @@
+/*
+ wrapper for X11 Window
+ Copyright (C) 1999 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#include "ditherWrapper.h"
+
+#include <iostream>
+
+using namespace std;
+
+
+/*
+ Flag for gamma correction
+ Makes images brighter/darker.
+ It's in the source but not activated (for now)
+*/
+int gammaCorrectFlag = 0;
+double gammaCorrect = 1.0;
+
+/*
+ Flag for chroma correction.
+ reduce the color intensity..
+ It's in the source but not activated (for now)
+*/
+int chromaCorrectFlag = 0;
+double chromaCorrect = 1.0;
+
+
+
+DitherWrapper::DitherWrapper(int bpp,unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask,
+ unsigned char pixel[256]) {
+
+ this->bpp=bpp;
+ this->redMask=redMask;
+ this->greenMask=greenMask;
+ this->blueMask=blueMask;
+
+
+ dither8Bit=new Dither8Bit(pixel);
+ dither16Bit=new Dither16Bit(redMask,greenMask,blueMask);
+ dither32Bit=new Dither32Bit(redMask,greenMask,blueMask);
+ ditherRGB_flipped=new DitherRGB_flipped();
+ ditherRGB=new DitherRGB();
+
+
+#ifdef INTEL
+ lmmx=mm_support();
+#else
+ lmmx=false;
+#endif
+
+
+}
+
+
+DitherWrapper::~DitherWrapper(){
+ delete dither16Bit;
+ delete dither8Bit;
+ delete dither32Bit;
+ delete ditherRGB_flipped;
+ delete ditherRGB;
+}
+
+
+
+
+
+void DitherWrapper::doDither(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset) {
+
+
+ //
+ // according to the input imageType and the output area
+ // handle different dither methods
+ //
+
+ int inputType=pic->getImageType();
+
+ if ( (inputType == PICTURE_YUVMODE_CR_CB) ||
+ (inputType == PICTURE_YUVMODE_CB_CR) ) {
+ doDitherYUV(pic,depth,imageMode,dest,offset);
+ return;
+ }
+
+ if ( (inputType == PICTURE_RGB) ||
+ (inputType == PICTURE_RGB_FLIPPED) ){
+ doDitherRGB(pic,depth,imageMode,dest,offset);
+ return;
+ }
+
+ cout << "unknown inputType:"<<inputType
+ << " in DitherWrapper::doDither"<<endl;
+}
+
+
+void DitherWrapper::doDitherRGB(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset) {
+
+ int inputType=pic->getImageType();
+
+ switch(inputType) {
+ case PICTURE_RGB:
+ doDitherRGB_NORMAL(pic,depth,imageMode,dest,offset);
+ break;
+ case PICTURE_RGB_FLIPPED:
+ doDitherRGB_FLIPPED(pic,depth,imageMode,dest,offset);
+ break;
+ default:
+ cout << "unknown RGB type:"<<inputType<<" in DitherWrapper"<<endl;
+ exit(0);
+ }
+}
+
+
+void DitherWrapper::doDitherRGB_NORMAL(YUVPicture* pic,
+ int depth,int imageMode,
+ unsigned char* dest,int offset) {
+
+ int w=pic->getWidth();
+ int h=pic->getHeight();
+
+ unsigned char* src=pic->getImagePtr();
+
+ if (imageMode & _IMAGE_DOUBLE) {
+ ditherRGB->ditherRGBImage_x2(dest,src,depth,w,h,offset);
+ } else {
+ ditherRGB->ditherRGBImage(dest,src,depth,w,h,offset);
+ }
+}
+
+void DitherWrapper::doDitherRGB_FLIPPED(YUVPicture* pic,
+ int depth,int imageMode,
+ unsigned char* dest,int offset) {
+
+ int w=pic->getWidth();
+ int h=pic->getHeight();
+
+ unsigned char* src=pic->getImagePtr();
+
+ ditherRGB_flipped->flipRGBImage(dest,src,depth,w,h,offset);
+}
+
+
+
+void DitherWrapper::doDitherYUV(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset) {
+
+ if (imageMode & _IMAGE_DOUBLE) {
+ doDither_x2(pic,depth,dest,offset);
+ } else {
+ doDither_std(pic,depth,dest,offset);
+ }
+}
+
+
+void DitherWrapper::doDither_std(YUVPicture* pic,int depth,
+ unsigned char* dest,int offset){
+
+ int h=pic->getHeight();
+ int w=pic->getWidth();
+ unsigned char* lum=pic->getLuminancePtr();
+ unsigned char* cr=pic->getCrPtr();
+ unsigned char* cb=pic->getCbPtr();
+
+
+ switch (depth) {
+ case 8:
+ dither8Bit->ditherImageOrdered(lum, cr, cb,dest , h, w);
+ break;
+ case 16:
+ if (lmmx) {
+ ditherBlock(lum,cr,cb,dest,h,w,offset);
+ } else {
+ dither16Bit->ditherImageColor16(lum,cr,cb,dest,h,w,offset);
+ }
+
+ break;
+ case 24:
+ case 32:
+ if (lmmx) {
+ dither32_mmx(lum, cr, cb,dest ,h,w,offset);
+ } else {
+ dither32Bit->ditherImageColor32(lum, cr, cb,dest ,h,w,offset);
+ }
+
+
+ break;
+ default:
+ cout << "cannot dither depth:"<<depth<<endl;
+ }
+
+}
+
+
+void DitherWrapper::doDither_x2(YUVPicture* pic,int depth,
+ unsigned char* dest,int offset){
+
+ int h=pic->getHeight();
+ int w=pic->getWidth();
+ unsigned char* lum=pic->getLuminancePtr();
+ unsigned char* cr=pic->getCrPtr();
+ unsigned char* cb=pic->getCbPtr();
+
+
+ switch (depth) {
+ case 8: {
+ // we do dither with the 8Bit std YUV ditherer to RGB
+ // and then we do the double part with the
+ // RGB ditherer. Its obviously much slower but at
+ // least it works. To not allocate memory twice
+ // we are a bit tricky. We know that the image
+ // has space for doubls size. We but the not double size
+ // image at the bottom of the dest. Maybe that
+ // the last line gets overwritten
+ int memPos=3*h*w;
+ dither8Bit->ditherImageOrdered(lum, cr, cb,dest+memPos, h, w);
+ unsigned char* src=dest+memPos;
+ ditherRGB->ditherRGBImage_x2(dest,src,depth,w,h,0);
+ break;
+ }
+ case 16:
+ dither16Bit->ditherImageTwox2Color16(lum,cr,cb,dest,h,w,offset);
+ break;
+ case 24:
+ case 32:
+ if (lmmx) {
+ //dither32x2_mmx(lum, cr, cb,dest ,h,w,offset);
+ dither32Bit->ditherImageTwox2Color32(lum,cr,cb,dest,h,w,offset);
+ } else {
+ dither32Bit->ditherImageTwox2Color32(lum,cr,cb,dest,h,w,offset);
+ }
+ break;
+ default:
+ cout << "cannot dither depth:" << depth << endl;
+ }
+}
diff --git a/mpeglib/lib/util/render/dither/ditherWrapper.h b/mpeglib/lib/util/render/dither/ditherWrapper.h
new file mode 100644
index 00000000..b01abff8
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherWrapper.h
@@ -0,0 +1,80 @@
+/*
+ wrapper for X11 Window
+ Copyright (C) 1999 Martin Vogt
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation.
+
+ For more information look at the file COPYRIGHT in this package
+
+ */
+
+
+#ifndef __DITHERWRAPPER_H
+#define __DITHERWRAPPER_H
+
+
+#include "../../mmx/mmx.h"
+
+#include "../yuvPicture.h"
+#include "../imageBase.h"
+#include <stdlib.h>
+#include "ditherMMX.h"
+#include "dither8Bit.h"
+#include "dither16Bit.h"
+#include "dither32Bit.h"
+#include "ditherRGB_flipped.h"
+#include "ditherRGB.h"
+
+
+/**
+ Wraps all calls to software ditherer and the different
+ resolutions,mmx enhancements, and doublesize ditherers.
+*/
+
+
+class DitherWrapper {
+
+ int lmmx;
+
+ int bpp;
+ // colorMask
+ unsigned int redMask;
+ unsigned int greenMask;
+ unsigned int blueMask;
+
+ Dither8Bit* dither8Bit;
+ Dither16Bit* dither16Bit;
+ Dither32Bit* dither32Bit;
+ DitherRGB_flipped* ditherRGB_flipped;
+ DitherRGB* ditherRGB;
+
+ public:
+ DitherWrapper(int bpp,unsigned int redMask,
+ unsigned int greenMask,unsigned int blueMask,
+ unsigned char pixel[256]);
+ ~DitherWrapper();
+
+/* int getDitherSize(); */
+/* void setDitherSize(int ditherMode); */
+
+ void doDither(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset);
+
+
+ private:
+ void doDitherYUV(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset);
+ void doDitherRGB(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset);
+ void doDitherRGB_NORMAL(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset);
+ void doDitherRGB_FLIPPED(YUVPicture* pic,int depth,int imageMode,
+ unsigned char* dest,int offset);
+
+ void doDither_std(YUVPicture* pic,int depth,unsigned char* dest,int offset);
+ void doDither_x2(YUVPicture* pic,int depth,unsigned char* dest,int offset);
+};
+
+#endif
diff --git a/mpeglib/lib/util/render/dither/ditherer_mmx16.cpp b/mpeglib/lib/util/render/dither/ditherer_mmx16.cpp
new file mode 100644
index 00000000..757f0676
--- /dev/null
+++ b/mpeglib/lib/util/render/dither/ditherer_mmx16.cpp
@@ -0,0 +1,256 @@
+
+#include "ditherMMX.h"
+
+#include <iostream>
+
+using namespace std;
+
+#ifndef INTEL
+// nothing
+void ditherBlock(unsigned char *lum, unsigned char *cr, unsigned char *cb,
+ unsigned char *out,
+ int cols, int rows, int screen_width) {
+ printf("call to ditherBlock. this should never happen\n");
+ printf("check mmx detection routine.\n");
+ exit(0);
+}
+#else
+
+
+static long long MMX16_0 = 0L;
+static unsigned long MMX16_10w[] = {0x00100010, 0x00100010};
+static unsigned long MMX16_80w[] = {0x00800080, 0x00800080};
+static unsigned long MMX16_00FFw[] = {0x00ff00ff, 0x00ff00ff};
+static unsigned short MMX16_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81};
+static unsigned short MMX16_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66};
+static unsigned short MMX16_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8};
+static unsigned short MMX16_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd};
+static unsigned short MMX16_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a};
+static unsigned short MMX16_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800};
+static unsigned short MMX16_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0};
+
+void dummy_dithermmx16() {
+ cout << "MMX16_0"<<MMX16_0<<endl;
+ cout << "MMX16_10w:"<<MMX16_10w<<endl;
+ cout << "MMX16_80w:"<<MMX16_80w<<endl;
+ cout << "MMX16_Ublucoeff:"<<MMX16_Ublucoeff<<endl;
+ cout << "MMX16_Vredcoeff:"<<MMX16_Vredcoeff<<endl;
+ cout << "MMX16_Ugrncoeff:"<<MMX16_Ugrncoeff<<endl;
+ cout << "MMX16_Vgrncoeff:"<<MMX16_Vgrncoeff<<endl;
+ cout << "MMX16_Ycoeff:"<<MMX16_Ycoeff<<endl;
+ cout << "MMX16_redmask:"<<MMX16_redmask<<endl;
+ cout << "MMX16_grnmask:"<<MMX16_grnmask<<endl;
+ cout << "MMX16_00FFw:"<<MMX16_00FFw<<endl;
+}
+
+
+void ditherBlock(unsigned char *lum,
+ unsigned char *cr,
+ unsigned char *cb,
+ unsigned char *out,
+ int rows,
+ int cols,
+ int mod) {
+
+ unsigned short *row1;
+ unsigned short *row2;
+ row1 = (unsigned short* )out; // 16 bit target
+
+ unsigned char* end = lum +cols*rows; // Pointer to the end
+ int x=cols;
+ row2=row1+mod+cols; // start of second row
+ mod=2*cols+4*mod; // increment for row1 in byte
+
+ // buffer for asm function
+ int buf[6];
+ buf[0]=(int)(lum+cols); // lum2 pointer
+ buf[1]=(int)end;
+ buf[2]=x;
+ buf[3]=mod;
+ buf[4]=0; //tmp0;
+ buf[5]=cols;
+
+
+
+ __asm__ __volatile__(
+ ".align 32\n"
+ "1:\n"
+ "movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0
+ "pxor %%mm7, %%mm7\n"
+ "movd (%0), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0
+ "punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0
+ "punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0
+ "psubw MMX16_80w, %%mm0\n"
+ "psubw MMX16_80w, %%mm1\n"
+ "movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0
+ "movq %%mm1, %%mm3\n" // Cr
+ "pmullw MMX16_Ugrncoeff, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0
+ "movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0
+ "pmullw MMX16_Ublucoeff, %%mm0\n" // Cb2blue
+ "pand MMX16_00FFw, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0
+ "pmullw MMX16_Vgrncoeff, %%mm3\n" // Cr2green
+ "movq (%2), %%mm7\n" // L2
+ "pmullw MMX16_Vredcoeff, %%mm1\n" // Cr2red
+ // "psubw MMX16_10w, %%mm6\n"
+ "psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1
+ "pmullw MMX16_Ycoeff, %%mm6\n" // lum1
+ // "psubw MMX16_10w, %%mm7\n" // L2
+ "paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green
+ "pmullw MMX16_Ycoeff, %%mm7\n" // lum2
+
+ "movq %%mm6, %%mm4\n" // lum1
+ "paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
+ "movq %%mm4, %%mm5\n" // lum1
+ "paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0
+ "paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
+ "psraw $6, %%mm4\n" // R1 0 .. 64
+ "movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1
+ "psraw $6, %%mm5\n" // G1 - .. +
+ "paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
+ "psraw $6, %%mm6\n" // B1 0 .. 64
+ "packuswb %%mm4, %%mm4\n" // R1 R1
+ "packuswb %%mm5, %%mm5\n" // G1 G1
+ "packuswb %%mm6, %%mm6\n" // B1 B1
+ "punpcklbw %%mm4, %%mm4\n"
+ "punpcklbw %%mm5, %%mm5\n"
+
+ "pand MMX16_redmask, %%mm4\n"
+ "psllw $3, %%mm5\n" // GREEN 1
+ "punpcklbw %%mm6, %%mm6\n"
+ "pand MMX16_grnmask, %%mm5\n"
+ "pand MMX16_redmask, %%mm6\n"
+ "por %%mm5, %%mm4\n" //
+ "psrlw $11, %%mm6\n" // BLUE 1
+ "movq %%mm3, %%mm5\n" // lum2
+ "paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1
+ "paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
+ "psraw $6, %%mm3\n" // R2
+ "por %%mm6, %%mm4\n" // MM4
+ "psraw $6, %%mm5\n" // G2
+
+ "movl %2,16%5\n" // store register in tmp0
+ "movl %5,%2\n" // lum2->register
+ "movq (%2),%%mm6\n" // 0 0 0 0 L3 L2 L1 L0 (load lum2)
+
+
+ //"movq (%2, %5), %%mm6\n" // L3 load lum2
+ "psraw $6, %%mm7\n"
+ "packuswb %%mm3, %%mm3\n"
+ "packuswb %%mm5, %%mm5\n"
+ "packuswb %%mm7, %%mm7\n"
+ "pand MMX16_00FFw, %%mm6\n" // L3
+ "punpcklbw %%mm3, %%mm3\n"
+ // "psubw MMX16_10w, %%mm6\n" // L3
+ "punpcklbw %%mm5, %%mm5\n"
+ "pmullw MMX16_Ycoeff, %%mm6\n" // lum3
+ "punpcklbw %%mm7, %%mm7\n"
+ "psllw $3, %%mm5\n" // GREEN 2
+ "pand MMX16_redmask, %%mm7\n"
+ "pand MMX16_redmask, %%mm3\n"
+ "psrlw $11, %%mm7\n" // BLUE 2
+ "pand MMX16_grnmask, %%mm5\n"
+ "por %%mm7, %%mm3\n"
+
+ "movq (%2), %%mm7\n" // L4 load lum2
+ "movl 16%5,%2\n" // tmp0->register
+
+ "por %%mm5, %%mm3\n" //
+ "psrlw $8, %%mm7\n" // L4
+ "movq %%mm4, %%mm5\n"
+ // "psubw MMX16_10w, %%mm7\n" // L4
+ "punpcklwd %%mm3, %%mm4\n"
+ "pmullw MMX16_Ycoeff, %%mm7\n" // lum4
+ "punpckhwd %%mm3, %%mm5\n"
+
+ "movq %%mm4, (%3)\n" // write row1
+ "movq %%mm5, 8(%3)\n" // write row1
+
+ "movq %%mm6, %%mm4\n" // Lum3
+ "paddw %%mm0, %%mm6\n" // Lum3 +blue
+
+ "movq %%mm4, %%mm5\n" // Lum3
+ "paddw %%mm1, %%mm4\n" // Lum3 +red
+ "paddw %%mm2, %%mm5\n" // Lum3 +green
+ "psraw $6, %%mm4\n"
+ "movq %%mm7, %%mm3\n" // Lum4
+ "psraw $6, %%mm5\n"
+ "paddw %%mm0, %%mm7\n" // Lum4 +blue
+ "psraw $6, %%mm6\n" // Lum3 +blue
+ "movq %%mm3, %%mm0\n" // Lum4
+ "packuswb %%mm4, %%mm4\n"
+ "paddw %%mm1, %%mm3\n" // Lum4 +red
+ "packuswb %%mm5, %%mm5\n"
+ "paddw %%mm2, %%mm0\n" // Lum4 +green
+ "packuswb %%mm6, %%mm6\n"
+ "punpcklbw %%mm4, %%mm4\n"
+ "punpcklbw %%mm5, %%mm5\n"
+ "punpcklbw %%mm6, %%mm6\n"
+ "psllw $3, %%mm5\n" // GREEN 3
+ "pand MMX16_redmask, %%mm4\n"
+ "psraw $6, %%mm3\n" // psr 6
+ "psraw $6, %%mm0\n"
+ "pand MMX16_redmask, %%mm6\n" // BLUE
+ "pand MMX16_grnmask, %%mm5\n"
+ "psrlw $11, %%mm6\n" // BLUE 3
+ "por %%mm5, %%mm4\n"
+ "psraw $6, %%mm7\n"
+ "por %%mm6, %%mm4\n"
+ "packuswb %%mm3, %%mm3\n"
+ "packuswb %%mm0, %%mm0\n"
+ "packuswb %%mm7, %%mm7\n"
+ "punpcklbw %%mm3, %%mm3\n"
+ "punpcklbw %%mm0, %%mm0\n"
+ "punpcklbw %%mm7, %%mm7\n"
+ "pand MMX16_redmask, %%mm3\n"
+ "pand MMX16_redmask, %%mm7\n" // BLUE
+ "psllw $3, %%mm0\n" // GREEN 4
+ "psrlw $11, %%mm7\n"
+ "pand MMX16_grnmask, %%mm0\n"
+ "por %%mm7, %%mm3\n"
+ "por %%mm0, %%mm3\n"
+
+ "movq %%mm4, %%mm5\n"
+
+ "punpcklwd %%mm3, %%mm4\n"
+ "punpckhwd %%mm3, %%mm5\n"
+
+ "movq %%mm4, (%4)\n"
+ "movq %%mm5, 8(%4)\n"
+
+ "subl $8, 8%5\n" // x-=8
+ "addl $8, %5\n" // lum2+8
+ "addl $8, %2\n"
+ "addl $4, %0\n"
+ "addl $4, %1\n"
+ "cmpl $0, 8%5\n"
+ "leal 16(%3), %3\n"
+ "leal 16(%4), %4\n" // row2+16
+
+
+ "jne 1b\n"
+ "addl 20%5, %2\n" // lum += cols
+
+ "movl %2,16%5\n" // store register in tmp0
+ "movl 20%5,%2\n" // cols->register
+
+ "addl %2, %5\n" // lum2 += cols
+ "addl 12%5, %3\n" // row1+= mod
+ "addl 12%5, %4\n" // row2+= mod
+ "movl %2, 8%5\n" // x=cols
+ "movl 16%5,%2\n" // store tmp0 in register
+
+ "cmpl 4%5, %2\n"
+ "jl 1b\n"
+
+ :
+ :"r" (cr), "r"(cb),"r"(lum),
+ "r"(row1),"r"(row2),"m"(buf[0])
+
+ );
+ __asm__ (
+ "emms\n"
+ );
+
+ }
+
+#endif