apps/plugins/imageviewer/jpeg/jpeg_decoder.c at master · tsiry-sandratraina.com/rockbox-zig

A modern Music Player Daemon based on Rockbox open source high quality audio player
libadwaita audio rust zig deno mpris rockbox mpd
rockbox-zig / apps / plugins / imageviewer / jpeg / jpeg_decoder.c
at master 1527 lines 55 kB view raw
   1/***************************************************************************
   2*             __________               __   ___.
   3*   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4*   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5*   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6*   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7*                     \/            \/     \/    \/            \/
   8* $Id$
   9*
  10* JPEG image viewer
  11* (This is a real mess if it has to be coded in one single C file)
  12*
  13* File scrolling addition (C) 2005 Alexander Spyridakis
  14* Copyright (C) 2004 Jörg Hohensohn aka [IDC]Dragon
  15* Heavily borrowed from the IJG implementation (C) Thomas G. Lane
  16* Small & fast downscaling IDCT (C) 2002 by Guido Vollbeding  JPEGclub.org
  17*
  18* This program is free software; you can redistribute it and/or
  19* modify it under the terms of the GNU General Public License
  20* as published by the Free Software Foundation; either version 2
  21* of the License, or (at your option) any later version.
  22*
  23* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  24* KIND, either express or implied.
  25*
  26****************************************************************************/
  27
  28#include "plugin.h"
  29
  30#include "jpeg_decoder.h"
  31
  32/* for portability of below JPEG code */
  33#define MEMSET(p,v,c) rb->memset(p,v,c)
  34#define MEMCPY(d,s,c) rb->memcpy(d,s,c)
  35#define INLINE static inline
  36#define ENDIAN_SWAP16(n) n /* only for poor little endian machines */
  37
  38/**************** begin JPEG code ********************/
  39
  40INLINE unsigned range_limit(int value)
  41{
  42#if defined(CPU_COLDFIRE)
  43    asm (  /* Note: Uses knowledge that only the low byte of the result is used */
  44        "add.l   #128,%[v]   \n"  /* value += 128; */
  45        "cmp.l   #255,%[v]   \n"  /* overflow? */
  46        "bls.b   1f          \n"  /* no: return value */
  47        "spl.b   %[v]        \n"  /* yes: set low byte to appropriate boundary */
  48    "1:                      \n"
  49        : /* outputs */
  50        [v]"+d"(value)
  51    );
  52    return value;
  53#elif defined(CPU_ARM)
  54    asm (  /* Note: Uses knowledge that only the low byte of the result is used */
  55        "add     %[v], %[v], #128    \n"  /* value += 128 */
  56        "cmp     %[v], #255          \n"  /* out of range 0..255? */
  57        "mvnhi   %[v], %[v], asr #31 \n"  /* yes: set all bits to ~(sign_bit) */
  58        : /* outputs */
  59        [v]"+r"(value)
  60    );
  61    return value;
  62#else
  63    value += 128;
  64    if(value < 0)   return 0;
  65    if(value > 255) return 255;
  66    return value;
  67#endif
  68}
  69
  70/* IDCT implementation */
  71
  72
  73#define CONST_BITS 13
  74#define PASS1_BITS 2
  75
  76
  77/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
  78* causing a lot of useless floating-point operations at run time.
  79* To get around this we use the following pre-calculated constants.
  80* If you change CONST_BITS you may want to add appropriate values.
  81* (With a reasonable C compiler, you can just rely on the FIX() macro...)
  82*/
  83#define FIX_0_298631336  2446 /* FIX(0.298631336) */
  84#define FIX_0_390180644  3196 /* FIX(0.390180644) */
  85#define FIX_0_541196100  4433 /* FIX(0.541196100) */
  86#define FIX_0_765366865  6270 /* FIX(0.765366865) */
  87#define FIX_0_899976223  7373 /* FIX(0.899976223) */
  88#define FIX_1_175875602  9633 /* FIX(1.175875602) */
  89#define FIX_1_501321110 12299 /* FIX(1.501321110) */
  90#define FIX_1_847759065 15137 /* FIX(1.847759065) */
  91#define FIX_1_961570560 16069 /* FIX(1.961570560) */
  92#define FIX_2_053119869 16819 /* FIX(2.053119869) */
  93#define FIX_2_562915447 20995 /* FIX(2.562915447) */
  94#define FIX_3_072711026 25172 /* FIX(3.072711026) */
  95
  96
  97
  98/* Multiply an long variable by an long constant to yield an long result.
  99* For 8-bit samples with the recommended scaling, all the variable
 100* and constant values involved are no more than 16 bits wide, so a
 101* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
 102* For 12-bit samples, a full 32-bit multiplication will be needed.
 103*/
 104#define MULTIPLY16(var,const)  (((short) (var)) * ((short) (const)))
 105
 106
 107/* Dequantize a coefficient by multiplying it by the multiplier-table
 108* entry; produce an int result.  In this module, both inputs and result
 109* are 16 bits or less, so either int or short multiply will work.
 110*/
 111/* #define DEQUANTIZE(coef,quantval)  (((int) (coef)) * (quantval)) */
 112#define DEQUANTIZE MULTIPLY16
 113
 114/* Descale and correctly round an int value that's scaled by N bits.
 115* We assume RIGHT_SHIFT rounds towards minus infinity, so adding
 116* the fudge factor is correct for either sign of X.
 117*/
 118#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
 119
 120
 121
 122/*
 123* Perform dequantization and inverse DCT on one block of coefficients,
 124* producing a reduced-size 1x1 output block.
 125*/
 126static void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 127{
 128    (void)skip_line; /* unused */
 129    *p_byte = range_limit(inptr[0] * quantptr[0] >> 3);
 130}
 131
 132
 133
 134/*
 135* Perform dequantization and inverse DCT on one block of coefficients,
 136* producing a reduced-size 2x2 output block.
 137*/
 138static void idct2x2(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 139{
 140    int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
 141    unsigned char* outptr;
 142
 143    /* Pass 1: process columns from input, store into work array. */
 144
 145    /* Column 0 */
 146    tmp4 = DEQUANTIZE(inptr[8*0], quantptr[8*0]);
 147    tmp5 = DEQUANTIZE(inptr[8*1], quantptr[8*1]);
 148
 149    tmp0 = tmp4 + tmp5;
 150    tmp2 = tmp4 - tmp5;
 151
 152    /* Column 1 */
 153    tmp4 = DEQUANTIZE(inptr[8*0+1], quantptr[8*0+1]);
 154    tmp5 = DEQUANTIZE(inptr[8*1+1], quantptr[8*1+1]);
 155
 156    tmp1 = tmp4 + tmp5;
 157    tmp3 = tmp4 - tmp5;
 158
 159    /* Pass 2: process 2 rows, store into output array. */
 160
 161    /* Row 0 */
 162    outptr = p_byte;
 163
 164    outptr[0] = range_limit((int) DESCALE(tmp0 + tmp1, 3));
 165    outptr[1] = range_limit((int) DESCALE(tmp0 - tmp1, 3));
 166
 167    /* Row 1 */
 168    outptr = p_byte + skip_line;
 169
 170    outptr[0] = range_limit((int) DESCALE(tmp2 + tmp3, 3));
 171    outptr[1] = range_limit((int) DESCALE(tmp2 - tmp3, 3));
 172}
 173
 174
 175
 176/*
 177* Perform dequantization and inverse DCT on one block of coefficients,
 178* producing a reduced-size 4x4 output block.
 179*/
 180static void idct4x4(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 181{
 182    int tmp0, tmp2, tmp10, tmp12;
 183    int z1, z2, z3;
 184    int * wsptr;
 185    unsigned char* outptr;
 186    int ctr;
 187    int workspace[4*4]; /* buffers data between passes */
 188
 189    /* Pass 1: process columns from input, store into work array. */
 190
 191    wsptr = workspace;
 192    for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++)
 193    {
 194        /* Even part */
 195
 196        tmp0 = DEQUANTIZE(inptr[8*0], quantptr[8*0]);
 197        tmp2 = DEQUANTIZE(inptr[8*2], quantptr[8*2]);
 198
 199        tmp10 = (tmp0 + tmp2) << PASS1_BITS;
 200        tmp12 = (tmp0 - tmp2) << PASS1_BITS;
 201
 202        /* Odd part */
 203        /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 204
 205        z2 = DEQUANTIZE(inptr[8*1], quantptr[8*1]);
 206        z3 = DEQUANTIZE(inptr[8*3], quantptr[8*3]);
 207
 208        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 209        tmp0 = DESCALE(z1 + MULTIPLY16(z3, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
 210        tmp2 = DESCALE(z1 + MULTIPLY16(z2, FIX_0_765366865), CONST_BITS-PASS1_BITS);
 211
 212        /* Final output stage */
 213
 214        wsptr[4*0] = (int) (tmp10 + tmp2);
 215        wsptr[4*3] = (int) (tmp10 - tmp2);
 216        wsptr[4*1] = (int) (tmp12 + tmp0);
 217        wsptr[4*2] = (int) (tmp12 - tmp0);
 218    }
 219
 220    /* Pass 2: process 4 rows from work array, store into output array. */
 221
 222    wsptr = workspace;
 223    for (ctr = 0; ctr < 4; ctr++)
 224    {
 225        outptr = p_byte + (ctr*skip_line);
 226        /* Even part */
 227
 228        tmp0 = (int) wsptr[0];
 229        tmp2 = (int) wsptr[2];
 230
 231        tmp10 = (tmp0 + tmp2) << CONST_BITS;
 232        tmp12 = (tmp0 - tmp2) << CONST_BITS;
 233
 234        /* Odd part */
 235        /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 236
 237        z2 = (int) wsptr[1];
 238        z3 = (int) wsptr[3];
 239
 240        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 241        tmp0 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 242        tmp2 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 243
 244        /* Final output stage */
 245
 246        outptr[0] = range_limit((int) DESCALE(tmp10 + tmp2,
 247            CONST_BITS+PASS1_BITS+3));
 248        outptr[3] = range_limit((int) DESCALE(tmp10 - tmp2,
 249            CONST_BITS+PASS1_BITS+3));
 250        outptr[1] = range_limit((int) DESCALE(tmp12 + tmp0,
 251            CONST_BITS+PASS1_BITS+3));
 252        outptr[2] = range_limit((int) DESCALE(tmp12 - tmp0,
 253            CONST_BITS+PASS1_BITS+3));
 254
 255        wsptr += 4;     /* advance pointer to next row */
 256    }
 257}
 258
 259
 260
 261/*
 262* Perform dequantization and inverse DCT on one block of coefficients.
 263*/
 264static void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 265{
 266    long tmp0, tmp1, tmp2, tmp3;
 267    long tmp10, tmp11, tmp12, tmp13;
 268    long z1, z2, z3, z4, z5;
 269    int * wsptr;
 270    unsigned char* outptr;
 271    int ctr;
 272    int workspace[64];  /* buffers data between passes */
 273
 274    /* Pass 1: process columns from input, store into work array. */
 275    /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 276    /* furthermore, we scale the results by 2**PASS1_BITS. */
 277
 278    wsptr = workspace;
 279    for (ctr = 8; ctr > 0; ctr--)
 280    {
 281    /* Due to quantization, we will usually find that many of the input
 282    * coefficients are zero, especially the AC terms.  We can exploit this
 283    * by short-circuiting the IDCT calculation for any column in which all
 284    * the AC terms are zero.  In that case each output is equal to the
 285    * DC coefficient (with scale factor as needed).
 286    * With typical images and quantization tables, half or more of the
 287    * column DCT calculations can be simplified this way.
 288    */
 289
 290        if ((inptr[8*1] | inptr[8*2] | inptr[8*3]
 291           | inptr[8*4] | inptr[8*5] | inptr[8*6] | inptr[8*7]) == 0)
 292        {
 293            /* AC terms all zero */
 294            int dcval = DEQUANTIZE(inptr[8*0], quantptr[8*0]) << PASS1_BITS;
 295
 296            wsptr[8*0] = wsptr[8*1] = wsptr[8*2] = wsptr[8*3] = wsptr[8*4]
 297                       = wsptr[8*5] = wsptr[8*6] = wsptr[8*7] = dcval;
 298            inptr++;      /* advance pointers to next column */
 299            quantptr++;
 300            wsptr++;
 301            continue;
 302        }
 303
 304        /* Even part: reverse the even part of the forward DCT. */
 305        /* The rotator is sqrt(2)*c(-6). */
 306
 307        z2 = DEQUANTIZE(inptr[8*2], quantptr[8*2]);
 308        z3 = DEQUANTIZE(inptr[8*6], quantptr[8*6]);
 309
 310        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 311        tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 312        tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 313
 314        z2 = DEQUANTIZE(inptr[8*0], quantptr[8*0]);
 315        z3 = DEQUANTIZE(inptr[8*4], quantptr[8*4]);
 316
 317        tmp0 = (z2 + z3) << CONST_BITS;
 318        tmp1 = (z2 - z3) << CONST_BITS;
 319
 320        tmp10 = tmp0 + tmp3;
 321        tmp13 = tmp0 - tmp3;
 322        tmp11 = tmp1 + tmp2;
 323        tmp12 = tmp1 - tmp2;
 324
 325        /* Odd part per figure 8; the matrix is unitary and hence its
 326           transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively. */
 327
 328        tmp0 = DEQUANTIZE(inptr[8*7], quantptr[8*7]);
 329        tmp1 = DEQUANTIZE(inptr[8*5], quantptr[8*5]);
 330        tmp2 = DEQUANTIZE(inptr[8*3], quantptr[8*3]);
 331        tmp3 = DEQUANTIZE(inptr[8*1], quantptr[8*1]);
 332
 333        z1 = tmp0 + tmp3;
 334        z2 = tmp1 + tmp2;
 335        z3 = tmp0 + tmp2;
 336        z4 = tmp1 + tmp3;
 337        z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 338
 339        tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 340        tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 341        tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 342        tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 343        z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 344        z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 345        z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 346        z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 347
 348        z3 += z5;
 349        z4 += z5;
 350
 351        tmp0 += z1 + z3;
 352        tmp1 += z2 + z4;
 353        tmp2 += z2 + z3;
 354        tmp3 += z1 + z4;
 355
 356        /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 357
 358        wsptr[8*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 359        wsptr[8*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 360        wsptr[8*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 361        wsptr[8*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 362        wsptr[8*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 363        wsptr[8*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 364        wsptr[8*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 365        wsptr[8*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 366
 367        inptr++; /* advance pointers to next column */
 368        quantptr++;
 369        wsptr++;
 370    }
 371
 372    /* Pass 2: process rows from work array, store into output array. */
 373    /* Note that we must descale the results by a factor of 8 == 2**3, */
 374    /* and also undo the PASS1_BITS scaling. */
 375
 376    wsptr = workspace;
 377    for (ctr = 0; ctr < 8; ctr++)
 378    {
 379        outptr = p_byte + (ctr*skip_line);
 380        /* Rows of zeroes can be exploited in the same way as we did with columns.
 381        * However, the column calculation has created many nonzero AC terms, so
 382        * the simplification applies less often (typically 5% to 10% of the time).
 383        * On machines with very fast multiplication, it's possible that the
 384        * test takes more time than it's worth.  In that case this section
 385        * may be commented out.
 386        */
 387
 388#ifndef NO_ZERO_ROW_TEST
 389        if ((wsptr[1] | wsptr[2] | wsptr[3]
 390           | wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0)
 391        {
 392            /* AC terms all zero */
 393            unsigned char dcval = range_limit((int) DESCALE((long) wsptr[0],
 394                PASS1_BITS+3));
 395
 396            outptr[0] = dcval;
 397            outptr[1] = dcval;
 398            outptr[2] = dcval;
 399            outptr[3] = dcval;
 400            outptr[4] = dcval;
 401            outptr[5] = dcval;
 402            outptr[6] = dcval;
 403            outptr[7] = dcval;
 404
 405            wsptr += 8; /* advance pointer to next row */
 406            continue;
 407        }
 408#endif
 409
 410        /* Even part: reverse the even part of the forward DCT. */
 411        /* The rotator is sqrt(2)*c(-6). */
 412
 413        z2 = (long) wsptr[2];
 414        z3 = (long) wsptr[6];
 415
 416        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 417        tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 418        tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 419
 420        tmp0 = ((long) wsptr[0] + (long) wsptr[4]) << CONST_BITS;
 421        tmp1 = ((long) wsptr[0] - (long) wsptr[4]) << CONST_BITS;
 422
 423        tmp10 = tmp0 + tmp3;
 424        tmp13 = tmp0 - tmp3;
 425        tmp11 = tmp1 + tmp2;
 426        tmp12 = tmp1 - tmp2;
 427
 428        /* Odd part per figure 8; the matrix is unitary and hence its
 429        * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */
 430
 431        tmp0 = (long) wsptr[7];
 432        tmp1 = (long) wsptr[5];
 433        tmp2 = (long) wsptr[3];
 434        tmp3 = (long) wsptr[1];
 435
 436        z1 = tmp0 + tmp3;
 437        z2 = tmp1 + tmp2;
 438        z3 = tmp0 + tmp2;
 439        z4 = tmp1 + tmp3;
 440        z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 441
 442        tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 443        tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 444        tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 445        tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 446        z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 447        z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 448        z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 449        z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 450
 451        z3 += z5;
 452        z4 += z5;
 453
 454        tmp0 += z1 + z3;
 455        tmp1 += z2 + z4;
 456        tmp2 += z2 + z3;
 457        tmp3 += z1 + z4;
 458
 459        /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 460
 461        outptr[0] = range_limit((int) DESCALE(tmp10 + tmp3,
 462            CONST_BITS+PASS1_BITS+3));
 463        outptr[7] = range_limit((int) DESCALE(tmp10 - tmp3,
 464            CONST_BITS+PASS1_BITS+3));
 465        outptr[1] = range_limit((int) DESCALE(tmp11 + tmp2,
 466            CONST_BITS+PASS1_BITS+3));
 467        outptr[6] = range_limit((int) DESCALE(tmp11 - tmp2,
 468            CONST_BITS+PASS1_BITS+3));
 469        outptr[2] = range_limit((int) DESCALE(tmp12 + tmp1,
 470            CONST_BITS+PASS1_BITS+3));
 471        outptr[5] = range_limit((int) DESCALE(tmp12 - tmp1,
 472            CONST_BITS+PASS1_BITS+3));
 473        outptr[3] = range_limit((int) DESCALE(tmp13 + tmp0,
 474            CONST_BITS+PASS1_BITS+3));
 475        outptr[4] = range_limit((int) DESCALE(tmp13 - tmp0,
 476            CONST_BITS+PASS1_BITS+3));
 477
 478        wsptr += 8; /* advance pointer to next row */
 479    }
 480}
 481
 482
 483
 484/* JPEG decoder implementation */
 485
 486/* Preprocess the JPEG JFIF file */
 487int process_markers(unsigned char* p_src, long size, struct jpeg* p_jpeg)
 488{
 489    unsigned char* p_end = p_src + size;
 490    int marker_size; /* variable length of marker segment */
 491    int i, j, n;
 492    int ret = 0; /* returned flags */
 493
 494    p_jpeg->p_entropy_end = p_end;
 495
 496    while (p_src < p_end)
 497    {
 498        if (*p_src++ != 0xFF) /* no marker? */
 499        {
 500            continue; /* discard */
 501        }
 502
 503        switch (*p_src++)
 504        {
 505        case 0xFF: /* Previous FF was fill byte */
 506            p_src--; /* This FF could be start of a marker */
 507            continue;
 508        case 0x00: /* Zero stuffed byte - discard */
 509            break;
 510
 511        case 0xC0: /* SOF Huff  - Baseline DCT */
 512            {
 513                ret |= SOF0;
 514                marker_size = *p_src++ << 8; /* Highbyte */
 515                marker_size |= *p_src++; /* Lowbyte */
 516                n = *p_src++; /* sample precision (= 8 or 12) */
 517                if (n != 8)
 518                {
 519                    return(-1); /* Unsupported sample precision */
 520                }
 521                p_jpeg->y_size = *p_src++ << 8; /* Highbyte */
 522                p_jpeg->y_size |= *p_src++; /* Lowbyte */
 523                p_jpeg->x_size = *p_src++ << 8; /* Highbyte */
 524                p_jpeg->x_size |= *p_src++; /* Lowbyte */
 525
 526                n = (marker_size-2-6)/3;
 527                if (*p_src++ != n || (n != 1 && n != 3))
 528                {
 529                    return(-2); /* Unsupported SOF0 component specification */
 530                }
 531                for (i=0; i<n; i++)
 532                {
 533                    p_jpeg->frameheader[i].ID = *p_src++; /* Component info */
 534                    p_jpeg->frameheader[i].horizontal_sampling = *p_src >> 4;
 535                    p_jpeg->frameheader[i].vertical_sampling = *p_src++ & 0x0F;
 536                    p_jpeg->frameheader[i].quanttable_select = *p_src++;
 537                    if (p_jpeg->frameheader[i].horizontal_sampling > 2
 538                     || p_jpeg->frameheader[i].vertical_sampling > 2)
 539                    return -3; /* Unsupported SOF0 subsampling */
 540                }
 541                p_jpeg->blocks = n;
 542            }
 543            break;
 544
 545        case 0xC1: /* SOF Huff  - Extended sequential DCT*/
 546        case 0xC2: /* SOF Huff  - Progressive DCT*/
 547        case 0xC3: /* SOF Huff  - Spatial (sequential) lossless*/
 548        case 0xC5: /* SOF Huff  - Differential sequential DCT*/
 549        case 0xC6: /* SOF Huff  - Differential progressive DCT*/
 550        case 0xC7: /* SOF Huff  - Differential spatial*/
 551        case 0xC8: /* SOF Arith - Reserved for JPEG extensions*/
 552        case 0xC9: /* SOF Arith - Extended sequential DCT*/
 553        case 0xCA: /* SOF Arith - Progressive DCT*/
 554        case 0xCB: /* SOF Arith - Spatial (sequential) lossless*/
 555        case 0xCD: /* SOF Arith - Differential sequential DCT*/
 556        case 0xCE: /* SOF Arith - Differential progressive DCT*/
 557        case 0xCF: /* SOF Arith - Differential spatial*/
 558            {
 559                return (-4); /* other DCT model than baseline not implemented */
 560            }
 561
 562        case 0xC4: /* Define Huffman Table(s) */
 563            {
 564                unsigned char* p_temp;
 565
 566                ret |= DHT;
 567                marker_size = *p_src++ << 8; /* Highbyte */
 568                marker_size |= *p_src++; /* Lowbyte */
 569
 570                p_temp = p_src;
 571                while (p_src < p_temp+marker_size-2-17) /* another table */
 572                {
 573                    int sum = 0;
 574                    i = *p_src & 0x0F; /* table index */
 575                    if (i > 1)
 576                    {
 577                        return (-5); /* Huffman table index out of range */
 578                    }
 579                    else if (*p_src++ & 0xF0) /* AC table */
 580                    {
 581                        for (j=0; j<16; j++)
 582                        {
 583                            sum += *p_src;
 584                            p_jpeg->hufftable[i].huffmancodes_ac[j] = *p_src++;
 585                        }
 586                        if(16 + sum > AC_LEN)
 587                            return -10; /* longer than allowed */
 588
 589                        for (; j < 16 + sum; j++)
 590                            p_jpeg->hufftable[i].huffmancodes_ac[j] = *p_src++;
 591                    }
 592                    else /* DC table */
 593                    {
 594                        for (j=0; j<16; j++)
 595                        {
 596                            sum += *p_src;
 597                            p_jpeg->hufftable[i].huffmancodes_dc[j] = *p_src++;
 598                        }
 599                        if(16 + sum > DC_LEN)
 600                            return -11; /* longer than allowed */
 601
 602                        for (; j < 16 + sum; j++)
 603                            p_jpeg->hufftable[i].huffmancodes_dc[j] = *p_src++;
 604                    }
 605                } /* while */
 606                p_src = p_temp+marker_size - 2; /* skip possible residue */
 607            }
 608            break;
 609
 610        case 0xCC: /* Define Arithmetic coding conditioning(s) */
 611            return(-6); /* Arithmetic coding not supported */
 612
 613        case 0xD8: /* Start of Image */
 614        case 0xD9: /* End of Image */
 615        case 0x01: /* for temp private use arith code */
 616            break; /* skip parameterless marker */
 617
 618
 619        case 0xDA: /* Start of Scan */
 620            {
 621                ret |= SOS;
 622                marker_size = *p_src++ << 8; /* Highbyte */
 623                marker_size |= *p_src++; /* Lowbyte */
 624
 625                n = (marker_size-2-1-3)/2;
 626                if (*p_src++ != n || (n != 1 && n != 3))
 627                {
 628                    return (-7); /* Unsupported SOS component specification */
 629                }
 630                for (i=0; i<n; i++)
 631                {
 632                    p_jpeg->scanheader[i].ID = *p_src++;
 633                    p_jpeg->scanheader[i].DC_select = *p_src >> 4;
 634                    p_jpeg->scanheader[i].AC_select = *p_src++ & 0x0F;
 635                }
 636                p_src += 3; /* skip spectral information */
 637                p_jpeg->p_entropy_data = p_src;
 638                p_end = p_src; /* exit while loop */
 639            }
 640            break;
 641
 642        case 0xDB: /* Define quantization Table(s) */
 643            {
 644                ret |= DQT;
 645                marker_size = *p_src++ << 8; /* Highbyte */
 646                marker_size |= *p_src++; /* Lowbyte */
 647                n = (marker_size-2)/(QUANT_TABLE_LENGTH+1); /* # of tables */
 648                for (i=0; i<n; i++)
 649                {
 650                    int id = *p_src++; /* ID */
 651                    if (id >= 4)
 652                    {
 653                        return (-8); /* Unsupported quantization table */
 654                    }
 655                    /* Read Quantisation table: */
 656                    for (j=0; j<QUANT_TABLE_LENGTH; j++)
 657                        p_jpeg->quanttable[id][j] = *p_src++;
 658                }
 659            }
 660            break;
 661
 662        case 0xDD: /* Define Restart Interval */
 663            {
 664                marker_size = *p_src++ << 8; /* Highbyte */
 665                marker_size |= *p_src++; /* Lowbyte */
 666                p_jpeg->restart_interval = *p_src++ << 8; /* Highbyte */
 667                p_jpeg->restart_interval |= *p_src++; /* Lowbyte */
 668                p_src += marker_size-4; /* skip segment */
 669            }
 670            break;
 671
 672        case 0xDC: /* Define Number of Lines */
 673        case 0xDE: /* Define Hierarchical progression */
 674        case 0xDF: /* Expand Reference Component(s) */
 675        case 0xE0: /* Application Field 0*/
 676        case 0xE1: /* Application Field 1*/
 677        case 0xE2: /* Application Field 2*/
 678        case 0xE3: /* Application Field 3*/
 679        case 0xE4: /* Application Field 4*/
 680        case 0xE5: /* Application Field 5*/
 681        case 0xE6: /* Application Field 6*/
 682        case 0xE7: /* Application Field 7*/
 683        case 0xE8: /* Application Field 8*/
 684        case 0xE9: /* Application Field 9*/
 685        case 0xEA: /* Application Field 10*/
 686        case 0xEB: /* Application Field 11*/
 687        case 0xEC: /* Application Field 12*/
 688        case 0xED: /* Application Field 13*/
 689        case 0xEE: /* Application Field 14*/
 690        case 0xEF: /* Application Field 15*/
 691        case 0xFE: /* Comment */
 692            {
 693                marker_size = *p_src++ << 8; /* Highbyte */
 694                marker_size |= *p_src++; /* Lowbyte */
 695                p_src += marker_size-2; /* skip segment */
 696            }
 697            break;
 698
 699        case 0xF0: /* Reserved for JPEG extensions */
 700        case 0xF1: /* Reserved for JPEG extensions */
 701        case 0xF2: /* Reserved for JPEG extensions */
 702        case 0xF3: /* Reserved for JPEG extensions */
 703        case 0xF4: /* Reserved for JPEG extensions */
 704        case 0xF5: /* Reserved for JPEG extensions */
 705        case 0xF6: /* Reserved for JPEG extensions */
 706        case 0xF7: /* Reserved for JPEG extensions */
 707        case 0xF8: /* Reserved for JPEG extensions */
 708        case 0xF9: /* Reserved for JPEG extensions */
 709        case 0xFA: /* Reserved for JPEG extensions */
 710        case 0xFB: /* Reserved for JPEG extensions */
 711        case 0xFC: /* Reserved for JPEG extensions */
 712        case 0xFD: /* Reserved for JPEG extensions */
 713        case 0x02: /* Reserved */
 714        default:
 715            return (-9); /* Unknown marker */
 716        } /* switch */
 717    } /* while */
 718
 719    return (ret); /* return flags with seen markers */
 720}
 721
 722
 723void default_huff_tbl(struct jpeg* p_jpeg)
 724{
 725    static const struct huffman_table luma_table =
 726    {
 727        {
 728            0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,
 729            0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
 730        },
 731        {
 732            0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,0x01,0x7D,
 733            0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,
 734            0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08,0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,
 735            0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28,
 736            0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
 737            0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,
 738            0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
 739            0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 740            0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,
 741            0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,
 742            0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,
 743            0xF9,0xFA
 744        }
 745    };
 746
 747    static const struct huffman_table chroma_table =
 748    {
 749        {
 750            0x00,0x03,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,
 751            0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
 752        },
 753        {
 754            0x00,0x02,0x01,0x02,0x04,0x04,0x03,0x04,0x07,0x05,0x04,0x04,0x00,0x01,0x02,0x77,
 755            0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,
 756            0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,
 757            0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26,
 758            0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,
 759            0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,
 760            0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
 761            0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,
 762            0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,
 763            0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,
 764            0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,
 765            0xF9,0xFA
 766        }
 767    };
 768
 769    MEMCPY(&p_jpeg->hufftable[0], &luma_table, sizeof(luma_table));
 770    MEMCPY(&p_jpeg->hufftable[1], &chroma_table, sizeof(chroma_table));
 771
 772    return;
 773}
 774
 775/* Compute the derived values for a Huffman table */
 776static void fix_huff_tbl(int* htbl, struct derived_tbl* dtbl)
 777{
 778    int p, i, l, si;
 779    int lookbits, ctr;
 780    char huffsize[257];
 781    unsigned int huffcode[257];
 782    unsigned int code;
 783
 784    dtbl->pub = htbl; /* fill in back link */
 785
 786    /* Figure C.1: make table of Huffman code length for each symbol */
 787    /* Note that this is in code-length order. */
 788
 789    p = 0;
 790    for (l = 1; l <= 16; l++)
 791    {    /* all possible code length */
 792        for (i = 1; i <= (int) htbl[l-1]; i++)  /* all codes per length */
 793            huffsize[p++] = (char) l;
 794    }
 795    huffsize[p] = 0;
 796
 797    /* Figure C.2: generate the codes themselves */
 798    /* Note that this is in code-length order. */
 799
 800    code = 0;
 801    si = huffsize[0];
 802    p = 0;
 803    while (huffsize[p])
 804    {
 805        while (((int) huffsize[p]) == si)
 806        {
 807            huffcode[p++] = code;
 808            code++;
 809        }
 810        code <<= 1;
 811        si++;
 812    }
 813
 814    /* Figure F.15: generate decoding tables for bit-sequential decoding */
 815
 816    p = 0;
 817    for (l = 1; l <= 16; l++)
 818    {
 819        if (htbl[l-1])
 820        {
 821            dtbl->valptr[l] = p; /* huffval[] index of 1st symbol of code length l */
 822            dtbl->mincode[l] = huffcode[p]; /* minimum code of length l */
 823            p += htbl[l-1];
 824            dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
 825        }
 826        else
 827        {
 828            dtbl->maxcode[l] = -1;  /* -1 if no codes of this length */
 829        }
 830    }
 831    dtbl->maxcode[17] = 0xFFFFFL; /* ensures huff_DECODE terminates */
 832
 833    /* Compute lookahead tables to speed up decoding.
 834    * First we set all the table entries to 0, indicating "too long";
 835    * then we iterate through the Huffman codes that are short enough and
 836    * fill in all the entries that correspond to bit sequences starting
 837    * with that code.
 838    */
 839
 840    MEMSET(dtbl->look_nbits, 0, sizeof(dtbl->look_nbits));
 841
 842    p = 0;
 843    for (l = 1; l <= HUFF_LOOKAHEAD; l++)
 844    {
 845        for (i = 1; i <= (int) htbl[l-1]; i++, p++)
 846        {
 847            /* l = current code's length, p = its index in huffcode[] & huffval[]. */
 848            /* Generate left-justified code followed by all possible bit sequences */
 849            lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
 850            for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--)
 851            {
 852                dtbl->look_nbits[lookbits] = l;
 853                dtbl->look_sym[lookbits] = htbl[16+p];
 854                lookbits++;
 855            }
 856        }
 857    }
 858}
 859
 860
 861/* zag[i] is the natural-order position of the i'th element of zigzag order.
 862 * If the incoming data is corrupted, decode_mcu could attempt to
 863 * reference values beyond the end of the array.  To avoid a wild store,
 864 * we put some extra zeroes after the real entries.
 865 */
 866static const int zag[] =
 867{
 868     0,  1,  8, 16,  9,  2,  3, 10,
 869    17, 24, 32, 25, 18, 11,  4,  5,
 870    12, 19, 26, 33, 40, 48, 41, 34,
 871    27, 20, 13,  6,  7, 14, 21, 28,
 872    35, 42, 49, 56, 57, 50, 43, 36,
 873    29, 22, 15, 23, 30, 37, 44, 51,
 874    58, 59, 52, 45, 38, 31, 39, 46,
 875    53, 60, 61, 54, 47, 55, 62, 63,
 876     0,  0,  0,  0,  0,  0,  0,  0, /* extra entries in case k>63 below */
 877     0,  0,  0,  0,  0,  0,  0,  0
 878};
 879
 880void build_lut(struct jpeg* p_jpeg)
 881{
 882    int i;
 883    fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_dc,
 884        &p_jpeg->dc_derived_tbls[0]);
 885    fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_ac,
 886        &p_jpeg->ac_derived_tbls[0]);
 887    fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_dc,
 888        &p_jpeg->dc_derived_tbls[1]);
 889    fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_ac,
 890        &p_jpeg->ac_derived_tbls[1]);
 891
 892    /* build the dequantization tables for the IDCT (De-ZiZagged) */
 893    for (i=0; i<64; i++)
 894    {
 895        p_jpeg->qt_idct[0][zag[i]] = p_jpeg->quanttable[0][i];
 896        p_jpeg->qt_idct[1][zag[i]] = p_jpeg->quanttable[1][i];
 897    }
 898
 899    for (i=0; i<4; i++)
 900        p_jpeg->store_pos[i] = i; /* default ordering */
 901
 902    /* assignments for the decoding of blocks */
 903    if (p_jpeg->frameheader[0].horizontal_sampling == 2
 904        && p_jpeg->frameheader[0].vertical_sampling == 1)
 905    {   /* 4:2:2 */
 906        p_jpeg->blocks = 4;
 907        p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
 908        p_jpeg->x_phys = p_jpeg->x_mbl * 16;
 909        p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
 910        p_jpeg->y_phys = p_jpeg->y_mbl * 8;
 911        p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
 912        p_jpeg->mcu_membership[1] = 0;
 913        p_jpeg->mcu_membership[2] = 1;
 914        p_jpeg->mcu_membership[3] = 2;
 915        p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
 916        p_jpeg->tab_membership[1] = 0;
 917        p_jpeg->tab_membership[2] = 1;
 918        p_jpeg->tab_membership[3] = 1;
 919        p_jpeg->subsample_x[0] = 1;
 920        p_jpeg->subsample_x[1] = 2;
 921        p_jpeg->subsample_x[2] = 2;
 922        p_jpeg->subsample_y[0] = 1;
 923        p_jpeg->subsample_y[1] = 1;
 924        p_jpeg->subsample_y[2] = 1;
 925    }
 926    if (p_jpeg->frameheader[0].horizontal_sampling == 1
 927        && p_jpeg->frameheader[0].vertical_sampling == 2)
 928    {   /* 4:2:2 vertically subsampled */
 929        p_jpeg->store_pos[1] = 2; /* block positions are mirrored */
 930        p_jpeg->store_pos[2] = 1;
 931        p_jpeg->blocks = 4;
 932        p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
 933        p_jpeg->x_phys = p_jpeg->x_mbl * 8;
 934        p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
 935        p_jpeg->y_phys = p_jpeg->y_mbl * 16;
 936        p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
 937        p_jpeg->mcu_membership[1] = 0;
 938        p_jpeg->mcu_membership[2] = 1;
 939        p_jpeg->mcu_membership[3] = 2;
 940        p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
 941        p_jpeg->tab_membership[1] = 0;
 942        p_jpeg->tab_membership[2] = 1;
 943        p_jpeg->tab_membership[3] = 1;
 944        p_jpeg->subsample_x[0] = 1;
 945        p_jpeg->subsample_x[1] = 1;
 946        p_jpeg->subsample_x[2] = 1;
 947        p_jpeg->subsample_y[0] = 1;
 948        p_jpeg->subsample_y[1] = 2;
 949        p_jpeg->subsample_y[2] = 2;
 950    }
 951    else if (p_jpeg->frameheader[0].horizontal_sampling == 2
 952        && p_jpeg->frameheader[0].vertical_sampling == 2)
 953    {   /* 4:2:0 */
 954        p_jpeg->blocks = 6;
 955        p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
 956        p_jpeg->x_phys = p_jpeg->x_mbl * 16;
 957        p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
 958        p_jpeg->y_phys = p_jpeg->y_mbl * 16;
 959        p_jpeg->mcu_membership[0] = 0;
 960        p_jpeg->mcu_membership[1] = 0;
 961        p_jpeg->mcu_membership[2] = 0;
 962        p_jpeg->mcu_membership[3] = 0;
 963        p_jpeg->mcu_membership[4] = 1;
 964        p_jpeg->mcu_membership[5] = 2;
 965        p_jpeg->tab_membership[0] = 0;
 966        p_jpeg->tab_membership[1] = 0;
 967        p_jpeg->tab_membership[2] = 0;
 968        p_jpeg->tab_membership[3] = 0;
 969        p_jpeg->tab_membership[4] = 1;
 970        p_jpeg->tab_membership[5] = 1;
 971        p_jpeg->subsample_x[0] = 1;
 972        p_jpeg->subsample_x[1] = 2;
 973        p_jpeg->subsample_x[2] = 2;
 974        p_jpeg->subsample_y[0] = 1;
 975        p_jpeg->subsample_y[1] = 2;
 976        p_jpeg->subsample_y[2] = 2;
 977    }
 978    else if (p_jpeg->frameheader[0].horizontal_sampling == 1
 979        && p_jpeg->frameheader[0].vertical_sampling == 1)
 980    {   /* 4:4:4 */
 981        /* don't overwrite p_jpeg->blocks */
 982        p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
 983        p_jpeg->x_phys = p_jpeg->x_mbl * 8;
 984        p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
 985        p_jpeg->y_phys = p_jpeg->y_mbl * 8;
 986        p_jpeg->mcu_membership[0] = 0;
 987        p_jpeg->mcu_membership[1] = 1;
 988        p_jpeg->mcu_membership[2] = 2;
 989        p_jpeg->tab_membership[0] = 0;
 990        p_jpeg->tab_membership[1] = 1;
 991        p_jpeg->tab_membership[2] = 1;
 992        p_jpeg->subsample_x[0] = 1;
 993        p_jpeg->subsample_x[1] = 1;
 994        p_jpeg->subsample_x[2] = 1;
 995        p_jpeg->subsample_y[0] = 1;
 996        p_jpeg->subsample_y[1] = 1;
 997        p_jpeg->subsample_y[2] = 1;
 998    }
 999    else
1000    {
1001        /* error */
1002    }
1003
1004}
1005
1006
1007/*
1008* These functions/macros provide the in-line portion of bit fetching.
1009* Use check_bit_buffer to ensure there are N bits in get_buffer
1010* before using get_bits, peek_bits, or drop_bits.
1011*  check_bit_buffer(state,n,action);
1012*    Ensure there are N bits in get_buffer; if suspend, take action.
1013*  val = get_bits(n);
1014*    Fetch next N bits.
1015*  val = peek_bits(n);
1016*    Fetch next N bits without removing them from the buffer.
1017*  drop_bits(n);
1018*    Discard next N bits.
1019* The value N should be a simple variable, not an expression, because it
1020* is evaluated multiple times.
1021*/
1022
1023INLINE void check_bit_buffer(struct bitstream* pb, int nbits)
1024{
1025    if (pb->bits_left < nbits)
1026    {   /* nbits is <= 16, so I can always refill 2 bytes in this case */
1027        unsigned char byte;
1028
1029        byte = *pb->next_input_byte++;
1030        if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
1031        {   /* simplification: just skip the (one-byte) marker code */
1032            pb->next_input_byte++;
1033        }
1034        pb->get_buffer = (pb->get_buffer << 8) | byte;
1035
1036        byte = *pb->next_input_byte++;
1037        if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
1038        {   /* simplification: just skip the (one-byte) marker code */
1039            pb->next_input_byte++;
1040        }
1041        pb->get_buffer = (pb->get_buffer << 8) | byte;
1042
1043        pb->bits_left += 16;
1044    }
1045}
1046
1047INLINE int get_bits(struct bitstream* pb, int nbits)
1048{
1049    return ((int) (pb->get_buffer >> (pb->bits_left -= nbits))) & (BIT_N(nbits)-1);
1050}
1051
1052INLINE int peek_bits(struct bitstream* pb, int nbits)
1053{
1054    return ((int) (pb->get_buffer >> (pb->bits_left - nbits))) & (BIT_N(nbits)-1);
1055}
1056
1057INLINE void drop_bits(struct bitstream* pb, int nbits)
1058{
1059    pb->bits_left -= nbits;
1060}
1061
1062/* re-synchronize to entropy data (skip restart marker) */
1063static void search_restart(struct bitstream* pb)
1064{
1065    pb->next_input_byte--; /* we may have overread it, taking 2 bytes */
1066    /* search for a non-byte-padding marker, has to be RSTm or EOS */
1067    while (pb->next_input_byte < pb->input_end &&
1068        (pb->next_input_byte[-2] != 0xFF || pb->next_input_byte[-1] == 0x00))
1069    {
1070        pb->next_input_byte++;
1071    }
1072    pb->bits_left = 0;
1073}
1074
1075/* Figure F.12: extend sign bit. */
1076#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
1077
1078static const int extend_test[16] =   /* entry n is 2**(n-1) */
1079{
1080    0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
1081    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
1082};
1083
1084#if (__GNUC__ >= 6)
1085#pragma GCC diagnostic push
1086#pragma GCC diagnostic ignored "-Wshift-negative-value"
1087#endif
1088
1089static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
1090{
1091    0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
1092    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
1093    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
1094    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1
1095};
1096#if (__GNUC__ >= 6)
1097#pragma GCC diagnostic pop
1098#endif
1099
1100/* Decode a single value */
1101INLINE int huff_decode_dc(struct bitstream* bs, struct derived_tbl* tbl)
1102{
1103    int nb, look, s, r;
1104
1105    check_bit_buffer(bs, HUFF_LOOKAHEAD);
1106    look = peek_bits(bs, HUFF_LOOKAHEAD);
1107    if ((nb = tbl->look_nbits[look]) != 0)
1108    {
1109        drop_bits(bs, nb);
1110        s = tbl->look_sym[look];
1111        check_bit_buffer(bs, s);
1112        r = get_bits(bs, s);
1113        s = HUFF_EXTEND(r, s);
1114    }
1115    else
1116    {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
1117        long code;
1118        nb=HUFF_LOOKAHEAD+1;
1119        check_bit_buffer(bs, nb);
1120        code = get_bits(bs, nb);
1121        while (code > tbl->maxcode[nb])
1122        {
1123            code <<= 1;
1124            check_bit_buffer(bs, 1);
1125            code |= get_bits(bs, 1);
1126            nb++;
1127        }
1128        if (nb > 16) /* error in Huffman */
1129        {
1130            s=0; /* fake a zero, this is most safe */
1131        }
1132        else
1133        {
1134            s = tbl->pub[16 + tbl->valptr[nb] + ((int) (code - tbl->mincode[nb])) ];
1135            check_bit_buffer(bs, s);
1136            r = get_bits(bs, s);
1137            s = HUFF_EXTEND(r, s);
1138        }
1139    } /* end slow decode */
1140    return s;
1141}
1142
1143INLINE int huff_decode_ac(struct bitstream* bs, struct derived_tbl* tbl)
1144{
1145    int nb, look, s;
1146
1147    check_bit_buffer(bs, HUFF_LOOKAHEAD);
1148    look = peek_bits(bs, HUFF_LOOKAHEAD);
1149    if ((nb = tbl->look_nbits[look]) != 0)
1150    {
1151        drop_bits(bs, nb);
1152        s = tbl->look_sym[look];
1153    }
1154    else
1155    {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
1156        long code;
1157        nb=HUFF_LOOKAHEAD+1;
1158        check_bit_buffer(bs, nb);
1159        code = get_bits(bs, nb);
1160        while (code > tbl->maxcode[nb])
1161        {
1162            code <<= 1;
1163            check_bit_buffer(bs, 1);
1164            code |= get_bits(bs, 1);
1165            nb++;
1166        }
1167        if (nb > 16) /* error in Huffman */
1168        {
1169            s=0; /* fake a zero, this is most safe */
1170        }
1171        else
1172        {
1173            s = tbl->pub[16 + tbl->valptr[nb] + ((int) (code - tbl->mincode[nb])) ];
1174        }
1175    } /* end slow decode */
1176    return s;
1177}
1178
1179
1180#ifdef HAVE_LCD_COLOR
1181
1182/* JPEG decoder variant for YUV decoding, into 3 different planes */
1183/*  Note: it keeps the original color subsampling, even if resized. */
1184int jpeg_decode(struct jpeg* p_jpeg, unsigned char* p_pixel[3],
1185                int downscale, void (*pf_progress)(int current, int total))
1186{
1187    struct bitstream bs; /* bitstream "object" */
1188    int block[64]; /* decoded DCT coefficients */
1189
1190    int width, height;
1191    int skip_line[3]; /* bytes from one line to the next (skip_line) */
1192    int skip_strip[3], skip_mcu[3]; /* bytes to next DCT row / column */
1193
1194    int i, x, y; /* loop counter */
1195
1196    unsigned char* p_line[3] = {p_pixel[0], p_pixel[1], p_pixel[2]};
1197    unsigned char* p_byte[3]; /* bitmap pointer */
1198
1199    void (*pf_idct)(unsigned char*, int*, int*, int); /* selected IDCT */
1200    int k_need; /* AC coefficients needed up to here */
1201    int zero_need; /* init the block with this many zeros */
1202
1203    int last_dc_val[3] = {0, 0, 0}; /* or 128 for chroma? */
1204    int store_offs[4]; /* memory offsets: order of Y11 Y12 Y21 Y22 U V */
1205    int restart = p_jpeg->restart_interval; /* MCUs until restart marker */
1206
1207    /* pick the IDCT we want, determine how to work with coefs */
1208    if (downscale == 1)
1209    {
1210        pf_idct = idct8x8;
1211        k_need = 64; /* all */
1212        zero_need = 63; /* all */
1213    }
1214    else if (downscale == 2)
1215    {
1216        pf_idct = idct4x4;
1217        k_need = 25; /* this far in zig-zag to cover 4*4 */
1218        zero_need = 27; /* clear this far in linear order */
1219    }
1220    else if (downscale == 4)
1221    {
1222        pf_idct = idct2x2;
1223        k_need = 5; /* this far in zig-zag to cover 2*2 */
1224        zero_need = 9; /* clear this far in linear order */
1225    }
1226    else if (downscale == 8)
1227    {
1228        pf_idct = idct1x1;
1229        k_need = 0; /* no AC, not needed */
1230        zero_need = 0; /* no AC, not needed */
1231    }
1232    else return -1; /* not supported */
1233
1234    /* init bitstream, fake a restart to make it start */
1235    bs.get_buffer = 0;
1236    bs.next_input_byte = p_jpeg->p_entropy_data;
1237    bs.bits_left = 0;
1238    bs.input_end = p_jpeg->p_entropy_end;
1239
1240    width  = p_jpeg->x_phys / downscale;
1241    height = p_jpeg->y_phys / downscale;
1242    for (i=0; i<3; i++) /* calculate some strides */
1243    {
1244        skip_line[i] = width / p_jpeg->subsample_x[i];
1245        skip_strip[i] = skip_line[i]
1246                        * (height / p_jpeg->y_mbl) / p_jpeg->subsample_y[i];
1247        skip_mcu[i] = width/p_jpeg->x_mbl / p_jpeg->subsample_x[i];
1248    }
1249
1250    /* prepare offsets about where to store the different blocks */
1251    store_offs[p_jpeg->store_pos[0]] = 0;
1252    store_offs[p_jpeg->store_pos[1]] = 8 / downscale; /* to the right */
1253    store_offs[p_jpeg->store_pos[2]] = width * 8 / downscale; /* below */
1254    store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2]; /* r+b */
1255
1256    for(y=0; y<p_jpeg->y_mbl && bs.next_input_byte <= bs.input_end; y++)
1257    {
1258        for (i=0; i<3; i++) /* scan line init */
1259        {
1260            p_byte[i] = p_line[i];
1261            p_line[i] += skip_strip[i];
1262        }
1263        for (x=0; x<p_jpeg->x_mbl; x++)
1264        {
1265            int blkn;
1266
1267            /* Outer loop handles each block in the MCU */
1268            for (blkn = 0; blkn < p_jpeg->blocks; blkn++)
1269            {   /* Decode a single block's worth of coefficients */
1270                int k = 1; /* coefficient index */
1271                int s, r; /* huffman values */
1272                int ci = p_jpeg->mcu_membership[blkn]; /* component index */
1273                int ti = p_jpeg->tab_membership[blkn]; /* table index */
1274                struct derived_tbl* dctbl = &p_jpeg->dc_derived_tbls[ti];
1275                struct derived_tbl* actbl = &p_jpeg->ac_derived_tbls[ti];
1276
1277                /* Section F.2.2.1: decode the DC coefficient difference */
1278                s = huff_decode_dc(&bs, dctbl);
1279
1280                last_dc_val[ci] += s;
1281                block[0] = last_dc_val[ci]; /* output it (assumes zag[0] = 0) */
1282
1283                /* coefficient buffer must be cleared */
1284                MEMSET(block+1, 0, zero_need*sizeof(block[0]));
1285
1286                /* Section F.2.2.2: decode the AC coefficients */
1287                for (; k < k_need; k++)
1288                {
1289                    s = huff_decode_ac(&bs, actbl);
1290                    r = s >> 4;
1291                    s &= 15;
1292
1293                    if (s)
1294                    {
1295                        k += r;
1296                        check_bit_buffer(&bs, s);
1297                        r = get_bits(&bs, s);
1298                        block[zag[k]] = HUFF_EXTEND(r, s);
1299                    }
1300                    else
1301                    {
1302                        if (r != 15)
1303                        {
1304                            k = 64;
1305                            break;
1306                        }
1307                        k += r;
1308                    }
1309                }  /* for k */
1310                /* In this path we just discard the values */
1311                for (; k < 64; k++)
1312                {
1313                    s = huff_decode_ac(&bs, actbl);
1314                    r = s >> 4;
1315                    s &= 15;
1316
1317                    if (s)
1318                    {
1319                        k += r;
1320                        check_bit_buffer(&bs, s);
1321                        drop_bits(&bs, s);
1322                    }
1323                    else
1324                    {
1325                        if (r != 15)
1326                            break;
1327                        k += r;
1328                    }
1329                }  /* for k */
1330
1331                if (ci == 0)
1332                {   /* Y component needs to bother about block store */
1333                    pf_idct(p_byte[0]+store_offs[blkn], block,
1334                        p_jpeg->qt_idct[ti], skip_line[0]);
1335                }
1336                else
1337                {   /* chroma */
1338                    pf_idct(p_byte[ci], block, p_jpeg->qt_idct[ti],
1339                        skip_line[ci]);
1340                }
1341            } /* for blkn */
1342            p_byte[0] += skip_mcu[0]; /* unrolled for (i=0; i<3; i++) loop */
1343            p_byte[1] += skip_mcu[1];
1344            p_byte[2] += skip_mcu[2];
1345            if (p_jpeg->restart_interval && --restart == 0)
1346            {   /* if a restart marker is due: */
1347                restart = p_jpeg->restart_interval; /* count again */
1348                search_restart(&bs); /* align the bitstream */
1349                last_dc_val[0] = last_dc_val[1] =
1350                                 last_dc_val[2] = 0; /* reset decoder */
1351            }
1352        } /* for x */
1353        if (pf_progress != NULL)
1354            pf_progress(y, p_jpeg->y_mbl-1); /* notify about decoding progress */
1355    } /* for y */
1356
1357    return 0; /* success */
1358}
1359#else /* !HAVE_LCD_COLOR */
1360
1361/* a JPEG decoder specialized in decoding only the luminance (b&w) */
1362int jpeg_decode(struct jpeg* p_jpeg, unsigned char* p_pixel[1], int downscale,
1363                void (*pf_progress)(int current, int total))
1364{
1365    struct bitstream bs; /* bitstream "object" */
1366    int block[64]; /* decoded DCT coefficients */
1367
1368    int width, height;
1369    int skip_line; /* bytes from one line to the next (skip_line) */
1370    int skip_strip, skip_mcu; /* bytes to next DCT row / column */
1371
1372    int x, y; /* loop counter */
1373
1374    unsigned char* p_line = p_pixel[0];
1375    unsigned char* p_byte; /* bitmap pointer */
1376
1377    void (*pf_idct)(unsigned char*, int*, int*, int); /* selected IDCT */
1378    int k_need; /* AC coefficients needed up to here */
1379    int zero_need; /* init the block with this many zeros */
1380
1381    int last_dc_val = 0;
1382    int store_offs[4]; /* memory offsets: order of Y11 Y12 Y21 Y22 U V */
1383    int restart = p_jpeg->restart_interval; /* MCUs until restart marker */
1384
1385    /* pick the IDCT we want, determine how to work with coefs */
1386    if (downscale == 1)
1387    {
1388        pf_idct = idct8x8;
1389        k_need = 64; /* all */
1390        zero_need = 63; /* all */
1391    }
1392    else if (downscale == 2)
1393    {
1394        pf_idct = idct4x4;
1395        k_need = 25; /* this far in zig-zag to cover 4*4 */
1396        zero_need = 27; /* clear this far in linear order */
1397    }
1398    else if (downscale == 4)
1399    {
1400        pf_idct = idct2x2;
1401        k_need = 5; /* this far in zig-zag to cover 2*2 */
1402        zero_need = 9; /* clear this far in linear order */
1403    }
1404    else if (downscale == 8)
1405    {
1406        pf_idct = idct1x1;
1407        k_need = 0; /* no AC, not needed */
1408        zero_need = 0; /* no AC, not needed */
1409    }
1410    else return -1; /* not supported */
1411
1412    /* init bitstream, fake a restart to make it start */
1413    bs.get_buffer = 0;
1414    bs.next_input_byte = p_jpeg->p_entropy_data;
1415    bs.bits_left = 0;
1416    bs.input_end = p_jpeg->p_entropy_end;
1417
1418    width  = p_jpeg->x_phys / downscale;
1419    height = p_jpeg->y_phys / downscale;
1420    skip_line = width;
1421    skip_strip = skip_line * (height / p_jpeg->y_mbl);
1422    skip_mcu = (width/p_jpeg->x_mbl);
1423
1424    /* prepare offsets about where to store the different blocks */
1425    store_offs[p_jpeg->store_pos[0]] = 0;
1426    store_offs[p_jpeg->store_pos[1]] = 8 / downscale; /* to the right */
1427    store_offs[p_jpeg->store_pos[2]] = width * 8 / downscale; /* below */
1428    store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2]; /* r+b */
1429
1430    for(y=0; y<p_jpeg->y_mbl && bs.next_input_byte <= bs.input_end; y++)
1431    {
1432        p_byte = p_line;
1433        p_line += skip_strip;
1434        for (x=0; x<p_jpeg->x_mbl; x++)
1435        {
1436            int blkn;
1437
1438            /* Outer loop handles each block in the MCU */
1439            for (blkn = 0; blkn < p_jpeg->blocks; blkn++)
1440            {   /* Decode a single block's worth of coefficients */
1441                int k = 1; /* coefficient index */
1442                int s, r; /* huffman values */
1443                int ci = p_jpeg->mcu_membership[blkn]; /* component index */
1444                int ti = p_jpeg->tab_membership[blkn]; /* table index */
1445                struct derived_tbl* dctbl = &p_jpeg->dc_derived_tbls[ti];
1446                struct derived_tbl* actbl = &p_jpeg->ac_derived_tbls[ti];
1447
1448                /* Section F.2.2.1: decode the DC coefficient difference */
1449                s = huff_decode_dc(&bs, dctbl);
1450
1451                if (ci == 0) /* only for Y component */
1452                {
1453                    last_dc_val += s;
1454                    block[0] = last_dc_val; /* output it (assumes zag[0] = 0) */
1455
1456                    /* coefficient buffer must be cleared */
1457                    MEMSET(block+1, 0, zero_need*sizeof(block[0]));
1458
1459                    /* Section F.2.2.2: decode the AC coefficients */
1460                    for (; k < k_need; k++)
1461                    {
1462                        s = huff_decode_ac(&bs, actbl);
1463                        r = s >> 4;
1464                        s &= 15;
1465
1466                        if (s)
1467                        {
1468                            k += r;
1469                            check_bit_buffer(&bs, s);
1470                            r = get_bits(&bs, s);
1471                            block[zag[k]] = HUFF_EXTEND(r, s);
1472                        }
1473                        else
1474                        {
1475                            if (r != 15)
1476                            {
1477                                k = 64;
1478                                break;
1479                            }
1480                            k += r;
1481                        }
1482                    }  /* for k */
1483                }
1484                /* In this path we just discard the values */
1485                for (; k < 64; k++)
1486                {
1487                    s = huff_decode_ac(&bs, actbl);
1488                    r = s >> 4;
1489                    s &= 15;
1490
1491                    if (s)
1492                    {
1493                        k += r;
1494                        check_bit_buffer(&bs, s);
1495                        drop_bits(&bs, s);
1496                    }
1497                    else
1498                    {
1499                        if (r != 15)
1500                            break;
1501                        k += r;
1502                    }
1503                }  /* for k */
1504
1505                if (ci == 0)
1506                {   /* only for Y component */
1507                    pf_idct(p_byte+store_offs[blkn], block, p_jpeg->qt_idct[ti],
1508                        skip_line);
1509                }
1510            } /* for blkn */
1511            p_byte += skip_mcu;
1512            if (p_jpeg->restart_interval && --restart == 0)
1513            {   /* if a restart marker is due: */
1514                restart = p_jpeg->restart_interval; /* count again */
1515                search_restart(&bs); /* align the bitstream */
1516                last_dc_val = 0; /* reset decoder */
1517            }
1518        } /* for x */
1519        if (pf_progress != NULL)
1520            pf_progress(y, p_jpeg->y_mbl-1); /* notify about decoding progress */
1521    } /* for y */
1522
1523    return 0; /* success */
1524}
1525#endif /* !HAVE_LCD_COLOR */
1526
1527/**************** end JPEG code ********************/