LCOV - code coverage report
Current view: top level - Core - OOTextureScaling.m (source / functions) Hit Total Coverage
Test: coverxygen.info Lines: 0 38 0.0 %
Date: 2025-05-28 07:50:54 Functions: 0 0 -

          Line data    Source code
       1           0 : /*
       2             : 
       3             : OOTextureScaling.m
       4             : 
       5             : Copyright (C) 2007-2013 Jens Ayton
       6             : 
       7             : Permission is hereby granted, free of charge, to any person obtaining a copy
       8             : of this software and associated documentation files (the "Software"), to deal
       9             : in the Software without restriction, including without limitation the rights
      10             : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      11             : copies of the Software, and to permit persons to whom the Software is
      12             : furnished to do so, subject to the following conditions:
      13             : 
      14             : The above copyright notice and this permission notice shall be included in all
      15             : copies or substantial portions of the Software.
      16             : 
      17             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      18             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      19             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      20             : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      21             : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      22             : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      23             : SOFTWARE.
      24             : 
      25             : */
      26             : 
      27             : 
      28             : #import "OOTextureScaling.h"
      29             : #import "OOFunctionAttributes.h"
      30             : #include <stdlib.h>
      31             : #import "OOLogging.h"
      32             : #import "OOMaths.h"
      33             : #import "OOCPUInfo.h"
      34             : 
      35             : 
      36           0 : #define DUMP_MIP_MAPS   0
      37           0 : #define DUMP_SCALE              0
      38             : 
      39             : 
      40             : /*      Internal function declarations.
      41             :         
      42             :         NOTE: the function definitions are grouped together for best code cache
      43             :         coherence rather than the order listed here.
      44             :  */
      45             : static BOOL GenerateMipMaps1(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
      46             : static BOOL GenerateMipMaps2(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
      47             : static BOOL GenerateMipMaps4(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
      48             : 
      49             : 
      50             : /*      ScaleToHalf_P_xN functions
      51             :         These scale a texture with P planes (components) to half its size in each
      52             :         dimension, handling N pixels at a time. srcWidth must be a multiple of N.
      53             :         Parameters are not validated -- bad parameters will lead to bad data or a
      54             :         crash.
      55             :         
      56             :         Scaling is an unweighted average. 8 bits per channel assumed.
      57             :         It is safe and meaningful for srcBytes == dstBytes.
      58             : */
      59             : static void ScaleToHalf_1_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      60             : static void ScaleToHalf_2_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      61             : static void ScaleToHalf_4_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      62             : 
      63             : #if OOLITE_NATIVE_64_BIT
      64             :         static void ScaleToHalf_1_x8(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      65             : //      static void ScaleToHalf_2_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      66             :         static void ScaleToHalf_4_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      67             : #else
      68             :         static void ScaleToHalf_1_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      69             : //      static void ScaleToHalf_2_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
      70             : #endif
      71             : 
      72             : 
      73             : OOINLINE void StretchVertically(OOPixMap srcPx, OOPixMap dstPx) ALWAYS_INLINE_FUNC;
      74             : OOINLINE void SqueezeVertically(OOPixMap pixMap, OOPixMapDimension dstHeight) ALWAYS_INLINE_FUNC;
      75             : OOINLINE void StretchHorizontally(OOPixMap srcPx, OOPixMap dstPx) ALWAYS_INLINE_FUNC;
      76             : OOINLINE void SqueezeHorizontally(OOPixMap pixMap, OOPixMapDimension dstHeight) ALWAYS_INLINE_FUNC;
      77             : 
      78             : static void StretchVerticallyN_x1(OOPixMap srcPx, OOPixMap dstPx);
      79             : 
      80             : static void SqueezeVertically1(OOPixMap srcPx, OOPixMapDimension dstHeight);
      81             : static void SqueezeVertically2(OOPixMap srcPx, OOPixMapDimension dstHeight);
      82             : static void SqueezeVertically4(OOPixMap srcPx, OOPixMapDimension dstHeight);
      83             : static void StretchHorizontally1(OOPixMap srcPx, OOPixMap dstPx);
      84             : static void StretchHorizontally2(OOPixMap srcPx, OOPixMap dstPx);
      85             : static void StretchHorizontally4(OOPixMap srcPx, OOPixMap dstPx);
      86             : static void SqueezeHorizontally1(OOPixMap srcPx, OOPixMapDimension dstWidth);
      87             : static void SqueezeHorizontally2(OOPixMap srcPx, OOPixMapDimension dstWidth);
      88             : static void SqueezeHorizontally4(OOPixMap srcPx, OOPixMapDimension dstWidth);
      89             : 
      90             : 
      91             : static BOOL EnsureCorrectDataSize(OOPixMap *pixMap, BOOL leaveSpaceForMipMaps) NONNULL_FUNC;
      92             : 
      93             : 
      94             : #if !OOLITE_NATIVE_64_BIT
      95             : 
      96             : static void StretchVerticallyN_x4(OOPixMap srcPx, OOPixMap dstPx);
      97             : 
      98           0 : OOINLINE void StretchVertically(OOPixMap srcPx, OOPixMap dstPx)
      99             : {
     100             :         if (!((srcPx.rowBytes) & 3))
     101             :         {
     102             :                 StretchVerticallyN_x4(srcPx, dstPx);
     103             :         }
     104             :         else
     105             :         {
     106             :                 StretchVerticallyN_x1(srcPx, dstPx);
     107             :         }
     108             : }
     109             : 
     110             : #else   // OOLITE_NATIVE_64_BIT
     111             : 
     112             : static void StretchVerticallyN_x8(OOPixMap srcPx, OOPixMap dstPx);
     113             : 
     114             : OOINLINE void StretchVertically(OOPixMap srcPx, OOPixMap dstPx)
     115             : {
     116             :         if (!((srcPx.rowBytes) & 7))
     117             :         {
     118             :                 StretchVerticallyN_x8(srcPx, dstPx);
     119             :         }
     120             :         else
     121             :         {
     122             :                 StretchVerticallyN_x1(srcPx, dstPx);
     123             :         }
     124             : }
     125             : 
     126             : #endif
     127             : 
     128             : 
     129           0 : OOINLINE void SqueezeVertically(OOPixMap pixMap, OOPixMapDimension dstHeight)
     130             : {
     131             :         switch (pixMap.format)
     132             :         {
     133             :                 case kOOPixMapRGBA:
     134             :                         SqueezeVertically4(pixMap, dstHeight);
     135             :                         return;
     136             :                         
     137             :                 case kOOPixMapGrayscale:
     138             :                         SqueezeVertically1(pixMap, dstHeight);
     139             :                         return;
     140             :                         
     141             :                 case kOOPixMapGrayscaleAlpha:
     142             :                         SqueezeVertically2(pixMap, dstHeight);
     143             :                         return;
     144             :                         
     145             :                 case kOOPixMapInvalidFormat:
     146             :                         break;
     147             :         }
     148             :         
     149             : #ifndef NDEBUG
     150             :         [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(pixMap.format)];
     151             : #else
     152             :         abort();
     153             : #endif
     154             : }
     155             : 
     156             : 
     157           0 : OOINLINE void StretchHorizontally(OOPixMap srcPx, OOPixMap dstPx)
     158             : {
     159             :         NSCParameterAssert(srcPx.format == dstPx.format);
     160             :         
     161             :         switch (srcPx.format)
     162             :         {
     163             :                 case kOOPixMapRGBA:
     164             :                         StretchHorizontally4(srcPx, dstPx);
     165             :                         return;
     166             :                         
     167             :                 case kOOPixMapGrayscale:
     168             :                         StretchHorizontally1(srcPx, dstPx);
     169             :                         return;
     170             :                         
     171             :                 case kOOPixMapGrayscaleAlpha:
     172             :                         StretchHorizontally2(srcPx, dstPx);
     173             :                         return;
     174             :                         
     175             :                 case kOOPixMapInvalidFormat:
     176             :                         break;
     177             :         }
     178             :         
     179             : #ifndef NDEBUG
     180             :         [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(srcPx.format)];
     181             : #else
     182             :         abort();
     183             : #endif
     184             : }
     185             : 
     186             : 
     187           0 : OOINLINE void SqueezeHorizontally(OOPixMap pixMap, OOPixMapDimension dstHeight)
     188             : {
     189             :         switch (pixMap.format)
     190             :         {
     191             :                 case kOOPixMapRGBA:
     192             :                         SqueezeHorizontally4(pixMap, dstHeight);
     193             :                         return;
     194             :                         
     195             :                 case kOOPixMapGrayscale:
     196             :                         SqueezeHorizontally1(pixMap, dstHeight);
     197             :                         return;
     198             :                         
     199             :                 case kOOPixMapGrayscaleAlpha:
     200             :                         SqueezeHorizontally2(pixMap, dstHeight);
     201             :                         return;
     202             :                         
     203             :                 case kOOPixMapInvalidFormat:
     204             :                         break;
     205             :         }
     206             :         
     207             : #ifndef NDEBUG
     208             :         [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(pixMap.format)];
     209             : #else
     210             :         abort();
     211             : #endif  
     212             : }
     213             : 
     214             : 
     215             : #if DUMP_MIP_MAPS || DUMP_SCALE
     216             : // NOTE: currently only works on OS X because of OSAtomicAdd32() (used to increment ID counter in thread-safe way). A simple increment would be sufficient if limited to a single thread (in OOTextureLoader).
     217             : volatile int32_t sPreviousDumpID                = 0;
     218             : int32_t OSAtomicAdd32(int32_t __theAmount, volatile int32_t *__theValue);
     219             : 
     220             : #endif
     221             : 
     222             : #if DUMP_MIP_MAPS
     223             : #define DUMP_CHANNELS           -1              // Bitmap of channel counts - -1 for all dumps
     224             : 
     225             : #define DUMP_MIP_MAP_PREPARE(pl)                uint32_t dumpPlanes = pl; \
     226             :                                                                                 uint32_t dumpLevel = 0; \
     227             :                                                                                 BOOL dumpThis = (dumpPlanes & DUMP_CHANNELS) != 0; \
     228             :                                                                                 SInt32 dumpID = dumpThis ? OSAtomicAdd32(1, &sPreviousDumpID) : 0;
     229             : #define DUMP_MIP_MAP_DUMP(px, w, h)             if (dumpThis) DumpMipMap(px, w, h, dumpPlanes, dumpID, dumpLevel++);
     230             : static void DumpMipMap(void *data, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, SInt32 ID, uint32_t level);
     231             : #else
     232           0 : #define DUMP_MIP_MAP_PREPARE(pl)                do { (void)pl; } while (0)
     233           0 : #define DUMP_MIP_MAP_DUMP(px, w, h)             do { (void)px; (void)w; (void)h; } while (0)
     234             : #endif
     235             : 
     236             : #if DUMP_SCALE
     237             : #define DUMP_SCALE_PREPARE()                    SInt32 dumpID = OSAtomicAdd32(1, &sPreviousDumpID), dumpCount = 0;
     238             : #define DUMP_SCALE_DUMP(PM, stage)              do { OOPixMap *pm = &(PM); OODumpPixMap(*pm, [NSString stringWithFormat:@"scaling dump ID %u stage %u-%@ %ux%u", dumpID, dumpCount++, stage, pm->width, pm->height]); } while (0)
     239             : #else
     240           0 : #define DUMP_SCALE_PREPARE()
     241           0 : #define DUMP_SCALE_DUMP(PM, stage)              do {} while (0)
     242             : #endif
     243             : 
     244             : 
     245           0 : OOPixMap OOScalePixMap(OOPixMap srcPx, OOPixMapDimension dstWidth, OOPixMapDimension dstHeight, BOOL leaveSpaceForMipMaps)
     246             : {
     247             :         OOPixMap                        dstPx = {0}, sparePx = {0};
     248             :         BOOL                            OK = YES;
     249             :         
     250             :         //      Sanity check.
     251             :         if (EXPECT_NOT(!OOIsValidPixMap(srcPx)))
     252             :         {
     253             :                 OOLogGenericParameterError();
     254             :                 free(srcPx.pixels);
     255             :                 return kOONullPixMap;
     256             :         }
     257             :         
     258             :         DUMP_SCALE_PREPARE();
     259             :         DUMP_SCALE_DUMP(srcPx, @"initial");
     260             :         
     261             :         if (srcPx.height < dstHeight)
     262             :         {
     263             :                 // Stretch vertically. This requires a separate buffer.
     264             :                 size_t dstSize = srcPx.rowBytes * dstHeight;
     265             :                 if (leaveSpaceForMipMaps && dstWidth <= srcPx.width)  dstSize = dstSize * 4 / 3;
     266             :                 
     267             :                 dstPx = OOAllocatePixMap(srcPx.width, dstHeight, srcPx.format, 0, dstSize);
     268             :                 if (EXPECT_NOT(!OOIsValidPixMap(dstPx)))  { OK = NO; goto FAIL; }
     269             :                 
     270             :                 StretchVertically(srcPx, dstPx);
     271             :                 DUMP_SCALE_DUMP(dstPx, @"stretched vertically");
     272             :                 
     273             :                 sparePx = srcPx;
     274             :                 srcPx = dstPx;
     275             :         }
     276             :         else if (dstHeight < srcPx.height)
     277             :         {
     278             :                 // Squeeze vertically. This can be done in-place.
     279             :                 SqueezeVertically(srcPx, dstHeight);
     280             :                 srcPx.height = dstHeight;
     281             :                 DUMP_SCALE_DUMP(srcPx, @"squeezed vertically");
     282             :         }
     283             :         
     284             :         if (srcPx.width < dstWidth)
     285             :         {
     286             :                 // Stretch horizontally. This requires a separate buffer.
     287             :                 size_t dstSize = OOPixMapBytesPerPixel(srcPx) * dstWidth * srcPx.height;
     288             :                 if (leaveSpaceForMipMaps)  dstSize = dstSize * 4 / 3;
     289             :                 
     290             :                 if (dstSize <= sparePx.bufferSize)
     291             :                 {
     292             :                         dstPx = OOMakePixMap(sparePx.pixels, dstWidth, srcPx.height, srcPx.format, 0, sparePx.bufferSize);
     293             :                         sparePx = kOONullPixMap;
     294             :                 }
     295             :                 else
     296             :                 {
     297             :                         dstPx = OOAllocatePixMap(dstWidth, srcPx.height, srcPx.format, 0, dstSize);
     298             :                 }
     299             :                 if (EXPECT_NOT(!OOIsValidPixMap(dstPx)))  { OK = NO; goto FAIL; }
     300             :                 
     301             :                 StretchHorizontally(srcPx, dstPx);
     302             :                 DUMP_SCALE_DUMP(dstPx, @"stretched horizontally");
     303             :         }
     304             :         else if (dstWidth < srcPx.width)
     305             :         {
     306             :                 // Squeeze horizontally. This can be done in-place.
     307             :                 SqueezeHorizontally(srcPx, dstWidth);
     308             :                 
     309             :                 dstPx = srcPx;
     310             :                 dstPx.width = dstWidth;
     311             :                 dstPx.rowBytes = dstPx.width * OOPixMapBytesPerPixel(dstPx);
     312             :                 DUMP_SCALE_DUMP(dstPx, @"squeezed horizontally");
     313             :         }
     314             :         else
     315             :         {
     316             :                 // No horizontal scaling.
     317             :                 dstPx = srcPx;
     318             :         }
     319             :         
     320             :         // Avoid a potential double free (if the realloc in EnsureCorrectDataSize() relocates the block).
     321             :         if (srcPx.pixels == dstPx.pixels)  srcPx.pixels = NULL;
     322             :         
     323             :         // dstPx is now the result.
     324             :         OK = EnsureCorrectDataSize(&dstPx, leaveSpaceForMipMaps);
     325             :         
     326             : FAIL:
     327             :         free(srcPx.pixels);
     328             :         if (sparePx.pixels != dstPx.pixels && sparePx.pixels != srcPx.pixels)
     329             :         {
     330             :                 free(sparePx.pixels);
     331             :         }
     332             :         if (!OK)
     333             :         {
     334             :                 free(dstPx.pixels);
     335             :                 dstPx.pixels = NULL;
     336             :         }
     337             :         
     338             :         return OK ? dstPx : kOONullPixMap;
     339             : }
     340             : 
     341             : 
     342             : // FIXME: should take an OOPixMap.
     343           0 : BOOL OOGenerateMipMaps(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format)
     344             : {
     345             :         if (EXPECT_NOT(width != OORoundUpToPowerOf2_PixMap(width) || height != OORoundUpToPowerOf2_PixMap(height)))
     346             :         {
     347             :                 OOLog(kOOLogParameterError, @"Non-power-of-two dimensions (%ux%u) passed to %s() - ignoring, data will be junk.", width, height, __PRETTY_FUNCTION__);
     348             :                 return NO;
     349             :         }
     350             :         if (EXPECT_NOT(textureBytes == NULL))
     351             :         {
     352             :                 OOLog(kOOLogParameterError, @"%@", @"NULL texture pointer passed to GenerateMipMaps().");
     353             :                 return NO;
     354             :         }
     355             :         
     356             :         switch (format)
     357             :         {
     358             :                 case kOOPixMapRGBA:
     359             :                         return GenerateMipMaps4(textureBytes, width, height);
     360             :                         
     361             :                 case kOOPixMapGrayscale:
     362             :                         return GenerateMipMaps1(textureBytes, width, height);
     363             :                         
     364             :                 case kOOPixMapGrayscaleAlpha:
     365             :                         return GenerateMipMaps2(textureBytes, width, height);
     366             :                         
     367             :                 case kOOPixMapInvalidFormat:
     368             :                         break;
     369             :         }
     370             :         
     371             : 
     372             :         OOLog(kOOLogParameterError, @"%s(): bad pixmap format (%@) - ignoring, data will be junk.", __PRETTY_FUNCTION__, OOPixMapFormatName(format));
     373             :         return NO;
     374             : }
     375             : 
     376             : 
     377           0 : static BOOL GenerateMipMaps1(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
     378             : {
     379             :         OOPixMapDimension               w = width, h = height;
     380             :         uint8_t                                 *curr, *next;
     381             :         
     382             :         DUMP_MIP_MAP_PREPARE(1);
     383             :         curr = textureBytes;
     384             :         
     385             : #if OOLITE_NATIVE_64_BIT
     386             :         while (8 < w && 1 < h)
     387             :         {
     388             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     389             :                 
     390             :                 next = curr + w * h;
     391             :                 ScaleToHalf_1_x8(curr, next, w, h);
     392             :                 
     393             :                 w >>= 1;
     394             :                 h >>= 1;
     395             :                 curr = next;
     396             :         }
     397             : #else
     398             :         while (4 < w && 1 < h)
     399             :         {
     400             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     401             :                 
     402             :                 next = curr + w * h;
     403             :                 ScaleToHalf_1_x4(curr, next, w, h);
     404             :                 
     405             :                 w >>= 1;
     406             :                 h >>= 1;
     407             :                 curr = next;
     408             :         }
     409             : #endif
     410             :         
     411             :         while (1 < w && 1 < h)
     412             :         {
     413             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     414             :                 
     415             :                 next = curr + w * h;
     416             :                 ScaleToHalf_1_x1(curr, next, w, h);
     417             :                 
     418             :                 w >>= 1;
     419             :                 h >>= 1;
     420             :                 curr = next;
     421             :         }
     422             :         
     423             :         DUMP_MIP_MAP_DUMP(curr, w, h);
     424             :         
     425             :         // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
     426             :         return YES;
     427             : }
     428             : 
     429             : 
     430           0 : static void ScaleToHalf_1_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
     431             : {
     432             :         OOPixMapDimension               x, y;
     433             :         uint8_t                                 *src0, *src1, *dst;
     434             :         uint_fast8_t                    px00, px01, px10, px11;
     435             :         uint_fast16_t                   sum;
     436             :         
     437             :         src0 = srcBytes;
     438             :         src1 = src0 + srcWidth;
     439             :         dst = dstBytes;
     440             :         
     441             :         y = srcHeight >> 1;
     442             :         do
     443             :         {
     444             :                 x = srcWidth >> 1;
     445             :                 do
     446             :                 {
     447             :                         // Read four pixels in a square...
     448             :                         px00 = *src0++;
     449             :                         px01 = *src0++;
     450             :                         px10 = *src1++;
     451             :                         px11 = *src1++;
     452             :                         
     453             :                         // ...add them together...
     454             :                         sum = px00 + px01 + px10 + px11;
     455             :                         
     456             :                         // ...shift the sum into place...
     457             :                         sum >>= 2;
     458             :                         
     459             :                         // ...and write output pixel.
     460             :                                 *dst++ = sum;
     461             :                 } while (--x);
     462             :                 
     463             :                 // Skip a row for each source row
     464             :                 src0 = src1;
     465             :                 src1 += srcWidth;
     466             :         } while (--y);
     467             : }
     468             : 
     469             : 
     470             : #if !OOLITE_NATIVE_64_BIT
     471             : 
     472           0 : static void ScaleToHalf_1_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
     473             : {
     474             :         OOPixMapDimension               x, y;
     475             :         uint32_t                                *src0, *src1, *dst;
     476             :         uint_fast32_t                   px00, px01, px10, px11;
     477             :         uint_fast32_t                   sum0, sum1;
     478             :         
     479             :         srcWidth >>= 2;   // Four (output) pixels at a time
     480             :         src0 = srcBytes;
     481             :         src1 = src0 + srcWidth;
     482             :         dst = dstBytes;
     483             :         
     484             :         y = srcHeight >> 1;
     485             :         do
     486             :         {
     487             :                 x = srcWidth >> 1;
     488             :                 do
     489             :                 {
     490             :                         // Read 8 pixels in a 4x2 rectangle...
     491             :                         px00 = *src0++;
     492             :                         px01 = *src0++;
     493             :                         px10 = *src1++;
     494             :                         px11 = *src1++;
     495             :                         
     496             :                         // ...add them together.
     497             :                         sum0 =  (px00 & 0x00FF00FF) +
     498             :                                         (px10 & 0x00FF00FF) +
     499             :                                         ((px00 & 0xFF00FF00) >> 8) +
     500             :                                         ((px10 & 0xFF00FF00) >> 8);
     501             :                         sum1 =  (px01 & 0x00FF00FF) +
     502             :                                         (px11 & 0x00FF00FF) +
     503             :                                         ((px01 & 0xFF00FF00) >> 8) +
     504             :                                         ((px11 & 0xFF00FF00) >> 8);
     505             :                         
     506             :                         // ...swizzle the sums around...
     507             : #if OOLITE_BIG_ENDIAN
     508             :                         sum0 = ((sum0 << 6) & 0xFF000000) | ((sum0 << 14) & 0x00FF0000);
     509             :                         sum1 = ((sum1 >> 10) & 0x0000FF00) | ((sum1 >>2) & 0x000000FF);
     510             : #elif OOLITE_LITTLE_ENDIAN
     511             :                         sum0 = ((sum0 >> 10) & 0x0000FF00) | ((sum0 >>2) & 0x000000FF);
     512             :                         sum1 = ((sum1 << 6) & 0xFF000000) | ((sum1 << 14) & 0x00FF0000);
     513             : #else
     514             :                         #error Neither OOLITE_BIG_ENDIAN nor OOLITE_LITTLE_ENDIAN is defined as nonzero!
     515             : #endif
     516             :                         
     517             :                         // ...and write output pixel.
     518             :                                 *dst++ = sum0 | sum1;
     519             :                 } while (--x);
     520             :                 
     521             :                 // Skip a row for each source row
     522             :                 src0 = src1;
     523             :                 src1 += srcWidth;
     524             :         } while (--y);
     525             : }
     526             : 
     527             : #else   // OOLITE_NATIVE_64_BIT
     528             : 
     529             : static void ScaleToHalf_1_x8(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
     530             : {
     531             :         OOPixMapDimension               x, y;
     532             :         uint64_t                                *src0, *src1;
     533             :         uint64_t                                *dst;
     534             :         uint_fast64_t                   px00, px01, px10, px11;
     535             :         uint_fast64_t                   sum0, sum1;
     536             :         
     537             :         srcWidth >>= 3;   // Eight (output) pixels at a time
     538             :         src0 = srcBytes;
     539             :         src1 = src0 + srcWidth;
     540             :         dst = dstBytes;
     541             :         
     542             :         y = srcHeight >> 1;
     543             :         do
     544             :         {
     545             :                 x = srcWidth >> 1;
     546             :                 do
     547             :                 {
     548             :                         // Read 16 pixels in an 8x2 rectangle...
     549             :                         px00 = *src0++;
     550             :                         px01 = *src0++;
     551             :                         px10 = *src1++;
     552             :                         px11 = *src1++;
     553             :                         
     554             :                         // ...add them together...
     555             :                         sum0 =  ((px00 & 0x00FF00FF00FF00FFULL)) +
     556             :                                         ((px10 & 0x00FF00FF00FF00FFULL)) +
     557             :                                         ((px00 & 0xFF00FF00FF00FF00ULL) >> 8) +
     558             :                                         ((px10 & 0xFF00FF00FF00FF00ULL) >> 8);
     559             :                         sum1 =  ((px01 & 0x00FF00FF00FF00FFULL)) +
     560             :                                         ((px11 & 0x00FF00FF00FF00FFULL)) +
     561             :                                         ((px01 & 0xFF00FF00FF00FF00ULL) >> 8) +
     562             :                                         ((px11 & 0xFF00FF00FF00FF00ULL) >> 8);
     563             :                         
     564             :                         // ...swizzle the sums around...
     565             : #if OOLITE_BIG_ENDIAN
     566             :                         sum0 =  ((sum0 << 06) & 0xFF00000000000000ULL) |
     567             :                                         ((sum0 << 14) & 0x00FF000000000000ULL) |
     568             :                                         ((sum0 << 22) & 0x0000FF0000000000ULL) |
     569             :                                         ((sum0 << 30) & 0x000000FF00000000ULL);
     570             :                         sum1 =  ((sum1 >> 26) & 0x00000000FF000000ULL) |
     571             :                                         ((sum1 >> 18) & 0x0000000000FF0000ULL) |
     572             :                                         ((sum1 >> 10) & 0x000000000000FF00ULL) |
     573             :                                         ((sum1 >> 02) & 0x00000000000000FFULL);
     574             : #elif OOLITE_LITTLE_ENDIAN
     575             :                         sum0 =  ((sum0 >> 26) & 0x00000000FF000000ULL) |
     576             :                                         ((sum0 >> 18) & 0x0000000000FF0000ULL) |
     577             :                                         ((sum0 >> 10) & 0x000000000000FF00ULL) |
     578             :                                         ((sum0 >> 02) & 0x00000000000000FFULL);
     579             :                         sum1 =  ((sum1 << 06) & 0xFF00000000000000ULL) |
     580             :                                         ((sum1 << 14) & 0x00FF000000000000ULL) |
     581             :                                         ((sum1 << 22) & 0x0000FF0000000000ULL) |
     582             :                                         ((sum1 << 30) & 0x000000FF00000000ULL);
     583             : #else
     584             :                         #error Neither OOLITE_BIG_ENDIAN nor OOLITE_LITTLE_ENDIAN is defined as nonzero!
     585             : #endif
     586             :                         // ...and write output pixel.
     587             :                                 *dst++ = sum0 | sum1;
     588             :                 } while (--x);
     589             :                 
     590             :                 // Skip a row for each source row
     591             :                 src0 = src1;
     592             :                 src1 += srcWidth;
     593             :         } while (--y);
     594             : }
     595             : 
     596             : #endif
     597             : 
     598             : 
     599           0 : static BOOL GenerateMipMaps2(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
     600             : {
     601             :         OOPixMapDimension               w = width, h = height;
     602             :         uint16_t                                *curr, *next;
     603             :         
     604             :         DUMP_MIP_MAP_PREPARE(2);
     605             :         curr = textureBytes;
     606             :         
     607             :         // TODO: multiple pixel two-plane scalers.
     608             : #if 0
     609             : #if OOLITE_NATIVE_64_BIT
     610             :         while (4 < w && 1 < h)
     611             :         {
     612             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     613             :                 
     614             :                 next = curr + w * h;
     615             :                 ScaleToHalf_2_x4(curr, next, w, h);
     616             :                 
     617             :                 w >>= 1;
     618             :                 h >>= 1;
     619             :                 curr = next;
     620             :         }
     621             : #else
     622             :         while (2 < w && 1 < h)
     623             :         {
     624             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     625             :                 
     626             :                 next = curr + w * h;
     627             :                 ScaleToHalf_2_x2(curr, next, w, h);
     628             :                 
     629             :                 w >>= 1;
     630             :                 h >>= 1;
     631             :                 curr = next;
     632             :         }
     633             : #endif
     634             : #endif
     635             :         
     636             :         while (1 < w && 1 < h)
     637             :         {
     638             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     639             :                 
     640             :                 next = curr + w * h;
     641             :                 ScaleToHalf_2_x1(curr, next, w, h);
     642             :                 
     643             :                 w >>= 1;
     644             :                 h >>= 1;
     645             :                 curr = next;
     646             :         }
     647             :         
     648             :         DUMP_MIP_MAP_DUMP(curr, w, h);
     649             :         
     650             :         // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
     651             :         return YES;
     652             : }
     653             : 
     654             : 
     655           0 : static void ScaleToHalf_2_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
     656             : {
     657             :         OOPixMapDimension               x, y;
     658             :         uint16_t                                *src0, *src1, *dst;
     659             :         uint_fast16_t                   px00, px01, px10, px11;
     660             :         uint_fast32_t                   sumHi, sumLo;
     661             :         
     662             :         src0 = srcBytes;
     663             :         src1 = src0 + srcWidth;
     664             :         dst = dstBytes;
     665             :         
     666             :         y = srcHeight >> 1;
     667             :         do
     668             :         {
     669             :                 x = srcWidth >> 1;
     670             :                 do
     671             :                 {
     672             :                         // Read four pixels in a square...
     673             :                         px00 = *src0++;
     674             :                         px01 = *src0++;
     675             :                         px10 = *src1++;
     676             :                         px11 = *src1++;
     677             :                         
     678             :                         // ...add them together...
     679             :                         sumHi = (px00 & 0xFF00) + (px01 & 0xFF00) + (px10 & 0xFF00) + (px11 & 0xFF00);
     680             :                         sumLo = (px00 & 0x00FF) + (px01 & 0x00FF) + (px10 & 0x00FF) + (px11 & 0x00FF);
     681             :                         
     682             :                         // ...merge and shift the sum into place...
     683             :                         sumLo = ((sumHi & 0x3FC00) | sumLo) >> 2;
     684             :                         
     685             :                         // ...and write output pixel.
     686             :                         *dst++ = sumLo;
     687             :                 } while (--x);
     688             :                 
     689             :                 // Skip a row for each source row
     690             :                 src0 = src1;
     691             :                 src1 += srcWidth;
     692             :         } while (--y);
     693             : }
     694             : 
     695             : 
     696           0 : static BOOL GenerateMipMaps4(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
     697             : {
     698             :         OOPixMapDimension               w = width, h = height;
     699             :         uint32_t                                *curr, *next;
     700             :         
     701             :         DUMP_MIP_MAP_PREPARE(4);
     702             :         curr = textureBytes;
     703             :         
     704             : #if OOLITE_NATIVE_64_BIT
     705             :         while (2 < w && 1 < h)
     706             :         {
     707             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     708             :                 
     709             :                 next = curr + w * h;
     710             :                 ScaleToHalf_4_x2(curr, next, w, h);
     711             :                 
     712             :                 w >>= 1;
     713             :                 h >>= 1;
     714             :                 curr = next;
     715             :         }
     716             :         if (EXPECT(1 < w && 1 < h))
     717             :         {
     718             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     719             :                 
     720             :                 next = curr + w * h;
     721             :                 ScaleToHalf_4_x1(curr, next, w, h);
     722             :                 
     723             :                 w >>= 1;
     724             :                 h >>= 1;
     725             :         }
     726             : #else
     727             :         while (1 < w && 1 < h)
     728             :         {
     729             :                 DUMP_MIP_MAP_DUMP(curr, w, h);
     730             :                 
     731             :                 next = curr + w * h;
     732             :                 ScaleToHalf_4_x1(curr, next, w, h);
     733             :                 
     734             :                 w >>= 1;
     735             :                 h >>= 1;
     736             :                 curr = next;
     737             :         }
     738             : #endif
     739             :         
     740             :         DUMP_MIP_MAP_DUMP(curr, w, h);
     741             :         
     742             :         // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
     743             :         return YES;
     744             : }
     745             : 
     746             : 
     747           0 : static void ScaleToHalf_4_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
     748             : {
     749             :         OOPixMapDimension               x, y;
     750             :         uint32_t                                *src0, *src1, *dst;
     751             :         uint_fast32_t                   px00, px01, px10, px11;
     752             :         
     753             :         /*      We treat channel layout as ABGR -- actual layout doesn't matter since
     754             :                 each channel is handled the same. We use two accumulators, with
     755             :                 alternating channels, so overflow doesn't cross channel boundaries,
     756             :                 while having less overhead than one accumulator per channel.
     757             :         */
     758             :         uint_fast32_t                   ag, br;
     759             :         
     760             :         src0 = srcBytes;
     761             :         src1 = src0 + srcWidth;
     762             :         dst = dstBytes;
     763             :         
     764             :         y = srcHeight >> 1;
     765             :         do
     766             :         {
     767             :                 x = srcWidth >> 1;
     768             :                 do
     769             :                 {
     770             :                         // Read four pixels in a square...
     771             :                         px00 = *src0++;
     772             :                         px01 = *src0++;
     773             :                         px10 = *src1++;
     774             :                         px11 = *src1++;
     775             :                         
     776             :                         // ...and add them together, channel by channel.
     777             :                         ag =  (px00 & 0xFF00FF00) >> 8;
     778             :                         br =  (px00 & 0x00FF00FF);
     779             :                         ag += (px01 & 0xFF00FF00) >> 8;
     780             :                         br += (px01 & 0x00FF00FF);
     781             :                         ag += (px10 & 0xFF00FF00) >> 8;
     782             :                         br += (px10 & 0x00FF00FF);
     783             :                         ag += (px11 & 0xFF00FF00) >> 8;
     784             :                         br += (px11 & 0x00FF00FF);
     785             :                         
     786             :                         // Shift the sums into place...
     787             :                         ag <<= 6;
     788             :                         br >>= 2;
     789             :                         
     790             :                         // ...and write output pixel.
     791             :                         *dst++ = (ag & 0xFF00FF00) | (br & 0x00FF00FF);
     792             :                 } while (--x);
     793             :                 
     794             :                 // Skip a row for each source row
     795             :                 src0 = src1;
     796             :                 src1 += srcWidth;
     797             :         } while (--y);
     798             : }
     799             : 
     800             : 
     801             : #if OOLITE_NATIVE_64_BIT
     802             : 
     803             : static void ScaleToHalf_4_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
     804             : {
     805             :         OOPixMapDimension               x, y;
     806             :         uint_fast64_t                   *src0, *src1, *dst;
     807             :         uint_fast64_t                   px00, px01, px10, px11;
     808             :         
     809             :         /*      We treat channel layout as ABGR -- actual layout doesn't matter since
     810             :                 each channel is handled the same. We use two accumulators, with
     811             :                 alternating channels, so overflow doesn't cross channel boundaries,
     812             :                 while having less overhead than one accumulator per channel.
     813             :         */
     814             :         uint_fast64_t                   ag0, ag1, br0, br1;
     815             :         
     816             :         srcWidth >>= 1;           // Two bytes at a time
     817             :         src0 = srcBytes;
     818             :         src1 = src0 + srcWidth;
     819             :         dst = dstBytes;
     820             :         
     821             :         y = srcHeight >> 1;
     822             :         do
     823             :         {
     824             :                 x = srcWidth >> 1;
     825             :                 do
     826             :                 {
     827             :                         // Read eight pixels (4x2)...
     828             :                         px00 = *src0++;
     829             :                         px01 = *src0++;
     830             :                         px10 = *src1++;
     831             :                         px11 = *src1++;
     832             :                         
     833             :                         // ...and add them together, channel by channel.
     834             :                         ag0 =  (px00 & 0xFF00FF00FF00FF00ULL) >> 8;
     835             :                         br0 =  (px00 & 0x00FF00FF00FF00FFULL);
     836             :                         ag0 += (px10 & 0xFF00FF00FF00FF00ULL) >> 8;
     837             :                         br0 += (px10 & 0x00FF00FF00FF00FFULL);
     838             :                         ag1 =  (px01 & 0xFF00FF00FF00FF00ULL) >> 8;
     839             :                         br1 =  (px01 & 0x00FF00FF00FF00FFULL);
     840             :                         ag1 += (px11 & 0xFF00FF00FF00FF00ULL) >> 8;
     841             :                         br1 += (px11 & 0x00FF00FF00FF00FFULL);
     842             :                         
     843             : #if OOLITE_BIG_ENDIAN
     844             :                         // Shift and add some more...
     845             :                         ag0 = ag0 + (ag0 << 32);
     846             :                         br0 = br0 + (br0 << 32);
     847             :                         ag1 = ag1 + (ag1 >> 32);
     848             :                         br1 = br1 + (br1 >> 32);
     849             :                         
     850             :                         // ...merge and shift some more...
     851             :                         ag0 = ((ag0 & 0x03FC03FC00000000ULL) | (ag1 & 0x0000000003FC03FCULL)) << 6;
     852             :                         br0 = ((br0 & 0x03FC03FC00000000ULL) | (br1 & 0x0000000003FC03FCULL)) >> 2;
     853             : #elif OOLITE_LITTLE_ENDIAN
     854             :                         // Shift and add some more...
     855             :                         ag0 = ag0 + (ag0 >> 32);
     856             :                         br0 = br0 + (br0 >> 32);
     857             :                         ag1 = ag1 + (ag1 << 32);
     858             :                         br1 = br1 + (br1 << 32);
     859             :                         
     860             :                         // ...merge and shift some more...
     861             :                         ag0 = ((ag0 & 0x0000000003FC03FCULL) | (ag1 & 0x03FC03FC00000000ULL)) << 6;
     862             :                         br0 = ((br0 & 0x0000000003FC03FCULL) | (br1 & 0x03FC03FC00000000ULL)) >> 2;
     863             : #else
     864             :         #error Unknown architecture.
     865             : #endif
     866             :                         
     867             :                         // ...and write output pixel.
     868             :                         *dst++ = ag0 | br0;
     869             :                 } while (--x);
     870             :                 
     871             :                 // Skip a row for each source row
     872             :                 src0 = src1;
     873             :                 src1 += srcWidth;
     874             :         } while (--y);
     875             : }
     876             : 
     877             : #endif
     878             : 
     879             : 
     880             : #if DUMP_MIP_MAPS
     881             : static void DumpMipMap(void *data, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, SInt32 ID, uint32_t level)
     882             : {
     883             :         OOPixMap pixMap = OOMakePixMap(data, width, height, format, 0, 0);
     884             :         OODumpPixMap(pixMap, [NSString stringWithFormat:@"mipmap dump ID %u lv%u %@ %ux%u", ID, level, OOPixMapFormatName(format), width, height]);
     885             : }
     886             : #endif
     887             : 
     888             : 
     889           0 : static void StretchVerticallyN_x1(OOPixMap srcPx, OOPixMap dstPx)
     890             : {
     891             :         uint8_t                         *src, *src0, *src1, *prev, *dst;
     892             :         uint8_t                         px0, px1;
     893             :         uint_fast32_t           x, y, xCount;
     894             :         size_t                          srcRowBytes;
     895             :         uint_fast16_t           weight0, weight1;
     896             :         uint_fast32_t           fractY; // Y coordinate, fixed-point (24.8)
     897             :         
     898             :         src = srcPx.pixels;
     899             :         srcRowBytes = srcPx.rowBytes;
     900             :         dst = dstPx.pixels;     // Assumes dstPx.width == dstPx.rowBytes.
     901             :         
     902             :         src0 = prev = src;
     903             :         
     904             :         xCount = srcPx.width * OOPixMapBytesPerPixel(srcPx);
     905             :         
     906             :         for (y = 1; y != dstPx.height; ++y)
     907             :         {
     908             :                 fractY = ((srcPx.height * y) << 8) / dstPx.height;
     909             :                 
     910             :                 src0 = prev;
     911             :                 prev = src1 = src + srcRowBytes * (fractY >> 8);
     912             :                 
     913             :                 weight1 = fractY & 0xFF;
     914             :                 weight0 = 0x100 - weight1;
     915             :                 
     916             :                 x = xCount;
     917             :                 while (x--)
     918             :                 {
     919             :                         px0 = *src0++;
     920             :                         px1 = *src1++;
     921             :                         
     922             :                         *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
     923             :                 }
     924             :         }
     925             :         
     926             :         // Copy last row (without referring to the last-plus-oneth row)
     927             :         x = xCount;
     928             :         while (x--)
     929             :         {
     930             :                 *dst++ = *src0++;
     931             :         }
     932             : }
     933             : 
     934             : 
     935             : #if !OOLITE_NATIVE_64_BIT
     936             : 
     937           0 : static void StretchVerticallyN_x4(OOPixMap srcPx, OOPixMap dstPx)
     938             : {
     939             :         uint8_t                         *src;
     940             :         uint32_t                        *src0, *src1, *prev, *dst;
     941             :         uint32_t                        px0, px1, ag, br;
     942             :         uint_fast32_t           x, y, xCount;
     943             :         size_t                          srcRowBytes;
     944             :         uint_fast16_t           weight0, weight1;
     945             :         uint_fast32_t           fractY; // Y coordinate, fixed-point (24.8)
     946             :         
     947             :         src = srcPx.pixels;
     948             :         srcRowBytes = srcPx.rowBytes;
     949             :         dst = dstPx.pixels;     // Assumes no row padding.
     950             :         
     951             :         src0 = prev = (uint32_t *)src;
     952             :         
     953             :         xCount = (srcPx.width * OOPixMapBytesPerPixel(srcPx)) >> 2;
     954             :         
     955             :         for (y = 1; y != dstPx.height; ++y)
     956             :         {
     957             :                 fractY = ((srcPx.height * y) << 8) / dstPx.height;
     958             :                 
     959             :                 src0 = prev;
     960             :                 prev = src1 = (uint32_t *)(src + srcRowBytes * (fractY >> 8));
     961             :                 
     962             :                 weight1 = fractY & 0xFF;
     963             :                 weight0 = 0x100 - weight1;
     964             :                 
     965             :                 x = xCount;
     966             :                 while (x--)
     967             :                 {
     968             :                         px0 = *src0++;
     969             :                         px1 = *src1++;
     970             :                         
     971             :                         ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
     972             :                         br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
     973             :                         
     974             :                         *dst++ = (ag & 0xFF00FF00) | ((br >> 8) & 0x00FF00FF);
     975             :                 }
     976             :         }
     977             :         
     978             :         // Copy last row (without referring to the last-plus-oneth row)
     979             :         x = xCount;
     980             :         while (x--)
     981             :         {
     982             :                 *dst++ = *src0++;
     983             :         }
     984             : }
     985             : 
     986             : #else   // OOLITE_NATIVE_64_BIT
     987             : 
     988             : static void StretchVerticallyN_x8(OOPixMap srcPx, OOPixMap dstPx)
     989             : {
     990             :         uint8_t                         *src;
     991             :         uint64_t                        *src0, *src1, *prev, *dst;
     992             :         uint64_t                        px0, px1, agag, brbr;
     993             :         uint_fast32_t           x, y, xCount;
     994             :         size_t                          srcRowBytes;
     995             :         uint_fast16_t           weight0, weight1;
     996             :         uint_fast32_t           fractY; // Y coordinate, fixed-point (24.8)
     997             :         
     998             :         src = srcPx.pixels;
     999             :         srcRowBytes = srcPx.rowBytes;
    1000             :         dst = dstPx.pixels;     // Assumes dstPx.width == dstPx.rowBytes.
    1001             :         
    1002             :         src0 = prev = (uint64_t *)src;
    1003             :         
    1004             :         xCount = (srcPx.width * OOPixMapBytesPerPixel(srcPx)) >> 3;
    1005             :         
    1006             :         for (y = 1; y != dstPx.height; ++y)
    1007             :         {
    1008             :                 fractY = ((srcPx.height * y) << 8) / dstPx.height;
    1009             :                 
    1010             :                 src0 = prev;
    1011             :                 prev = src1 = (uint64_t *)(src + srcRowBytes * (fractY >> 8));
    1012             :                 
    1013             :                 weight1 = fractY & 0xFF;
    1014             :                 weight0 = 0x100 - weight1;
    1015             :                 
    1016             :                 x = xCount;
    1017             :                 while (x--)
    1018             :                 {
    1019             :                         px0 = *src0++;
    1020             :                         px1 = *src1++;
    1021             :                         
    1022             :                         agag = ((px0 & 0xFF00FF00FF00FF00ULL) >> 8) * weight0 + ((px1 & 0xFF00FF00FF00FF00ULL) >> 8) * weight1;
    1023             :                         brbr = (px0 & 0x00FF00FF00FF00FFULL) * weight0 + (px1 & 0x00FF00FF00FF00FFULL) * weight1;
    1024             :                         
    1025             :                         *dst++ = (agag & 0xFF00FF00FF00FF00ULL) | ((brbr >> 8) & 0x00FF00FF00FF00FFULL);
    1026             :                 }
    1027             :         }
    1028             :         
    1029             :         // Copy last row (without referring to the last-plus-oneth row)
    1030             :         x = xCount;
    1031             :         while (x--)
    1032             :         {
    1033             :                 *dst++ = *src0++;
    1034             :         }
    1035             : }
    1036             : #endif
    1037             : 
    1038             : 
    1039           0 : static void StretchHorizontally1(OOPixMap srcPx, OOPixMap dstPx)
    1040             : {
    1041             :         uint8_t                         *src, *srcStart, *dst;
    1042             :         uint8_t                         px0, px1;
    1043             :         uint_fast32_t           x, y, xCount;
    1044             :         size_t                          srcRowBytes;
    1045             :         uint_fast16_t           weight0, weight1;
    1046             :         uint_fast32_t           fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
    1047             :         
    1048             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 1);
    1049             :         
    1050             :         srcStart = srcPx.pixels;
    1051             :         srcRowBytes = srcPx.rowBytes;
    1052             :         xCount = dstPx.width;
    1053             :         dst = dstPx.pixels;     // Assumes no row padding
    1054             :         
    1055             :         deltaX = (srcPx.width << 12) / dstPx.width;
    1056             :         px1 = *srcStart;
    1057             :         
    1058             :         for (y = 0; y < dstPx.height - 1; ++y)
    1059             :         {
    1060             :                 fractX = 0;
    1061             :                 
    1062             :                 for (x = 0; x!= xCount; ++x)
    1063             :                 {
    1064             :                         fractX += deltaX;
    1065             :                         
    1066             :                         weight1 = (fractX >> 4) & 0xFF;
    1067             :                         weight0 = 0x100 - weight1;
    1068             :                         
    1069             :                         px0 = px1;
    1070             :                         src = srcStart + (fractX >> 12);
    1071             :                         px1 = *src;
    1072             :                         
    1073             :                         *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
    1074             :                 }
    1075             :                 
    1076             :                 srcStart = (uint8_t *)((char *)srcStart + srcRowBytes);
    1077             :                 px1 = *srcStart;
    1078             :         }
    1079             :         
    1080             :         // Copy last row without reading off end of buffer
    1081             :         fractX = 0;
    1082             :         for (x = 0; x!= xCount; ++x)
    1083             :         {
    1084             :                 fractX += deltaX;
    1085             :                 
    1086             :                 weight1 = (fractX >> 4) & 0xFF;
    1087             :                 weight0 = 0x100 - weight1;
    1088             :                 
    1089             :                 px0 = px1;
    1090             :                 src = srcStart + (fractX >> 12);
    1091             :                 px1 = *src;
    1092             :                 
    1093             :                 *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
    1094             :         }
    1095             : }
    1096             : 
    1097             : 
    1098           0 : static void StretchHorizontally2(OOPixMap srcPx, OOPixMap dstPx)
    1099             : {
    1100             :         uint16_t                        *src, *srcStart, *dst;
    1101             :         uint16_t                        px0, px1;
    1102             :         uint_fast32_t           hi, lo;
    1103             :         uint_fast32_t           x, y, xCount;
    1104             :         size_t                          srcRowBytes;
    1105             :         uint_fast16_t           weight0, weight1;
    1106             :         uint_fast32_t           fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
    1107             :         
    1108             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 2);
    1109             :         
    1110             :         srcStart = srcPx.pixels;
    1111             :         srcRowBytes = srcPx.rowBytes;
    1112             :         xCount = dstPx.width;
    1113             :         dst = dstPx.pixels;     // Assumes no row padding
    1114             :         
    1115             :         deltaX = (srcPx.width << 12) / dstPx.width;
    1116             :         px1 = *srcStart;
    1117             :         
    1118             :         for (y = 0; y < dstPx.height - 1; ++y)
    1119             :         {
    1120             :                 fractX = 0;
    1121             :                 
    1122             :                 for (x = 0; x!= xCount; ++x)
    1123             :                 {
    1124             :                         fractX += deltaX;
    1125             :                         
    1126             :                         weight1 = (fractX >> 4) & 0xFF;
    1127             :                         weight0 = 0x100 - weight1;
    1128             :                         
    1129             :                         px0 = px1;
    1130             :                         src = srcStart + (fractX >> 12);
    1131             :                         px1 = *src;
    1132             :                         
    1133             :                         hi = (px0 & 0xFF00) * weight0 + (px1 & 0xFF00) * weight1;
    1134             :                         lo = (px0 & 0x00FF) * weight0 + (px1 & 0x00FF) * weight1;
    1135             :                         
    1136             :                         *dst++ = ((hi & 0xFF0000) | (lo & 0x00FF00)) >> 8;
    1137             :                 }
    1138             :                 
    1139             :                 srcStart = (uint16_t *)((char *)srcStart + srcRowBytes);
    1140             :                 px1 = *srcStart;
    1141             :         }
    1142             :         
    1143             :         // Copy last row without reading off end of buffer
    1144             :         fractX = 0;
    1145             :         for (x = 0; x!= xCount; ++x)
    1146             :         {
    1147             :                 fractX += deltaX;
    1148             :                 
    1149             :                 weight1 = (fractX >> 4) & 0xFF;
    1150             :                 weight0 = 0x100 - weight1;
    1151             :                 
    1152             :                 px0 = px1;
    1153             :                 src = srcStart + (fractX >> 12);
    1154             :                 px1 = *src;
    1155             :                 
    1156             :                 hi = (px0 & 0xFF00) * weight0 + (px1 & 0xFF00) * weight1;
    1157             :                 lo = (px0 & 0x00FF) * weight0 + (px1 & 0x00FF) * weight1;
    1158             :                 
    1159             :                 *dst++ = ((hi & 0xFF0000) | (lo & 0x00FF00)) >> 8;
    1160             :         }
    1161             : }
    1162             : 
    1163             : 
    1164           0 : static void StretchHorizontally4(OOPixMap srcPx, OOPixMap dstPx)
    1165             : {
    1166             :         uint32_t                        *src, *srcStart, *dst;
    1167             :         uint32_t                        px0, px1;
    1168             :         uint32_t                        ag, br;
    1169             :         uint_fast32_t           x, y, xCount;
    1170             :         size_t                          srcRowBytes;
    1171             :         uint_fast16_t           weight0, weight1;
    1172             :         uint_fast32_t           fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
    1173             :         
    1174             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 4);
    1175             :         
    1176             :         srcStart = srcPx.pixels;
    1177             :         srcRowBytes = srcPx.rowBytes;
    1178             :         xCount = dstPx.width;
    1179             :         dst = dstPx.pixels;     // Assumes no row padding
    1180             :         
    1181             :         deltaX = (srcPx.width << 12) / dstPx.width;
    1182             :         px1 = *srcStart;
    1183             :         
    1184             :         for (y = 0; y < dstPx.height - 1; ++y)
    1185             :         {
    1186             :                 fractX = 0;
    1187             :                 
    1188             :                 for (x = 0; x!= xCount; ++x)
    1189             :                 {
    1190             :                         fractX += deltaX;
    1191             :                         
    1192             :                         weight1 = (fractX >> 4) & 0xFF;
    1193             :                         weight0 = 0x100 - weight1;
    1194             :                         
    1195             :                         px0 = px1;
    1196             :                         src = srcStart + (fractX >> 12);
    1197             :                         px1 = *src;
    1198             :                         
    1199             :                         ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
    1200             :                         br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
    1201             :                         
    1202             :                         *dst++ = (ag & 0xFF00FF00) | ((br & 0xFF00FF00) >> 8);
    1203             :                 }
    1204             :                 
    1205             :                 srcStart = (uint32_t *)((char *)srcStart + srcRowBytes);
    1206             :                 px1 = *srcStart;
    1207             :         }
    1208             :         
    1209             :         // Copy last row without reading off end of buffer
    1210             :         fractX = 0;
    1211             :         for (x = 0; x!= xCount; ++x)
    1212             :         {
    1213             :                 fractX += deltaX;
    1214             :                 
    1215             :                 weight1 = (fractX >> 4) & 0xFF;
    1216             :                 weight0 = 0x100 - weight1;
    1217             :                 
    1218             :                 px0 = px1;
    1219             :                 src = srcStart + (fractX >> 12);
    1220             :                 if (EXPECT(x < xCount - 1))  px1 = *src;
    1221             :                 
    1222             :                 ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
    1223             :                 br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
    1224             :                 
    1225             :                 *dst++ = (ag & 0xFF00FF00) | ((br & 0xFF00FF00) >> 8);
    1226             :         }
    1227             : }
    1228             : 
    1229             : 
    1230           0 : static void SqueezeHorizontally1(OOPixMap srcPx, OOPixMapDimension dstWidth)
    1231             : {
    1232             :         uint8_t                         *src, *srcStart, *dst;
    1233             :         uint8_t                         borderPx;
    1234             :         uint_fast32_t           x, y, xCount, endX;
    1235             :         size_t                          srcRowBytes;
    1236             :         uint_fast32_t           endFractX, deltaX;
    1237             :         uint_fast32_t           accum, weight;
    1238             :         uint_fast8_t            borderWeight;
    1239             :         
    1240             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1);
    1241             :         
    1242             :         srcStart = srcPx.pixels;
    1243             :         dst = srcStart; // Output is placed in same buffer, without line padding.
    1244             :         srcRowBytes = srcPx.rowBytes;
    1245             :         
    1246             :         deltaX = (srcPx.width << 12) / dstWidth;
    1247             :         
    1248             :         for (y = 0; y != srcPx.height; ++y)
    1249             :         {
    1250             :                 borderPx = *srcStart;
    1251             :                 endFractX = 0;
    1252             :                 borderWeight = 0;
    1253             :                 
    1254             :                 src = srcStart;
    1255             :                 
    1256             :                 x = 0;
    1257             :                 xCount = dstWidth;
    1258             :                 while (xCount--)
    1259             :                 {
    1260             :                         endFractX += deltaX;
    1261             :                         endX = endFractX >> 12;
    1262             :                         
    1263             :                         borderWeight = 0xFF - borderWeight;
    1264             :                         accum = borderPx * borderWeight;
    1265             :                         weight = borderWeight;
    1266             :                         
    1267             :                         borderWeight = (endFractX >> 4) & 0xFF;
    1268             :                         weight += borderWeight;
    1269             :                         
    1270             :                         for (;;)
    1271             :                         {
    1272             :                                 ++x;
    1273             :                                 if (EXPECT(x == endX))
    1274             :                                 {
    1275             :                                         if (EXPECT(xCount))  borderPx = *++src;
    1276             :                                         accum += borderPx * borderWeight;
    1277             :                                         break;
    1278             :                                 }
    1279             :                                 else
    1280             :                                 {
    1281             :                                         accum += *++src * 0xFF;
    1282             :                                         weight += 0xFF;
    1283             :                                 }
    1284             :                         }
    1285             :                         
    1286             :                         *dst++ = accum / weight;
    1287             :                 }
    1288             :                 
    1289             :                 srcStart = (uint8_t *)((char *)srcStart + srcRowBytes);
    1290             :         }
    1291             : }
    1292             : 
    1293             : 
    1294           0 : static void SqueezeVertically1(OOPixMap srcPx, OOPixMapDimension dstHeight)
    1295             : {
    1296             :         uint8_t                         *src, *srcStart, *dst;
    1297             :         uint_fast32_t           x, y, xCount, startY, endY, lastRow;
    1298             :         size_t                          srcRowBytes;
    1299             :         uint_fast32_t           endFractY, deltaY;
    1300             :         uint_fast32_t           accum, weight;
    1301             :         uint_fast8_t            startWeight, endWeight;
    1302             :         
    1303             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1);
    1304             :         
    1305             :         dst = srcPx.pixels;     // Output is placed in same buffer, without line padding.
    1306             :         srcRowBytes = srcPx.rowBytes;
    1307             :         xCount = srcPx.width;
    1308             :         
    1309             :         deltaY = (srcPx.height << 12) / dstHeight;
    1310             :         endFractY = 0;
    1311             :         
    1312             :         endWeight = 0;
    1313             :         endY = 0;
    1314             :         
    1315             :         lastRow = srcPx.height - 1;
    1316             :         
    1317             :         while (endY < lastRow)
    1318             :         {
    1319             :                 endFractY += deltaY;
    1320             :                 startY = endY;
    1321             :                 endY = endFractY >> 12;
    1322             :                 
    1323             :                 startWeight = 0xFF - endWeight;
    1324             :                 endWeight = (endFractY >> 4) & 0xFF;
    1325             :                 
    1326             :                 srcStart = (uint8_t *)((char *)srcPx.pixels + srcRowBytes * startY);
    1327             :                 
    1328             :                 for (x = 0; x != xCount; ++x)
    1329             :                 {
    1330             :                         src = srcStart++;
    1331             :                         accum = startWeight * *src;
    1332             :                         weight = startWeight + endWeight;
    1333             :                         
    1334             :                         y = startY;
    1335             :                         for (;;)
    1336             :                         {
    1337             :                                 ++y;
    1338             :                                 src = (uint8_t *)((char *)src + srcRowBytes);
    1339             :                                 if (EXPECT_NOT(y == endY))
    1340             :                                 {
    1341             :                                         if (EXPECT(endY < lastRow))  accum += *src * endWeight;
    1342             :                                         break;
    1343             :                                 }
    1344             :                                 else
    1345             :                                 {
    1346             :                                         accum += *src * 0xFF;
    1347             :                                         weight += 0xFF;
    1348             :                                 }
    1349             :                         }
    1350             :                         
    1351             :                         *dst++ = accum / weight;
    1352             :                 }
    1353             :         }       
    1354             : }
    1355             : 
    1356             : 
    1357             : /*      Macros to manage 2-channel accumulators in 2-channel squeeze scalers.
    1358             :         accumHi is the sum of weighted high-channel pixels, shifted left 8 bits.
    1359             :         accumLo is the sum of weighted low-channel pixels.
    1360             :         weight is the sum of all pixel weights.
    1361             : */
    1362           0 : #define ACCUM2(PX, WT) do {                                                     \
    1363             :                         uint16_t px = PX;                                               \
    1364             :                         uint_fast32_t wt = WT;                                  \
    1365             :                         accumHi += (px & 0xFF00) * wt;                      \
    1366             :                         accumLo += (px & 0x00FF) * wt;                      \
    1367             :                         weight += wt;                                                   \
    1368             :                 } while (0)
    1369             : 
    1370           0 : #define CLEAR_ACCUM2() do {                                                     \
    1371             :                         accumHi = 0;                                                    \
    1372             :                         accumLo = 0;                                                    \
    1373             :                         weight = 0;                                                             \
    1374             :                 } while (0)
    1375             : 
    1376           0 : #define ACCUM2TOPX()    (                                                       \
    1377             :                         ((accumHi / weight) & 0xFF00) |                     \
    1378             :                         ((accumLo / weight) & 0x00FF)                       \
    1379             :                 )
    1380             : 
    1381             : 
    1382           0 : static void SqueezeHorizontally2(OOPixMap srcPx, OOPixMapDimension dstWidth)
    1383             : {
    1384             :         uint16_t                        *src, *srcStart, *dst;
    1385             :         uint16_t                        borderPx;
    1386             :         uint_fast32_t           x, y, xCount, endX;
    1387             :         size_t                          srcRowBytes;
    1388             :         uint_fast32_t           endFractX, deltaX;
    1389             :         uint_fast32_t           accumHi, accumLo, weight;
    1390             :         uint_fast8_t            borderWeight;
    1391             :         
    1392             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2);
    1393             :         
    1394             :         srcStart = srcPx.pixels;
    1395             :         dst = srcStart; // Output is placed in same buffer, without line padding.
    1396             :         srcRowBytes = srcPx.rowBytes;
    1397             :         
    1398             :         deltaX = (srcPx.width << 12) / dstWidth;
    1399             :         
    1400             :         for (y = 0; y != srcPx.height; ++y)
    1401             :         {
    1402             :                 borderPx = *srcStart;
    1403             :                 endFractX = 0;
    1404             :                 borderWeight = 0;
    1405             :                 
    1406             :                 src = srcStart;
    1407             :                 
    1408             :                 x = 0;
    1409             :                 xCount = dstWidth;
    1410             :                 while (xCount--)
    1411             :                 {
    1412             :                         endFractX += deltaX;
    1413             :                         endX = endFractX >> 12;
    1414             :                         
    1415             :                         CLEAR_ACCUM2();
    1416             :                         
    1417             :                         borderWeight = 0xFF - borderWeight;
    1418             :                         ACCUM2(borderPx, borderWeight);
    1419             :                         
    1420             :                         borderWeight = (endFractX >> 4) & 0xFF;
    1421             :                         
    1422             :                         for (;;)
    1423             :                         {
    1424             :                                 ++x;
    1425             :                                 if (EXPECT(x == endX))
    1426             :                                 {
    1427             :                                         if (EXPECT(xCount))  borderPx = *++src;
    1428             :                                         ACCUM2(borderPx, borderWeight);
    1429             :                                         break;
    1430             :                                 }
    1431             :                                 else
    1432             :                                 {
    1433             :                                         ACCUM2(*++src, 0xFF);
    1434             :                                 }
    1435             :                         }
    1436             :                         
    1437             :                         *dst++ = ACCUM2TOPX();
    1438             :                 }
    1439             :                 
    1440             :                 srcStart = (uint16_t *)((char *)srcStart + srcRowBytes);
    1441             :         }
    1442             : }
    1443             : 
    1444             : 
    1445           0 : static void SqueezeVertically2(OOPixMap srcPx, OOPixMapDimension dstHeight)
    1446             : {
    1447             :         uint16_t                        *src, *srcStart, *dst;
    1448             :         uint_fast32_t           x, y, xCount, startY, endY, lastRow;
    1449             :         size_t                          srcRowBytes;
    1450             :         uint_fast32_t           endFractY, deltaY;
    1451             :         uint_fast32_t           accumHi, accumLo, weight;
    1452             :         uint_fast8_t            startWeight, endWeight;
    1453             :         
    1454             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2);
    1455             :         
    1456             :         dst = srcPx.pixels;     // Output is placed in same buffer, without line padding.
    1457             :         srcRowBytes = srcPx.rowBytes;
    1458             :         xCount = srcPx.width;
    1459             :         
    1460             :         deltaY = (srcPx.height << 12) / dstHeight;
    1461             :         endFractY = 0;
    1462             :         
    1463             :         endWeight = 0;
    1464             :         endY = 0;
    1465             :         
    1466             :         lastRow = srcPx.height - 1;
    1467             :         
    1468             :         while (endY < lastRow)
    1469             :         {
    1470             :                 endFractY += deltaY;
    1471             :                 startY = endY;
    1472             :                 endY = endFractY >> 12;
    1473             :                 
    1474             :                 startWeight = 0xFF - endWeight;
    1475             :                 endWeight = (endFractY >> 4) & 0xFF;
    1476             :                 
    1477             :                 srcStart = (uint16_t *)((char *)srcPx.pixels + srcRowBytes * startY);
    1478             :                 
    1479             :                 for (x = 0; x != xCount; ++x)
    1480             :                 {
    1481             :                         src = srcStart++;
    1482             :                         
    1483             :                         CLEAR_ACCUM2();
    1484             :                         ACCUM2(*src, startWeight);
    1485             :                         
    1486             :                         y = startY;
    1487             :                         for (;;)
    1488             :                         {
    1489             :                                 ++y;
    1490             :                                 src = (uint16_t *)((char *)src + srcRowBytes);
    1491             :                                 if (EXPECT_NOT(y == endY))
    1492             :                                 {
    1493             :                                         if (EXPECT(endY <= lastRow))  ACCUM2(*src, endWeight);
    1494             :                                         break;
    1495             :                                 }
    1496             :                                 else
    1497             :                                 {
    1498             :                                         ACCUM2(*src, 0xFF);
    1499             :                                 }
    1500             :                         }
    1501             :                         
    1502             :                         *dst++ = ACCUM2TOPX();
    1503             :                 }
    1504             :         }
    1505             : }
    1506             : 
    1507             : 
    1508             : /*      Macros to manage 4-channel accumulators in 4-channel squeeze scalers.
    1509             :         The approach is similar to the ACCUM2 family above, except that the wt
    1510             :         multiplication works on two channels at a time before splitting into four
    1511             :         accumulators, all of which are shifted to the low end of the value.
    1512             : */
    1513           0 : #define ACCUM4(PX, WT) do {                                                     \
    1514             :                         uint32_t px = PX;                                               \
    1515             :                         uint_fast32_t wt = WT;                                  \
    1516             :                         ag = ((px & 0xFF00FF00) >> 8) * wt;           \
    1517             :                         br = (px & 0x00FF00FF) * wt;                        \
    1518             :                         accum1 += ag >> 16;                                               \
    1519             :                         accum2 += br >> 16;                                               \
    1520             :                         accum3 += ag & 0xFFFF;                                      \
    1521             :                         accum4 += br & 0xFFFF;                                      \
    1522             :                         weight += wt;                                                   \
    1523             :                 } while (0)
    1524             : 
    1525           0 : #define CLEAR_ACCUM4() do {                                                     \
    1526             :                         accum1 = 0;                                                             \
    1527             :                         accum2 = 0;                                                             \
    1528             :                         accum3 = 0;                                                             \
    1529             :                         accum4 = 0;                                                             \
    1530             :                         weight = 0;                                                             \
    1531             :                 } while (0)
    1532             : 
    1533             : /*      These integer divisions cause a stall -- this is the biggest
    1534             :         bottleneck in this file. Unrolling the loop might help on PPC.
    1535             :         Linear interpolation instead of box filtering would help, with
    1536             :         a quality hit. Given that scaling doesn't happen very often,
    1537             :         I think I'll leave it this way. -- Ahruman
    1538             : */
    1539           0 : #define ACCUM4TOPX()    (                                                       \
    1540             :                         (((accum1 / weight) & 0xFF) << 24) |  \
    1541             :                         (((accum3 / weight) & 0xFF) << 8)  |  \
    1542             :                         (((accum2 / weight) & 0xFF) << 16) |  \
    1543             :                         ((accum4 / weight) & 0xFF)                          \
    1544             :                 )
    1545             : 
    1546             : 
    1547           0 : static void SqueezeHorizontally4(OOPixMap srcPx, OOPixMapDimension dstWidth)
    1548             : {
    1549             :         uint32_t                        *src, *srcStart, *dst;
    1550             :         uint32_t                        borderPx, ag, br;
    1551             :         uint_fast32_t           x, y, xCount, endX;
    1552             :         size_t                          srcRowBytes;
    1553             :         uint_fast32_t           endFractX, deltaX;
    1554             :         uint_fast32_t           accum1, accum2, accum3, accum4, weight;
    1555             :         uint_fast8_t            borderWeight;
    1556             :         
    1557             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4);
    1558             :         
    1559             :         srcStart = srcPx.pixels;
    1560             :         dst = srcStart; // Output is placed in same buffer, without line padding.
    1561             :         srcRowBytes = srcPx.rowBytes;
    1562             :         
    1563             :         deltaX = (srcPx.width << 12) / dstWidth;
    1564             :         
    1565             :         for (y = 0; y != srcPx.height; ++y)
    1566             :         {
    1567             :                 borderPx = *srcStart;
    1568             :                 endFractX = 0;
    1569             :                 borderWeight = 0;
    1570             :                 
    1571             :                 src = srcStart;
    1572             :                 
    1573             :                 x = 0;
    1574             :                 xCount = dstWidth;
    1575             :                 while (xCount--)
    1576             :                 {
    1577             :                         endFractX += deltaX;
    1578             :                         endX = endFractX >> 12;
    1579             :                         
    1580             :                         CLEAR_ACCUM4();
    1581             :                         
    1582             :                         borderWeight = 0xFF - borderWeight;
    1583             :                         ACCUM4(borderPx, borderWeight);
    1584             :                         
    1585             :                         borderWeight = (endFractX >> 4) & 0xFF;
    1586             :                         
    1587             :                         for (;;)
    1588             :                         {
    1589             :                                 ++x;
    1590             :                                 if (EXPECT(x == endX))
    1591             :                                 {
    1592             :                                         if (EXPECT(xCount))  borderPx = *++src;
    1593             :                                         ACCUM4(borderPx, borderWeight);
    1594             :                                         break;
    1595             :                                 }
    1596             :                                 else
    1597             :                                 {
    1598             :                                         ACCUM4(*++src, 0xFF);
    1599             :                                 }
    1600             :                         }
    1601             :                         
    1602             :                         *dst++ = ACCUM4TOPX();
    1603             :                 }
    1604             :                 
    1605             :                 srcStart = (uint32_t *)((char *)srcStart + srcRowBytes);
    1606             :         }
    1607             : }
    1608             : 
    1609             : 
    1610           0 : static void SqueezeVertically4(OOPixMap srcPx, OOPixMapDimension dstHeight)
    1611             : {
    1612             :         uint32_t                        *src, *srcStart, *dst;
    1613             :         uint_fast32_t           x, y, xCount, startY, endY, lastRow;
    1614             :         size_t                          srcRowBytes;
    1615             :         uint32_t                        ag, br;
    1616             :         uint_fast32_t           endFractY, deltaY;
    1617             :         uint_fast32_t           accum1, accum2, accum3, accum4, weight;
    1618             :         uint_fast8_t            startWeight, endWeight;
    1619             :         
    1620             :         NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4);
    1621             :         
    1622             :         dst = srcPx.pixels;     // Output is placed in same buffer, without line padding.
    1623             :         srcRowBytes = srcPx.rowBytes;
    1624             :         xCount = srcPx.width;
    1625             :         
    1626             :         deltaY = (srcPx.height << 12) / dstHeight;
    1627             :         endFractY = 0;
    1628             :         
    1629             :         endWeight = 0;
    1630             :         endY = 0;
    1631             :         
    1632             :         lastRow = srcPx.height - 1;
    1633             :         
    1634             :         while (endY < lastRow)
    1635             :         {
    1636             :                 endFractY += deltaY;
    1637             :                 startY = endY;
    1638             :                 endY = endFractY >> 12;
    1639             :                 
    1640             :                 startWeight = 0xFF - endWeight;
    1641             :                 endWeight = (endFractY >> 4) & 0xFF;
    1642             :                 
    1643             :                 srcStart = (uint32_t *)((char *)srcPx.pixels + srcRowBytes * startY);
    1644             :                 
    1645             :                 for (x = 0; x != xCount; ++x)
    1646             :                 {
    1647             :                         src = srcStart++;
    1648             :                         
    1649             :                         CLEAR_ACCUM4();
    1650             :                         ACCUM4(*src, startWeight);
    1651             :                         
    1652             :                         y = startY;
    1653             :                         for (;;)
    1654             :                         {
    1655             :                                 ++y;
    1656             :                                 src = (uint32_t *)((char *)src + srcRowBytes);
    1657             :                                 if (EXPECT_NOT(y == endY))
    1658             :                                 {
    1659             :                                         if (EXPECT(endY <= lastRow))  ACCUM4(*src, endWeight);
    1660             :                                         break;
    1661             :                                 }
    1662             :                                 else
    1663             :                                 {
    1664             :                                         ACCUM4(*src, 0xFF);
    1665             :                                 }
    1666             :                         }
    1667             :                         
    1668             :                         *dst++ = ACCUM4TOPX();
    1669             :                 }
    1670             :         }
    1671             : }
    1672             : 
    1673             : 
    1674           0 : static BOOL EnsureCorrectDataSize(OOPixMap *pixMap, BOOL leaveSpaceForMipMaps)
    1675             : {
    1676             :         size_t                          correctSize;
    1677             :         void                            *bytes = NULL;
    1678             :         
    1679             :         correctSize = pixMap->rowBytes * pixMap->height;
    1680             :         
    1681             :         // correctSize > 0 check is redundant, but static analyzer (checker-262) doesn't know that. -- Ahruman 2012-03-17
    1682             :         NSCParameterAssert(OOIsValidPixMap(*pixMap) && correctSize > 0);
    1683             :         
    1684             :         /*      Ensure that the block is not too small. This needs to be done before
    1685             :                 adding the mip-map space, as the texture may have been shrunk in place
    1686             :                 without being grown for mip-maps.
    1687             :         */
    1688             :         if (EXPECT_NOT(pixMap->bufferSize < correctSize))
    1689             :         {
    1690             :                 OOLogGenericParameterError();
    1691             :                 return NO;
    1692             :         }
    1693             :         
    1694             :         if (leaveSpaceForMipMaps)  correctSize = correctSize * 4 / 3;
    1695             :         if (correctSize != pixMap->bufferSize)
    1696             :         {
    1697             :                 bytes = realloc(pixMap->pixels, correctSize);
    1698             :                 if (EXPECT_NOT(bytes == NULL))  free(pixMap->pixels);
    1699             :                 pixMap->pixels = bytes;
    1700             :                 pixMap->bufferSize = correctSize;
    1701             :         }
    1702             :         
    1703             :         return YES;
    1704             : }

Generated by: LCOV version 1.14