Line data Source code
1 0 : /*
2 :
3 : OOTextureScaling.m
4 :
5 : Copyright (C) 2007-2013 Jens Ayton
6 :
7 : Permission is hereby granted, free of charge, to any person obtaining a copy
8 : of this software and associated documentation files (the "Software"), to deal
9 : in the Software without restriction, including without limitation the rights
10 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 : copies of the Software, and to permit persons to whom the Software is
12 : furnished to do so, subject to the following conditions:
13 :
14 : The above copyright notice and this permission notice shall be included in all
15 : copies or substantial portions of the Software.
16 :
17 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 : SOFTWARE.
24 :
25 : */
26 :
27 :
28 : #import "OOTextureScaling.h"
29 : #import "OOFunctionAttributes.h"
30 : #include <stdlib.h>
31 : #import "OOLogging.h"
32 : #import "OOMaths.h"
33 : #import "OOCPUInfo.h"
34 :
35 :
36 0 : #define DUMP_MIP_MAPS 0
37 0 : #define DUMP_SCALE 0
38 :
39 :
40 : /* Internal function declarations.
41 :
42 : NOTE: the function definitions are grouped together for best code cache
43 : coherence rather than the order listed here.
44 : */
45 : static BOOL GenerateMipMaps1(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
46 : static BOOL GenerateMipMaps2(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
47 : static BOOL GenerateMipMaps4(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
48 :
49 :
50 : /* ScaleToHalf_P_xN functions
51 : These scale a texture with P planes (components) to half its size in each
52 : dimension, handling N pixels at a time. srcWidth must be a multiple of N.
53 : Parameters are not validated -- bad parameters will lead to bad data or a
54 : crash.
55 :
56 : Scaling is an unweighted average. 8 bits per channel assumed.
57 : It is safe and meaningful for srcBytes == dstBytes.
58 : */
59 : static void ScaleToHalf_1_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
60 : static void ScaleToHalf_2_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
61 : static void ScaleToHalf_4_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
62 :
63 : #if OOLITE_NATIVE_64_BIT
64 : static void ScaleToHalf_1_x8(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
65 : // static void ScaleToHalf_2_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
66 : static void ScaleToHalf_4_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
67 : #else
68 : static void ScaleToHalf_1_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
69 : // static void ScaleToHalf_2_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
70 : #endif
71 :
72 :
73 : OOINLINE void StretchVertically(OOPixMap srcPx, OOPixMap dstPx) ALWAYS_INLINE_FUNC;
74 : OOINLINE void SqueezeVertically(OOPixMap pixMap, OOPixMapDimension dstHeight) ALWAYS_INLINE_FUNC;
75 : OOINLINE void StretchHorizontally(OOPixMap srcPx, OOPixMap dstPx) ALWAYS_INLINE_FUNC;
76 : OOINLINE void SqueezeHorizontally(OOPixMap pixMap, OOPixMapDimension dstHeight) ALWAYS_INLINE_FUNC;
77 :
78 : static void StretchVerticallyN_x1(OOPixMap srcPx, OOPixMap dstPx);
79 :
80 : static void SqueezeVertically1(OOPixMap srcPx, OOPixMapDimension dstHeight);
81 : static void SqueezeVertically2(OOPixMap srcPx, OOPixMapDimension dstHeight);
82 : static void SqueezeVertically4(OOPixMap srcPx, OOPixMapDimension dstHeight);
83 : static void StretchHorizontally1(OOPixMap srcPx, OOPixMap dstPx);
84 : static void StretchHorizontally2(OOPixMap srcPx, OOPixMap dstPx);
85 : static void StretchHorizontally4(OOPixMap srcPx, OOPixMap dstPx);
86 : static void SqueezeHorizontally1(OOPixMap srcPx, OOPixMapDimension dstWidth);
87 : static void SqueezeHorizontally2(OOPixMap srcPx, OOPixMapDimension dstWidth);
88 : static void SqueezeHorizontally4(OOPixMap srcPx, OOPixMapDimension dstWidth);
89 :
90 :
91 : static BOOL EnsureCorrectDataSize(OOPixMap *pixMap, BOOL leaveSpaceForMipMaps) NONNULL_FUNC;
92 :
93 :
94 : #if !OOLITE_NATIVE_64_BIT
95 :
96 : static void StretchVerticallyN_x4(OOPixMap srcPx, OOPixMap dstPx);
97 :
98 0 : OOINLINE void StretchVertically(OOPixMap srcPx, OOPixMap dstPx)
99 : {
100 : if (!((srcPx.rowBytes) & 3))
101 : {
102 : StretchVerticallyN_x4(srcPx, dstPx);
103 : }
104 : else
105 : {
106 : StretchVerticallyN_x1(srcPx, dstPx);
107 : }
108 : }
109 :
110 : #else // OOLITE_NATIVE_64_BIT
111 :
112 : static void StretchVerticallyN_x8(OOPixMap srcPx, OOPixMap dstPx);
113 :
114 : OOINLINE void StretchVertically(OOPixMap srcPx, OOPixMap dstPx)
115 : {
116 : if (!((srcPx.rowBytes) & 7))
117 : {
118 : StretchVerticallyN_x8(srcPx, dstPx);
119 : }
120 : else
121 : {
122 : StretchVerticallyN_x1(srcPx, dstPx);
123 : }
124 : }
125 :
126 : #endif
127 :
128 :
129 0 : OOINLINE void SqueezeVertically(OOPixMap pixMap, OOPixMapDimension dstHeight)
130 : {
131 : switch (pixMap.format)
132 : {
133 : case kOOPixMapRGBA:
134 : SqueezeVertically4(pixMap, dstHeight);
135 : return;
136 :
137 : case kOOPixMapGrayscale:
138 : SqueezeVertically1(pixMap, dstHeight);
139 : return;
140 :
141 : case kOOPixMapGrayscaleAlpha:
142 : SqueezeVertically2(pixMap, dstHeight);
143 : return;
144 :
145 : case kOOPixMapInvalidFormat:
146 : break;
147 : }
148 :
149 : #ifndef NDEBUG
150 : [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(pixMap.format)];
151 : #else
152 : abort();
153 : #endif
154 : }
155 :
156 :
157 0 : OOINLINE void StretchHorizontally(OOPixMap srcPx, OOPixMap dstPx)
158 : {
159 : NSCParameterAssert(srcPx.format == dstPx.format);
160 :
161 : switch (srcPx.format)
162 : {
163 : case kOOPixMapRGBA:
164 : StretchHorizontally4(srcPx, dstPx);
165 : return;
166 :
167 : case kOOPixMapGrayscale:
168 : StretchHorizontally1(srcPx, dstPx);
169 : return;
170 :
171 : case kOOPixMapGrayscaleAlpha:
172 : StretchHorizontally2(srcPx, dstPx);
173 : return;
174 :
175 : case kOOPixMapInvalidFormat:
176 : break;
177 : }
178 :
179 : #ifndef NDEBUG
180 : [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(srcPx.format)];
181 : #else
182 : abort();
183 : #endif
184 : }
185 :
186 :
187 0 : OOINLINE void SqueezeHorizontally(OOPixMap pixMap, OOPixMapDimension dstHeight)
188 : {
189 : switch (pixMap.format)
190 : {
191 : case kOOPixMapRGBA:
192 : SqueezeHorizontally4(pixMap, dstHeight);
193 : return;
194 :
195 : case kOOPixMapGrayscale:
196 : SqueezeHorizontally1(pixMap, dstHeight);
197 : return;
198 :
199 : case kOOPixMapGrayscaleAlpha:
200 : SqueezeHorizontally2(pixMap, dstHeight);
201 : return;
202 :
203 : case kOOPixMapInvalidFormat:
204 : break;
205 : }
206 :
207 : #ifndef NDEBUG
208 : [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(pixMap.format)];
209 : #else
210 : abort();
211 : #endif
212 : }
213 :
214 :
215 : #if DUMP_MIP_MAPS || DUMP_SCALE
216 : // NOTE: currently only works on OS X because of OSAtomicAdd32() (used to increment ID counter in thread-safe way). A simple increment would be sufficient if limited to a single thread (in OOTextureLoader).
217 : volatile int32_t sPreviousDumpID = 0;
218 : int32_t OSAtomicAdd32(int32_t __theAmount, volatile int32_t *__theValue);
219 :
220 : #endif
221 :
222 : #if DUMP_MIP_MAPS
223 : #define DUMP_CHANNELS -1 // Bitmap of channel counts - -1 for all dumps
224 :
225 : #define DUMP_MIP_MAP_PREPARE(pl) uint32_t dumpPlanes = pl; \
226 : uint32_t dumpLevel = 0; \
227 : BOOL dumpThis = (dumpPlanes & DUMP_CHANNELS) != 0; \
228 : SInt32 dumpID = dumpThis ? OSAtomicAdd32(1, &sPreviousDumpID) : 0;
229 : #define DUMP_MIP_MAP_DUMP(px, w, h) if (dumpThis) DumpMipMap(px, w, h, dumpPlanes, dumpID, dumpLevel++);
230 : static void DumpMipMap(void *data, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, SInt32 ID, uint32_t level);
231 : #else
232 0 : #define DUMP_MIP_MAP_PREPARE(pl) do { (void)pl; } while (0)
233 0 : #define DUMP_MIP_MAP_DUMP(px, w, h) do { (void)px; (void)w; (void)h; } while (0)
234 : #endif
235 :
236 : #if DUMP_SCALE
237 : #define DUMP_SCALE_PREPARE() SInt32 dumpID = OSAtomicAdd32(1, &sPreviousDumpID), dumpCount = 0;
238 : #define DUMP_SCALE_DUMP(PM, stage) do { OOPixMap *pm = &(PM); OODumpPixMap(*pm, [NSString stringWithFormat:@"scaling dump ID %u stage %u-%@ %ux%u", dumpID, dumpCount++, stage, pm->width, pm->height]); } while (0)
239 : #else
240 0 : #define DUMP_SCALE_PREPARE()
241 0 : #define DUMP_SCALE_DUMP(PM, stage) do {} while (0)
242 : #endif
243 :
244 :
245 0 : OOPixMap OOScalePixMap(OOPixMap srcPx, OOPixMapDimension dstWidth, OOPixMapDimension dstHeight, BOOL leaveSpaceForMipMaps)
246 : {
247 : OOPixMap dstPx = {0}, sparePx = {0};
248 : BOOL OK = YES;
249 :
250 : // Sanity check.
251 : if (EXPECT_NOT(!OOIsValidPixMap(srcPx)))
252 : {
253 : OOLogGenericParameterError();
254 : free(srcPx.pixels);
255 : return kOONullPixMap;
256 : }
257 :
258 : DUMP_SCALE_PREPARE();
259 : DUMP_SCALE_DUMP(srcPx, @"initial");
260 :
261 : if (srcPx.height < dstHeight)
262 : {
263 : // Stretch vertically. This requires a separate buffer.
264 : size_t dstSize = srcPx.rowBytes * dstHeight;
265 : if (leaveSpaceForMipMaps && dstWidth <= srcPx.width) dstSize = dstSize * 4 / 3;
266 :
267 : dstPx = OOAllocatePixMap(srcPx.width, dstHeight, srcPx.format, 0, dstSize);
268 : if (EXPECT_NOT(!OOIsValidPixMap(dstPx))) { OK = NO; goto FAIL; }
269 :
270 : StretchVertically(srcPx, dstPx);
271 : DUMP_SCALE_DUMP(dstPx, @"stretched vertically");
272 :
273 : sparePx = srcPx;
274 : srcPx = dstPx;
275 : }
276 : else if (dstHeight < srcPx.height)
277 : {
278 : // Squeeze vertically. This can be done in-place.
279 : SqueezeVertically(srcPx, dstHeight);
280 : srcPx.height = dstHeight;
281 : DUMP_SCALE_DUMP(srcPx, @"squeezed vertically");
282 : }
283 :
284 : if (srcPx.width < dstWidth)
285 : {
286 : // Stretch horizontally. This requires a separate buffer.
287 : size_t dstSize = OOPixMapBytesPerPixel(srcPx) * dstWidth * srcPx.height;
288 : if (leaveSpaceForMipMaps) dstSize = dstSize * 4 / 3;
289 :
290 : if (dstSize <= sparePx.bufferSize)
291 : {
292 : dstPx = OOMakePixMap(sparePx.pixels, dstWidth, srcPx.height, srcPx.format, 0, sparePx.bufferSize);
293 : sparePx = kOONullPixMap;
294 : }
295 : else
296 : {
297 : dstPx = OOAllocatePixMap(dstWidth, srcPx.height, srcPx.format, 0, dstSize);
298 : }
299 : if (EXPECT_NOT(!OOIsValidPixMap(dstPx))) { OK = NO; goto FAIL; }
300 :
301 : StretchHorizontally(srcPx, dstPx);
302 : DUMP_SCALE_DUMP(dstPx, @"stretched horizontally");
303 : }
304 : else if (dstWidth < srcPx.width)
305 : {
306 : // Squeeze horizontally. This can be done in-place.
307 : SqueezeHorizontally(srcPx, dstWidth);
308 :
309 : dstPx = srcPx;
310 : dstPx.width = dstWidth;
311 : dstPx.rowBytes = dstPx.width * OOPixMapBytesPerPixel(dstPx);
312 : DUMP_SCALE_DUMP(dstPx, @"squeezed horizontally");
313 : }
314 : else
315 : {
316 : // No horizontal scaling.
317 : dstPx = srcPx;
318 : }
319 :
320 : // Avoid a potential double free (if the realloc in EnsureCorrectDataSize() relocates the block).
321 : if (srcPx.pixels == dstPx.pixels) srcPx.pixels = NULL;
322 :
323 : // dstPx is now the result.
324 : OK = EnsureCorrectDataSize(&dstPx, leaveSpaceForMipMaps);
325 :
326 : FAIL:
327 : free(srcPx.pixels);
328 : if (sparePx.pixels != dstPx.pixels && sparePx.pixels != srcPx.pixels)
329 : {
330 : free(sparePx.pixels);
331 : }
332 : if (!OK)
333 : {
334 : free(dstPx.pixels);
335 : dstPx.pixels = NULL;
336 : }
337 :
338 : return OK ? dstPx : kOONullPixMap;
339 : }
340 :
341 :
342 : // FIXME: should take an OOPixMap.
343 0 : BOOL OOGenerateMipMaps(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format)
344 : {
345 : if (EXPECT_NOT(width != OORoundUpToPowerOf2_PixMap(width) || height != OORoundUpToPowerOf2_PixMap(height)))
346 : {
347 : OOLog(kOOLogParameterError, @"Non-power-of-two dimensions (%ux%u) passed to %s() - ignoring, data will be junk.", width, height, __PRETTY_FUNCTION__);
348 : return NO;
349 : }
350 : if (EXPECT_NOT(textureBytes == NULL))
351 : {
352 : OOLog(kOOLogParameterError, @"%@", @"NULL texture pointer passed to GenerateMipMaps().");
353 : return NO;
354 : }
355 :
356 : switch (format)
357 : {
358 : case kOOPixMapRGBA:
359 : return GenerateMipMaps4(textureBytes, width, height);
360 :
361 : case kOOPixMapGrayscale:
362 : return GenerateMipMaps1(textureBytes, width, height);
363 :
364 : case kOOPixMapGrayscaleAlpha:
365 : return GenerateMipMaps2(textureBytes, width, height);
366 :
367 : case kOOPixMapInvalidFormat:
368 : break;
369 : }
370 :
371 :
372 : OOLog(kOOLogParameterError, @"%s(): bad pixmap format (%@) - ignoring, data will be junk.", __PRETTY_FUNCTION__, OOPixMapFormatName(format));
373 : return NO;
374 : }
375 :
376 :
377 0 : static BOOL GenerateMipMaps1(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
378 : {
379 : OOPixMapDimension w = width, h = height;
380 : uint8_t *curr, *next;
381 :
382 : DUMP_MIP_MAP_PREPARE(1);
383 : curr = textureBytes;
384 :
385 : #if OOLITE_NATIVE_64_BIT
386 : while (8 < w && 1 < h)
387 : {
388 : DUMP_MIP_MAP_DUMP(curr, w, h);
389 :
390 : next = curr + w * h;
391 : ScaleToHalf_1_x8(curr, next, w, h);
392 :
393 : w >>= 1;
394 : h >>= 1;
395 : curr = next;
396 : }
397 : #else
398 : while (4 < w && 1 < h)
399 : {
400 : DUMP_MIP_MAP_DUMP(curr, w, h);
401 :
402 : next = curr + w * h;
403 : ScaleToHalf_1_x4(curr, next, w, h);
404 :
405 : w >>= 1;
406 : h >>= 1;
407 : curr = next;
408 : }
409 : #endif
410 :
411 : while (1 < w && 1 < h)
412 : {
413 : DUMP_MIP_MAP_DUMP(curr, w, h);
414 :
415 : next = curr + w * h;
416 : ScaleToHalf_1_x1(curr, next, w, h);
417 :
418 : w >>= 1;
419 : h >>= 1;
420 : curr = next;
421 : }
422 :
423 : DUMP_MIP_MAP_DUMP(curr, w, h);
424 :
425 : // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
426 : return YES;
427 : }
428 :
429 :
430 0 : static void ScaleToHalf_1_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
431 : {
432 : OOPixMapDimension x, y;
433 : uint8_t *src0, *src1, *dst;
434 : uint_fast8_t px00, px01, px10, px11;
435 : uint_fast16_t sum;
436 :
437 : src0 = srcBytes;
438 : src1 = src0 + srcWidth;
439 : dst = dstBytes;
440 :
441 : y = srcHeight >> 1;
442 : do
443 : {
444 : x = srcWidth >> 1;
445 : do
446 : {
447 : // Read four pixels in a square...
448 : px00 = *src0++;
449 : px01 = *src0++;
450 : px10 = *src1++;
451 : px11 = *src1++;
452 :
453 : // ...add them together...
454 : sum = px00 + px01 + px10 + px11;
455 :
456 : // ...shift the sum into place...
457 : sum >>= 2;
458 :
459 : // ...and write output pixel.
460 : *dst++ = sum;
461 : } while (--x);
462 :
463 : // Skip a row for each source row
464 : src0 = src1;
465 : src1 += srcWidth;
466 : } while (--y);
467 : }
468 :
469 :
470 : #if !OOLITE_NATIVE_64_BIT
471 :
472 0 : static void ScaleToHalf_1_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
473 : {
474 : OOPixMapDimension x, y;
475 : uint32_t *src0, *src1, *dst;
476 : uint_fast32_t px00, px01, px10, px11;
477 : uint_fast32_t sum0, sum1;
478 :
479 : srcWidth >>= 2; // Four (output) pixels at a time
480 : src0 = srcBytes;
481 : src1 = src0 + srcWidth;
482 : dst = dstBytes;
483 :
484 : y = srcHeight >> 1;
485 : do
486 : {
487 : x = srcWidth >> 1;
488 : do
489 : {
490 : // Read 8 pixels in a 4x2 rectangle...
491 : px00 = *src0++;
492 : px01 = *src0++;
493 : px10 = *src1++;
494 : px11 = *src1++;
495 :
496 : // ...add them together.
497 : sum0 = (px00 & 0x00FF00FF) +
498 : (px10 & 0x00FF00FF) +
499 : ((px00 & 0xFF00FF00) >> 8) +
500 : ((px10 & 0xFF00FF00) >> 8);
501 : sum1 = (px01 & 0x00FF00FF) +
502 : (px11 & 0x00FF00FF) +
503 : ((px01 & 0xFF00FF00) >> 8) +
504 : ((px11 & 0xFF00FF00) >> 8);
505 :
506 : // ...swizzle the sums around...
507 : #if OOLITE_BIG_ENDIAN
508 : sum0 = ((sum0 << 6) & 0xFF000000) | ((sum0 << 14) & 0x00FF0000);
509 : sum1 = ((sum1 >> 10) & 0x0000FF00) | ((sum1 >>2) & 0x000000FF);
510 : #elif OOLITE_LITTLE_ENDIAN
511 : sum0 = ((sum0 >> 10) & 0x0000FF00) | ((sum0 >>2) & 0x000000FF);
512 : sum1 = ((sum1 << 6) & 0xFF000000) | ((sum1 << 14) & 0x00FF0000);
513 : #else
514 : #error Neither OOLITE_BIG_ENDIAN nor OOLITE_LITTLE_ENDIAN is defined as nonzero!
515 : #endif
516 :
517 : // ...and write output pixel.
518 : *dst++ = sum0 | sum1;
519 : } while (--x);
520 :
521 : // Skip a row for each source row
522 : src0 = src1;
523 : src1 += srcWidth;
524 : } while (--y);
525 : }
526 :
527 : #else // OOLITE_NATIVE_64_BIT
528 :
529 : static void ScaleToHalf_1_x8(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
530 : {
531 : OOPixMapDimension x, y;
532 : uint64_t *src0, *src1;
533 : uint64_t *dst;
534 : uint_fast64_t px00, px01, px10, px11;
535 : uint_fast64_t sum0, sum1;
536 :
537 : srcWidth >>= 3; // Eight (output) pixels at a time
538 : src0 = srcBytes;
539 : src1 = src0 + srcWidth;
540 : dst = dstBytes;
541 :
542 : y = srcHeight >> 1;
543 : do
544 : {
545 : x = srcWidth >> 1;
546 : do
547 : {
548 : // Read 16 pixels in an 8x2 rectangle...
549 : px00 = *src0++;
550 : px01 = *src0++;
551 : px10 = *src1++;
552 : px11 = *src1++;
553 :
554 : // ...add them together...
555 : sum0 = ((px00 & 0x00FF00FF00FF00FFULL)) +
556 : ((px10 & 0x00FF00FF00FF00FFULL)) +
557 : ((px00 & 0xFF00FF00FF00FF00ULL) >> 8) +
558 : ((px10 & 0xFF00FF00FF00FF00ULL) >> 8);
559 : sum1 = ((px01 & 0x00FF00FF00FF00FFULL)) +
560 : ((px11 & 0x00FF00FF00FF00FFULL)) +
561 : ((px01 & 0xFF00FF00FF00FF00ULL) >> 8) +
562 : ((px11 & 0xFF00FF00FF00FF00ULL) >> 8);
563 :
564 : // ...swizzle the sums around...
565 : #if OOLITE_BIG_ENDIAN
566 : sum0 = ((sum0 << 06) & 0xFF00000000000000ULL) |
567 : ((sum0 << 14) & 0x00FF000000000000ULL) |
568 : ((sum0 << 22) & 0x0000FF0000000000ULL) |
569 : ((sum0 << 30) & 0x000000FF00000000ULL);
570 : sum1 = ((sum1 >> 26) & 0x00000000FF000000ULL) |
571 : ((sum1 >> 18) & 0x0000000000FF0000ULL) |
572 : ((sum1 >> 10) & 0x000000000000FF00ULL) |
573 : ((sum1 >> 02) & 0x00000000000000FFULL);
574 : #elif OOLITE_LITTLE_ENDIAN
575 : sum0 = ((sum0 >> 26) & 0x00000000FF000000ULL) |
576 : ((sum0 >> 18) & 0x0000000000FF0000ULL) |
577 : ((sum0 >> 10) & 0x000000000000FF00ULL) |
578 : ((sum0 >> 02) & 0x00000000000000FFULL);
579 : sum1 = ((sum1 << 06) & 0xFF00000000000000ULL) |
580 : ((sum1 << 14) & 0x00FF000000000000ULL) |
581 : ((sum1 << 22) & 0x0000FF0000000000ULL) |
582 : ((sum1 << 30) & 0x000000FF00000000ULL);
583 : #else
584 : #error Neither OOLITE_BIG_ENDIAN nor OOLITE_LITTLE_ENDIAN is defined as nonzero!
585 : #endif
586 : // ...and write output pixel.
587 : *dst++ = sum0 | sum1;
588 : } while (--x);
589 :
590 : // Skip a row for each source row
591 : src0 = src1;
592 : src1 += srcWidth;
593 : } while (--y);
594 : }
595 :
596 : #endif
597 :
598 :
599 0 : static BOOL GenerateMipMaps2(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
600 : {
601 : OOPixMapDimension w = width, h = height;
602 : uint16_t *curr, *next;
603 :
604 : DUMP_MIP_MAP_PREPARE(2);
605 : curr = textureBytes;
606 :
607 : // TODO: multiple pixel two-plane scalers.
608 : #if 0
609 : #if OOLITE_NATIVE_64_BIT
610 : while (4 < w && 1 < h)
611 : {
612 : DUMP_MIP_MAP_DUMP(curr, w, h);
613 :
614 : next = curr + w * h;
615 : ScaleToHalf_2_x4(curr, next, w, h);
616 :
617 : w >>= 1;
618 : h >>= 1;
619 : curr = next;
620 : }
621 : #else
622 : while (2 < w && 1 < h)
623 : {
624 : DUMP_MIP_MAP_DUMP(curr, w, h);
625 :
626 : next = curr + w * h;
627 : ScaleToHalf_2_x2(curr, next, w, h);
628 :
629 : w >>= 1;
630 : h >>= 1;
631 : curr = next;
632 : }
633 : #endif
634 : #endif
635 :
636 : while (1 < w && 1 < h)
637 : {
638 : DUMP_MIP_MAP_DUMP(curr, w, h);
639 :
640 : next = curr + w * h;
641 : ScaleToHalf_2_x1(curr, next, w, h);
642 :
643 : w >>= 1;
644 : h >>= 1;
645 : curr = next;
646 : }
647 :
648 : DUMP_MIP_MAP_DUMP(curr, w, h);
649 :
650 : // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
651 : return YES;
652 : }
653 :
654 :
655 0 : static void ScaleToHalf_2_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
656 : {
657 : OOPixMapDimension x, y;
658 : uint16_t *src0, *src1, *dst;
659 : uint_fast16_t px00, px01, px10, px11;
660 : uint_fast32_t sumHi, sumLo;
661 :
662 : src0 = srcBytes;
663 : src1 = src0 + srcWidth;
664 : dst = dstBytes;
665 :
666 : y = srcHeight >> 1;
667 : do
668 : {
669 : x = srcWidth >> 1;
670 : do
671 : {
672 : // Read four pixels in a square...
673 : px00 = *src0++;
674 : px01 = *src0++;
675 : px10 = *src1++;
676 : px11 = *src1++;
677 :
678 : // ...add them together...
679 : sumHi = (px00 & 0xFF00) + (px01 & 0xFF00) + (px10 & 0xFF00) + (px11 & 0xFF00);
680 : sumLo = (px00 & 0x00FF) + (px01 & 0x00FF) + (px10 & 0x00FF) + (px11 & 0x00FF);
681 :
682 : // ...merge and shift the sum into place...
683 : sumLo = ((sumHi & 0x3FC00) | sumLo) >> 2;
684 :
685 : // ...and write output pixel.
686 : *dst++ = sumLo;
687 : } while (--x);
688 :
689 : // Skip a row for each source row
690 : src0 = src1;
691 : src1 += srcWidth;
692 : } while (--y);
693 : }
694 :
695 :
696 0 : static BOOL GenerateMipMaps4(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
697 : {
698 : OOPixMapDimension w = width, h = height;
699 : uint32_t *curr, *next;
700 :
701 : DUMP_MIP_MAP_PREPARE(4);
702 : curr = textureBytes;
703 :
704 : #if OOLITE_NATIVE_64_BIT
705 : while (2 < w && 1 < h)
706 : {
707 : DUMP_MIP_MAP_DUMP(curr, w, h);
708 :
709 : next = curr + w * h;
710 : ScaleToHalf_4_x2(curr, next, w, h);
711 :
712 : w >>= 1;
713 : h >>= 1;
714 : curr = next;
715 : }
716 : if (EXPECT(1 < w && 1 < h))
717 : {
718 : DUMP_MIP_MAP_DUMP(curr, w, h);
719 :
720 : next = curr + w * h;
721 : ScaleToHalf_4_x1(curr, next, w, h);
722 :
723 : w >>= 1;
724 : h >>= 1;
725 : }
726 : #else
727 : while (1 < w && 1 < h)
728 : {
729 : DUMP_MIP_MAP_DUMP(curr, w, h);
730 :
731 : next = curr + w * h;
732 : ScaleToHalf_4_x1(curr, next, w, h);
733 :
734 : w >>= 1;
735 : h >>= 1;
736 : curr = next;
737 : }
738 : #endif
739 :
740 : DUMP_MIP_MAP_DUMP(curr, w, h);
741 :
742 : // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
743 : return YES;
744 : }
745 :
746 :
747 0 : static void ScaleToHalf_4_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
748 : {
749 : OOPixMapDimension x, y;
750 : uint32_t *src0, *src1, *dst;
751 : uint_fast32_t px00, px01, px10, px11;
752 :
753 : /* We treat channel layout as ABGR -- actual layout doesn't matter since
754 : each channel is handled the same. We use two accumulators, with
755 : alternating channels, so overflow doesn't cross channel boundaries,
756 : while having less overhead than one accumulator per channel.
757 : */
758 : uint_fast32_t ag, br;
759 :
760 : src0 = srcBytes;
761 : src1 = src0 + srcWidth;
762 : dst = dstBytes;
763 :
764 : y = srcHeight >> 1;
765 : do
766 : {
767 : x = srcWidth >> 1;
768 : do
769 : {
770 : // Read four pixels in a square...
771 : px00 = *src0++;
772 : px01 = *src0++;
773 : px10 = *src1++;
774 : px11 = *src1++;
775 :
776 : // ...and add them together, channel by channel.
777 : ag = (px00 & 0xFF00FF00) >> 8;
778 : br = (px00 & 0x00FF00FF);
779 : ag += (px01 & 0xFF00FF00) >> 8;
780 : br += (px01 & 0x00FF00FF);
781 : ag += (px10 & 0xFF00FF00) >> 8;
782 : br += (px10 & 0x00FF00FF);
783 : ag += (px11 & 0xFF00FF00) >> 8;
784 : br += (px11 & 0x00FF00FF);
785 :
786 : // Shift the sums into place...
787 : ag <<= 6;
788 : br >>= 2;
789 :
790 : // ...and write output pixel.
791 : *dst++ = (ag & 0xFF00FF00) | (br & 0x00FF00FF);
792 : } while (--x);
793 :
794 : // Skip a row for each source row
795 : src0 = src1;
796 : src1 += srcWidth;
797 : } while (--y);
798 : }
799 :
800 :
801 : #if OOLITE_NATIVE_64_BIT
802 :
803 : static void ScaleToHalf_4_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
804 : {
805 : OOPixMapDimension x, y;
806 : uint_fast64_t *src0, *src1, *dst;
807 : uint_fast64_t px00, px01, px10, px11;
808 :
809 : /* We treat channel layout as ABGR -- actual layout doesn't matter since
810 : each channel is handled the same. We use two accumulators, with
811 : alternating channels, so overflow doesn't cross channel boundaries,
812 : while having less overhead than one accumulator per channel.
813 : */
814 : uint_fast64_t ag0, ag1, br0, br1;
815 :
816 : srcWidth >>= 1; // Two bytes at a time
817 : src0 = srcBytes;
818 : src1 = src0 + srcWidth;
819 : dst = dstBytes;
820 :
821 : y = srcHeight >> 1;
822 : do
823 : {
824 : x = srcWidth >> 1;
825 : do
826 : {
827 : // Read eight pixels (4x2)...
828 : px00 = *src0++;
829 : px01 = *src0++;
830 : px10 = *src1++;
831 : px11 = *src1++;
832 :
833 : // ...and add them together, channel by channel.
834 : ag0 = (px00 & 0xFF00FF00FF00FF00ULL) >> 8;
835 : br0 = (px00 & 0x00FF00FF00FF00FFULL);
836 : ag0 += (px10 & 0xFF00FF00FF00FF00ULL) >> 8;
837 : br0 += (px10 & 0x00FF00FF00FF00FFULL);
838 : ag1 = (px01 & 0xFF00FF00FF00FF00ULL) >> 8;
839 : br1 = (px01 & 0x00FF00FF00FF00FFULL);
840 : ag1 += (px11 & 0xFF00FF00FF00FF00ULL) >> 8;
841 : br1 += (px11 & 0x00FF00FF00FF00FFULL);
842 :
843 : #if OOLITE_BIG_ENDIAN
844 : // Shift and add some more...
845 : ag0 = ag0 + (ag0 << 32);
846 : br0 = br0 + (br0 << 32);
847 : ag1 = ag1 + (ag1 >> 32);
848 : br1 = br1 + (br1 >> 32);
849 :
850 : // ...merge and shift some more...
851 : ag0 = ((ag0 & 0x03FC03FC00000000ULL) | (ag1 & 0x0000000003FC03FCULL)) << 6;
852 : br0 = ((br0 & 0x03FC03FC00000000ULL) | (br1 & 0x0000000003FC03FCULL)) >> 2;
853 : #elif OOLITE_LITTLE_ENDIAN
854 : // Shift and add some more...
855 : ag0 = ag0 + (ag0 >> 32);
856 : br0 = br0 + (br0 >> 32);
857 : ag1 = ag1 + (ag1 << 32);
858 : br1 = br1 + (br1 << 32);
859 :
860 : // ...merge and shift some more...
861 : ag0 = ((ag0 & 0x0000000003FC03FCULL) | (ag1 & 0x03FC03FC00000000ULL)) << 6;
862 : br0 = ((br0 & 0x0000000003FC03FCULL) | (br1 & 0x03FC03FC00000000ULL)) >> 2;
863 : #else
864 : #error Unknown architecture.
865 : #endif
866 :
867 : // ...and write output pixel.
868 : *dst++ = ag0 | br0;
869 : } while (--x);
870 :
871 : // Skip a row for each source row
872 : src0 = src1;
873 : src1 += srcWidth;
874 : } while (--y);
875 : }
876 :
877 : #endif
878 :
879 :
880 : #if DUMP_MIP_MAPS
881 : static void DumpMipMap(void *data, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, SInt32 ID, uint32_t level)
882 : {
883 : OOPixMap pixMap = OOMakePixMap(data, width, height, format, 0, 0);
884 : OODumpPixMap(pixMap, [NSString stringWithFormat:@"mipmap dump ID %u lv%u %@ %ux%u", ID, level, OOPixMapFormatName(format), width, height]);
885 : }
886 : #endif
887 :
888 :
889 0 : static void StretchVerticallyN_x1(OOPixMap srcPx, OOPixMap dstPx)
890 : {
891 : uint8_t *src, *src0, *src1, *prev, *dst;
892 : uint8_t px0, px1;
893 : uint_fast32_t x, y, xCount;
894 : size_t srcRowBytes;
895 : uint_fast16_t weight0, weight1;
896 : uint_fast32_t fractY; // Y coordinate, fixed-point (24.8)
897 :
898 : src = srcPx.pixels;
899 : srcRowBytes = srcPx.rowBytes;
900 : dst = dstPx.pixels; // Assumes dstPx.width == dstPx.rowBytes.
901 :
902 : src0 = prev = src;
903 :
904 : xCount = srcPx.width * OOPixMapBytesPerPixel(srcPx);
905 :
906 : for (y = 1; y != dstPx.height; ++y)
907 : {
908 : fractY = ((srcPx.height * y) << 8) / dstPx.height;
909 :
910 : src0 = prev;
911 : prev = src1 = src + srcRowBytes * (fractY >> 8);
912 :
913 : weight1 = fractY & 0xFF;
914 : weight0 = 0x100 - weight1;
915 :
916 : x = xCount;
917 : while (x--)
918 : {
919 : px0 = *src0++;
920 : px1 = *src1++;
921 :
922 : *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
923 : }
924 : }
925 :
926 : // Copy last row (without referring to the last-plus-oneth row)
927 : x = xCount;
928 : while (x--)
929 : {
930 : *dst++ = *src0++;
931 : }
932 : }
933 :
934 :
935 : #if !OOLITE_NATIVE_64_BIT
936 :
937 0 : static void StretchVerticallyN_x4(OOPixMap srcPx, OOPixMap dstPx)
938 : {
939 : uint8_t *src;
940 : uint32_t *src0, *src1, *prev, *dst;
941 : uint32_t px0, px1, ag, br;
942 : uint_fast32_t x, y, xCount;
943 : size_t srcRowBytes;
944 : uint_fast16_t weight0, weight1;
945 : uint_fast32_t fractY; // Y coordinate, fixed-point (24.8)
946 :
947 : src = srcPx.pixels;
948 : srcRowBytes = srcPx.rowBytes;
949 : dst = dstPx.pixels; // Assumes no row padding.
950 :
951 : src0 = prev = (uint32_t *)src;
952 :
953 : xCount = (srcPx.width * OOPixMapBytesPerPixel(srcPx)) >> 2;
954 :
955 : for (y = 1; y != dstPx.height; ++y)
956 : {
957 : fractY = ((srcPx.height * y) << 8) / dstPx.height;
958 :
959 : src0 = prev;
960 : prev = src1 = (uint32_t *)(src + srcRowBytes * (fractY >> 8));
961 :
962 : weight1 = fractY & 0xFF;
963 : weight0 = 0x100 - weight1;
964 :
965 : x = xCount;
966 : while (x--)
967 : {
968 : px0 = *src0++;
969 : px1 = *src1++;
970 :
971 : ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
972 : br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
973 :
974 : *dst++ = (ag & 0xFF00FF00) | ((br >> 8) & 0x00FF00FF);
975 : }
976 : }
977 :
978 : // Copy last row (without referring to the last-plus-oneth row)
979 : x = xCount;
980 : while (x--)
981 : {
982 : *dst++ = *src0++;
983 : }
984 : }
985 :
986 : #else // OOLITE_NATIVE_64_BIT
987 :
988 : static void StretchVerticallyN_x8(OOPixMap srcPx, OOPixMap dstPx)
989 : {
990 : uint8_t *src;
991 : uint64_t *src0, *src1, *prev, *dst;
992 : uint64_t px0, px1, agag, brbr;
993 : uint_fast32_t x, y, xCount;
994 : size_t srcRowBytes;
995 : uint_fast16_t weight0, weight1;
996 : uint_fast32_t fractY; // Y coordinate, fixed-point (24.8)
997 :
998 : src = srcPx.pixels;
999 : srcRowBytes = srcPx.rowBytes;
1000 : dst = dstPx.pixels; // Assumes dstPx.width == dstPx.rowBytes.
1001 :
1002 : src0 = prev = (uint64_t *)src;
1003 :
1004 : xCount = (srcPx.width * OOPixMapBytesPerPixel(srcPx)) >> 3;
1005 :
1006 : for (y = 1; y != dstPx.height; ++y)
1007 : {
1008 : fractY = ((srcPx.height * y) << 8) / dstPx.height;
1009 :
1010 : src0 = prev;
1011 : prev = src1 = (uint64_t *)(src + srcRowBytes * (fractY >> 8));
1012 :
1013 : weight1 = fractY & 0xFF;
1014 : weight0 = 0x100 - weight1;
1015 :
1016 : x = xCount;
1017 : while (x--)
1018 : {
1019 : px0 = *src0++;
1020 : px1 = *src1++;
1021 :
1022 : agag = ((px0 & 0xFF00FF00FF00FF00ULL) >> 8) * weight0 + ((px1 & 0xFF00FF00FF00FF00ULL) >> 8) * weight1;
1023 : brbr = (px0 & 0x00FF00FF00FF00FFULL) * weight0 + (px1 & 0x00FF00FF00FF00FFULL) * weight1;
1024 :
1025 : *dst++ = (agag & 0xFF00FF00FF00FF00ULL) | ((brbr >> 8) & 0x00FF00FF00FF00FFULL);
1026 : }
1027 : }
1028 :
1029 : // Copy last row (without referring to the last-plus-oneth row)
1030 : x = xCount;
1031 : while (x--)
1032 : {
1033 : *dst++ = *src0++;
1034 : }
1035 : }
1036 : #endif
1037 :
1038 :
1039 0 : static void StretchHorizontally1(OOPixMap srcPx, OOPixMap dstPx)
1040 : {
1041 : uint8_t *src, *srcStart, *dst;
1042 : uint8_t px0, px1;
1043 : uint_fast32_t x, y, xCount;
1044 : size_t srcRowBytes;
1045 : uint_fast16_t weight0, weight1;
1046 : uint_fast32_t fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
1047 :
1048 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 1);
1049 :
1050 : srcStart = srcPx.pixels;
1051 : srcRowBytes = srcPx.rowBytes;
1052 : xCount = dstPx.width;
1053 : dst = dstPx.pixels; // Assumes no row padding
1054 :
1055 : deltaX = (srcPx.width << 12) / dstPx.width;
1056 : px1 = *srcStart;
1057 :
1058 : for (y = 0; y < dstPx.height - 1; ++y)
1059 : {
1060 : fractX = 0;
1061 :
1062 : for (x = 0; x!= xCount; ++x)
1063 : {
1064 : fractX += deltaX;
1065 :
1066 : weight1 = (fractX >> 4) & 0xFF;
1067 : weight0 = 0x100 - weight1;
1068 :
1069 : px0 = px1;
1070 : src = srcStart + (fractX >> 12);
1071 : px1 = *src;
1072 :
1073 : *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
1074 : }
1075 :
1076 : srcStart = (uint8_t *)((char *)srcStart + srcRowBytes);
1077 : px1 = *srcStart;
1078 : }
1079 :
1080 : // Copy last row without reading off end of buffer
1081 : fractX = 0;
1082 : for (x = 0; x!= xCount; ++x)
1083 : {
1084 : fractX += deltaX;
1085 :
1086 : weight1 = (fractX >> 4) & 0xFF;
1087 : weight0 = 0x100 - weight1;
1088 :
1089 : px0 = px1;
1090 : src = srcStart + (fractX >> 12);
1091 : px1 = *src;
1092 :
1093 : *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
1094 : }
1095 : }
1096 :
1097 :
1098 0 : static void StretchHorizontally2(OOPixMap srcPx, OOPixMap dstPx)
1099 : {
1100 : uint16_t *src, *srcStart, *dst;
1101 : uint16_t px0, px1;
1102 : uint_fast32_t hi, lo;
1103 : uint_fast32_t x, y, xCount;
1104 : size_t srcRowBytes;
1105 : uint_fast16_t weight0, weight1;
1106 : uint_fast32_t fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
1107 :
1108 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 2);
1109 :
1110 : srcStart = srcPx.pixels;
1111 : srcRowBytes = srcPx.rowBytes;
1112 : xCount = dstPx.width;
1113 : dst = dstPx.pixels; // Assumes no row padding
1114 :
1115 : deltaX = (srcPx.width << 12) / dstPx.width;
1116 : px1 = *srcStart;
1117 :
1118 : for (y = 0; y < dstPx.height - 1; ++y)
1119 : {
1120 : fractX = 0;
1121 :
1122 : for (x = 0; x!= xCount; ++x)
1123 : {
1124 : fractX += deltaX;
1125 :
1126 : weight1 = (fractX >> 4) & 0xFF;
1127 : weight0 = 0x100 - weight1;
1128 :
1129 : px0 = px1;
1130 : src = srcStart + (fractX >> 12);
1131 : px1 = *src;
1132 :
1133 : hi = (px0 & 0xFF00) * weight0 + (px1 & 0xFF00) * weight1;
1134 : lo = (px0 & 0x00FF) * weight0 + (px1 & 0x00FF) * weight1;
1135 :
1136 : *dst++ = ((hi & 0xFF0000) | (lo & 0x00FF00)) >> 8;
1137 : }
1138 :
1139 : srcStart = (uint16_t *)((char *)srcStart + srcRowBytes);
1140 : px1 = *srcStart;
1141 : }
1142 :
1143 : // Copy last row without reading off end of buffer
1144 : fractX = 0;
1145 : for (x = 0; x!= xCount; ++x)
1146 : {
1147 : fractX += deltaX;
1148 :
1149 : weight1 = (fractX >> 4) & 0xFF;
1150 : weight0 = 0x100 - weight1;
1151 :
1152 : px0 = px1;
1153 : src = srcStart + (fractX >> 12);
1154 : px1 = *src;
1155 :
1156 : hi = (px0 & 0xFF00) * weight0 + (px1 & 0xFF00) * weight1;
1157 : lo = (px0 & 0x00FF) * weight0 + (px1 & 0x00FF) * weight1;
1158 :
1159 : *dst++ = ((hi & 0xFF0000) | (lo & 0x00FF00)) >> 8;
1160 : }
1161 : }
1162 :
1163 :
1164 0 : static void StretchHorizontally4(OOPixMap srcPx, OOPixMap dstPx)
1165 : {
1166 : uint32_t *src, *srcStart, *dst;
1167 : uint32_t px0, px1;
1168 : uint32_t ag, br;
1169 : uint_fast32_t x, y, xCount;
1170 : size_t srcRowBytes;
1171 : uint_fast16_t weight0, weight1;
1172 : uint_fast32_t fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
1173 :
1174 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 4);
1175 :
1176 : srcStart = srcPx.pixels;
1177 : srcRowBytes = srcPx.rowBytes;
1178 : xCount = dstPx.width;
1179 : dst = dstPx.pixels; // Assumes no row padding
1180 :
1181 : deltaX = (srcPx.width << 12) / dstPx.width;
1182 : px1 = *srcStart;
1183 :
1184 : for (y = 0; y < dstPx.height - 1; ++y)
1185 : {
1186 : fractX = 0;
1187 :
1188 : for (x = 0; x!= xCount; ++x)
1189 : {
1190 : fractX += deltaX;
1191 :
1192 : weight1 = (fractX >> 4) & 0xFF;
1193 : weight0 = 0x100 - weight1;
1194 :
1195 : px0 = px1;
1196 : src = srcStart + (fractX >> 12);
1197 : px1 = *src;
1198 :
1199 : ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
1200 : br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
1201 :
1202 : *dst++ = (ag & 0xFF00FF00) | ((br & 0xFF00FF00) >> 8);
1203 : }
1204 :
1205 : srcStart = (uint32_t *)((char *)srcStart + srcRowBytes);
1206 : px1 = *srcStart;
1207 : }
1208 :
1209 : // Copy last row without reading off end of buffer
1210 : fractX = 0;
1211 : for (x = 0; x!= xCount; ++x)
1212 : {
1213 : fractX += deltaX;
1214 :
1215 : weight1 = (fractX >> 4) & 0xFF;
1216 : weight0 = 0x100 - weight1;
1217 :
1218 : px0 = px1;
1219 : src = srcStart + (fractX >> 12);
1220 : if (EXPECT(x < xCount - 1)) px1 = *src;
1221 :
1222 : ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
1223 : br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
1224 :
1225 : *dst++ = (ag & 0xFF00FF00) | ((br & 0xFF00FF00) >> 8);
1226 : }
1227 : }
1228 :
1229 :
1230 0 : static void SqueezeHorizontally1(OOPixMap srcPx, OOPixMapDimension dstWidth)
1231 : {
1232 : uint8_t *src, *srcStart, *dst;
1233 : uint8_t borderPx;
1234 : uint_fast32_t x, y, xCount, endX;
1235 : size_t srcRowBytes;
1236 : uint_fast32_t endFractX, deltaX;
1237 : uint_fast32_t accum, weight;
1238 : uint_fast8_t borderWeight;
1239 :
1240 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1);
1241 :
1242 : srcStart = srcPx.pixels;
1243 : dst = srcStart; // Output is placed in same buffer, without line padding.
1244 : srcRowBytes = srcPx.rowBytes;
1245 :
1246 : deltaX = (srcPx.width << 12) / dstWidth;
1247 :
1248 : for (y = 0; y != srcPx.height; ++y)
1249 : {
1250 : borderPx = *srcStart;
1251 : endFractX = 0;
1252 : borderWeight = 0;
1253 :
1254 : src = srcStart;
1255 :
1256 : x = 0;
1257 : xCount = dstWidth;
1258 : while (xCount--)
1259 : {
1260 : endFractX += deltaX;
1261 : endX = endFractX >> 12;
1262 :
1263 : borderWeight = 0xFF - borderWeight;
1264 : accum = borderPx * borderWeight;
1265 : weight = borderWeight;
1266 :
1267 : borderWeight = (endFractX >> 4) & 0xFF;
1268 : weight += borderWeight;
1269 :
1270 : for (;;)
1271 : {
1272 : ++x;
1273 : if (EXPECT(x == endX))
1274 : {
1275 : if (EXPECT(xCount)) borderPx = *++src;
1276 : accum += borderPx * borderWeight;
1277 : break;
1278 : }
1279 : else
1280 : {
1281 : accum += *++src * 0xFF;
1282 : weight += 0xFF;
1283 : }
1284 : }
1285 :
1286 : *dst++ = accum / weight;
1287 : }
1288 :
1289 : srcStart = (uint8_t *)((char *)srcStart + srcRowBytes);
1290 : }
1291 : }
1292 :
1293 :
1294 0 : static void SqueezeVertically1(OOPixMap srcPx, OOPixMapDimension dstHeight)
1295 : {
1296 : uint8_t *src, *srcStart, *dst;
1297 : uint_fast32_t x, y, xCount, startY, endY, lastRow;
1298 : size_t srcRowBytes;
1299 : uint_fast32_t endFractY, deltaY;
1300 : uint_fast32_t accum, weight;
1301 : uint_fast8_t startWeight, endWeight;
1302 :
1303 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1);
1304 :
1305 : dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
1306 : srcRowBytes = srcPx.rowBytes;
1307 : xCount = srcPx.width;
1308 :
1309 : deltaY = (srcPx.height << 12) / dstHeight;
1310 : endFractY = 0;
1311 :
1312 : endWeight = 0;
1313 : endY = 0;
1314 :
1315 : lastRow = srcPx.height - 1;
1316 :
1317 : while (endY < lastRow)
1318 : {
1319 : endFractY += deltaY;
1320 : startY = endY;
1321 : endY = endFractY >> 12;
1322 :
1323 : startWeight = 0xFF - endWeight;
1324 : endWeight = (endFractY >> 4) & 0xFF;
1325 :
1326 : srcStart = (uint8_t *)((char *)srcPx.pixels + srcRowBytes * startY);
1327 :
1328 : for (x = 0; x != xCount; ++x)
1329 : {
1330 : src = srcStart++;
1331 : accum = startWeight * *src;
1332 : weight = startWeight + endWeight;
1333 :
1334 : y = startY;
1335 : for (;;)
1336 : {
1337 : ++y;
1338 : src = (uint8_t *)((char *)src + srcRowBytes);
1339 : if (EXPECT_NOT(y == endY))
1340 : {
1341 : if (EXPECT(endY < lastRow)) accum += *src * endWeight;
1342 : break;
1343 : }
1344 : else
1345 : {
1346 : accum += *src * 0xFF;
1347 : weight += 0xFF;
1348 : }
1349 : }
1350 :
1351 : *dst++ = accum / weight;
1352 : }
1353 : }
1354 : }
1355 :
1356 :
1357 : /* Macros to manage 2-channel accumulators in 2-channel squeeze scalers.
1358 : accumHi is the sum of weighted high-channel pixels, shifted left 8 bits.
1359 : accumLo is the sum of weighted low-channel pixels.
1360 : weight is the sum of all pixel weights.
1361 : */
1362 0 : #define ACCUM2(PX, WT) do { \
1363 : uint16_t px = PX; \
1364 : uint_fast32_t wt = WT; \
1365 : accumHi += (px & 0xFF00) * wt; \
1366 : accumLo += (px & 0x00FF) * wt; \
1367 : weight += wt; \
1368 : } while (0)
1369 :
1370 0 : #define CLEAR_ACCUM2() do { \
1371 : accumHi = 0; \
1372 : accumLo = 0; \
1373 : weight = 0; \
1374 : } while (0)
1375 :
1376 0 : #define ACCUM2TOPX() ( \
1377 : ((accumHi / weight) & 0xFF00) | \
1378 : ((accumLo / weight) & 0x00FF) \
1379 : )
1380 :
1381 :
1382 0 : static void SqueezeHorizontally2(OOPixMap srcPx, OOPixMapDimension dstWidth)
1383 : {
1384 : uint16_t *src, *srcStart, *dst;
1385 : uint16_t borderPx;
1386 : uint_fast32_t x, y, xCount, endX;
1387 : size_t srcRowBytes;
1388 : uint_fast32_t endFractX, deltaX;
1389 : uint_fast32_t accumHi, accumLo, weight;
1390 : uint_fast8_t borderWeight;
1391 :
1392 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2);
1393 :
1394 : srcStart = srcPx.pixels;
1395 : dst = srcStart; // Output is placed in same buffer, without line padding.
1396 : srcRowBytes = srcPx.rowBytes;
1397 :
1398 : deltaX = (srcPx.width << 12) / dstWidth;
1399 :
1400 : for (y = 0; y != srcPx.height; ++y)
1401 : {
1402 : borderPx = *srcStart;
1403 : endFractX = 0;
1404 : borderWeight = 0;
1405 :
1406 : src = srcStart;
1407 :
1408 : x = 0;
1409 : xCount = dstWidth;
1410 : while (xCount--)
1411 : {
1412 : endFractX += deltaX;
1413 : endX = endFractX >> 12;
1414 :
1415 : CLEAR_ACCUM2();
1416 :
1417 : borderWeight = 0xFF - borderWeight;
1418 : ACCUM2(borderPx, borderWeight);
1419 :
1420 : borderWeight = (endFractX >> 4) & 0xFF;
1421 :
1422 : for (;;)
1423 : {
1424 : ++x;
1425 : if (EXPECT(x == endX))
1426 : {
1427 : if (EXPECT(xCount)) borderPx = *++src;
1428 : ACCUM2(borderPx, borderWeight);
1429 : break;
1430 : }
1431 : else
1432 : {
1433 : ACCUM2(*++src, 0xFF);
1434 : }
1435 : }
1436 :
1437 : *dst++ = ACCUM2TOPX();
1438 : }
1439 :
1440 : srcStart = (uint16_t *)((char *)srcStart + srcRowBytes);
1441 : }
1442 : }
1443 :
1444 :
1445 0 : static void SqueezeVertically2(OOPixMap srcPx, OOPixMapDimension dstHeight)
1446 : {
1447 : uint16_t *src, *srcStart, *dst;
1448 : uint_fast32_t x, y, xCount, startY, endY, lastRow;
1449 : size_t srcRowBytes;
1450 : uint_fast32_t endFractY, deltaY;
1451 : uint_fast32_t accumHi, accumLo, weight;
1452 : uint_fast8_t startWeight, endWeight;
1453 :
1454 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2);
1455 :
1456 : dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
1457 : srcRowBytes = srcPx.rowBytes;
1458 : xCount = srcPx.width;
1459 :
1460 : deltaY = (srcPx.height << 12) / dstHeight;
1461 : endFractY = 0;
1462 :
1463 : endWeight = 0;
1464 : endY = 0;
1465 :
1466 : lastRow = srcPx.height - 1;
1467 :
1468 : while (endY < lastRow)
1469 : {
1470 : endFractY += deltaY;
1471 : startY = endY;
1472 : endY = endFractY >> 12;
1473 :
1474 : startWeight = 0xFF - endWeight;
1475 : endWeight = (endFractY >> 4) & 0xFF;
1476 :
1477 : srcStart = (uint16_t *)((char *)srcPx.pixels + srcRowBytes * startY);
1478 :
1479 : for (x = 0; x != xCount; ++x)
1480 : {
1481 : src = srcStart++;
1482 :
1483 : CLEAR_ACCUM2();
1484 : ACCUM2(*src, startWeight);
1485 :
1486 : y = startY;
1487 : for (;;)
1488 : {
1489 : ++y;
1490 : src = (uint16_t *)((char *)src + srcRowBytes);
1491 : if (EXPECT_NOT(y == endY))
1492 : {
1493 : if (EXPECT(endY <= lastRow)) ACCUM2(*src, endWeight);
1494 : break;
1495 : }
1496 : else
1497 : {
1498 : ACCUM2(*src, 0xFF);
1499 : }
1500 : }
1501 :
1502 : *dst++ = ACCUM2TOPX();
1503 : }
1504 : }
1505 : }
1506 :
1507 :
1508 : /* Macros to manage 4-channel accumulators in 4-channel squeeze scalers.
1509 : The approach is similar to the ACCUM2 family above, except that the wt
1510 : multiplication works on two channels at a time before splitting into four
1511 : accumulators, all of which are shifted to the low end of the value.
1512 : */
1513 0 : #define ACCUM4(PX, WT) do { \
1514 : uint32_t px = PX; \
1515 : uint_fast32_t wt = WT; \
1516 : ag = ((px & 0xFF00FF00) >> 8) * wt; \
1517 : br = (px & 0x00FF00FF) * wt; \
1518 : accum1 += ag >> 16; \
1519 : accum2 += br >> 16; \
1520 : accum3 += ag & 0xFFFF; \
1521 : accum4 += br & 0xFFFF; \
1522 : weight += wt; \
1523 : } while (0)
1524 :
1525 0 : #define CLEAR_ACCUM4() do { \
1526 : accum1 = 0; \
1527 : accum2 = 0; \
1528 : accum3 = 0; \
1529 : accum4 = 0; \
1530 : weight = 0; \
1531 : } while (0)
1532 :
1533 : /* These integer divisions cause a stall -- this is the biggest
1534 : bottleneck in this file. Unrolling the loop might help on PPC.
1535 : Linear interpolation instead of box filtering would help, with
1536 : a quality hit. Given that scaling doesn't happen very often,
1537 : I think I'll leave it this way. -- Ahruman
1538 : */
1539 0 : #define ACCUM4TOPX() ( \
1540 : (((accum1 / weight) & 0xFF) << 24) | \
1541 : (((accum3 / weight) & 0xFF) << 8) | \
1542 : (((accum2 / weight) & 0xFF) << 16) | \
1543 : ((accum4 / weight) & 0xFF) \
1544 : )
1545 :
1546 :
1547 0 : static void SqueezeHorizontally4(OOPixMap srcPx, OOPixMapDimension dstWidth)
1548 : {
1549 : uint32_t *src, *srcStart, *dst;
1550 : uint32_t borderPx, ag, br;
1551 : uint_fast32_t x, y, xCount, endX;
1552 : size_t srcRowBytes;
1553 : uint_fast32_t endFractX, deltaX;
1554 : uint_fast32_t accum1, accum2, accum3, accum4, weight;
1555 : uint_fast8_t borderWeight;
1556 :
1557 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4);
1558 :
1559 : srcStart = srcPx.pixels;
1560 : dst = srcStart; // Output is placed in same buffer, without line padding.
1561 : srcRowBytes = srcPx.rowBytes;
1562 :
1563 : deltaX = (srcPx.width << 12) / dstWidth;
1564 :
1565 : for (y = 0; y != srcPx.height; ++y)
1566 : {
1567 : borderPx = *srcStart;
1568 : endFractX = 0;
1569 : borderWeight = 0;
1570 :
1571 : src = srcStart;
1572 :
1573 : x = 0;
1574 : xCount = dstWidth;
1575 : while (xCount--)
1576 : {
1577 : endFractX += deltaX;
1578 : endX = endFractX >> 12;
1579 :
1580 : CLEAR_ACCUM4();
1581 :
1582 : borderWeight = 0xFF - borderWeight;
1583 : ACCUM4(borderPx, borderWeight);
1584 :
1585 : borderWeight = (endFractX >> 4) & 0xFF;
1586 :
1587 : for (;;)
1588 : {
1589 : ++x;
1590 : if (EXPECT(x == endX))
1591 : {
1592 : if (EXPECT(xCount)) borderPx = *++src;
1593 : ACCUM4(borderPx, borderWeight);
1594 : break;
1595 : }
1596 : else
1597 : {
1598 : ACCUM4(*++src, 0xFF);
1599 : }
1600 : }
1601 :
1602 : *dst++ = ACCUM4TOPX();
1603 : }
1604 :
1605 : srcStart = (uint32_t *)((char *)srcStart + srcRowBytes);
1606 : }
1607 : }
1608 :
1609 :
1610 0 : static void SqueezeVertically4(OOPixMap srcPx, OOPixMapDimension dstHeight)
1611 : {
1612 : uint32_t *src, *srcStart, *dst;
1613 : uint_fast32_t x, y, xCount, startY, endY, lastRow;
1614 : size_t srcRowBytes;
1615 : uint32_t ag, br;
1616 : uint_fast32_t endFractY, deltaY;
1617 : uint_fast32_t accum1, accum2, accum3, accum4, weight;
1618 : uint_fast8_t startWeight, endWeight;
1619 :
1620 : NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4);
1621 :
1622 : dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
1623 : srcRowBytes = srcPx.rowBytes;
1624 : xCount = srcPx.width;
1625 :
1626 : deltaY = (srcPx.height << 12) / dstHeight;
1627 : endFractY = 0;
1628 :
1629 : endWeight = 0;
1630 : endY = 0;
1631 :
1632 : lastRow = srcPx.height - 1;
1633 :
1634 : while (endY < lastRow)
1635 : {
1636 : endFractY += deltaY;
1637 : startY = endY;
1638 : endY = endFractY >> 12;
1639 :
1640 : startWeight = 0xFF - endWeight;
1641 : endWeight = (endFractY >> 4) & 0xFF;
1642 :
1643 : srcStart = (uint32_t *)((char *)srcPx.pixels + srcRowBytes * startY);
1644 :
1645 : for (x = 0; x != xCount; ++x)
1646 : {
1647 : src = srcStart++;
1648 :
1649 : CLEAR_ACCUM4();
1650 : ACCUM4(*src, startWeight);
1651 :
1652 : y = startY;
1653 : for (;;)
1654 : {
1655 : ++y;
1656 : src = (uint32_t *)((char *)src + srcRowBytes);
1657 : if (EXPECT_NOT(y == endY))
1658 : {
1659 : if (EXPECT(endY <= lastRow)) ACCUM4(*src, endWeight);
1660 : break;
1661 : }
1662 : else
1663 : {
1664 : ACCUM4(*src, 0xFF);
1665 : }
1666 : }
1667 :
1668 : *dst++ = ACCUM4TOPX();
1669 : }
1670 : }
1671 : }
1672 :
1673 :
1674 0 : static BOOL EnsureCorrectDataSize(OOPixMap *pixMap, BOOL leaveSpaceForMipMaps)
1675 : {
1676 : size_t correctSize;
1677 : void *bytes = NULL;
1678 :
1679 : correctSize = pixMap->rowBytes * pixMap->height;
1680 :
1681 : // correctSize > 0 check is redundant, but static analyzer (checker-262) doesn't know that. -- Ahruman 2012-03-17
1682 : NSCParameterAssert(OOIsValidPixMap(*pixMap) && correctSize > 0);
1683 :
1684 : /* Ensure that the block is not too small. This needs to be done before
1685 : adding the mip-map space, as the texture may have been shrunk in place
1686 : without being grown for mip-maps.
1687 : */
1688 : if (EXPECT_NOT(pixMap->bufferSize < correctSize))
1689 : {
1690 : OOLogGenericParameterError();
1691 : return NO;
1692 : }
1693 :
1694 : if (leaveSpaceForMipMaps) correctSize = correctSize * 4 / 3;
1695 : if (correctSize != pixMap->bufferSize)
1696 : {
1697 : bytes = realloc(pixMap->pixels, correctSize);
1698 : if (EXPECT_NOT(bytes == NULL)) free(pixMap->pixels);
1699 : pixMap->pixels = bytes;
1700 : pixMap->bufferSize = correctSize;
1701 : }
1702 :
1703 : return YES;
1704 : }
|