Oolite 1.91.0.7646-241128-10e222e
Loading...
Searching...
No Matches
OOTextureScaling.m
Go to the documentation of this file.
1/*
2
3OOTextureScaling.m
4
5Copyright (C) 2007-2013 Jens Ayton
6
7Permission is hereby granted, free of charge, to any person obtaining a copy
8of this software and associated documentation files (the "Software"), to deal
9in the Software without restriction, including without limitation the rights
10to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11copies of the Software, and to permit persons to whom the Software is
12furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice shall be included in all
15copies or substantial portions of the Software.
16
17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23SOFTWARE.
24
25*/
26
27
28#import "OOTextureScaling.h"
30#include <stdlib.h>
31#import "OOLogging.h"
32#import "OOMaths.h"
33#import "OOCPUInfo.h"
34
35
36#define DUMP_MIP_MAPS 0
37#define DUMP_SCALE 0
38
39
40/* Internal function declarations.
41
42 NOTE: the function definitions are grouped together for best code cache
43 coherence rather than the order listed here.
44 */
45static BOOL GenerateMipMaps1(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
46static BOOL GenerateMipMaps2(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
47static BOOL GenerateMipMaps4(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC;
48
49
50/* ScaleToHalf_P_xN functions
51 These scale a texture with P planes (components) to half its size in each
52 dimension, handling N pixels at a time. srcWidth must be a multiple of N.
53 Parameters are not validated -- bad parameters will lead to bad data or a
54 crash.
55
56 Scaling is an unweighted average. 8 bits per channel assumed.
57 It is safe and meaningful for srcBytes == dstBytes.
58*/
59static void ScaleToHalf_1_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
60static void ScaleToHalf_2_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
61static void ScaleToHalf_4_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
62
63#if OOLITE_NATIVE_64_BIT
64 static void ScaleToHalf_1_x8(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
65// static void ScaleToHalf_2_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
66 static void ScaleToHalf_4_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
67#else
68 static void ScaleToHalf_1_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
69// static void ScaleToHalf_2_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC;
70#endif
71
72
77
78static void StretchVerticallyN_x1(OOPixMap srcPx, OOPixMap dstPx);
79
80static void SqueezeVertically1(OOPixMap srcPx, OOPixMapDimension dstHeight);
81static void SqueezeVertically2(OOPixMap srcPx, OOPixMapDimension dstHeight);
82static void SqueezeVertically4(OOPixMap srcPx, OOPixMapDimension dstHeight);
83static void StretchHorizontally1(OOPixMap srcPx, OOPixMap dstPx);
84static void StretchHorizontally2(OOPixMap srcPx, OOPixMap dstPx);
85static void StretchHorizontally4(OOPixMap srcPx, OOPixMap dstPx);
86static void SqueezeHorizontally1(OOPixMap srcPx, OOPixMapDimension dstWidth);
87static void SqueezeHorizontally2(OOPixMap srcPx, OOPixMapDimension dstWidth);
88static void SqueezeHorizontally4(OOPixMap srcPx, OOPixMapDimension dstWidth);
89
90
91static BOOL EnsureCorrectDataSize(OOPixMap *pixMap, BOOL leaveSpaceForMipMaps) NONNULL_FUNC;
92
93
94#if !OOLITE_NATIVE_64_BIT
95
96static void StretchVerticallyN_x4(OOPixMap srcPx, OOPixMap dstPx);
97
99{
100 if (!((srcPx.rowBytes) & 3))
101 {
102 StretchVerticallyN_x4(srcPx, dstPx);
103 }
104 else
105 {
106 StretchVerticallyN_x1(srcPx, dstPx);
107 }
108}
109
110#else // OOLITE_NATIVE_64_BIT
111
112static void StretchVerticallyN_x8(OOPixMap srcPx, OOPixMap dstPx);
113
115{
116 if (!((srcPx.rowBytes) & 7))
117 {
118 StretchVerticallyN_x8(srcPx, dstPx);
119 }
120 else
121 {
122 StretchVerticallyN_x1(srcPx, dstPx);
123 }
124}
125
126#endif
127
128
130{
131 switch (pixMap.format)
132 {
133 case kOOPixMapRGBA:
134 SqueezeVertically4(pixMap, dstHeight);
135 return;
136
138 SqueezeVertically1(pixMap, dstHeight);
139 return;
140
142 SqueezeVertically2(pixMap, dstHeight);
143 return;
144
146 break;
147 }
148
149#ifndef NDEBUG
150 [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(pixMap.format)];
151#else
152 abort();
153#endif
154}
155
156
158{
159 NSCParameterAssert(srcPx.format == dstPx.format);
160
161 switch (srcPx.format)
162 {
163 case kOOPixMapRGBA:
164 StretchHorizontally4(srcPx, dstPx);
165 return;
166
168 StretchHorizontally1(srcPx, dstPx);
169 return;
170
172 StretchHorizontally2(srcPx, dstPx);
173 return;
174
176 break;
177 }
178
179#ifndef NDEBUG
180 [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(srcPx.format)];
181#else
182 abort();
183#endif
184}
185
186
188{
189 switch (pixMap.format)
190 {
191 case kOOPixMapRGBA:
192 SqueezeHorizontally4(pixMap, dstHeight);
193 return;
194
196 SqueezeHorizontally1(pixMap, dstHeight);
197 return;
198
200 SqueezeHorizontally2(pixMap, dstHeight);
201 return;
202
204 break;
205 }
206
207#ifndef NDEBUG
208 [NSException raise:NSInternalInconsistencyException format:@"Unsupported pixmap format in scaler: %@", OOPixMapFormatName(pixMap.format)];
209#else
210 abort();
211#endif
212}
213
214
215#if DUMP_MIP_MAPS || DUMP_SCALE
216// NOTE: currently only works on OS X because of OSAtomicAdd32() (used to increment ID counter in thread-safe way). A simple increment would be sufficient if limited to a single thread (in OOTextureLoader).
217volatile int32_t sPreviousDumpID = 0;
218int32_t OSAtomicAdd32(int32_t __theAmount, volatile int32_t *__theValue);
219
220#endif
221
222#if DUMP_MIP_MAPS
223#define DUMP_CHANNELS -1 // Bitmap of channel counts - -1 for all dumps
224
225#define DUMP_MIP_MAP_PREPARE(pl) uint32_t dumpPlanes = pl; \
226 uint32_t dumpLevel = 0; \
227 BOOL dumpThis = (dumpPlanes & DUMP_CHANNELS) != 0; \
228 SInt32 dumpID = dumpThis ? OSAtomicAdd32(1, &sPreviousDumpID) : 0;
229#define DUMP_MIP_MAP_DUMP(px, w, h) if (dumpThis) DumpMipMap(px, w, h, dumpPlanes, dumpID, dumpLevel++);
230static void DumpMipMap(void *data, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, SInt32 ID, uint32_t level);
231#else
232#define DUMP_MIP_MAP_PREPARE(pl) do { (void)pl; } while (0)
233#define DUMP_MIP_MAP_DUMP(px, w, h) do { (void)px; (void)w; (void)h; } while (0)
234#endif
235
236#if DUMP_SCALE
237#define DUMP_SCALE_PREPARE() SInt32 dumpID = OSAtomicAdd32(1, &sPreviousDumpID), dumpCount = 0;
238#define DUMP_SCALE_DUMP(PM, stage) do { OOPixMap *pm = &(PM); OODumpPixMap(*pm, [NSString stringWithFormat:@"scaling dump ID %u stage %u-%@ %ux%u", dumpID, dumpCount++, stage, pm->width, pm->height]); } while (0)
239#else
240#define DUMP_SCALE_PREPARE()
241#define DUMP_SCALE_DUMP(PM, stage) do {} while (0)
242#endif
243
244
245OOPixMap OOScalePixMap(OOPixMap srcPx, OOPixMapDimension dstWidth, OOPixMapDimension dstHeight, BOOL leaveSpaceForMipMaps)
246{
247 OOPixMap dstPx = {0}, sparePx = {0};
248 BOOL OK = YES;
249
250 // Sanity check.
251 if (EXPECT_NOT(!OOIsValidPixMap(srcPx)))
252 {
254 free(srcPx.pixels);
255 return kOONullPixMap;
256 }
257
259 DUMP_SCALE_DUMP(srcPx, @"initial");
260
261 if (srcPx.height < dstHeight)
262 {
263 // Stretch vertically. This requires a separate buffer.
264 size_t dstSize = srcPx.rowBytes * dstHeight;
265 if (leaveSpaceForMipMaps && dstWidth <= srcPx.width) dstSize = dstSize * 4 / 3;
266
267 dstPx = OOAllocatePixMap(srcPx.width, dstHeight, srcPx.format, 0, dstSize);
268 if (EXPECT_NOT(!OOIsValidPixMap(dstPx))) { OK = NO; goto FAIL; }
269
270 StretchVertically(srcPx, dstPx);
271 DUMP_SCALE_DUMP(dstPx, @"stretched vertically");
272
273 sparePx = srcPx;
274 srcPx = dstPx;
275 }
276 else if (dstHeight < srcPx.height)
277 {
278 // Squeeze vertically. This can be done in-place.
279 SqueezeVertically(srcPx, dstHeight);
280 srcPx.height = dstHeight;
281 DUMP_SCALE_DUMP(srcPx, @"squeezed vertically");
282 }
283
284 if (srcPx.width < dstWidth)
285 {
286 // Stretch horizontally. This requires a separate buffer.
287 size_t dstSize = OOPixMapBytesPerPixel(srcPx) * dstWidth * srcPx.height;
288 if (leaveSpaceForMipMaps) dstSize = dstSize * 4 / 3;
289
290 if (dstSize <= sparePx.bufferSize)
291 {
292 dstPx = OOMakePixMap(sparePx.pixels, dstWidth, srcPx.height, srcPx.format, 0, sparePx.bufferSize);
293 sparePx = kOONullPixMap;
294 }
295 else
296 {
297 dstPx = OOAllocatePixMap(dstWidth, srcPx.height, srcPx.format, 0, dstSize);
298 }
299 if (EXPECT_NOT(!OOIsValidPixMap(dstPx))) { OK = NO; goto FAIL; }
300
301 StretchHorizontally(srcPx, dstPx);
302 DUMP_SCALE_DUMP(dstPx, @"stretched horizontally");
303 }
304 else if (dstWidth < srcPx.width)
305 {
306 // Squeeze horizontally. This can be done in-place.
307 SqueezeHorizontally(srcPx, dstWidth);
308
309 dstPx = srcPx;
310 dstPx.width = dstWidth;
311 dstPx.rowBytes = dstPx.width * OOPixMapBytesPerPixel(dstPx);
312 DUMP_SCALE_DUMP(dstPx, @"squeezed horizontally");
313 }
314 else
315 {
316 // No horizontal scaling.
317 dstPx = srcPx;
318 }
319
320 // Avoid a potential double free (if the realloc in EnsureCorrectDataSize() relocates the block).
321 if (srcPx.pixels == dstPx.pixels) srcPx.pixels = NULL;
322
323 // dstPx is now the result.
324 OK = EnsureCorrectDataSize(&dstPx, leaveSpaceForMipMaps);
325
326FAIL:
327 free(srcPx.pixels);
328 if (sparePx.pixels != dstPx.pixels && sparePx.pixels != srcPx.pixels)
329 {
330 free(sparePx.pixels);
331 }
332 if (!OK)
333 {
334 free(dstPx.pixels);
335 dstPx.pixels = NULL;
336 }
337
338 return OK ? dstPx : kOONullPixMap;
339}
340
341
342// FIXME: should take an OOPixMap.
343BOOL OOGenerateMipMaps(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format)
344{
345 if (EXPECT_NOT(width != OORoundUpToPowerOf2_PixMap(width) || height != OORoundUpToPowerOf2_PixMap(height)))
346 {
347 OOLog(kOOLogParameterError, @"Non-power-of-two dimensions (%ux%u) passed to %s() - ignoring, data will be junk.", width, height, __PRETTY_FUNCTION__);
348 return NO;
349 }
350 if (EXPECT_NOT(textureBytes == NULL))
351 {
352 OOLog(kOOLogParameterError, @"%@", @"NULL texture pointer passed to GenerateMipMaps().");
353 return NO;
354 }
355
356 switch (format)
357 {
358 case kOOPixMapRGBA:
359 return GenerateMipMaps4(textureBytes, width, height);
360
362 return GenerateMipMaps1(textureBytes, width, height);
363
365 return GenerateMipMaps2(textureBytes, width, height);
366
368 break;
369 }
370
371
372 OOLog(kOOLogParameterError, @"%s(): bad pixmap format (%@) - ignoring, data will be junk.", __PRETTY_FUNCTION__, OOPixMapFormatName(format));
373 return NO;
374}
375
376
377static BOOL GenerateMipMaps1(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
378{
379 OOPixMapDimension w = width, h = height;
380 uint8_t *curr, *next;
381
383 curr = textureBytes;
384
385#if OOLITE_NATIVE_64_BIT
386 while (8 < w && 1 < h)
387 {
388 DUMP_MIP_MAP_DUMP(curr, w, h);
389
390 next = curr + w * h;
391 ScaleToHalf_1_x8(curr, next, w, h);
392
393 w >>= 1;
394 h >>= 1;
395 curr = next;
396 }
397#else
398 while (4 < w && 1 < h)
399 {
400 DUMP_MIP_MAP_DUMP(curr, w, h);
401
402 next = curr + w * h;
403 ScaleToHalf_1_x4(curr, next, w, h);
404
405 w >>= 1;
406 h >>= 1;
407 curr = next;
408 }
409#endif
410
411 while (1 < w && 1 < h)
412 {
413 DUMP_MIP_MAP_DUMP(curr, w, h);
414
415 next = curr + w * h;
416 ScaleToHalf_1_x1(curr, next, w, h);
417
418 w >>= 1;
419 h >>= 1;
420 curr = next;
421 }
422
423 DUMP_MIP_MAP_DUMP(curr, w, h);
424
425 // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
426 return YES;
427}
428
429
430static void ScaleToHalf_1_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
431{
433 uint8_t *src0, *src1, *dst;
434 uint_fast8_t px00, px01, px10, px11;
435 uint_fast16_t sum;
436
437 src0 = srcBytes;
438 src1 = src0 + srcWidth;
439 dst = dstBytes;
440
441 y = srcHeight >> 1;
442 do
443 {
444 x = srcWidth >> 1;
445 do
446 {
447 // Read four pixels in a square...
448 px00 = *src0++;
449 px01 = *src0++;
450 px10 = *src1++;
451 px11 = *src1++;
452
453 // ...add them together...
454 sum = px00 + px01 + px10 + px11;
455
456 // ...shift the sum into place...
457 sum >>= 2;
458
459 // ...and write output pixel.
460 *dst++ = sum;
461 } while (--x);
462
463 // Skip a row for each source row
464 src0 = src1;
465 src1 += srcWidth;
466 } while (--y);
467}
468
469
470#if !OOLITE_NATIVE_64_BIT
471
472static void ScaleToHalf_1_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
473{
475 uint32_t *src0, *src1, *dst;
476 uint_fast32_t px00, px01, px10, px11;
477 uint_fast32_t sum0, sum1;
478
479 srcWidth >>= 2; // Four (output) pixels at a time
480 src0 = srcBytes;
481 src1 = src0 + srcWidth;
482 dst = dstBytes;
483
484 y = srcHeight >> 1;
485 do
486 {
487 x = srcWidth >> 1;
488 do
489 {
490 // Read 8 pixels in a 4x2 rectangle...
491 px00 = *src0++;
492 px01 = *src0++;
493 px10 = *src1++;
494 px11 = *src1++;
495
496 // ...add them together.
497 sum0 = (px00 & 0x00FF00FF) +
498 (px10 & 0x00FF00FF) +
499 ((px00 & 0xFF00FF00) >> 8) +
500 ((px10 & 0xFF00FF00) >> 8);
501 sum1 = (px01 & 0x00FF00FF) +
502 (px11 & 0x00FF00FF) +
503 ((px01 & 0xFF00FF00) >> 8) +
504 ((px11 & 0xFF00FF00) >> 8);
505
506 // ...swizzle the sums around...
507#if OOLITE_BIG_ENDIAN
508 sum0 = ((sum0 << 6) & 0xFF000000) | ((sum0 << 14) & 0x00FF0000);
509 sum1 = ((sum1 >> 10) & 0x0000FF00) | ((sum1 >>2) & 0x000000FF);
510#elif OOLITE_LITTLE_ENDIAN
511 sum0 = ((sum0 >> 10) & 0x0000FF00) | ((sum0 >>2) & 0x000000FF);
512 sum1 = ((sum1 << 6) & 0xFF000000) | ((sum1 << 14) & 0x00FF0000);
513#else
514 #error Neither OOLITE_BIG_ENDIAN nor OOLITE_LITTLE_ENDIAN is defined as nonzero!
515#endif
516
517 // ...and write output pixel.
518 *dst++ = sum0 | sum1;
519 } while (--x);
520
521 // Skip a row for each source row
522 src0 = src1;
523 src1 += srcWidth;
524 } while (--y);
525}
526
527#else // OOLITE_NATIVE_64_BIT
528
529static void ScaleToHalf_1_x8(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
530{
532 uint64_t *src0, *src1;
533 uint64_t *dst;
534 uint_fast64_t px00, px01, px10, px11;
535 uint_fast64_t sum0, sum1;
536
537 srcWidth >>= 3; // Eight (output) pixels at a time
538 src0 = srcBytes;
539 src1 = src0 + srcWidth;
540 dst = dstBytes;
541
542 y = srcHeight >> 1;
543 do
544 {
545 x = srcWidth >> 1;
546 do
547 {
548 // Read 16 pixels in an 8x2 rectangle...
549 px00 = *src0++;
550 px01 = *src0++;
551 px10 = *src1++;
552 px11 = *src1++;
553
554 // ...add them together...
555 sum0 = ((px00 & 0x00FF00FF00FF00FFULL)) +
556 ((px10 & 0x00FF00FF00FF00FFULL)) +
557 ((px00 & 0xFF00FF00FF00FF00ULL) >> 8) +
558 ((px10 & 0xFF00FF00FF00FF00ULL) >> 8);
559 sum1 = ((px01 & 0x00FF00FF00FF00FFULL)) +
560 ((px11 & 0x00FF00FF00FF00FFULL)) +
561 ((px01 & 0xFF00FF00FF00FF00ULL) >> 8) +
562 ((px11 & 0xFF00FF00FF00FF00ULL) >> 8);
563
564 // ...swizzle the sums around...
565#if OOLITE_BIG_ENDIAN
566 sum0 = ((sum0 << 06) & 0xFF00000000000000ULL) |
567 ((sum0 << 14) & 0x00FF000000000000ULL) |
568 ((sum0 << 22) & 0x0000FF0000000000ULL) |
569 ((sum0 << 30) & 0x000000FF00000000ULL);
570 sum1 = ((sum1 >> 26) & 0x00000000FF000000ULL) |
571 ((sum1 >> 18) & 0x0000000000FF0000ULL) |
572 ((sum1 >> 10) & 0x000000000000FF00ULL) |
573 ((sum1 >> 02) & 0x00000000000000FFULL);
574#elif OOLITE_LITTLE_ENDIAN
575 sum0 = ((sum0 >> 26) & 0x00000000FF000000ULL) |
576 ((sum0 >> 18) & 0x0000000000FF0000ULL) |
577 ((sum0 >> 10) & 0x000000000000FF00ULL) |
578 ((sum0 >> 02) & 0x00000000000000FFULL);
579 sum1 = ((sum1 << 06) & 0xFF00000000000000ULL) |
580 ((sum1 << 14) & 0x00FF000000000000ULL) |
581 ((sum1 << 22) & 0x0000FF0000000000ULL) |
582 ((sum1 << 30) & 0x000000FF00000000ULL);
583#else
584 #error Neither OOLITE_BIG_ENDIAN nor OOLITE_LITTLE_ENDIAN is defined as nonzero!
585#endif
586 // ...and write output pixel.
587 *dst++ = sum0 | sum1;
588 } while (--x);
589
590 // Skip a row for each source row
591 src0 = src1;
592 src1 += srcWidth;
593 } while (--y);
594}
595
596#endif
597
598
599static BOOL GenerateMipMaps2(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
600{
601 OOPixMapDimension w = width, h = height;
602 uint16_t *curr, *next;
603
605 curr = textureBytes;
606
607 // TODO: multiple pixel two-plane scalers.
608#if 0
609#if OOLITE_NATIVE_64_BIT
610 while (4 < w && 1 < h)
611 {
612 DUMP_MIP_MAP_DUMP(curr, w, h);
613
614 next = curr + w * h;
615 ScaleToHalf_2_x4(curr, next, w, h);
616
617 w >>= 1;
618 h >>= 1;
619 curr = next;
620 }
621#else
622 while (2 < w && 1 < h)
623 {
624 DUMP_MIP_MAP_DUMP(curr, w, h);
625
626 next = curr + w * h;
627 ScaleToHalf_2_x2(curr, next, w, h);
628
629 w >>= 1;
630 h >>= 1;
631 curr = next;
632 }
633#endif
634#endif
635
636 while (1 < w && 1 < h)
637 {
638 DUMP_MIP_MAP_DUMP(curr, w, h);
639
640 next = curr + w * h;
641 ScaleToHalf_2_x1(curr, next, w, h);
642
643 w >>= 1;
644 h >>= 1;
645 curr = next;
646 }
647
648 DUMP_MIP_MAP_DUMP(curr, w, h);
649
650 // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
651 return YES;
652}
653
654
655static void ScaleToHalf_2_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
656{
658 uint16_t *src0, *src1, *dst;
659 uint_fast16_t px00, px01, px10, px11;
660 uint_fast32_t sumHi, sumLo;
661
662 src0 = srcBytes;
663 src1 = src0 + srcWidth;
664 dst = dstBytes;
665
666 y = srcHeight >> 1;
667 do
668 {
669 x = srcWidth >> 1;
670 do
671 {
672 // Read four pixels in a square...
673 px00 = *src0++;
674 px01 = *src0++;
675 px10 = *src1++;
676 px11 = *src1++;
677
678 // ...add them together...
679 sumHi = (px00 & 0xFF00) + (px01 & 0xFF00) + (px10 & 0xFF00) + (px11 & 0xFF00);
680 sumLo = (px00 & 0x00FF) + (px01 & 0x00FF) + (px10 & 0x00FF) + (px11 & 0x00FF);
681
682 // ...merge and shift the sum into place...
683 sumLo = ((sumHi & 0x3FC00) | sumLo) >> 2;
684
685 // ...and write output pixel.
686 *dst++ = sumLo;
687 } while (--x);
688
689 // Skip a row for each source row
690 src0 = src1;
691 src1 += srcWidth;
692 } while (--y);
693}
694
695
696static BOOL GenerateMipMaps4(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height)
697{
698 OOPixMapDimension w = width, h = height;
699 uint32_t *curr, *next;
700
702 curr = textureBytes;
703
704#if OOLITE_NATIVE_64_BIT
705 while (2 < w && 1 < h)
706 {
707 DUMP_MIP_MAP_DUMP(curr, w, h);
708
709 next = curr + w * h;
710 ScaleToHalf_4_x2(curr, next, w, h);
711
712 w >>= 1;
713 h >>= 1;
714 curr = next;
715 }
716 if (EXPECT(1 < w && 1 < h))
717 {
718 DUMP_MIP_MAP_DUMP(curr, w, h);
719
720 next = curr + w * h;
721 ScaleToHalf_4_x1(curr, next, w, h);
722
723 w >>= 1;
724 h >>= 1;
725 }
726#else
727 while (1 < w && 1 < h)
728 {
729 DUMP_MIP_MAP_DUMP(curr, w, h);
730
731 next = curr + w * h;
732 ScaleToHalf_4_x1(curr, next, w, h);
733
734 w >>= 1;
735 h >>= 1;
736 curr = next;
737 }
738#endif
739
740 DUMP_MIP_MAP_DUMP(curr, w, h);
741
742 // TODO: handle residual 1xN/Nx1 mips. For now, we just limit maximum mip level for non-square textures.
743 return YES;
744}
745
746
747static void ScaleToHalf_4_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
748{
750 uint32_t *src0, *src1, *dst;
751 uint_fast32_t px00, px01, px10, px11;
752
753 /* We treat channel layout as ABGR -- actual layout doesn't matter since
754 each channel is handled the same. We use two accumulators, with
755 alternating channels, so overflow doesn't cross channel boundaries,
756 while having less overhead than one accumulator per channel.
757 */
758 uint_fast32_t ag, br;
759
760 src0 = srcBytes;
761 src1 = src0 + srcWidth;
762 dst = dstBytes;
763
764 y = srcHeight >> 1;
765 do
766 {
767 x = srcWidth >> 1;
768 do
769 {
770 // Read four pixels in a square...
771 px00 = *src0++;
772 px01 = *src0++;
773 px10 = *src1++;
774 px11 = *src1++;
775
776 // ...and add them together, channel by channel.
777 ag = (px00 & 0xFF00FF00) >> 8;
778 br = (px00 & 0x00FF00FF);
779 ag += (px01 & 0xFF00FF00) >> 8;
780 br += (px01 & 0x00FF00FF);
781 ag += (px10 & 0xFF00FF00) >> 8;
782 br += (px10 & 0x00FF00FF);
783 ag += (px11 & 0xFF00FF00) >> 8;
784 br += (px11 & 0x00FF00FF);
785
786 // Shift the sums into place...
787 ag <<= 6;
788 br >>= 2;
789
790 // ...and write output pixel.
791 *dst++ = (ag & 0xFF00FF00) | (br & 0x00FF00FF);
792 } while (--x);
793
794 // Skip a row for each source row
795 src0 = src1;
796 src1 += srcWidth;
797 } while (--y);
798}
799
800
801#if OOLITE_NATIVE_64_BIT
802
803static void ScaleToHalf_4_x2(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight)
804{
806 uint_fast64_t *src0, *src1, *dst;
807 uint_fast64_t px00, px01, px10, px11;
808
809 /* We treat channel layout as ABGR -- actual layout doesn't matter since
810 each channel is handled the same. We use two accumulators, with
811 alternating channels, so overflow doesn't cross channel boundaries,
812 while having less overhead than one accumulator per channel.
813 */
814 uint_fast64_t ag0, ag1, br0, br1;
815
816 srcWidth >>= 1; // Two bytes at a time
817 src0 = srcBytes;
818 src1 = src0 + srcWidth;
819 dst = dstBytes;
820
821 y = srcHeight >> 1;
822 do
823 {
824 x = srcWidth >> 1;
825 do
826 {
827 // Read eight pixels (4x2)...
828 px00 = *src0++;
829 px01 = *src0++;
830 px10 = *src1++;
831 px11 = *src1++;
832
833 // ...and add them together, channel by channel.
834 ag0 = (px00 & 0xFF00FF00FF00FF00ULL) >> 8;
835 br0 = (px00 & 0x00FF00FF00FF00FFULL);
836 ag0 += (px10 & 0xFF00FF00FF00FF00ULL) >> 8;
837 br0 += (px10 & 0x00FF00FF00FF00FFULL);
838 ag1 = (px01 & 0xFF00FF00FF00FF00ULL) >> 8;
839 br1 = (px01 & 0x00FF00FF00FF00FFULL);
840 ag1 += (px11 & 0xFF00FF00FF00FF00ULL) >> 8;
841 br1 += (px11 & 0x00FF00FF00FF00FFULL);
842
843#if OOLITE_BIG_ENDIAN
844 // Shift and add some more...
845 ag0 = ag0 + (ag0 << 32);
846 br0 = br0 + (br0 << 32);
847 ag1 = ag1 + (ag1 >> 32);
848 br1 = br1 + (br1 >> 32);
849
850 // ...merge and shift some more...
851 ag0 = ((ag0 & 0x03FC03FC00000000ULL) | (ag1 & 0x0000000003FC03FCULL)) << 6;
852 br0 = ((br0 & 0x03FC03FC00000000ULL) | (br1 & 0x0000000003FC03FCULL)) >> 2;
853#elif OOLITE_LITTLE_ENDIAN
854 // Shift and add some more...
855 ag0 = ag0 + (ag0 >> 32);
856 br0 = br0 + (br0 >> 32);
857 ag1 = ag1 + (ag1 << 32);
858 br1 = br1 + (br1 << 32);
859
860 // ...merge and shift some more...
861 ag0 = ((ag0 & 0x0000000003FC03FCULL) | (ag1 & 0x03FC03FC00000000ULL)) << 6;
862 br0 = ((br0 & 0x0000000003FC03FCULL) | (br1 & 0x03FC03FC00000000ULL)) >> 2;
863#else
864 #error Unknown architecture.
865#endif
866
867 // ...and write output pixel.
868 *dst++ = ag0 | br0;
869 } while (--x);
870
871 // Skip a row for each source row
872 src0 = src1;
873 src1 += srcWidth;
874 } while (--y);
875}
876
877#endif
878
879
880#if DUMP_MIP_MAPS
881static void DumpMipMap(void *data, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, SInt32 ID, uint32_t level)
882{
883 OOPixMap pixMap = OOMakePixMap(data, width, height, format, 0, 0);
884 OODumpPixMap(pixMap, [NSString stringWithFormat:@"mipmap dump ID %u lv%u %@ %ux%u", ID, level, OOPixMapFormatName(format), width, height]);
885}
886#endif
887
888
889static void StretchVerticallyN_x1(OOPixMap srcPx, OOPixMap dstPx)
890{
891 uint8_t *src, *src0, *src1, *prev, *dst;
892 uint8_t px0, px1;
893 uint_fast32_t x, y, xCount;
894 size_t srcRowBytes;
895 uint_fast16_t weight0, weight1;
896 uint_fast32_t fractY; // Y coordinate, fixed-point (24.8)
897
898 src = srcPx.pixels;
899 srcRowBytes = srcPx.rowBytes;
900 dst = dstPx.pixels; // Assumes dstPx.width == dstPx.rowBytes.
901
902 src0 = prev = src;
903
904 xCount = srcPx.width * OOPixMapBytesPerPixel(srcPx);
905
906 for (y = 1; y != dstPx.height; ++y)
907 {
908 fractY = ((srcPx.height * y) << 8) / dstPx.height;
909
910 src0 = prev;
911 prev = src1 = src + srcRowBytes * (fractY >> 8);
912
913 weight1 = fractY & 0xFF;
914 weight0 = 0x100 - weight1;
915
916 x = xCount;
917 while (x--)
918 {
919 px0 = *src0++;
920 px1 = *src1++;
921
922 *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
923 }
924 }
925
926 // Copy last row (without referring to the last-plus-oneth row)
927 x = xCount;
928 while (x--)
929 {
930 *dst++ = *src0++;
931 }
932}
933
934
935#if !OOLITE_NATIVE_64_BIT
936
937static void StretchVerticallyN_x4(OOPixMap srcPx, OOPixMap dstPx)
938{
939 uint8_t *src;
940 uint32_t *src0, *src1, *prev, *dst;
941 uint32_t px0, px1, ag, br;
942 uint_fast32_t x, y, xCount;
943 size_t srcRowBytes;
944 uint_fast16_t weight0, weight1;
945 uint_fast32_t fractY; // Y coordinate, fixed-point (24.8)
946
947 src = srcPx.pixels;
948 srcRowBytes = srcPx.rowBytes;
949 dst = dstPx.pixels; // Assumes no row padding.
950
951 src0 = prev = (uint32_t *)src;
952
953 xCount = (srcPx.width * OOPixMapBytesPerPixel(srcPx)) >> 2;
954
955 for (y = 1; y != dstPx.height; ++y)
956 {
957 fractY = ((srcPx.height * y) << 8) / dstPx.height;
958
959 src0 = prev;
960 prev = src1 = (uint32_t *)(src + srcRowBytes * (fractY >> 8));
961
962 weight1 = fractY & 0xFF;
963 weight0 = 0x100 - weight1;
964
965 x = xCount;
966 while (x--)
967 {
968 px0 = *src0++;
969 px1 = *src1++;
970
971 ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
972 br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
973
974 *dst++ = (ag & 0xFF00FF00) | ((br >> 8) & 0x00FF00FF);
975 }
976 }
977
978 // Copy last row (without referring to the last-plus-oneth row)
979 x = xCount;
980 while (x--)
981 {
982 *dst++ = *src0++;
983 }
984}
985
986#else // OOLITE_NATIVE_64_BIT
987
988static void StretchVerticallyN_x8(OOPixMap srcPx, OOPixMap dstPx)
989{
990 uint8_t *src;
991 uint64_t *src0, *src1, *prev, *dst;
992 uint64_t px0, px1, agag, brbr;
993 uint_fast32_t x, y, xCount;
994 size_t srcRowBytes;
995 uint_fast16_t weight0, weight1;
996 uint_fast32_t fractY; // Y coordinate, fixed-point (24.8)
997
998 src = srcPx.pixels;
999 srcRowBytes = srcPx.rowBytes;
1000 dst = dstPx.pixels; // Assumes dstPx.width == dstPx.rowBytes.
1001
1002 src0 = prev = (uint64_t *)src;
1003
1004 xCount = (srcPx.width * OOPixMapBytesPerPixel(srcPx)) >> 3;
1005
1006 for (y = 1; y != dstPx.height; ++y)
1007 {
1008 fractY = ((srcPx.height * y) << 8) / dstPx.height;
1009
1010 src0 = prev;
1011 prev = src1 = (uint64_t *)(src + srcRowBytes * (fractY >> 8));
1012
1013 weight1 = fractY & 0xFF;
1014 weight0 = 0x100 - weight1;
1015
1016 x = xCount;
1017 while (x--)
1018 {
1019 px0 = *src0++;
1020 px1 = *src1++;
1021
1022 agag = ((px0 & 0xFF00FF00FF00FF00ULL) >> 8) * weight0 + ((px1 & 0xFF00FF00FF00FF00ULL) >> 8) * weight1;
1023 brbr = (px0 & 0x00FF00FF00FF00FFULL) * weight0 + (px1 & 0x00FF00FF00FF00FFULL) * weight1;
1024
1025 *dst++ = (agag & 0xFF00FF00FF00FF00ULL) | ((brbr >> 8) & 0x00FF00FF00FF00FFULL);
1026 }
1027 }
1028
1029 // Copy last row (without referring to the last-plus-oneth row)
1030 x = xCount;
1031 while (x--)
1032 {
1033 *dst++ = *src0++;
1034 }
1035}
1036#endif
1037
1038
1039static void StretchHorizontally1(OOPixMap srcPx, OOPixMap dstPx)
1040{
1041 uint8_t *src, *srcStart, *dst;
1042 uint8_t px0, px1;
1043 uint_fast32_t x, y, xCount;
1044 size_t srcRowBytes;
1045 uint_fast16_t weight0, weight1;
1046 uint_fast32_t fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
1047
1048 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 1);
1049
1050 srcStart = srcPx.pixels;
1051 srcRowBytes = srcPx.rowBytes;
1052 xCount = dstPx.width;
1053 dst = dstPx.pixels; // Assumes no row padding
1054
1055 deltaX = (srcPx.width << 12) / dstPx.width;
1056 px1 = *srcStart;
1057
1058 for (y = 0; y < dstPx.height - 1; ++y)
1059 {
1060 fractX = 0;
1061
1062 for (x = 0; x!= xCount; ++x)
1063 {
1064 fractX += deltaX;
1065
1066 weight1 = (fractX >> 4) & 0xFF;
1067 weight0 = 0x100 - weight1;
1068
1069 px0 = px1;
1070 src = srcStart + (fractX >> 12);
1071 px1 = *src;
1072
1073 *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
1074 }
1075
1076 srcStart = (uint8_t *)((char *)srcStart + srcRowBytes);
1077 px1 = *srcStart;
1078 }
1079
1080 // Copy last row without reading off end of buffer
1081 fractX = 0;
1082 for (x = 0; x!= xCount; ++x)
1083 {
1084 fractX += deltaX;
1085
1086 weight1 = (fractX >> 4) & 0xFF;
1087 weight0 = 0x100 - weight1;
1088
1089 px0 = px1;
1090 src = srcStart + (fractX >> 12);
1091 px1 = *src;
1092
1093 *dst++ = (px0 * weight0 + px1 * weight1) >> 8;
1094 }
1095}
1096
1097
1098static void StretchHorizontally2(OOPixMap srcPx, OOPixMap dstPx)
1099{
1100 uint16_t *src, *srcStart, *dst;
1101 uint16_t px0, px1;
1102 uint_fast32_t hi, lo;
1103 uint_fast32_t x, y, xCount;
1104 size_t srcRowBytes;
1105 uint_fast16_t weight0, weight1;
1106 uint_fast32_t fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
1107
1108 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 2);
1109
1110 srcStart = srcPx.pixels;
1111 srcRowBytes = srcPx.rowBytes;
1112 xCount = dstPx.width;
1113 dst = dstPx.pixels; // Assumes no row padding
1114
1115 deltaX = (srcPx.width << 12) / dstPx.width;
1116 px1 = *srcStart;
1117
1118 for (y = 0; y < dstPx.height - 1; ++y)
1119 {
1120 fractX = 0;
1121
1122 for (x = 0; x!= xCount; ++x)
1123 {
1124 fractX += deltaX;
1125
1126 weight1 = (fractX >> 4) & 0xFF;
1127 weight0 = 0x100 - weight1;
1128
1129 px0 = px1;
1130 src = srcStart + (fractX >> 12);
1131 px1 = *src;
1132
1133 hi = (px0 & 0xFF00) * weight0 + (px1 & 0xFF00) * weight1;
1134 lo = (px0 & 0x00FF) * weight0 + (px1 & 0x00FF) * weight1;
1135
1136 *dst++ = ((hi & 0xFF0000) | (lo & 0x00FF00)) >> 8;
1137 }
1138
1139 srcStart = (uint16_t *)((char *)srcStart + srcRowBytes);
1140 px1 = *srcStart;
1141 }
1142
1143 // Copy last row without reading off end of buffer
1144 fractX = 0;
1145 for (x = 0; x!= xCount; ++x)
1146 {
1147 fractX += deltaX;
1148
1149 weight1 = (fractX >> 4) & 0xFF;
1150 weight0 = 0x100 - weight1;
1151
1152 px0 = px1;
1153 src = srcStart + (fractX >> 12);
1154 px1 = *src;
1155
1156 hi = (px0 & 0xFF00) * weight0 + (px1 & 0xFF00) * weight1;
1157 lo = (px0 & 0x00FF) * weight0 + (px1 & 0x00FF) * weight1;
1158
1159 *dst++ = ((hi & 0xFF0000) | (lo & 0x00FF00)) >> 8;
1160 }
1161}
1162
1163
1164static void StretchHorizontally4(OOPixMap srcPx, OOPixMap dstPx)
1165{
1166 uint32_t *src, *srcStart, *dst;
1167 uint32_t px0, px1;
1168 uint32_t ag, br;
1169 uint_fast32_t x, y, xCount;
1170 size_t srcRowBytes;
1171 uint_fast16_t weight0, weight1;
1172 uint_fast32_t fractX, deltaX; // X coordinate, fixed-point (20.12), allowing widths up to 1 mebipixel
1173
1174 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4 && OOIsValidPixMap(dstPx) && OOPixMapBytesPerPixel(dstPx) == 4);
1175
1176 srcStart = srcPx.pixels;
1177 srcRowBytes = srcPx.rowBytes;
1178 xCount = dstPx.width;
1179 dst = dstPx.pixels; // Assumes no row padding
1180
1181 deltaX = (srcPx.width << 12) / dstPx.width;
1182 px1 = *srcStart;
1183
1184 for (y = 0; y < dstPx.height - 1; ++y)
1185 {
1186 fractX = 0;
1187
1188 for (x = 0; x!= xCount; ++x)
1189 {
1190 fractX += deltaX;
1191
1192 weight1 = (fractX >> 4) & 0xFF;
1193 weight0 = 0x100 - weight1;
1194
1195 px0 = px1;
1196 src = srcStart + (fractX >> 12);
1197 px1 = *src;
1198
1199 ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
1200 br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
1201
1202 *dst++ = (ag & 0xFF00FF00) | ((br & 0xFF00FF00) >> 8);
1203 }
1204
1205 srcStart = (uint32_t *)((char *)srcStart + srcRowBytes);
1206 px1 = *srcStart;
1207 }
1208
1209 // Copy last row without reading off end of buffer
1210 fractX = 0;
1211 for (x = 0; x!= xCount; ++x)
1212 {
1213 fractX += deltaX;
1214
1215 weight1 = (fractX >> 4) & 0xFF;
1216 weight0 = 0x100 - weight1;
1217
1218 px0 = px1;
1219 src = srcStart + (fractX >> 12);
1220 if (EXPECT(x < xCount - 1)) px1 = *src;
1221
1222 ag = ((px0 & 0xFF00FF00) >> 8) * weight0 + ((px1 & 0xFF00FF00) >> 8) * weight1;
1223 br = (px0 & 0x00FF00FF) * weight0 + (px1 & 0x00FF00FF) * weight1;
1224
1225 *dst++ = (ag & 0xFF00FF00) | ((br & 0xFF00FF00) >> 8);
1226 }
1227}
1228
1229
1231{
1232 uint8_t *src, *srcStart, *dst;
1233 uint8_t borderPx;
1234 uint_fast32_t x, y, xCount, endX;
1235 size_t srcRowBytes;
1236 uint_fast32_t endFractX, deltaX;
1237 uint_fast32_t accum, weight;
1238 uint_fast8_t borderWeight;
1239
1240 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1);
1241
1242 srcStart = srcPx.pixels;
1243 dst = srcStart; // Output is placed in same buffer, without line padding.
1244 srcRowBytes = srcPx.rowBytes;
1245
1246 deltaX = (srcPx.width << 12) / dstWidth;
1247
1248 for (y = 0; y != srcPx.height; ++y)
1249 {
1250 borderPx = *srcStart;
1251 endFractX = 0;
1252 borderWeight = 0;
1253
1254 src = srcStart;
1255
1256 x = 0;
1257 xCount = dstWidth;
1258 while (xCount--)
1259 {
1260 endFractX += deltaX;
1261 endX = endFractX >> 12;
1262
1263 borderWeight = 0xFF - borderWeight;
1264 accum = borderPx * borderWeight;
1265 weight = borderWeight;
1266
1267 borderWeight = (endFractX >> 4) & 0xFF;
1268 weight += borderWeight;
1269
1270 for (;;)
1271 {
1272 ++x;
1273 if (EXPECT(x == endX))
1274 {
1275 if (EXPECT(xCount)) borderPx = *++src;
1276 accum += borderPx * borderWeight;
1277 break;
1278 }
1279 else
1280 {
1281 accum += *++src * 0xFF;
1282 weight += 0xFF;
1283 }
1284 }
1285
1286 *dst++ = accum / weight;
1287 }
1288
1289 srcStart = (uint8_t *)((char *)srcStart + srcRowBytes);
1290 }
1291}
1292
1293
1294static void SqueezeVertically1(OOPixMap srcPx, OOPixMapDimension dstHeight)
1295{
1296 uint8_t *src, *srcStart, *dst;
1297 uint_fast32_t x, y, xCount, startY, endY, lastRow;
1298 size_t srcRowBytes;
1299 uint_fast32_t endFractY, deltaY;
1300 uint_fast32_t accum, weight;
1301 uint_fast8_t startWeight, endWeight;
1302
1303 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 1);
1304
1305 dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
1306 srcRowBytes = srcPx.rowBytes;
1307 xCount = srcPx.width;
1308
1309 deltaY = (srcPx.height << 12) / dstHeight;
1310 endFractY = 0;
1311
1312 endWeight = 0;
1313 endY = 0;
1314
1315 lastRow = srcPx.height - 1;
1316
1317 while (endY < lastRow)
1318 {
1319 endFractY += deltaY;
1320 startY = endY;
1321 endY = endFractY >> 12;
1322
1323 startWeight = 0xFF - endWeight;
1324 endWeight = (endFractY >> 4) & 0xFF;
1325
1326 srcStart = (uint8_t *)((char *)srcPx.pixels + srcRowBytes * startY);
1327
1328 for (x = 0; x != xCount; ++x)
1329 {
1330 src = srcStart++;
1331 accum = startWeight * *src;
1332 weight = startWeight + endWeight;
1333
1334 y = startY;
1335 for (;;)
1336 {
1337 ++y;
1338 src = (uint8_t *)((char *)src + srcRowBytes);
1339 if (EXPECT_NOT(y == endY))
1340 {
1341 if (EXPECT(endY < lastRow)) accum += *src * endWeight;
1342 break;
1343 }
1344 else
1345 {
1346 accum += *src * 0xFF;
1347 weight += 0xFF;
1348 }
1349 }
1350
1351 *dst++ = accum / weight;
1352 }
1353 }
1354}
1355
1356
1357/* Macros to manage 2-channel accumulators in 2-channel squeeze scalers.
1358 accumHi is the sum of weighted high-channel pixels, shifted left 8 bits.
1359 accumLo is the sum of weighted low-channel pixels.
1360 weight is the sum of all pixel weights.
1361*/
1362#define ACCUM2(PX, WT) do { \
1363 uint16_t px = PX; \
1364 uint_fast32_t wt = WT; \
1365 accumHi += (px & 0xFF00) * wt; \
1366 accumLo += (px & 0x00FF) * wt; \
1367 weight += wt; \
1368 } while (0)
1369
1370#define CLEAR_ACCUM2() do { \
1371 accumHi = 0; \
1372 accumLo = 0; \
1373 weight = 0; \
1374 } while (0)
1375
1376#define ACCUM2TOPX() ( \
1377 ((accumHi / weight) & 0xFF00) | \
1378 ((accumLo / weight) & 0x00FF) \
1379 )
1380
1381
1383{
1384 uint16_t *src, *srcStart, *dst;
1385 uint16_t borderPx;
1386 uint_fast32_t x, y, xCount, endX;
1387 size_t srcRowBytes;
1388 uint_fast32_t endFractX, deltaX;
1389 uint_fast32_t accumHi, accumLo, weight;
1390 uint_fast8_t borderWeight;
1391
1392 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2);
1393
1394 srcStart = srcPx.pixels;
1395 dst = srcStart; // Output is placed in same buffer, without line padding.
1396 srcRowBytes = srcPx.rowBytes;
1397
1398 deltaX = (srcPx.width << 12) / dstWidth;
1399
1400 for (y = 0; y != srcPx.height; ++y)
1401 {
1402 borderPx = *srcStart;
1403 endFractX = 0;
1404 borderWeight = 0;
1405
1406 src = srcStart;
1407
1408 x = 0;
1409 xCount = dstWidth;
1410 while (xCount--)
1411 {
1412 endFractX += deltaX;
1413 endX = endFractX >> 12;
1414
1415 CLEAR_ACCUM2();
1416
1417 borderWeight = 0xFF - borderWeight;
1418 ACCUM2(borderPx, borderWeight);
1419
1420 borderWeight = (endFractX >> 4) & 0xFF;
1421
1422 for (;;)
1423 {
1424 ++x;
1425 if (EXPECT(x == endX))
1426 {
1427 if (EXPECT(xCount)) borderPx = *++src;
1428 ACCUM2(borderPx, borderWeight);
1429 break;
1430 }
1431 else
1432 {
1433 ACCUM2(*++src, 0xFF);
1434 }
1435 }
1436
1437 *dst++ = ACCUM2TOPX();
1438 }
1439
1440 srcStart = (uint16_t *)((char *)srcStart + srcRowBytes);
1441 }
1442}
1443
1444
1445static void SqueezeVertically2(OOPixMap srcPx, OOPixMapDimension dstHeight)
1446{
1447 uint16_t *src, *srcStart, *dst;
1448 uint_fast32_t x, y, xCount, startY, endY, lastRow;
1449 size_t srcRowBytes;
1450 uint_fast32_t endFractY, deltaY;
1451 uint_fast32_t accumHi, accumLo, weight;
1452 uint_fast8_t startWeight, endWeight;
1453
1454 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 2);
1455
1456 dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
1457 srcRowBytes = srcPx.rowBytes;
1458 xCount = srcPx.width;
1459
1460 deltaY = (srcPx.height << 12) / dstHeight;
1461 endFractY = 0;
1462
1463 endWeight = 0;
1464 endY = 0;
1465
1466 lastRow = srcPx.height - 1;
1467
1468 while (endY < lastRow)
1469 {
1470 endFractY += deltaY;
1471 startY = endY;
1472 endY = endFractY >> 12;
1473
1474 startWeight = 0xFF - endWeight;
1475 endWeight = (endFractY >> 4) & 0xFF;
1476
1477 srcStart = (uint16_t *)((char *)srcPx.pixels + srcRowBytes * startY);
1478
1479 for (x = 0; x != xCount; ++x)
1480 {
1481 src = srcStart++;
1482
1483 CLEAR_ACCUM2();
1484 ACCUM2(*src, startWeight);
1485
1486 y = startY;
1487 for (;;)
1488 {
1489 ++y;
1490 src = (uint16_t *)((char *)src + srcRowBytes);
1491 if (EXPECT_NOT(y == endY))
1492 {
1493 if (EXPECT(endY <= lastRow)) ACCUM2(*src, endWeight);
1494 break;
1495 }
1496 else
1497 {
1498 ACCUM2(*src, 0xFF);
1499 }
1500 }
1501
1502 *dst++ = ACCUM2TOPX();
1503 }
1504 }
1505}
1506
1507
1508/* Macros to manage 4-channel accumulators in 4-channel squeeze scalers.
1509 The approach is similar to the ACCUM2 family above, except that the wt
1510 multiplication works on two channels at a time before splitting into four
1511 accumulators, all of which are shifted to the low end of the value.
1512*/
1513#define ACCUM4(PX, WT) do { \
1514 uint32_t px = PX; \
1515 uint_fast32_t wt = WT; \
1516 ag = ((px & 0xFF00FF00) >> 8) * wt; \
1517 br = (px & 0x00FF00FF) * wt; \
1518 accum1 += ag >> 16; \
1519 accum2 += br >> 16; \
1520 accum3 += ag & 0xFFFF; \
1521 accum4 += br & 0xFFFF; \
1522 weight += wt; \
1523 } while (0)
1524
1525#define CLEAR_ACCUM4() do { \
1526 accum1 = 0; \
1527 accum2 = 0; \
1528 accum3 = 0; \
1529 accum4 = 0; \
1530 weight = 0; \
1531 } while (0)
1532
1533/* These integer divisions cause a stall -- this is the biggest
1534 bottleneck in this file. Unrolling the loop might help on PPC.
1535 Linear interpolation instead of box filtering would help, with
1536 a quality hit. Given that scaling doesn't happen very often,
1537 I think I'll leave it this way. -- Ahruman
1538*/
1539#define ACCUM4TOPX() ( \
1540 (((accum1 / weight) & 0xFF) << 24) | \
1541 (((accum3 / weight) & 0xFF) << 8) | \
1542 (((accum2 / weight) & 0xFF) << 16) | \
1543 ((accum4 / weight) & 0xFF) \
1544 )
1545
1546
1548{
1549 uint32_t *src, *srcStart, *dst;
1550 uint32_t borderPx, ag, br;
1551 uint_fast32_t x, y, xCount, endX;
1552 size_t srcRowBytes;
1553 uint_fast32_t endFractX, deltaX;
1554 uint_fast32_t accum1, accum2, accum3, accum4, weight;
1555 uint_fast8_t borderWeight;
1556
1557 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4);
1558
1559 srcStart = srcPx.pixels;
1560 dst = srcStart; // Output is placed in same buffer, without line padding.
1561 srcRowBytes = srcPx.rowBytes;
1562
1563 deltaX = (srcPx.width << 12) / dstWidth;
1564
1565 for (y = 0; y != srcPx.height; ++y)
1566 {
1567 borderPx = *srcStart;
1568 endFractX = 0;
1569 borderWeight = 0;
1570
1571 src = srcStart;
1572
1573 x = 0;
1574 xCount = dstWidth;
1575 while (xCount--)
1576 {
1577 endFractX += deltaX;
1578 endX = endFractX >> 12;
1579
1580 CLEAR_ACCUM4();
1581
1582 borderWeight = 0xFF - borderWeight;
1583 ACCUM4(borderPx, borderWeight);
1584
1585 borderWeight = (endFractX >> 4) & 0xFF;
1586
1587 for (;;)
1588 {
1589 ++x;
1590 if (EXPECT(x == endX))
1591 {
1592 if (EXPECT(xCount)) borderPx = *++src;
1593 ACCUM4(borderPx, borderWeight);
1594 break;
1595 }
1596 else
1597 {
1598 ACCUM4(*++src, 0xFF);
1599 }
1600 }
1601
1602 *dst++ = ACCUM4TOPX();
1603 }
1604
1605 srcStart = (uint32_t *)((char *)srcStart + srcRowBytes);
1606 }
1607}
1608
1609
1610static void SqueezeVertically4(OOPixMap srcPx, OOPixMapDimension dstHeight)
1611{
1612 uint32_t *src, *srcStart, *dst;
1613 uint_fast32_t x, y, xCount, startY, endY, lastRow;
1614 size_t srcRowBytes;
1615 uint32_t ag, br;
1616 uint_fast32_t endFractY, deltaY;
1617 uint_fast32_t accum1, accum2, accum3, accum4, weight;
1618 uint_fast8_t startWeight, endWeight;
1619
1620 NSCParameterAssert(OOIsValidPixMap(srcPx) && OOPixMapBytesPerPixel(srcPx) == 4);
1621
1622 dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
1623 srcRowBytes = srcPx.rowBytes;
1624 xCount = srcPx.width;
1625
1626 deltaY = (srcPx.height << 12) / dstHeight;
1627 endFractY = 0;
1628
1629 endWeight = 0;
1630 endY = 0;
1631
1632 lastRow = srcPx.height - 1;
1633
1634 while (endY < lastRow)
1635 {
1636 endFractY += deltaY;
1637 startY = endY;
1638 endY = endFractY >> 12;
1639
1640 startWeight = 0xFF - endWeight;
1641 endWeight = (endFractY >> 4) & 0xFF;
1642
1643 srcStart = (uint32_t *)((char *)srcPx.pixels + srcRowBytes * startY);
1644
1645 for (x = 0; x != xCount; ++x)
1646 {
1647 src = srcStart++;
1648
1649 CLEAR_ACCUM4();
1650 ACCUM4(*src, startWeight);
1651
1652 y = startY;
1653 for (;;)
1654 {
1655 ++y;
1656 src = (uint32_t *)((char *)src + srcRowBytes);
1657 if (EXPECT_NOT(y == endY))
1658 {
1659 if (EXPECT(endY <= lastRow)) ACCUM4(*src, endWeight);
1660 break;
1661 }
1662 else
1663 {
1664 ACCUM4(*src, 0xFF);
1665 }
1666 }
1667
1668 *dst++ = ACCUM4TOPX();
1669 }
1670 }
1671}
1672
1673
1674static BOOL EnsureCorrectDataSize(OOPixMap *pixMap, BOOL leaveSpaceForMipMaps)
1675{
1676 size_t correctSize;
1677 void *bytes = NULL;
1678
1679 correctSize = pixMap->rowBytes * pixMap->height;
1680
1681 // correctSize > 0 check is redundant, but static analyzer (checker-262) doesn't know that. -- Ahruman 2012-03-17
1682 NSCParameterAssert(OOIsValidPixMap(*pixMap) && correctSize > 0);
1683
1684 /* Ensure that the block is not too small. This needs to be done before
1685 adding the mip-map space, as the texture may have been shrunk in place
1686 without being grown for mip-maps.
1687 */
1688 if (EXPECT_NOT(pixMap->bufferSize < correctSize))
1689 {
1691 return NO;
1692 }
1693
1694 if (leaveSpaceForMipMaps) correctSize = correctSize * 4 / 3;
1695 if (correctSize != pixMap->bufferSize)
1696 {
1697 bytes = realloc(pixMap->pixels, correctSize);
1698 if (EXPECT_NOT(bytes == NULL)) free(pixMap->pixels);
1699 pixMap->pixels = bytes;
1700 pixMap->bufferSize = correctSize;
1701 }
1702
1703 return YES;
1704}
#define EXPECT_NOT(x)
#define ALWAYS_INLINE_FUNC
#define NONNULL_FUNC
#define OOINLINE
#define EXPECT(x)
#define FAIL(s)
#define OOLogGenericParameterError()
Definition OOLogging.h:125
#define OOLog(class, format,...)
Definition OOLogging.h:88
NSString *const kOOLogParameterError
Definition OOLogging.m:647
uint_fast32_t OOPixMapDimension
Definition OOPixMap.h:33
NSString * OOPixMapFormatName(OOPixMapFormat format) PURE_FUNC
Definition OOPixMap.m:217
void OODumpPixMap(OOPixMap pixMap, NSString *name)
Definition OOPixMap.m:145
OOPixMapFormat
Definition OOPixMap.h:39
@ kOOPixMapInvalidFormat
Definition OOPixMap.h:40
@ kOOPixMapGrayscale
Definition OOPixMap.h:41
@ kOOPixMapRGBA
Definition OOPixMap.h:43
@ kOOPixMapGrayscaleAlpha
Definition OOPixMap.h:42
const OOPixMap kOONullPixMap
Definition OOPixMap.m:31
#define OORoundUpToPowerOf2_PixMap
Definition OOPixMap.h:35
OOPixMap OOAllocatePixMap(OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, size_t rowBytes, size_t bufferSize)
Definition OOPixMap.m:73
OOINLINE unsigned short OOPixMapBytesPerPixel(OOPixMap pixMap)
Definition OOPixMap.h:132
OOPixMap OOMakePixMap(void *pixels, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format, size_t rowBytes, size_t bufferSize)
Definition OOPixMap.m:53
BOOL OOIsValidPixMap(OOPixMap pixMap)
Definition OOPixMap.m:42
float y
float x
static BOOL GenerateMipMaps2(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC
static void SqueezeVertically1(OOPixMap srcPx, OOPixMapDimension dstHeight)
#define DUMP_MIP_MAP_DUMP(px, w, h)
static void ScaleToHalf_4_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC
#define ACCUM2(PX, WT)
static void ScaleToHalf_2_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC
#define DUMP_SCALE_PREPARE()
static void StretchHorizontally2(OOPixMap srcPx, OOPixMap dstPx)
#define DUMP_SCALE_DUMP(PM, stage)
#define ACCUM2TOPX()
static BOOL GenerateMipMaps4(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC
static BOOL EnsureCorrectDataSize(OOPixMap *pixMap, BOOL leaveSpaceForMipMaps) NONNULL_FUNC
OOPixMap OOScalePixMap(OOPixMap srcPx, OOPixMapDimension dstWidth, OOPixMapDimension dstHeight, BOOL leaveSpaceForMipMaps)
#define DUMP_MIP_MAP_PREPARE(pl)
OOINLINE void StretchVertically(OOPixMap srcPx, OOPixMap dstPx) ALWAYS_INLINE_FUNC
static void SqueezeHorizontally2(OOPixMap srcPx, OOPixMapDimension dstWidth)
static BOOL GenerateMipMaps1(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height) NONNULL_FUNC
OOINLINE void StretchHorizontally(OOPixMap srcPx, OOPixMap dstPx) ALWAYS_INLINE_FUNC
static void SqueezeVertically4(OOPixMap srcPx, OOPixMapDimension dstHeight)
#define ACCUM4(PX, WT)
#define ACCUM4TOPX()
OOINLINE void SqueezeVertically(OOPixMap pixMap, OOPixMapDimension dstHeight) ALWAYS_INLINE_FUNC
static void ScaleToHalf_1_x1(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC
OOINLINE void SqueezeHorizontally(OOPixMap pixMap, OOPixMapDimension dstHeight) ALWAYS_INLINE_FUNC
static void SqueezeHorizontally1(OOPixMap srcPx, OOPixMapDimension dstWidth)
static void SqueezeHorizontally4(OOPixMap srcPx, OOPixMapDimension dstWidth)
BOOL OOGenerateMipMaps(void *textureBytes, OOPixMapDimension width, OOPixMapDimension height, OOPixMapFormat format)
static void StretchHorizontally1(OOPixMap srcPx, OOPixMap dstPx)
static void StretchVerticallyN_x1(OOPixMap srcPx, OOPixMap dstPx)
static void ScaleToHalf_1_x4(void *srcBytes, void *dstBytes, OOPixMapDimension srcWidth, OOPixMapDimension srcHeight) NONNULL_FUNC
static void StretchHorizontally4(OOPixMap srcPx, OOPixMap dstPx)
#define CLEAR_ACCUM4()
#define CLEAR_ACCUM2()
static void SqueezeVertically2(OOPixMap srcPx, OOPixMapDimension dstHeight)
static void StretchVerticallyN_x4(OOPixMap srcPx, OOPixMap dstPx)
OOPixMapDimension height
Definition OOPixMap.h:50
size_t bufferSize
Definition OOPixMap.h:53
size_t rowBytes
Definition OOPixMap.h:52
void * pixels
Definition OOPixMap.h:49
OOPixMapDimension width
Definition OOPixMap.h:50
OOPixMapFormat format
Definition OOPixMap.h:51