Line data Source code
1 0 : /* 2 : 3 : OOEncodingConverter.m 4 : 5 : Copyright (C) 2008-2013 Jens Ayton and contributors 6 : 7 : Permission is hereby granted, free of charge, to any person obtaining a copy 8 : of this software and associated documentation files (the "Software"), to deal 9 : in the Software without restriction, including without limitation the rights 10 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 : copies of the Software, and to permit persons to whom the Software is 12 : furnished to do so, subject to the following conditions: 13 : 14 : The above copyright notice and this permission notice shall be included in all 15 : copies or substantial portions of the Software. 16 : 17 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 : SOFTWARE. 24 : 25 : */ 26 : 27 : #ifndef OOENCODINGCONVERTER_EXCLUDE 28 : 29 : #import "OOEncodingConverter.h" 30 : #import "OOCache.h" 31 : #import "OOCollectionExtractors.h" 32 : #import "OOLogging.h" 33 : 34 : 35 : /* Using compatibility mapping - converting strings to Unicode form KC - would 36 : reduce potential complications in localizing Oolite. However, the method to 37 : perform the transformation is not available in GNUstep. I'm currently not 38 : using it under OS X either, for cross-platform consistency. 39 : -- Ahruman 2008-01-27 40 : */ 41 : #if OOLITE_MAC_OS_X 42 0 : #define USE_COMPATIBILITY_MAPPING 0 43 : #else 44 : #define USE_COMPATIBILITY_MAPPING 0 45 : #endif 46 : 47 : 48 0 : #define PROFILE_ENCODING_CONVERTER 0 49 : 50 : 51 0 : static const NSUInteger kCachePruneThreshold = 200; 52 : 53 : 54 : #if PROFILE_ENCODING_CONVERTER 55 : static OOEncodingConverter *sProfiledConverter = nil; 56 : static NSTimer *sProfileTimer = nil; 57 : 58 : static unsigned sCacheHits = 0; 59 : static unsigned sCacheMisses = 0; 60 : #endif 61 : 62 : 63 : @interface OOEncodingConverter (Private) 64 : 65 0 : - (NSData *) performConversionForString:(NSString *)string; 66 : 67 : @end 68 : 69 : 70 : @implementation OOEncodingConverter 71 : 72 : - (id) initWithEncoding:(NSStringEncoding)encoding substitutions:(NSDictionary *)substitutions 73 : { 74 : self = [super init]; 75 : if (self != nil) 76 : { 77 : _cache = [[OOCache alloc] init]; 78 : [_cache setPruneThreshold:kCachePruneThreshold]; 79 : [_cache setName:@"Text encoding"]; 80 : _substitutions = [substitutions copy]; 81 : _encoding = encoding; 82 : 83 : #if PROFILE_ENCODING_CONVERTER 84 : if (sProfiledConverter == nil) 85 : { 86 : sProfiledConverter = self; 87 : sProfileTimer = [NSTimer scheduledTimerWithTimeInterval:5 target:self selector:@selector(profileFire:) userInfo:nil repeats:YES]; 88 : } 89 : #endif 90 : } 91 : 92 : return self; 93 : } 94 : 95 : 96 : - (id) initWithFontPList:(NSDictionary *)fontPList 97 : { 98 : return [self initWithEncoding:EncodingFromString([fontPList oo_stringForKey:@"encoding"]) substitutions:[fontPList oo_dictionaryForKey:@"substitutions"]]; 99 : } 100 : 101 : 102 0 : - (void) dealloc 103 : { 104 : [_cache release]; 105 : [_substitutions release]; 106 : 107 : #if PROFILE_ENCODING_CONVERTER 108 : sProfiledConverter = nil; 109 : [sProfileTimer invalidate]; 110 : sProfileTimer = nil; 111 : sCacheHits = 0; 112 : sCacheMisses = 0; 113 : #endif 114 : 115 : [super dealloc]; 116 : } 117 : 118 : 119 0 : - (NSString *) descriptionComponents 120 : { 121 : return [NSString stringWithFormat:@"encoding: %lu", _encoding]; 122 : } 123 : 124 : 125 : - (NSData *) convertString:(NSString *)string 126 : { 127 : NSData *data = nil; 128 : 129 : #if USE_COMPATIBILITY_MAPPING 130 : // Convert to Unicode Normalization Form KC (that is, minimize the use of combining modifiers while avoiding precomposed ligatures) 131 : string = [string precomposedStringWithCompatibilityMapping]; 132 : #endif 133 : 134 : if (string == nil) return [NSData data]; 135 : 136 : data = [_cache objectForKey:string]; 137 : if (data == nil) 138 : { 139 : data = [self performConversionForString:string]; 140 : if (data != nil) [_cache setObject:data forKey:string]; 141 : 142 : #if PROFILE_ENCODING_CONVERTER 143 : ++sCacheMisses; 144 : } 145 : else 146 : { 147 : ++sCacheHits; 148 : #endif 149 : } 150 : 151 : return data; 152 : } 153 : 154 : 155 : - (NSStringEncoding) encoding 156 : { 157 : return _encoding; 158 : } 159 : 160 : @end 161 : 162 : 163 : @implementation OOEncodingConverter (Private) 164 : 165 : - (NSData *) performConversionForString:(NSString *)string 166 : { 167 : NSString *subst = nil; 168 : NSEnumerator *substEnum = nil; 169 : NSMutableString *mutable = nil; 170 : 171 : mutable = [[string mutableCopy] autorelease]; 172 : if (mutable == nil) return nil; 173 : 174 : for (substEnum = [_substitutions keyEnumerator]; (subst = [substEnum nextObject]); ) 175 : { 176 : [mutable replaceOccurrencesOfString:subst 177 : withString:[_substitutions objectForKey:subst] 178 : options:0 179 : range:NSMakeRange(0, [mutable length])]; 180 : } 181 : 182 : return [mutable dataUsingEncoding:_encoding allowLossyConversion:YES]; 183 : } 184 : 185 : 186 : #if PROFILE_ENCODING_CONVERTER 187 : /* 188 : Profiling observations: 189 : * The clock generates one new string per second. 190 : * The trade screens each use over 100 strings, so cache sizes below 150 191 : are undesireable. 192 : * Cache hit ratio is extremely near 100% at most times. 193 : */ 194 : - (void) profileFire:(id)junk 195 : { 196 : float ratio = (float)sCacheHits / (float)(sCacheHits + sCacheMisses); 197 : OOLog(@"strings.encoding.profile", @"Cache hits: %u, misses: %u, ratio: %.2g", sCacheHits, sCacheMisses, ratio); 198 : sCacheHits = sCacheMisses = 0; 199 : } 200 : #endif 201 : 202 : @end 203 : 204 : #endif //OOENCODINGCONVERTER_EXCLUDE 205 : 206 : 207 : /* 208 : There are a variety of overlapping naming schemes for text encoding. 209 : We ignore them and use a fixed list: 210 : "windows-latin-1" NSWindowsCP1252StringEncoding 211 : "windows-latin-2" NSWindowsCP1250StringEncoding 212 : "windows-cyrillic" NSWindowsCP1251StringEncoding 213 : "windows-greek" NSWindowsCP1253StringEncoding 214 : "windows-turkish" NSWindowsCP1254StringEncoding 215 : */ 216 : 217 0 : #define kWindowsLatin1Str @"windows-latin-1" 218 0 : #define kWindowsLatin2Str @"windows-latin-2" 219 0 : #define kWindowsCyrillicStr @"windows-cyrillic" 220 0 : #define kWindowsGreekStr @"windows-greek" 221 0 : #define kWindowsTurkishStr @"windows-turkish" 222 : 223 : 224 0 : NSString *StringFromEncoding(NSStringEncoding encoding) 225 : { 226 : switch (encoding) 227 : { 228 : case NSWindowsCP1252StringEncoding: 229 : return kWindowsLatin1Str; 230 : 231 : case NSWindowsCP1250StringEncoding: 232 : return kWindowsLatin2Str; 233 : 234 : case NSWindowsCP1251StringEncoding: 235 : return kWindowsCyrillicStr; 236 : 237 : case NSWindowsCP1253StringEncoding: 238 : return kWindowsGreekStr; 239 : 240 : case NSWindowsCP1254StringEncoding: 241 : return kWindowsTurkishStr; 242 : 243 : default: 244 : return nil; 245 : } 246 : } 247 : 248 : 249 0 : NSStringEncoding EncodingFromString(NSString *name) 250 : { 251 : if ([name isEqualToString:kWindowsLatin1Str]) return NSWindowsCP1252StringEncoding; 252 : if ([name isEqualToString:kWindowsLatin2Str]) return NSWindowsCP1250StringEncoding; 253 : if ([name isEqualToString:kWindowsCyrillicStr]) return NSWindowsCP1251StringEncoding; 254 : if ([name isEqualToString:kWindowsGreekStr]) return NSWindowsCP1253StringEncoding; 255 : if ([name isEqualToString:kWindowsTurkishStr]) return NSWindowsCP1254StringEncoding; 256 : return (NSStringEncoding)NSNotFound; 257 : }