Advertisement
Guest User

NSString extension to fix HTML entities

a guest
Mar 30th, 2010
1,480
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. //
  2. //  NSString+HTMLEntities.h
  3. //
  4.  
  5. @interface NSString (HTMLEntities)
  6.  
  7. - (NSString *)stringByDecodingHTMLEntities;
  8.  
  9. @end
  10.  
  11.  
  12. //
  13. //  NSString+HTMLEntities.m
  14. //
  15.  
  16. #import "NSString+HTMLEntities.h"
  17.  
  18. @implementation NSString (HTMLEntities)
  19.  
  20. - (NSString *)stringByDecodingHTMLEntities {
  21.     NSUInteger myLength = [self length];
  22.     NSUInteger ampIndex = [self rangeOfString:@"&" options:NSLiteralSearch].location;
  23.    
  24.     // Short-circuit if there are no ampersands.
  25.     if (ampIndex == NSNotFound) {
  26.         return self;
  27.     }
  28.     // Make result string with some extra capacity.
  29.     NSMutableString *result = [NSMutableString stringWithCapacity:(myLength * 1.25)];
  30.    
  31.     // First iteration doesn't need to scan to & since we did that already, but for code simplicity's sake we'll do it again with the scanner.
  32.     NSScanner *scanner = [NSScanner scannerWithString:self];
  33.    
  34.     [scanner setCharactersToBeSkipped:nil];
  35.    
  36.     NSCharacterSet *boundaryCharacterSet = [NSCharacterSet characterSetWithCharactersInString:@" \t\n\r;"];
  37.    
  38.     do {
  39.         // Scan up to the next entity or the end of the string.
  40.         NSString *nonEntityString;
  41.         if ([scanner scanUpToString:@"&" intoString:&nonEntityString]) {
  42.             [result appendString:nonEntityString];
  43.         }
  44.         if ([scanner isAtEnd]) {
  45.             goto finish;
  46.         }
  47.         // Scan either a HTML or numeric character entity reference.
  48.         if ([scanner scanString:@"&" intoString:NULL])
  49.             [result appendString:@"&"];
  50.         else if ([scanner scanString:@"'" intoString:NULL])
  51.             [result appendString:@"'"];
  52.         else if ([scanner scanString:@""" intoString:NULL])
  53.             [result appendString:@"\""];
  54.         else if ([scanner scanString:@"<" intoString:NULL])
  55.             [result appendString:@"<"];
  56.         else if ([scanner scanString:@"&gt;" intoString:NULL])
  57.             [result appendString:@">"];
  58.         else if ([scanner scanString:@"&#" intoString:NULL]) {
  59.             BOOL gotNumber;
  60.             unsigned charCode;
  61.             NSString *xForHex = @"";
  62.            
  63.             // Is it hex or decimal?
  64.             if ([scanner scanString:@"x" intoString:&xForHex]) {
  65.                 gotNumber = [scanner scanHexInt:&charCode];
  66.             }
  67.             else {
  68.                 gotNumber = [scanner scanInt:(int*)&charCode];
  69.             }
  70.            
  71.             if (gotNumber) {
  72.                 [result appendFormat:@"%C", charCode];
  73.                 [scanner scanString:@";" intoString:NULL];
  74.             }
  75.             else {
  76.                 NSString *unknownEntity = @"";             
  77.                 [scanner scanUpToCharactersFromSet:boundaryCharacterSet intoString:&unknownEntity];
  78.                 [result appendFormat:@"&#%@%@", xForHex, unknownEntity];
  79.                 //[scanner scanUpToString:@";" intoString:&unknownEntity];
  80.                 //[result appendFormat:@"&#%@%@;", xForHex, unknownEntity];
  81.                 NSLog(@"Expected numeric character entity but got &#%@%@;", xForHex, unknownEntity);
  82.             }
  83.            
  84.         }
  85.         else {
  86.             NSString *amp;
  87.             //an isolated & symbol
  88.             [scanner scanString:@"&" intoString:&amp];
  89.             [result appendString:amp];
  90.         }
  91.     }
  92.     while (![scanner isAtEnd]);
  93.    
  94. finish:
  95.     return result;
  96. }
  97.  
  98. @end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement