Advertisement
Guest User

Untitled

a guest
Jul 31st, 2010
159
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.79 KB | None | 0 0
  1. /*Copyright (C) 2010  Armin Preiml
  2.  
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 3 of the License, or
  6. (at your option) any later version.
  7.  
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11. GNU General Public License for more details.
  12.  
  13. You should have received a copy of the GNU General Public License
  14. along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  15.  
  16. #include <iostream>
  17. #include <vector>
  18. #include <iconv.h>
  19. #include <errno.h>
  20.  
  21. string convertToUTF8(string input)
  22. {
  23.     string output = "";
  24.    
  25.     //create convert description
  26.     iconv_t cd = iconv_open("UTF-8", "ISO-8859-1");
  27.    
  28.     if (cd == (iconv_t)-1)  //error handling
  29.     {
  30.         cerr << "Convert to UTF-8 failed: ";
  31.    
  32.         switch (errno)      //detailed error messages (maybe it contains too much detail :)
  33.         {
  34.             case EMFILE:
  35.             cerr << "{OPEN_MAX} files descriptors are currently open in the calling process.\n";
  36.             case ENFILE:
  37.             cerr << "Too many files are currently open in the system. \n";
  38.             case ENOMEM:
  39.             cerr << "Insufficient storage space is available. \n";
  40.             case EINVAL:
  41.             cerr << "The conversion specified by fromcode and tocode is not supported by the implementation. \n";
  42.        
  43.             default:
  44.             cerr << "WTF?\n";
  45.         }
  46.    
  47.     }
  48.     else
  49.     {
  50.         const size_t inputSize = input.size();
  51.        
  52.         if (inputSize)  //input is not empty
  53.         {
  54.             //convert function doesn't accept const char *, therefore copy content into an char *
  55.             vector<char> inputBuffer(input.begin(), input.end());
  56.             char *inputBufferBegin = &inputBuffer[0];
  57.            
  58.             size_t inputBytesLeft = inputSize;  //bytes to convert
  59.            
  60.             static const size_t outputSize = 1000;
  61.             size_t outputBytesLeft;
  62.            
  63.             char outputBuffer[outputSize];
  64.             char *outputBufferBegin;
  65.  
  66.             while (inputBytesLeft > 0 )
  67.             {
  68.                 outputBytesLeft = outputSize;
  69.                 outputBufferBegin = outputBuffer;
  70.        
  71.                 if (iconv(cd, &inputBufferBegin, &inputBytesLeft, &outputBufferBegin, &outputBytesLeft) == (size_t)-1)
  72.                 {
  73.                     switch (errno) {
  74.                     case E2BIG: //outputBuffer is full
  75.                         output += string(outputBuffer, outputSize);
  76.                         break;
  77.            
  78.                     case EILSEQ:
  79.                         cerr << "Invalid multibyte sequence.\n";
  80.                         break;
  81.                     case EINVAL:
  82.                         cerr << "Incomplete multibyte sequence.\n";
  83.                         break;
  84.            
  85.                     default:
  86.                         cerr << "DUNNO\n";
  87.                     }
  88.                
  89.                 }
  90.             }
  91.            
  92.             //read only relevant bytes from outputBuffer
  93.             output += string(outputBuffer, outputSize - outputBytesLeft);
  94.            
  95.         }
  96.    
  97.     }
  98.    
  99.     iconv_close (cd);  
  100.    
  101.     return output;
  102. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement