Advertisement
Guest User

Untitled

a guest
Jun 30th, 2015
215
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.01 KB | None | 0 0
  1. #include <iostream>
  2.  
  3. using namespace std;
  4. /*
  5. 0xxxxxxx A single-byte US-ASCII code (from the first 127 characters)
  6. 110xxxxx One more byte follows
  7. 1110xxxx Two more bytes follow
  8. 11110xxx Three more bytes follow
  9.  
  10. 10xxxxxx A continuation of one of the multi-byte characters
  11. */
  12. bool validUTF8(const char *s) {
  13. int m, r, i = 0;
  14. while (*s != '\0') {
  15. m = 7;
  16. while (m > 0 && (*s & (1 << m)) > 0) --m;
  17. ++s;
  18. r = 6-m;
  19. if (r == -1) // single byte
  20. continue;
  21. else if (r == 0 || r > 3) // continuation or bytes more than 3
  22. return false;
  23. else {
  24. while (r > 0 && *s != '\0') {
  25. if ((*s & (1<<7)) && (*s & (1<<6)) == 0) {
  26. ++s;
  27. --r;
  28. } else {
  29. break;
  30. }
  31. }
  32. if (r > 0) return false;
  33. }
  34. }
  35. return true;
  36. }
  37.  
  38. // 11010000
  39. // 10011000
  40. int main() {
  41. const char *s= "Hello, ζŽθ€…";
  42. cout<<validUTF8(s)<<endl;
  43. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement