Guest User

Untitled

a guest
Jan 23rd, 2018
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.09 KB | None | 0 0
  1. #include <stdbool.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4.  
  5. static int buf[4];
  6. static size_t buflen;
  7.  
  8. /* Reads from stdin until buf[index] can be accessed. */
  9. static bool read_buf(size_t index) {
  10. while (!(index < buflen)) {
  11. buf[buflen] = getchar();
  12. if (buf[buflen] == EOF) {
  13. return false;
  14. }
  15. buflen++;
  16. }
  17. return buf[buflen - 1] != EOF;
  18. }
  19.  
  20. static void write_buf(size_t n) {
  21. for (size_t i = 0; i < n; i++) {
  22. putchar(buf[i]);
  23. }
  24. buflen = 0;
  25. }
  26.  
  27. static bool start_between(int min, int max) {
  28. return min <= buf[0] && buf[0] < max;
  29. }
  30.  
  31. static bool is_cont(int index) {
  32. return 0x80 <= buf[index] && buf[index] < 0xC0;
  33. }
  34.  
  35. static int code_point(int bits18, int bits12, int bits06, int bits00) {
  36. return ((bits18 & 0x3F) << 18)
  37. | ((bits12 & 0x3F) << 12)
  38. | ((bits06 & 0x3F) << 6)
  39. | (bits00 & 0x3F);
  40. }
  41.  
  42. int main(void) {
  43. while (read_buf(0) && !ferror(stdout)) {
  44. if (start_between(0x00, 0x80)) {
  45. write_buf(1);
  46. } else if (start_between(0xC2, 0xE0) && read_buf(1) && is_cont(1)
  47. && code_point(0, 0, buf[0] & 0x1F, buf[1]) >= 0x0080) {
  48. write_buf(2);
  49. } else if (start_between(0xE0, 0xF0) && read_buf(2) && is_cont(1) && is_cont(2)
  50. && code_point(0, buf[0] & 0x0F, buf[1], buf[2]) >= 0x0800) {
  51. write_buf(3);
  52. } else if (start_between(0xF0, 0xF8) && read_buf(3) && is_cont(1) && is_cont(2) && is_cont(3)
  53. && code_point(buf[0] & 0x07, buf[1], buf[2], buf[3]) >= 0x010000
  54. && code_point(buf[0] & 0x07, buf[1], buf[2], buf[3]) < 0x110000) {
  55. write_buf(4);
  56. } else {
  57. putchar(0xC0 + ((buf[0] >> 6) & 0x1F));
  58. putchar(0x80 + ((buf[0] >> 0) & 0x3F));
  59. buflen--;
  60. buf[0] = buf[1];
  61. buf[1] = buf[2];
  62. buf[2] = buf[3];
  63. buf[3] = 0;
  64. }
  65. }
  66.  
  67. return ferror(stdin) || ferror(stdout) ? EXIT_FAILURE : EXIT_SUCCESS;
  68. }
  69.  
  70. perl -e 'print pack("H*", "fcf09f9985e08080fc")' | ./latin1_to_utf8 | hexdump -C
Add Comment
Please, Sign In to add comment