Guest User

Untitled

a guest
Jul 19th, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.40 KB | None | 0 0
  1. #!/usr/bin/env perl
  2. use strict;
  3. use warnings;
  4. use feature qw/say bitwise/;
  5. use experimental 'bitwise';
  6. use utf8;
  7. use Benchmark ':all';
  8. use Inline C => <<'END_OF_C_CODE';
  9.  
  10. #define IN_RANGE_INC(type,val,beg,end) \
  11. ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
  12. <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
  13.  
  14. static inline UV
  15. decode_utf8_cp (unsigned char *s, STRLEN len, STRLEN *clen)
  16. {
  17. if (len >= 2
  18. && IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
  19. && IN_RANGE_INC (char, s[1], 0x80, 0xbf))
  20. {
  21. *clen = 2;
  22. return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
  23. }
  24. else
  25. return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
  26. }
  27.  
  28. SV* first_diff_c(SV* sa, SV *sb) {
  29. STRLEN sa_len, sb_len;
  30. char *sa_ptr = SvPV (sa, sa_len);
  31. char *sb_ptr = SvPV (sb, sb_len);
  32. if(SvUTF8(sa) != SvUTF8(sb))
  33. croak ("different argument encoding");
  34. int i;
  35. int min_len = sa_len < sb_len? sa_len: sb_len;
  36. if(SvUTF8(sa)) {
  37. char *sa_end = SvEND(sa) + 1;
  38. char *sb_end = SvEND(sb) + 1;
  39. int cpos;
  40. for(cpos = i = 0; i < min_len; ++cpos) {
  41. STRLEN csa = 1, csb = 1;
  42. UV usa = decode_utf8_cp (sa_ptr + i, sa_end - sa_ptr, &csa);
  43. UV usb = decode_utf8_cp (sb_ptr + i, sb_end - sb_ptr, &csb);
  44. if(usa != usb)
  45. return newSViv(cpos);
  46. i += csa;
  47. }
  48. } else {
  49. for(i = 0; i < min_len; ++i) {
  50. if(sa_ptr[i] != sb_ptr[i])
  51. return newSViv(i);
  52. }
  53. }
  54. if(sa_len != sb_len)
  55. return newSViv(SvCUR(sa));
  56. return &PL_sv_undef;
  57. }
  58.  
  59. END_OF_C_CODE
  60.  
  61. binmode STDOUT, ':utf8';
  62.  
  63. sub first_diff_xor {("$_[0]" ^. "$_[1]") =~ /[^\x00]/ ? $-[0] : undef}
  64.  
  65. sub first_diff_pos {
  66. my ($a, $b) =
  67. length(${$_[0]}) > length(${$_[1]})
  68. ? @_
  69. : ($_[1], $_[0]);
  70. while ($$a =~ m/(.)/g) {
  71. my $c = $1;
  72. return pos($$a) unless $$b =~ m/(.)/g && $c eq $1;
  73. }
  74. return undef;
  75. }
  76.  
  77. sub first_diff_pos_gc {
  78. my ($a, $b) =
  79. length(${$_[0]}) > length(${$_[1]})
  80. ? @_
  81. : ($_[1], $_[0]);
  82. while ($$a =~ m/(.)/gc) {
  83. my $c = $1;
  84. return pos($$a) unless $$b =~ m/(.)/gc && $c eq $1;
  85. }
  86. return undef;
  87. }
  88.  
  89. my @test = (
  90. ["cat", "dog"],
  91. ["kater", "katze"],
  92. ["foo", "foobar"],
  93. ["i want to ride my bicycle!", "i want to ride my bicycle!"],
  94. ["Андрей", "Андрий"]
  95. );
  96. #for my $t (@test) {say "$t->[0] ~~ $t->[1] = " . (first_diff_c(@$t) // "undef")}
  97. #exit;
  98. cmpthese(
  99. 1_000_000, {
  100. first_diff_pos => sub {
  101. first_diff_pos(\$_->[0], \$_->[1]) for @test;
  102. },
  103. first_diff_pos_gc => sub {
  104. first_diff_pos_gc(\$_->[0], \$_->[1]) for @test;
  105. },
  106. first_diff_xor => sub {
  107. first_diff_xor(@$_) for @test;
  108. },
  109. first_diff_c => sub {
  110. first_diff_c(@$_) for @test;
  111. },
  112. },
  113. );
  114.  
  115. __END__
  116.  
  117. Rate first_diff_pos first_diff_xor first_diff_pos_gc first_diff_c
  118. first_diff_pos 189394/s -- -34% -58% -88%
  119. first_diff_xor 288184/s 52% -- -36% -81%
  120. first_diff_pos_gc 448430/s 137% 56% -- -71%
  121. first_diff_c 1538462/s 712% 434% 243% --
Add Comment
Please, Sign In to add comment