Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env perl
- use strict;
- use warnings;
- use feature qw/say bitwise/;
- use experimental 'bitwise';
- use utf8;
- use Benchmark ':all';
- use Inline C => <<'END_OF_C_CODE';
- #define IN_RANGE_INC(type,val,beg,end) \
- ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
- <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
- static inline UV
- decode_utf8_cp (unsigned char *s, STRLEN len, STRLEN *clen)
- {
- if (len >= 2
- && IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
- && IN_RANGE_INC (char, s[1], 0x80, 0xbf))
- {
- *clen = 2;
- return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
- }
- else
- return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
- }
- SV* first_diff_c(SV* sa, SV *sb) {
- STRLEN sa_len, sb_len;
- char *sa_ptr = SvPV (sa, sa_len);
- char *sb_ptr = SvPV (sb, sb_len);
- if(SvUTF8(sa) != SvUTF8(sb))
- croak ("different argument encoding");
- int i;
- int min_len = sa_len < sb_len? sa_len: sb_len;
- if(SvUTF8(sa)) {
- char *sa_end = SvEND(sa) + 1;
- char *sb_end = SvEND(sb) + 1;
- int cpos;
- for(cpos = i = 0; i < min_len; ++cpos) {
- STRLEN csa = 1, csb = 1;
- UV usa = decode_utf8_cp (sa_ptr + i, sa_end - sa_ptr, &csa);
- UV usb = decode_utf8_cp (sb_ptr + i, sb_end - sb_ptr, &csb);
- if(usa != usb)
- return newSViv(cpos);
- i += csa;
- }
- } else {
- for(i = 0; i < min_len; ++i) {
- if(sa_ptr[i] != sb_ptr[i])
- return newSViv(i);
- }
- }
- if(sa_len != sb_len)
- return newSViv(SvCUR(sa));
- return &PL_sv_undef;
- }
- END_OF_C_CODE
- binmode STDOUT, ':utf8';
- sub first_diff_xor {("$_[0]" ^. "$_[1]") =~ /[^\x00]/ ? $-[0] : undef}
- sub first_diff_pos {
- my ($a, $b) =
- length(${$_[0]}) > length(${$_[1]})
- ? @_
- : ($_[1], $_[0]);
- while ($$a =~ m/(.)/g) {
- my $c = $1;
- return pos($$a) unless $$b =~ m/(.)/g && $c eq $1;
- }
- return undef;
- }
- sub first_diff_pos_gc {
- my ($a, $b) =
- length(${$_[0]}) > length(${$_[1]})
- ? @_
- : ($_[1], $_[0]);
- while ($$a =~ m/(.)/gc) {
- my $c = $1;
- return pos($$a) unless $$b =~ m/(.)/gc && $c eq $1;
- }
- return undef;
- }
- my @test = (
- ["cat", "dog"],
- ["kater", "katze"],
- ["foo", "foobar"],
- ["i want to ride my bicycle!", "i want to ride my bicycle!"],
- ["Андрей", "Андрий"]
- );
- #for my $t (@test) {say "$t->[0] ~~ $t->[1] = " . (first_diff_c(@$t) // "undef")}
- #exit;
- cmpthese(
- 1_000_000, {
- first_diff_pos => sub {
- first_diff_pos(\$_->[0], \$_->[1]) for @test;
- },
- first_diff_pos_gc => sub {
- first_diff_pos_gc(\$_->[0], \$_->[1]) for @test;
- },
- first_diff_xor => sub {
- first_diff_xor(@$_) for @test;
- },
- first_diff_c => sub {
- first_diff_c(@$_) for @test;
- },
- },
- );
- __END__
- Rate first_diff_pos first_diff_xor first_diff_pos_gc first_diff_c
- first_diff_pos 189394/s -- -34% -58% -88%
- first_diff_xor 288184/s 52% -- -36% -81%
- first_diff_pos_gc 448430/s 137% 56% -- -71%
- first_diff_c 1538462/s 712% 434% 243% --
Add Comment
Please, Sign In to add comment