Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <boost/algorithm/string/find.hpp>
- #include <boost/iostreams/device/mapped_file.hpp>
- #include <cassert>
- #include <iostream>
- using namespace boost;
- using namespace boost::algorithm;
- using namespace boost::iostreams;
- using namespace std;
- int main(int argc, char **argv)
- {
- if (argc != 3) {
- cerr << "Call: " << argv[0] << " PATTERN_FILE SRC_FILEn";
- return 3;
- }
- mapped_file_source pattern(argv[1]);
- mapped_file_source src(argv[2]);
- iterator_range<const char*> p_range(pattern.data(),
- pattern.data() + pattern.size());
- iterator_range<const char*> s_range(src.data(), src.data() + src.size());
- iterator_range<const char*> result = find_first(s_range, p_range);
- if (result) {
- size_t pos = result.begin()-s_range.begin();
- cout << pos << 'n';
- return 0;
- }
- return 1;
- }
- $ make CXXFLAGS="-Wall -g" LDLIBS="-lboost_iostreams" searchb
- $ dd if=WTF_-_EPISODE_277_RACHAEL_HARRIS.mp3 of=t skip=232323 bs=1 count=4K
- $ ls -l t
- -rw-r--r-- 1 juser users 4096 2012-05-31 15:24 t
- $ ./searchb t WTF_-_EPISODE_277_RACHAEL_HARRIS.mp3
- 232323
- #!/usr/bin/env python
- import locale
- import os
- import sys
- import urllib2
- def boyermoore_horspool(fd, needle):
- nlen = len(needle)
- nlast = nlen - 1
- skip = []
- for k in range(256):
- skip.append(nlen)
- for k in range(nlast):
- skip[ord(needle[k])] = nlast - k
- skip = tuple(skip)
- pos = 0
- consumed = 0
- haystack = bytes()
- while True:
- more = nlen - (consumed - pos)
- morebytes = fd.read(more)
- haystack = haystack[more:] + morebytes
- if len(morebytes) < more:
- return -1
- consumed = consumed + more
- i = nlast
- while i >= 0 and haystack[i] == needle[i]:
- i = i - 1
- if i == -1:
- return pos
- pos = pos + skip[ord(haystack[nlast])]
- return -1
- if __name__ == "__main__":
- if len(sys.argv) < 2:
- sys.stderr.write("""Usage: horspool.py NEEDLE_FILE [URL]
- Search for the contents of NEEDLE_FILE inside the content at URL.
- If URL is omitted, search standard input.
- If the content is found, print the offset of the first occurrence and return 0.
- Otherwise, return 1.""")
- sys.exit(2)
- needle_file = open(sys.argv[1])
- needle = needle_file.read()
- needle_file.close
- fd = urllib2.urlopen(sys.argv[2]) if len(sys.argv) > 2 else sys.stdin
- offset = boyermoore_horspool(fd, needle)
- if offset >= 0: print offset
- else: sys.exit(1)
- fd.close()
Add Comment
Please, Sign In to add comment