Advertisement
Guest User

Untitled

a guest
Feb 18th, 2018
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Rust 2.87 KB | None | 0 0
  1. #![feature(align_offset)]
  2. use std::usize;
  3. use std::mem;
  4. use std::cmp;
  5.  
  6. const LO_U64: u64 = 0x0101010101010101;
  7. const HI_U64: u64 = 0x8080808080808080;
  8.  
  9. // use truncation
  10. const LO_USIZE: usize = LO_U64 as usize;
  11. const HI_USIZE: usize = HI_U64 as usize;
  12.  
  13. #[inline]
  14. fn contains_zero_byte(x: usize) -> bool {
  15.     x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
  16. }
  17.  
  18. #[cfg(target_pointer_width = "16")]
  19. #[inline]
  20. fn repeat_byte(b: u8) -> usize {
  21.     (b as usize) << 8 | b as usize
  22. }
  23.  
  24. #[cfg(target_pointer_width = "32")]
  25. #[inline]
  26. fn repeat_byte(b: u8) -> usize {
  27.     let mut rep = (b as usize) << 8 | b as usize;
  28.     rep = rep << 16 | rep;
  29.     rep
  30. }
  31.  
  32. #[cfg(target_pointer_width = "64")]
  33. #[inline]
  34. fn repeat_byte(b: u8) -> usize {
  35.     let mut rep = (b as usize) << 8 | b as usize;
  36.     rep = rep << 16 | rep;
  37.     rep = rep << 32 | rep;
  38.     rep
  39. }
  40.  
  41. /// Return the first index matching the byte `a` in `text`.
  42. pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
  43.     // Scan for a single byte value by reading two `usize` words at a time.
  44.     //
  45.     // Split `text` in three parts
  46.     // - unaligned initial part, before the first word aligned address in text
  47.     // - body, scan by 2 words at a time
  48.     // - the last remaining part, < 2 word size
  49.     let len = text.len();
  50.     let ptr = text.as_ptr();
  51.     let usize_bytes = mem::size_of::<usize>();
  52.  
  53.     // search up to an aligned boundary
  54.     let mut offset = ptr.align_offset(usize_bytes);
  55.     if offset > 0 {
  56.         offset = cmp::min(offset, len);
  57.         if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
  58.             return Some(index);
  59.         }
  60.     }
  61.  
  62.     // search the body of the text
  63.     let repeated_x = repeat_byte(x);
  64.  
  65.     if len >= 2 * usize_bytes {
  66.         while offset <= len - 2 * usize_bytes {
  67.             unsafe {
  68.                 let u = *(ptr.offset(offset as isize) as *const usize);
  69.                 let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
  70.  
  71.                 // break if there is a matching byte
  72.                 let zu = contains_zero_byte(u ^ repeated_x);
  73.                 let zv = contains_zero_byte(v ^ repeated_x);
  74.                 if zu || zv {
  75.                     break;
  76.                 }
  77.             }
  78.             offset += usize_bytes * 2;
  79.         }
  80.     }
  81.  
  82.     // find the byte after the point the body loop stopped
  83.     text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i)
  84. }
  85.  
  86. fn main() {
  87.     let mut buff:Vec<u8> = Vec::new();
  88.     buff.resize(1024*1024*512, 0);
  89.     buff[1024*1024*510] = 7;
  90.     buff[1024*1024*128] = 77;
  91.    
  92.     for _ in 0..8 {
  93.         match memchr(7, buff.as_slice()) {
  94.             Some(x) => {
  95.                 println!("{}", x);
  96.             },
  97.             None => {
  98.                 println!("0";
  99.             }
  100.         }
  101.     }
  102.    
  103.     for _ in 0..8 {
  104.         match memchr(7, buff.as_slice()) {
  105.             Some(x) => {
  106.                 println!("{}", x);
  107.             },
  108.             None => {
  109.                 println!("0";
  110.             }
  111.         }
  112.     }
  113. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement