Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use std::sync::Arc;
- #[derive(Debug)]
- pub struct Vector1d<T> {
- pub data: Vec<T>,
- }
- impl<T> Vector1d<T> {
- pub fn new() -> Vector1d<T> {
- Vector1d::<T> { data: vec![] }
- }
- }
- #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)]
- pub struct Vector1dPosition {
- pub index: usize,
- pub offset: usize,
- }
- impl Vector1dPosition {
- pub fn new() -> Vector1dPosition {
- Vector1dPosition {
- index: 0,
- offset: 0,
- }
- }
- }
- pub type MergeCallback<T> =
- fn(&mut [T], &mut [T], &mut Vec<T>) -> (usize, usize);
- pub trait Merge<T> {
- fn merge(
- &self,
- left: &mut [T],
- right: &mut [T],
- result: &mut Vec<T>,
- ) -> (usize, usize);
- }
- pub type MergeType<T> = Option<Arc<dyn Merge<T> + Sync + Send>>;
- pub struct Merge1d<T> {
- left: Vec<Vector1d<T>>,
- right: Vec<Vector1d<T>>,
- }
- pub struct MergePair<T> {
- pub left: Vec<Vector1d<T>>,
- pub right: Vec<Vector1d<T>>,
- }
- impl<T> Merge1d<T>
- where
- T: PartialOrd + Default + std::fmt::Debug,
- {
- pub fn new(left: Vec<Vector1d<T>>, right: Vec<Vector1d<T>>) -> Merge1d<T> {
- assert!(!left.is_empty());
- assert!(!right.is_empty());
- Merge1d::<T> { left, right }
- }
- // Pulls a pair of vectors to merge from the end. Returns None if
- // no more pairs left to merge.
- pub fn pull_next_merge_pair(
- &mut self,
- min_data_length: usize,
- max_data_length: usize,
- ) -> (MergePair<T>, bool) {
- assert!(!self.left.is_empty() || !self.right.is_empty());
- let mut result = MergePair::<T> {
- left: vec![],
- right: vec![],
- };
- let (left_start_pos, right_start_pos) =
- self.get_start_positions(min_data_length, max_data_length);
- assert!(left_start_pos.is_some() || right_start_pos.is_some());
- if let Some(left_start_pos) = left_start_pos {
- result.left = Self::pull_vectors(&mut self.left, left_start_pos);
- }
- if let Some(right_start_pos) = right_start_pos {
- result.right =
- Self::pull_vectors(&mut self.right, right_start_pos);
- }
- let last_pair = self.left.is_empty() && self.right.is_empty();
- (result, last_pair)
- }
- fn get_next_buffer<ValueType>(
- buffers: &mut Vec<Vec<ValueType>>,
- vector_size: usize,
- ) -> Vec<ValueType> {
- if let Some(buffer) = buffers.pop() {
- return buffer;
- }
- Vec::with_capacity(vector_size)
- }
- fn store_buffer<ValueType>(
- buffers: &mut Vec<Vec<ValueType>>,
- mut buffer: Vec<ValueType>,
- vector_size: usize,
- ) {
- if buffers.len() < 4 && buffer.capacity() == vector_size {
- buffer.clear();
- buffers.push(buffer);
- }
- }
- pub fn merge(
- mut pair: MergePair<T>,
- buffers: &mut Vec<Vec<T>>,
- vector_size: usize,
- merge_callback: MergeType<T>,
- ) -> Vec<Vector1d<T>> {
- if pair.left.is_empty() {
- return pair.right;
- }
- if pair.right.is_empty() {
- return pair.left;
- }
- let mut result = vec![];
- let mut current_result = Self::get_next_buffer(buffers, vector_size);
- let mut left_pos = Vector1dPosition::new();
- let mut right_pos = Vector1dPosition::new();
- let left_end_pos = Vector1dPosition {
- index: pair.left.len() - 1,
- offset: pair.left[pair.left.len() - 1].data.len(),
- };
- let right_end_pos = Vector1dPosition {
- index: pair.right.len() - 1,
- offset: pair.right[pair.right.len() - 1].data.len(),
- };
- assert!(merge_callback.is_some());
- while left_pos < left_end_pos && right_pos < right_end_pos {
- let left_inc;
- let right_inc;
- let merge = merge_callback.as_ref().unwrap();
- (left_inc, right_inc) = merge.merge(
- &mut pair.left[left_pos.index].data[left_pos.offset..],
- &mut pair.right[right_pos.index].data[right_pos.offset..],
- &mut current_result,
- );
- left_pos.offset += left_inc;
- right_pos.offset += right_inc;
- assert!(left_pos.offset <= pair.left[left_pos.index].data.len());
- assert!(
- right_pos.offset <= pair.right[right_pos.index].data.len()
- );
- if left_pos.offset == pair.left[left_pos.index].data.len() {
- Self::store_buffer(
- buffers,
- std::mem::take(&mut pair.left[left_pos.index].data),
- vector_size,
- );
- left_pos.index += 1;
- left_pos.offset = 0;
- }
- if right_pos.offset == pair.right[right_pos.index].data.len() {
- Self::store_buffer(
- buffers,
- std::mem::take(&mut pair.right[right_pos.index].data),
- vector_size,
- );
- right_pos.index += 1;
- right_pos.offset = 0;
- }
- if current_result.capacity() == current_result.len() {
- result.push(current_result);
- current_result = Self::get_next_buffer(buffers, vector_size);
- }
- }
- while left_pos < left_end_pos {
- current_result.push(std::mem::take(
- &mut pair.left[left_pos.index].data[left_pos.offset],
- ));
- left_pos.offset += 1;
- if left_pos.offset == pair.left[left_pos.index].data.len() {
- Self::store_buffer(
- buffers,
- std::mem::take(&mut pair.left[left_pos.index].data),
- vector_size,
- );
- left_pos.index += 1;
- left_pos.offset = 0;
- }
- if current_result.capacity() == current_result.len() {
- result.push(current_result);
- current_result = Self::get_next_buffer(buffers, vector_size);
- }
- }
- while right_pos < right_end_pos {
- current_result.push(std::mem::take(
- &mut pair.right[right_pos.index].data[right_pos.offset],
- ));
- right_pos.offset += 1;
- if right_pos.offset == pair.right[right_pos.index].data.len() {
- Self::store_buffer(
- buffers,
- std::mem::take(&mut pair.right[right_pos.index].data),
- vector_size,
- );
- right_pos.index += 1;
- right_pos.offset = 0;
- }
- if current_result.capacity() == current_result.len() {
- result.push(current_result);
- current_result = Self::get_next_buffer(buffers, vector_size);
- }
- }
- if !current_result.is_empty() {
- result.push(current_result);
- }
- let mut vec1d_result = Vec::with_capacity(result.len());
- for vec in &mut result {
- vec1d_result.push(Vector1d::<T> {
- data: std::mem::take(vec),
- });
- }
- vec1d_result
- }
- fn pull_vectors(
- vecs: &mut Vec<Vector1d<T>>,
- start_pos: Vector1dPosition,
- ) -> Vec<Vector1d<T>> {
- assert!(!vecs.is_empty());
- assert!(start_pos.index < vecs.len());
- let mut result: Vec<Vector1d<T>> =
- Vec::with_capacity(vecs.len() - start_pos.index);
- for _ in 0..(vecs.len() - start_pos.index) {
- result.push(Vector1d::<T>::new());
- }
- let mut index = vecs.len() - 1;
- loop {
- assert!(index == vecs.len() - 1);
- let target_index = index - start_pos.index;
- if target_index == 0 {
- if start_pos.offset == 0 {
- result[target_index] = vecs.pop().unwrap();
- } else {
- result[target_index].data =
- vecs[index].data.split_off(start_pos.offset);
- }
- break;
- }
- result[target_index] = vecs.pop().unwrap();
- index -= 1;
- }
- result
- }
- fn get_start_positions(
- &self,
- min_data_length: usize,
- max_data_length: usize,
- ) -> (Option<Vector1dPosition>, Option<Vector1dPosition>) {
- if self.left.is_empty() {
- return (None, Some(Vector1dPosition::new()));
- }
- if self.right.is_empty() {
- return (Some(Vector1dPosition::new()), None);
- }
- let left_pos = Self::find_start_position_from_end_between_distance(
- &self.left,
- min_data_length,
- max_data_length,
- );
- let value = &self.left[left_pos.index].data[left_pos.offset];
- let right_pos_from_value =
- Self::find_position_from_value(&self.right, value);
- if let Some(right_pos_from_value) = right_pos_from_value {
- let distance = Self::calculate_distance_to_end_from_position(
- &self.right,
- right_pos_from_value.clone(),
- );
- if distance <= max_data_length {
- return (Some(left_pos), Some(right_pos_from_value));
- }
- } else {
- assert!(
- value < self.right[0].data.first().unwrap()
- || value
- > self.right[self.right.len() - 1]
- .data
- .last()
- .unwrap()
- );
- return (Some(left_pos), None);
- }
- let right_pos = Self::find_start_position_from_end_between_distance(
- &self.right,
- min_data_length,
- max_data_length,
- );
- let value = &self.right[right_pos.index].data[right_pos.offset];
- let left_pos = Self::find_position_from_value(&self.left, value);
- (left_pos, Some(right_pos))
- }
- // Returns a position given the distance range
- fn find_start_position_from_end_between_distance(
- vecs: &Vec<Vector1d<T>>,
- min_distance: usize,
- max_distance: usize,
- ) -> Vector1dPosition {
- let mut index: usize = vecs.len() - 1;
- let mut distance = 0;
- let mut index_within_range: Option<usize> = None;
- loop {
- let vec_len = vecs[index].data.len();
- if distance + vec_len == max_distance {
- return Vector1dPosition { index, offset: 0 };
- }
- if distance + vec_len > max_distance {
- // Going to the end of this vector is too much (or
- // exact right amount).
- if let Some(index_within_range) = index_within_range {
- return Vector1dPosition {
- index: index_within_range,
- offset: 0,
- };
- }
- let distance_needed = max_distance - distance;
- assert!(distance_needed > 0);
- assert!(distance_needed <= vec_len);
- // Return the max distance point.
- return Vector1dPosition {
- index,
- offset: vec_len - distance_needed,
- };
- }
- if distance + vec_len >= min_distance {
- index_within_range = Some(index);
- }
- distance += vec_len;
- if index == 0 {
- break;
- }
- index -= 1;
- }
- // The beginning of the vector.
- Vector1dPosition::new()
- }
- fn calculate_distance_to_end_from_position(
- vecs: &Vec<Vector1d<T>>,
- mut pos: Vector1dPosition,
- ) -> usize {
- let mut distance = 0;
- while pos.index < vecs.len() {
- distance += vecs.len() - pos.offset;
- pos.offset = 0;
- pos.index += 1;
- }
- distance
- }
- fn find_position_from_value(
- vecs: &Vec<Vector1d<T>>,
- value: &T,
- ) -> Option<Vector1dPosition> {
- let mut index = vecs.len() - 1;
- loop {
- let vec = &vecs[index].data;
- if value > vec.last().unwrap() {
- // The value falls between the two vectors. Return the
- // start position from the next one assuming there is
- // a next one.
- if index == vecs.len() - 1 {
- return None;
- }
- return Some(Vector1dPosition {
- index: index + 1,
- offset: 0,
- });
- }
- if value >= &vec[0] {
- // The value falls within this vector
- let offset = vec.partition_point(|x| x < value);
- assert!(offset != vec.len());
- return Some(Vector1dPosition { index, offset });
- }
- if index == 0 {
- break;
- }
- index -= 1;
- }
- // The start since the value is before the first value.
- Some(Vector1dPosition::new())
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement