Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #ifndef _ii_hpp
- #define _ii_hpp
- #include <iostream>
- #include <memory>
- #include <vector>
- #include <queue>
- #include <iterator>
- #include <algorithm>
- namespace ii_impl {
- using namespace std;
- uint8_t * encode(uint64_t n, uint8_t * buf)
- {
- do {
- // get last 7 bits
- uint8_t x = static_cast<uint8_t>(n & 0x7F);
- n = n >> 7;
- if (n > 0) {
- // add continuing bit
- x = x | 0x80;
- }
- *buf = x;
- ++buf;
- } while (n > 0);
- return buf;
- }
- const uint8_t * decode(uint64_t & n, const uint8_t * buf) {
- uint64_t x = 0;
- uint8_t i = 0;
- n = 0;
- do {
- x = static_cast<uint64_t>(*buf);
- uint64_t pd = x & 0x7F;
- n = n | (pd << (7 * i));
- ++i;
- ++buf;
- } while ((x & 0x80) != 0);
- return buf;
- }
- class Feature {
- const uint64_t size;
- const uint8_t * data_start;
- class iterator :
- public std::iterator<std::forward_iterator_tag, uint64_t> {
- uint64_t size;
- uint64_t idx;
- uint64_t cur;
- const uint8_t * buf;
- public:
- uint64_t operator*() const {
- return cur;
- }
- iterator& operator++() {
- ++idx;
- uint64_t diff = 0;
- // check if idx is valid so that decode does not access invalid memory
- if (idx < size)
- buf = decode(diff, buf);
- cur += diff;
- return *this;
- }
- bool operator== (const iterator & a) const {
- return idx == a.idx;
- }
- bool operator!= (const iterator & a) const {
- return idx != a.idx;
- }
- iterator(const Feature &f, bool end)
- {
- size = f.size;
- if (end) {
- idx = f.size;
- return;
- }
- idx = 0;
- cur = 0;
- buf = f.data_start;
- if (idx < f.size)
- buf = decode(cur, buf);
- }
- };
- public:
- // feature starts with uint64_t size value
- Feature(const uint64_t * p)
- : size(*p) {
- data_start = (const uint8_t *)(p + 1);
- }
- iterator begin() {
- return iterator(*this, false);
- }
- iterator end() {
- return iterator(*this, true);
- }
- };
- }
- namespace ii
- {
- template<typename Truncate, typename FeatureObjectLists>
- void create (Truncate&& truncate, FeatureObjectLists&& features)
- {
- //TODO: replace this with your implementation
- /*
- std::cerr << "number of features: " << features.size() << std::endl;
- for (size_t i = 0; i < features.size(); ++i) {
- std::cerr << "objects that contain feature " << i << ": ";
- for (auto&&cur : features[i]) {
- std::cerr << cur << ' ';
- }
- std::cerr << std::endl;
- }
- std::cerr << std::endl;
- //*/
- // max_b is a tight upper bound on the number of bytes that will be used
- // 3 * features.size() is space for:
- // - offset table
- // - count of objects in each feature
- // - aligning before next feature
- uint64_t max_b = 5 * 8 * features.size();
- for (size_t i = 0; i < features.size(); ++i) {
- uint64_t last = 0;
- for (auto&&cur : features[i]) {
- uint64_t diff = cur - last;
- while (diff > 0) {
- ++max_b;
- diff >>= 7;
- }
- last = cur;
- }
- }
- // + 8 to take ceil of the division
- uint64_t * seg_start = (uint64_t *)truncate((max_b + 8) / 8);
- uint64_t * data = seg_start + features.size();
- for (size_t i = 0; i < features.size(); ++i) {
- // set offset in the table
- seg_start[i] = data - seg_start;
- // + 1 -- reserving space for count
- uint8_t * buf = (uint8_t *)(data + 1);
- uint64_t last = 0;
- uint64_t count = 0;
- for (auto&&cur : features[i]) {
- buf = ii_impl::encode(cur - last, buf);
- last = cur;
- ++count;
- }
- // store count of objects of this feature
- *data = count;
- size_t foo = SIZE_MAX;
- void * void_buf = (void *)buf;
- data = (uint64_t *) std::align(alignof(uint64_t), sizeof(uint64_t), void_buf, foo);
- }
- truncate(data - seg_start);
- }
- template<class Fs, class OutFn>
- void search(const uint64_t*segment, size_t size, Fs&& fs, OutFn&& callback)
- {
- std::queue<ii_impl::Feature> q_orig;
- std::queue<std::vector<uint64_t>> q_intrm;
- for (auto&&f : fs) {
- q_orig.push(ii_impl::Feature(segment + segment[f]));
- }
- std::vector<uint64_t> res;
- while (!q_orig.empty() || !q_intrm.empty()) {
- if (q_orig.size() >= 2) {
- auto f1 = q_orig.front(); q_orig.pop();
- auto f2 = q_orig.front(); q_orig.pop();
- std::vector<uint64_t> intrm_res;
- std::set_intersection(f1.begin(), f1.end(), f2.begin(), f2.end(), std::back_inserter(intrm_res));
- q_intrm.push(std::move(intrm_res));
- }
- else if (q_orig.size() == 1) {
- auto f1 = q_orig.front(); q_orig.pop();
- if (q_intrm.empty()) { // single-feature query, just copy to result
- for (auto&& x : f1)
- res.push_back(x);
- }
- else {
- auto f2 = std::move(q_intrm.front()); q_intrm.pop();
- std::vector<uint64_t> intrm_res;
- std::set_intersection(f1.begin(), f1.end(), f2.begin(), f2.end(), std::back_inserter(intrm_res));
- q_intrm.push(std::move(intrm_res));
- }
- }
- else if (q_intrm.size() >= 2) {
- auto f1 = std::move(q_intrm.front()); q_intrm.pop();
- auto f2 = std::move(q_intrm.front()); q_intrm.pop();
- std::vector<uint64_t> intrm_res;
- std::set_intersection(f1.begin(), f1.end(), f2.begin(), f2.end(), std::back_inserter(intrm_res));
- q_intrm.push(std::move(intrm_res));
- }
- else if (q_intrm.size() == 1) {
- res = std::move(q_intrm.front()); q_intrm.pop();
- }
- }
- for (auto && x : res)
- callback(x);
- }
- }; //namespace ii
- #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement