Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use std::{fmt, str, mem};
- use std::sync::Mutex;
- use std::collections::HashSet;
- #[derive(Copy, Clone)]
- pub struct InternedStr<'interner>(&'interner str);
- impl PartialEq for InternedStr<'interner> {
- fn eq(&self, other: &InternedStr<'interner>) -> bool {
- let a = self.0 as *const str;
- let b = other.0 as *const str;
- a.eq(&b)
- }
- }
- impl fmt::Display for InternedStr<'interner> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}", self.0)
- }
- }
- impl fmt::Debug for InternedStr<'interner> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "\"{}\" ({:?})", self.0, self.0 as *const str)
- }
- }
- pub struct StringInterner {
- interned_strings: Mutex<HashSet<String>>,
- }
- impl StringInterner {
- pub fn new() -> StringInterner {
- StringInterner {
- interned_strings: Mutex::new(HashSet::new()),
- }
- }
- pub fn intern_string(&'interner self, to_intern: &str) -> InternedStr<'interner> {
- // TODO(zac): @UNWRAP - Is there a way we want to gracefully handle the Mutex being
- // poisoned?
- let mut interned_strings = self.interned_strings.lock().unwrap();
- if let Some(interned) = (*interned_strings).get(to_intern) {
- // @UNSAFE - NOTE(zac): This unsafe converts this string slice into having the lifetime of the
- // string interner.
- //
- // This *should* be safe, as the actual string should only be dropped when this object
- // is dropped.
- let slice = unsafe { mem::transmute(interned as &str) };
- return InternedStr(slice);
- }
- let created = String::from(to_intern);
- // @UNSAFE - NOTE(zac): This unsafe converts this string slice into having the lifetime of the
- // string interner.
- //
- // This *should* be safe, if I understand correctly, as 'created' will not be dropped until
- // the string interner is dropped, and we are tying the lifetime of the interned string to
- // this object.
- let slice = unsafe { mem::transmute(&created as &str) };
- (*interned_strings).insert(created);
- InternedStr(slice)
- }
- }
- #[cfg(test)]
- mod tests {
- use super::*;
- #[test]
- fn test_passing_interred_strings_across_threads() {
- use std::sync::Arc;
- use std::thread;
- let interner = Arc::new(StringInterner::new());
- let mut handlers = Vec::new();
- for _ in 0..3 {
- let interner = interner.clone();
- let handler = thread::spawn(move || {
- // TODO(zac): We should be able to safely pass this data
- // across threads...
- interner.intern_string("Hello")
- });
- handlers.push(handler);
- }
- interner.intern_string("world");
- for handler in handlers {
- handler.join().unwrap();
- }
- }
- #[test]
- fn test_interned_string_equality_is_pointer_based() {
- let string = "interned";
- let other_string = "interned".to_string();
- // The same byte array is equal.
- let a = InternedStr(string);
- let b = InternedStr(string);
- assert_eq!(a, b);
- // Different string arrays are not equal, even if they
- // contain exactly the same characters.
- let a = InternedStr(string);
- let b = InternedStr(&other_string);
- assert_ne!(a, b);
- }
- #[test]
- fn test_interned_string_display_displays_string() {
- let string = "interned";
- assert_eq!(format!("{}", InternedStr(string)), "interned");
- }
- }
Add Comment
Please, Sign In to add comment