Guest User

Chinese-English Dictionary (Raymond Chen)

a guest
Jul 5th, 2013
100
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <windows.h>
  2. #include <time.h>
  3. #include <algorithm>
  4. #include <string>
  5. #include <fstream>
  6. #include <iostream> // for cin/cout
  7. #include <vector>
  8.  
  9. class MappedTextFile
  10. {
  11. public:
  12.  MappedTextFile(LPCTSTR pszFile);
  13.  ~MappedTextFile();
  14.  
  15.  const CHAR *Buffer() { return m_p; }
  16.  DWORD Length() const { return m_cb; }
  17.  
  18. private:
  19.  PCHAR   m_p;
  20.  DWORD   m_cb;
  21.  HANDLE  m_hf;
  22.  HANDLE  m_hfm;
  23. };
  24.  
  25. MappedTextFile::MappedTextFile(LPCTSTR pszFile)
  26.     : m_hfm(NULL), m_p(NULL), m_cb(0)
  27. {
  28.  m_hf = CreateFile(pszFile, GENERIC_READ, FILE_SHARE_READ,
  29.                    NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
  30.  if (m_hf != INVALID_HANDLE_VALUE) {
  31.   DWORD cb = GetFileSize(m_hf, NULL);
  32.   m_hfm = CreateFileMapping(m_hf, NULL, PAGE_READONLY, 0, 0, NULL);
  33.   if (m_hfm != NULL) {
  34.    m_p = reinterpret_cast<PCHAR>
  35.                  (MapViewOfFile(m_hfm, FILE_MAP_READ, 0, 0, cb));
  36.    if (m_p) {
  37.     m_cb = cb;
  38.    }
  39.   }
  40.  }
  41. }
  42.  
  43. MappedTextFile::~MappedTextFile()
  44. {
  45.  if (m_p) UnmapViewOfFile(m_p);
  46.  if (m_hfm) CloseHandle(m_hfm);
  47.  if (m_hf != INVALID_HANDLE_VALUE) CloseHandle(m_hf);
  48. }
  49.  
  50. using std::string;
  51. using std::wstring;
  52. using std::vector;
  53.  
  54. class StringPool
  55. {
  56. public:
  57.  StringPool();
  58.  ~StringPool();
  59.  LPWSTR AllocString(const WCHAR* pszBegin, const WCHAR* pszEnd);
  60.  
  61. private:
  62.  union HEADER {
  63.   struct {
  64.    HEADER* m_phdrPrev;
  65.    SIZE_T  m_cb;
  66.   };
  67.   WCHAR alignment;
  68.  };
  69.  enum { MIN_CBCHUNK = 32000,
  70.         MAX_CHARALLOC = 1024*1024 };
  71.  
  72. private:
  73.  WCHAR*  m_pchNext;   // first available byte
  74.  WCHAR*  m_pchLimit;  // one past last available byte
  75.  HEADER* m_phdrCur;   // current block
  76.  DWORD   m_dwGranularity;
  77. }; // colorization fixed 25 May
  78.  
  79. struct DictionaryEntry
  80. {
  81.  bool Parse(const WCHAR* begin, const WCHAR* end, StringPool& pool);
  82.  LPWSTR m_pszTrad;
  83.  LPWSTR m_pszSimp;
  84.  LPWSTR m_pszPinyin;
  85.  LPWSTR m_pszEnglish;
  86. };
  87.  
  88. LPWSTR AllocString(const WCHAR* begin, const WCHAR* end)
  89. {
  90.  int cch = end - begin + 1;
  91.  LPWSTR psz = new WCHAR[cch];
  92.  lstrcpynW(psz, begin, cch);
  93.  return psz;
  94. }
  95.  
  96. inline DWORD RoundUp(DWORD cb, DWORD units)
  97. {
  98.     return ((cb + units - 1) / units) * units;
  99. }
  100.  
  101. StringPool::StringPool()
  102.  : m_pchNext(NULL), m_pchLimit(NULL), m_phdrCur(NULL)
  103. {
  104.  SYSTEM_INFO si;
  105.  GetSystemInfo(&si);
  106.  m_dwGranularity = RoundUp(sizeof(HEADER) + MIN_CBCHUNK,
  107.                            si.dwAllocationGranularity);
  108. }
  109.  
  110. LPWSTR StringPool::AllocString(const WCHAR* pszBegin, const WCHAR* pszEnd)
  111. {
  112.  size_t cch = pszEnd - pszBegin + 1;
  113.  LPWSTR psz = m_pchNext;
  114.  if (m_pchNext + cch <= m_pchLimit) {
  115.   m_pchNext += cch;
  116.   lstrcpynW(psz, pszBegin, cch);
  117.   return psz;
  118.  }
  119.  
  120.  if (cch > MAX_CHARALLOC) goto OOM;
  121.  DWORD cbAlloc = RoundUp(cch * sizeof(WCHAR) + sizeof(HEADER),
  122.                                                           m_dwGranularity);
  123.  BYTE* pbNext = reinterpret_cast<BYTE*>(
  124.                   VirtualAlloc(NULL, cbAlloc, MEM_COMMIT, PAGE_READWRITE));
  125.  if (!pbNext) {
  126. OOM:
  127.   static std::bad_alloc OOM;
  128.   throw(OOM);
  129.  }
  130.  
  131.  m_pchLimit = reinterpret_cast<WCHAR*>(pbNext + cbAlloc);
  132.  HEADER* phdrCur = reinterpret_cast<HEADER*>(pbNext);
  133.  phdrCur->m_phdrPrev = m_phdrCur;
  134.  phdrCur->m_cb = cbAlloc;
  135.  m_phdrCur = phdrCur;
  136.  m_pchNext = reinterpret_cast<WCHAR*>(phdrCur + 1);
  137.  
  138.  return AllocString(pszBegin, pszEnd);
  139. }
  140.  
  141. StringPool::~StringPool()
  142. {
  143.  HEADER* phdr = m_phdrCur;
  144.  while (phdr) {
  145.   HEADER hdr = *phdr;
  146.   VirtualFree(hdr.m_phdrPrev, hdr.m_cb, MEM_RELEASE);
  147.   phdr = hdr.m_phdrPrev;
  148.  }
  149. }
  150.  
  151. bool DictionaryEntry::Parse(
  152.        const WCHAR* begin, const WCHAR* end,
  153.        StringPool& pool)
  154. {
  155.  const WCHAR* pch = std::find(begin, end, L' ');
  156.  if (pch >= end) return false;
  157.  m_pszTrad = pool.AllocString(begin, pch);
  158.  begin = std::find(pch, end, L'[') + 1;
  159.  if (begin >= end) return false;
  160.  pch = std::find(begin, end, L']');
  161.  if (pch >= end) return false;
  162.  m_pszPinyin = pool.AllocString(begin, pch);
  163.  begin = std::find(pch, end, L'/') + 1;
  164.  if (begin >= end) return false;
  165.  for (pch = end; *--pch != L'/'; ) { }
  166.  if (begin >= pch) return false;
  167.  m_pszEnglish = pool.AllocString(begin, pch);
  168.  return true;
  169. }
  170.  
  171. class Dictionary
  172. {
  173. public:
  174.  Dictionary();
  175.  int Length() { return v.size(); }
  176.  const DictionaryEntry& Item(int i) { return v[i]; }
  177. private:
  178.  vector<DictionaryEntry> v;
  179.  StringPool m_pool;
  180. };
  181.  
  182. Dictionary::Dictionary()
  183. {
  184.  MappedTextFile mtf(TEXT("cedict.b5"));
  185.  const CHAR* pchBuf = mtf.Buffer();
  186.  const CHAR* pchEnd = pchBuf + mtf.Length();
  187.  while (pchBuf < pchEnd) {
  188.   const CHAR* pchEOL = std::find(pchBuf, pchEnd, '\n');
  189.   if (*pchBuf != '#') {
  190.    size_t cchBuf = pchEOL - pchBuf;
  191.    wchar_t* buf = new wchar_t[cchBuf];
  192.    DWORD cchResult = MultiByteToWideChar(950, 0,
  193.                           pchBuf, cchBuf, buf, cchBuf);
  194.    if (cchResult) {
  195.     DictionaryEntry de;
  196.     if (de.Parse(buf, buf + cchResult, m_pool)) {
  197.      v.push_back(de);
  198.     }
  199.    }
  200.    delete[] buf;
  201.   }
  202.   pchBuf = pchEOL + 1;
  203.  }
  204. }
  205.  
  206. int __cdecl main(int argc, const char* argv[])
  207. {
  208.  clock_t dw = clock();
  209.  {
  210.   Dictionary dict;
  211.   std::cout << dict.Length() << std::endl;
  212.  }
  213.  std::cout << clock() - dw << std::endl;
  214.  return 0;
  215. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×