Advertisement
Guest User

Chinese-English Dictionary (Raymond Chen)

a guest
Jul 5th, 2013
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.08 KB | None | 0 0
  1. #include <windows.h>
  2. #include <time.h>
  3. #include <algorithm>
  4. #include <string>
  5. #include <fstream>
  6. #include <iostream> // for cin/cout
  7. #include <vector>
  8.  
  9. class MappedTextFile
  10. {
  11. public:
  12.  MappedTextFile(LPCTSTR pszFile);
  13.  ~MappedTextFile();
  14.  
  15.  const CHAR *Buffer() { return m_p; }
  16.  DWORD Length() const { return m_cb; }
  17.  
  18. private:
  19.  PCHAR   m_p;
  20.  DWORD   m_cb;
  21.  HANDLE  m_hf;
  22.  HANDLE  m_hfm;
  23. };
  24.  
  25. MappedTextFile::MappedTextFile(LPCTSTR pszFile)
  26.     : m_hfm(NULL), m_p(NULL), m_cb(0)
  27. {
  28.  m_hf = CreateFile(pszFile, GENERIC_READ, FILE_SHARE_READ,
  29.                    NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
  30.  if (m_hf != INVALID_HANDLE_VALUE) {
  31.   DWORD cb = GetFileSize(m_hf, NULL);
  32.   m_hfm = CreateFileMapping(m_hf, NULL, PAGE_READONLY, 0, 0, NULL);
  33.   if (m_hfm != NULL) {
  34.    m_p = reinterpret_cast<PCHAR>
  35.                  (MapViewOfFile(m_hfm, FILE_MAP_READ, 0, 0, cb));
  36.    if (m_p) {
  37.     m_cb = cb;
  38.    }
  39.   }
  40.  }
  41. }
  42.  
  43. MappedTextFile::~MappedTextFile()
  44. {
  45.  if (m_p) UnmapViewOfFile(m_p);
  46.  if (m_hfm) CloseHandle(m_hfm);
  47.  if (m_hf != INVALID_HANDLE_VALUE) CloseHandle(m_hf);
  48. }
  49.  
  50. using std::string;
  51. using std::wstring;
  52. using std::vector;
  53.  
  54. class StringPool
  55. {
  56. public:
  57.  StringPool();
  58.  ~StringPool();
  59.  LPWSTR AllocString(const WCHAR* pszBegin, const WCHAR* pszEnd);
  60.  
  61. private:
  62.  union HEADER {
  63.   struct {
  64.    HEADER* m_phdrPrev;
  65.    SIZE_T  m_cb;
  66.   };
  67.   WCHAR alignment;
  68.  };
  69.  enum { MIN_CBCHUNK = 32000,
  70.         MAX_CHARALLOC = 1024*1024 };
  71.  
  72. private:
  73.  WCHAR*  m_pchNext;   // first available byte
  74.  WCHAR*  m_pchLimit;  // one past last available byte
  75.  HEADER* m_phdrCur;   // current block
  76.  DWORD   m_dwGranularity;
  77. }; // colorization fixed 25 May
  78.  
  79. struct DictionaryEntry
  80. {
  81.  bool Parse(const WCHAR* begin, const WCHAR* end, StringPool& pool);
  82.  LPWSTR m_pszTrad;
  83.  LPWSTR m_pszSimp;
  84.  LPWSTR m_pszPinyin;
  85.  LPWSTR m_pszEnglish;
  86. };
  87.  
  88. LPWSTR AllocString(const WCHAR* begin, const WCHAR* end)
  89. {
  90.  int cch = end - begin + 1;
  91.  LPWSTR psz = new WCHAR[cch];
  92.  lstrcpynW(psz, begin, cch);
  93.  return psz;
  94. }
  95.  
  96. inline DWORD RoundUp(DWORD cb, DWORD units)
  97. {
  98.     return ((cb + units - 1) / units) * units;
  99. }
  100.  
  101. StringPool::StringPool()
  102.  : m_pchNext(NULL), m_pchLimit(NULL), m_phdrCur(NULL)
  103. {
  104.  SYSTEM_INFO si;
  105.  GetSystemInfo(&si);
  106.  m_dwGranularity = RoundUp(sizeof(HEADER) + MIN_CBCHUNK,
  107.                            si.dwAllocationGranularity);
  108. }
  109.  
  110. LPWSTR StringPool::AllocString(const WCHAR* pszBegin, const WCHAR* pszEnd)
  111. {
  112.  size_t cch = pszEnd - pszBegin + 1;
  113.  LPWSTR psz = m_pchNext;
  114.  if (m_pchNext + cch <= m_pchLimit) {
  115.   m_pchNext += cch;
  116.   lstrcpynW(psz, pszBegin, cch);
  117.   return psz;
  118.  }
  119.  
  120.  if (cch > MAX_CHARALLOC) goto OOM;
  121.  DWORD cbAlloc = RoundUp(cch * sizeof(WCHAR) + sizeof(HEADER),
  122.                                                           m_dwGranularity);
  123.  BYTE* pbNext = reinterpret_cast<BYTE*>(
  124.                   VirtualAlloc(NULL, cbAlloc, MEM_COMMIT, PAGE_READWRITE));
  125.  if (!pbNext) {
  126. OOM:
  127.   static std::bad_alloc OOM;
  128.   throw(OOM);
  129.  }
  130.  
  131.  m_pchLimit = reinterpret_cast<WCHAR*>(pbNext + cbAlloc);
  132.  HEADER* phdrCur = reinterpret_cast<HEADER*>(pbNext);
  133.  phdrCur->m_phdrPrev = m_phdrCur;
  134.  phdrCur->m_cb = cbAlloc;
  135.  m_phdrCur = phdrCur;
  136.  m_pchNext = reinterpret_cast<WCHAR*>(phdrCur + 1);
  137.  
  138.  return AllocString(pszBegin, pszEnd);
  139. }
  140.  
  141. StringPool::~StringPool()
  142. {
  143.  HEADER* phdr = m_phdrCur;
  144.  while (phdr) {
  145.   HEADER hdr = *phdr;
  146.   VirtualFree(hdr.m_phdrPrev, hdr.m_cb, MEM_RELEASE);
  147.   phdr = hdr.m_phdrPrev;
  148.  }
  149. }
  150.  
  151. bool DictionaryEntry::Parse(
  152.        const WCHAR* begin, const WCHAR* end,
  153.        StringPool& pool)
  154. {
  155.  const WCHAR* pch = std::find(begin, end, L' ');
  156.  if (pch >= end) return false;
  157.  m_pszTrad = pool.AllocString(begin, pch);
  158.  begin = std::find(pch, end, L'[') + 1;
  159.  if (begin >= end) return false;
  160.  pch = std::find(begin, end, L']');
  161.  if (pch >= end) return false;
  162.  m_pszPinyin = pool.AllocString(begin, pch);
  163.  begin = std::find(pch, end, L'/') + 1;
  164.  if (begin >= end) return false;
  165.  for (pch = end; *--pch != L'/'; ) { }
  166.  if (begin >= pch) return false;
  167.  m_pszEnglish = pool.AllocString(begin, pch);
  168.  return true;
  169. }
  170.  
  171. class Dictionary
  172. {
  173. public:
  174.  Dictionary();
  175.  int Length() { return v.size(); }
  176.  const DictionaryEntry& Item(int i) { return v[i]; }
  177. private:
  178.  vector<DictionaryEntry> v;
  179.  StringPool m_pool;
  180. };
  181.  
  182. Dictionary::Dictionary()
  183. {
  184.  MappedTextFile mtf(TEXT("cedict.b5"));
  185.  const CHAR* pchBuf = mtf.Buffer();
  186.  const CHAR* pchEnd = pchBuf + mtf.Length();
  187.  while (pchBuf < pchEnd) {
  188.   const CHAR* pchEOL = std::find(pchBuf, pchEnd, '\n');
  189.   if (*pchBuf != '#') {
  190.    size_t cchBuf = pchEOL - pchBuf;
  191.    wchar_t* buf = new wchar_t[cchBuf];
  192.    DWORD cchResult = MultiByteToWideChar(950, 0,
  193.                           pchBuf, cchBuf, buf, cchBuf);
  194.    if (cchResult) {
  195.     DictionaryEntry de;
  196.     if (de.Parse(buf, buf + cchResult, m_pool)) {
  197.      v.push_back(de);
  198.     }
  199.    }
  200.    delete[] buf;
  201.   }
  202.   pchBuf = pchEOL + 1;
  203.  }
  204. }
  205.  
  206. int __cdecl main(int argc, const char* argv[])
  207. {
  208.  clock_t dw = clock();
  209.  {
  210.   Dictionary dict;
  211.   std::cout << dict.Length() << std::endl;
  212.  }
  213.  std::cout << clock() - dw << std::endl;
  214.  return 0;
  215. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement