SHARE
TWEET

T-SQL Function (iTVF) to List All Unicode Code Points

SqlQuantumLeap Mar 26th, 2019 (edited) 79 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2.  -------------------- List All Unicode Code Points -------------------
  3.  
  4. Created By: Solomon Rutzky / Sql Quantum Leap ( https://SqlQuantumLeap.com/ )
  5. Created On: 2019-03-25
  6. Updated On: 2019-04-09
  7.  
  8. Location:          https://pastebin.com/vbzz0LqN
  9. Related blog post: https://sqlquantumleap.com/2019/03/25/ssms-tip-3-easily-access-research-all-unicode-characters-yes-including-emojis-%f0%9f%98%b8/
  10. ----------------------------------------------------------------------
  11. */
  12.  
  13.  
  14. USE [tempdb];
  15.  
  16.  
  17. IF (OBJECT_ID(N'dbo.ListAllUnicodeCodePoints') IS NULL)
  18. BEGIN
  19.     RAISERROR(N'Creating placeholder function...', 10, 1) WITH NOWAIT;
  20.     EXEC(N'CREATE FUNCTION dbo.ListAllUnicodeCodePoints()
  21.             RETURNS TABLE AS RETURN SELECT 1 AS [a];');
  22. END;
  23.  
  24.  
  25. -------------------------------------------------------------------------------
  26. --===========================================================================--
  27. -------------------------------------------------------------------------------
  28. GO
  29. ALTER FUNCTION dbo.ListAllUnicodeCodePoints()
  30. RETURNS TABLE
  31. AS RETURN
  32. WITH nums AS
  33. (
  34.   SELECT TOP (1114111) (ROW_NUMBER() OVER (ORDER BY @@MICROSOFTVERSION) - 1) AS [num]
  35.   FROM   [master].[sys].[all_columns] ac1
  36.   CROSS JOIN [master].[sys].[all_columns] ac2
  37. ), chars AS
  38. (
  39.   SELECT n.[num],
  40.          RIGHT(CONVERT(CHAR(6), CONVERT(BINARY(3), n.[num]), 2),
  41.                CASE WHEN n.[num] > 65535 THEN 5 ELSE 4 END) AS [CodePointHex],
  42.          CASE WHEN n.[num] > 65535 THEN 55232 + (n.[num] / 1024) ELSE NULL END AS [HighSurrogateINT],
  43.          CASE WHEN n.[num] > 65535 THEN 56320 + (n.[num] % 1024) ELSE NULL END AS [LowSurrogateINT]
  44.   FROM   nums n
  45.   WHERE  n.[num] BETWEEN 0x000000 AND 0x014700
  46.   OR     n.[num] BETWEEN 0x016800 AND 0x030000
  47.   OR     n.[num] BETWEEN 0x0E0001 AND 0x0E01EF
  48. )
  49. SELECT
  50.        'U+' + c.[CodePointHex] AS [CodePoint],
  51.        c.[num] AS [CdPntINT],
  52.        '0x' + c.[CodePointHex] AS [CdPntBIN],
  53.        CASE
  54.          WHEN c.[num] > 65535 THEN NCHAR(c.[HighSurrogateINT]) + NCHAR(c.[LowSurrogateINT])
  55.          ELSE NCHAR(c.[num])
  56.        END AS [Char],
  57.        CASE
  58.          WHEN c.[num] > 65535 THEN CONVERT(CHAR(10), CONVERT(BINARY(4),
  59.                                            NCHAR(c.[HighSurrogateINT]) + NCHAR(c.[LowSurrogateINT])), 1)
  60.          ELSE CONVERT(CHAR(6), CONVERT(BINARY(2), NCHAR(c.[num])), 1)
  61.        END AS [UTF-16LE       ],
  62.  
  63.        /*  -- uncomment for SQL Server 2019 and newer only!
  64.        CASE
  65.          WHEN c.[num] > 65535 THEN CONVERT(CHAR(10), CONVERT(BINARY(4), CONVERT(CHAR(4),
  66.                                            NCHAR(c.[HighSurrogateINT]) + NCHAR(c.[LowSurrogateINT])
  67.                                            COLLATE Latin1_General_100_CI_AS_SC_UTF8)), 1)
  68.          ELSE CONVERT(CHAR(10), CONVERT(VARBINARY(3), CONVERT(VARCHAR(3), NCHAR(c.[num])
  69.                       COLLATE Latin1_General_100_CI_AS_SC_UTF8)), 1)
  70.        END AS [UTF-8          ],
  71.        */
  72.  
  73.        c.[HighSurrogateINT] AS [HighSrgtINT],
  74.        c.[LowSurrogateINT] AS [LowSrgtINT],
  75.        CONVERT(BINARY(2), c.[HighSurrogateINT]) AS [HighSrgtBIN],
  76.        CONVERT(BINARY(2), c.[LowSurrogateINT]) AS [LowSrgtBIN],
  77.        'NCHAR(' + CASE
  78.                     WHEN c.[num] > 65535 THEN CONVERT(CHAR(6), CONVERT(BINARY(2), c.[HighSurrogateINT]), 1)
  79.                       + ') + NCHAR(' + CONVERT(CHAR(6), CONVERT(BINARY(2), c.[LowSurrogateINT]), 1)
  80.                     ELSE CONVERT(CHAR(6), CONVERT(BINARY(2), c.[num]), 1)
  81.                   END + ')' AS [T-SQL                                                  ],
  82.        '&#x' + c.[CodePointHex] + ';' AS [HTML        ],
  83.        '\x' + CASE
  84.                 WHEN c.[num] > 65535 THEN CONVERT(CHAR(4), CONVERT(BINARY(2), c.[HighSurrogateINT]), 2)
  85.                   + '\x' + CONVERT(CHAR(4), CONVERT(BINARY(2), c.[LowSurrogateINT]), 2)
  86.                 ELSE CONVERT(CHAR(4), CONVERT(BINARY(2), c.[num]), 2)
  87.               END AS [C/C++/C#/Java/?]
  88. FROM   chars c;
  89. GO
  90. -------------------------------------------------------------------------------
  91. --===========================================================================--
  92. -------------------------------------------------------------------------------
  93.  
  94.  
  95. -------- TEST ---------
  96.  
  97.  
  98. -- List all 188,657 code points:
  99. SELECT cp.*
  100. FROM   dbo.ListAllUnicodeCodePoints() cp;
  101.  
  102.  
  103. -- List some emoji:
  104. SELECT cp.*
  105. FROM   dbo.ListAllUnicodeCodePoints() cp
  106. WHERE  cp.[CdPntINT] BETWEEN 0x1F000 AND 0x1F9FF;
  107.  
  108.  
  109. -- List the Tibetan characters, sorted naturally for that language:
  110. SELECT cp.*
  111. FROM   dbo.ListAllUnicodeCodePoints() cp
  112. WHERE  cp.[CdPntINT] BETWEEN 0x0F00 AND 0x0FFF -- Tibetan
  113. ORDER BY  cp.[Char] COLLATE Nepali_100_CS_AS;
  114.  
  115.  
  116. -- List characters that are considered the same as "E"
  117. -- (when using Latin1_General_100_CI_AI):
  118. SELECT cp.*
  119. FROM   dbo.ListAllUnicodeCodePoints() cp
  120. WHERE  cp.[Char] = N'E' COLLATE Latin1_General_100_CI_AI
  121. ORDER BY  cp.[CdPntINT];
  122. -- 94 rows!!
  123.  
  124.  
  125. -- List characters that have a numeric value between 0 and 10,000
  126. -- (for pre-SQL Server 2017, use Latin1_General_100_CI_AI):
  127. SELECT cp.*
  128. FROM   dbo.ListAllUnicodeCodePoints() cp
  129. WHERE  cp.[Char] LIKE N'%[0-' + NCHAR(0x2182) + N']%' COLLATE Japanese_XJIS_140_CI_AI--Latin1_General_100_CI_AI
  130. ORDER BY  cp.[Char] COLLATE Japanese_XJIS_140_CI_AI--Latin1_General_100_CI_AI
  131. -- 752 rows!! (for Japanese_XJIS_140_CI_AI)
  132. -- 550 rows!! (for Latin1_General_100_CI_AI)
  133.  
  134.  
  135.  
  136. -- Can be used in SSMS a as a keyboard Query Shortcut (only a single line is allowed):
  137. SELECT cp.* FROM dbo.ListAllUnicodeCodePoints() cp; RETURN;
  138. -- See blog post (linked in the header comment bloc) for details.
  139.  
  140.  
  141.  
  142. -- DROP FUNCTION dbo.ListAllUnicodeCodePoints;
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top