View difference between Paste ID: <a href="/QPBERWQY">QPBERWQY</a> and <a href="/eJtW9VDG">eJtW9VDG</a>

function isUTF8ExtensionChar(char) {
1		function isUTF8ExtensionChar(char) {
2		return (char >= "\200" && char < "\277")
3		}
4
5		# Extract a substring from a UTF-8 encoded string. This is required
6		# since not all versions of awk respect the encoding specified by
7		# $LANG. Of particular interest for me is the default busybox awk
8		# within Alpine linux.
9		function substrUTF8(str, start, len, inLen, subLen, inIndex, subIndex, outLen) {
10		# Length of input string
11		inLen = length(str)
12
13		# Current index into input string
14		inIndex = 1
15
16		# Skip the initial unicode characters to get to starting index
17		while (subIndex < start && inIndex <= inLen) {
18		if (!isUTF8ExtensionChar(substr(str, inIndex, 1))) {
19		inIndex++
20		subIndex++
21		while (isUTF8ExtensionChar(substr(str, inIndex, 1))) {
22		inIndex++
23		}
24		}
25		}
26
27		# Length of substring of input string which will produce the
28		# output string
29		subLen = 0
30
31		# Starting index of input string which corresponds to the first
32		# character of the output string
33		subIndex = 1
34
35		# Number of true unicode characters counted for output string
36		outLen = 0
37
38		# Calculate the end point of the sub string
39		while (outLen < len && inIndex <= inLen) {
40		if (!isUTF8ExtensionChar(substr(str, inIndex, 1))) {
41		inIndex++
42		subLen++
43		outLen++
44		while (isUTF8ExtensionChar(substr(str, inIndex, 1))) {
45		inIndex++
46		subLen++
47		}
48		}
49		}
50
51		return substr(str, subIndex, subLen);
52		}