Advertisement
Guest User

Untitled

a guest
Jan 15th, 2018
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. CREATE OR REPLACE FUNCTION "public"."fnnormalizahtml"("t" text)
  2.   RETURNS "pg_catalog"."text" AS $BODY$
  3. declare
  4.     r record;
  5. begin
  6.     for r in
  7.         select distinct ce.ch, ce.name
  8.         from
  9.             character_entity ce
  10.             inner join (
  11.                 select name[1] "name"
  12.                 from regexp_matches(t, '&([A-Za-z]+?);', 'g') r(name)
  13.             ) s on ce.name = s.name
  14.     loop
  15.         t := replace(t, '&' || r.name || ';', r.ch);
  16.     end loop;
  17.  
  18.     for r in
  19.         select distinct
  20.             hex[1] hex,
  21.             ('x' || repeat('0', 8 - length(hex[1])) || hex[1])::bit(32)::int codepoint
  22.         from regexp_matches(t, '&#x([0-9a-f]{1,8}?);', 'gi') s(hex)
  23.     loop
  24.         t := regexp_replace(t, '&#x' || r.hex || ';', chr(r.codepoint), 'gi');
  25.     end loop;
  26.  
  27.     for r in
  28.         select distinct
  29.             chr(codepoint[1]::int) ch,
  30.             codepoint[1] codepoint
  31.         from regexp_matches(t, '&#([0-9]{1,10}?);', 'g') s(codepoint)
  32.     loop
  33.         t := replace(t, '&#' || r.codepoint || ';', r.ch);
  34.     end loop;
  35.  
  36.     return t;
  37. end;
  38. $BODY$
  39.   LANGUAGE 'plpgsql' IMMUTABLE COST 1000
  40. ;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement