Advertisement
creamygoat

Dirty Script to Tidy SSM-USA map SVG

Nov 22nd, 2013
541
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.35 KB | None | 0 0
  1. import subprocess
  2. import itertools
  3. from HTMLParser import HTMLParser
  4.  
  5. DPs = 3
  6.  
  7. StateNames = {
  8.  'AL': 'Alabama',
  9.  'AK': 'Alaska',
  10.  'AZ': 'Arizona',
  11.  'AR': 'Arkansas',
  12.  'CA': 'California',
  13.  'CO': 'Colorado',
  14.  'CT': 'Connecticut',
  15.  'DE': 'Delaware',
  16.  'FL': 'Florida',
  17.  'GA': 'Georgia',
  18.  'HI': 'Hawaii',
  19.  'ID': 'Idaho',
  20.  'IL': 'Illinois',
  21.  'IN': 'Indiana',
  22.  'IA': 'Iowa',
  23.  'KS': 'Kansas',
  24.  'KY': 'Kentucky',
  25.  'LA': 'Louisiana',
  26.  'MA': 'Massachusetts',
  27.  'MD': 'Maryland',
  28.  'ME': 'Maine',
  29.  'MI': 'Michigan',
  30.  'MN': 'Minnesota',
  31.  'MO': 'Missouri',
  32.  'MS': 'Mississippi',
  33.  'MT': 'Montana',
  34.  'NE': 'Nebraska',
  35.  'NV': 'Nevada',
  36.  'NH': 'New Hampshire',
  37.  'NJ': 'New Jersey',
  38.  'NM': 'New Mexico',
  39.  'NY': 'New York',
  40.  'NC': 'North Carolina',
  41.  'ND': 'North Dakota',
  42.  'OH': 'Ohio',
  43.  'OK': 'Oklahoma',
  44.  'OR': 'Oregon',
  45.  'PA': 'Pennsylvania',
  46.  'RI': 'Rhode Island',
  47.  'SC': 'South Carolina',
  48.  'SD': 'South Dakota',
  49.  'TN': 'Tennessee',
  50.  'TX': 'Texas',
  51.  'UT': 'Utah',
  52.  'VT': 'Vermont',
  53.  'VA': 'Virginia',
  54.  'WA': 'Washington',
  55.  'WV': 'West Virginia',
  56.  'WI': 'Wisconsin',
  57.  'WY': 'Wyoming',
  58.  'PR': 'Puerto Rico',
  59.  'VI': 'Virgin Islands',
  60.  'GU': 'Guam',
  61.  'AS': 'American Samoa',
  62.  'MP': 'Morthern Mariana Islands',
  63.  'DC': 'Washington D.C.'
  64. }
  65.  
  66.  
  67. Centres = {
  68.  'AL': (648, 415),
  69.  'AK': (112, 542),
  70.  'AZ': (203, 364),
  71.  'AR': (545, 369),
  72.  'CA': (71, 274),
  73.  'CO': (320, 272),
  74.  'CT': (858, 178),
  75.  'DE': (826, 243),
  76.  'FL': (722, 502),
  77.  'GA': (710, 405),
  78.  'HI': (285, 552),
  79.  'ID': (185, 119),
  80.  'IL': (590, 258),
  81.  'IN': (638, 253),
  82.  'IA': (518, 213),
  83.  'KS': (440, 293),
  84.  'KY': (658, 305),
  85.  'LA': (563, 458),
  86.  'MA': (874, 162),
  87.  'MD': (798, 252),
  88.  'ME': (888, 86),
  89.  'MI': (635, 142),
  90.  'MN': (521, 114),
  91.  'MO': (538, 297),
  92.  'MS': (595, 416),
  93.  'MT': (270, 87),
  94.  'NE': (420, 225),
  95.  'NV': (140, 241),
  96.  'NH': (865, 123),
  97.  'NJ': (832, 216),
  98.  'NM': (298, 379),
  99.  'NY': (812, 160),
  100.  'NC': (768, 332),
  101.  'ND': (416, 96),
  102.  'OH': (701, 234),
  103.  'OK': (434, 359),
  104.  'OR': (95, 120),
  105.  'PA': (780, 211),
  106.  'RI': (877, 171),
  107.  'SC': (751, 371),
  108.  'SD': (414, 160),
  109.  'TN': (658, 343),
  110.  'TX': (405, 452),
  111.  'UT': (218, 250),
  112.  'VT': (848, 126),
  113.  'VA': (767, 290),
  114.  'WA': (115, 47),
  115.  'WV': (749, 266),
  116.  'WI': (575, 155),
  117.  'WY': (295, 183),
  118.  'DC': (801.33209, 251.79613),
  119.  'PR': (596, 537),
  120.  'VI': (657, 537),
  121.  'GU': (579, 575),
  122.  'AS': (615, 575),
  123.  'MP': (657, 575)
  124. }
  125.  
  126. Bumps = {
  127.  'HI': (-8, 0),
  128.  'VI': (0, -1)
  129. }
  130.  
  131. NewOrigins = {
  132.   'AK': (0, 425),
  133.   'HI': (211, 492),
  134.   'PR': (560, 516),
  135.   'VI': (633, 516),
  136.   'GU': (560, 557),
  137.   'AS': (597, 557),
  138.   'MP': (633, 557)
  139. }
  140.  
  141. def MaxDP(x, n):
  142.  
  143.   '''Return as a string, x at a maximum of n decimal places.'''
  144.  
  145.   s = '%.*f' % (n, x)
  146.  
  147.   if '.' in s:
  148.     while s[-1:] == '0':
  149.       s = s[:-1]
  150.     if s[-1:] == '.':
  151.       s = s[:-1]
  152.  
  153.   return s
  154.  
  155.  
  156. def VSum(*VectorArgs):
  157.   if len(VectorArgs) == 1:
  158.     Vectors = VectorArgs[0]
  159.   else:
  160.     Vectors = VectorArgs
  161.   Result = tuple(Vectors[0])
  162.   for i in range(1, len(Vectors)):
  163.     Result = tuple(a + b for a, b in zip(Result, Vectors[i]))
  164.   return Result
  165.  
  166.  
  167. def VDiff(A, B):
  168.   return tuple(x - y for x, y in zip(A, B))
  169.  
  170.  
  171. def ShiftedPD(PathData, Transform, NumDPs=2):
  172.  
  173.   CmdSet = 'MLHVQTCSAZ'
  174.  
  175.   x, y = Transform
  176.  
  177.   ArgFormats = {
  178.    'M': 'x,y',
  179.    'L': 'x,y',
  180.    'H': 'x',
  181.    'V': 'y',
  182.    'Q': 'x,y x,y',
  183.    'T': 'x,y',
  184.    'C': 'x,y x,y x,y',
  185.    'S': 'x,y x,y',
  186.    'A': '0,0 0,0,0 x,y',
  187.    'Z': '0'
  188.   }
  189.  
  190.   Result = ''
  191.  
  192.   CmdStr = ''
  193.   CmdStrs = []
  194.  
  195.   for Ch in PathData:
  196.     if Ch.upper() in CmdSet:
  197.       CmdStr = CmdStr.rstrip()
  198.       if CmdStr != '':
  199.         CmdStrs.append(CmdStr)
  200.       CmdStr = ''
  201.     CmdStr += Ch
  202.   CmdStr = CmdStr.rstrip()
  203.   if CmdStr != '':
  204.     CmdStrs.append(CmdStr)
  205.  
  206.   NewCmdStrs = []
  207.   HaveRefX = False
  208.   HaveRefY = False
  209.  
  210.   for CmdStr in CmdStrs:
  211.     Cmd = CmdStr[0]
  212.     CmdIsAbs = Cmd == Cmd.upper() and Cmd not in 'Zz'
  213.     ForceAbsCmd = False
  214.     ArgStrs = list(itertools.chain.from_iterable(
  215.         [S.split() for S in CmdStr[1:].split(',')]))
  216.     Args = [float(S) for S in ArgStrs]
  217.     ArgFmt = ArgFormats[Cmd.upper()]
  218.     ArgStrs = []
  219.     ArgFmtIx = 0
  220.     for Arg in Args:
  221.       if ArgFmtIx >= len(ArgFmt):
  222.         ArgStrs += ' ' + Cmd + ' '
  223.         ArgFmtIx = 0
  224.       while ArgFmt[ArgFmtIx] not in '0xy':
  225.         ArgStrs += ArgFmt[ArgFmtIx]
  226.         ArgFmtIx += 1
  227.       Ch = ArgFmt[ArgFmtIx]
  228.       ArgFmtIx += 1
  229.       Delta = 0
  230.       if Ch == 'x':
  231.         if CmdIsAbs or not HaveRefX:
  232.           Delta = x
  233.           ForceAbsCmd = ForceAbsCmd or not HaveRefX
  234.           HaveRefX = True
  235.       elif Ch == 'y':
  236.         if CmdIsAbs or not HaveRefY:
  237.           Delta = y
  238.           ForceAbsCmd = ForceAbsCmd or not HaveRefY
  239.           HaveRefY = True
  240.       ArgStrs.append(MaxDP(Arg + Delta, NumDPs))
  241.     NewCmdStr = Cmd.upper() if ForceAbsCmd else Cmd
  242.     if len(ArgStrs) > 0 and Cmd not in 'Zz':
  243.       NewCmdStr += ' ' + ''.join(ArgStrs)
  244.  
  245.     NewCmdStrs.append(NewCmdStr)
  246.  
  247.   Result = ' '.join(NewCmdStrs)
  248.  
  249.   return Result
  250.  
  251.  
  252. def WrapPathData(PathData, FirstIndentStr, IndentStr, Terminator, MaxColumns):
  253.  
  254.   Result = []
  255.  
  256.   CmdSet = 'MLHVQTCSAZ'
  257.  
  258.   CmdStr = ''
  259.   CmdStrs = []
  260.  
  261.   for Ch in PathData:
  262.     if Ch.upper() in CmdSet:
  263.       CmdStr = CmdStr.rstrip()
  264.       if CmdStr != '':
  265.         CmdStrs.append(CmdStr)
  266.       CmdStr = ''
  267.     CmdStr += Ch
  268.   CmdStr = CmdStr.rstrip()
  269.   if CmdStr != '':
  270.     CmdStrs.append(CmdStr)
  271.  
  272.   Line = FirstIndentStr
  273.   LastCmd = '?'
  274.   Sep = ''
  275.  
  276.   for Ix, CmdStr in enumerate(CmdStrs):
  277.     IsLast = Ix + 1 == len(CmdStrs)
  278.     NoMoreRoom = len(Line) + len(Sep) + len(CmdStr) > MaxColumns
  279.     DoWrap = NoMoreRoom or LastCmd in 'Zz'
  280.     if DoWrap:
  281.       Result += [Line]
  282.       Line = IndentStr + CmdStr
  283.     else:
  284.       Line += Sep + CmdStr
  285.     Sep = ' '
  286.     LastCmd = CmdStr[0]
  287.   if len(Line) + len(Terminator) > MaxColumns:
  288.     Result += [Line]
  289.     Line = IndentStr
  290.   Result += [Line + Terminator]
  291.  
  292.   return Result
  293.  
  294.  
  295. psIdle = 0
  296. psDefs = 1
  297. psClipPath = 2
  298. psCPPath = 3
  299.  
  300.  
  301. class MyParser(HTMLParser):
  302.  
  303.   def __init__(self):
  304.  
  305.     HTMLParser.__init__(self)
  306.     self.State = psIdle
  307.     self.CPID = None
  308.     self.CPPaths = None
  309.     self.ClipPaths = {}
  310.     self.NewClipPaths = {}
  311.     self.ClipPathNames = []
  312.     self.Shapes = {}
  313.     self.ShapeNames = []
  314.     self.ShapeNamesByClipPathName = {}
  315.  
  316.   def GetAttr(self, Attributes, AttrName):
  317.     Result = None
  318.     for (Name, Value) in Attributes:
  319.       if Name == AttrName:
  320.         Result = Value
  321.         break
  322.     return Result
  323.  
  324.   def handle_starttag(self, tag, attrs):
  325.  
  326.     #print 'Start', tag
  327.     #print '  ' + repr(attrs)
  328.  
  329.     if self.State == psDefs:
  330.  
  331.       if tag == 'clippath':
  332.         self.State = psClipPath
  333.         self.CPID = self.GetAttr(attrs, 'id')
  334.         self.CPPaths = []
  335.  
  336.     elif self.State == psClipPath:
  337.  
  338.       if tag == 'path':
  339.  
  340.         PathID = self.GetAttr(attrs, 'id')
  341.         PathData = self.GetAttr(attrs, 'd')
  342.         self.CPPaths.append((PathID, PathData))
  343.         self.State = psCPPath
  344.  
  345.     elif self.State == psCPPath:
  346.  
  347.       pass
  348.  
  349.     else:
  350.  
  351.       if tag == 'defs':
  352.  
  353.         self.State = psDefs
  354.  
  355.       if tag == 'use':
  356.  
  357.         ID = self.GetAttr(attrs, 'id')
  358.  
  359.         if ID is not None and len(ID) == 2:
  360.  
  361.           XLink = self.GetAttr(attrs, 'xlink:href')
  362.           CPID = self.GetAttr(attrs, 'clip-path')[len('url(#'):-1]
  363.           TranslateStr = self.GetAttr(attrs, 'transform')[len('translate('):-1]
  364.           Transform = tuple([float(S) for S in
  365.               (TranslateStr.split(',') + ['0', '0'])[:2]])
  366.  
  367.           if ID == 'DC':
  368.             Transform = (0, 0)
  369.  
  370.           if CPID in self.ClipPaths:
  371.  
  372.             Prec = 5 if ID in ['PR', 'VI', 'GU', 'AS', 'MP'] else 3
  373.  
  374.             if ID in Bumps:
  375.               Transform = VSum(Transform, Bumps[ID])
  376.  
  377.             if ID in NewOrigins:
  378.               Transform = VDiff(Transform, NewOrigins[ID])
  379.  
  380.             NewCPPaths = []
  381.  
  382.             for (PathID, PathData) in self.ClipPaths[CPID]:
  383.               NewPathData = ShiftedPD(PathData, Transform, Prec)
  384.               NewCPPaths.append((PathID, NewPathData))
  385.  
  386.             self.NewClipPaths[CPID] = NewCPPaths
  387.             self.Shapes[ID] = (CPID, XLink)
  388.             self.ShapeNames.append(ID)
  389.             self.ShapeNamesByClipPathName[CPID] = ID
  390.  
  391.  
  392.   def handle_endtag(self, tag):
  393.  
  394.     #print 'End', tag
  395.  
  396.     if self.State == psDefs:
  397.  
  398.       if tag == 'defs':
  399.         self.State = psIdle
  400.  
  401.     elif self.State == psClipPath:
  402.  
  403.       if tag == 'clippath':
  404.         self.ClipPaths[self.CPID] = self.CPPaths
  405.         self.ClipPathNames.append(self.CPID)
  406.         self.CPID = None
  407.         self.CPPaths = None
  408.         self.State = psDefs
  409.  
  410.     elif self.State == psCPPath:
  411.  
  412.       if tag == 'path':
  413.         self.State = psClipPath
  414.  
  415.     else:
  416.  
  417.       pass
  418.  
  419.  
  420.   def handle_data(self, data):
  421.  
  422.     pass
  423.  
  424.  
  425. def Main():
  426.   #URL = 'http://www.pidjin.net/2006/03/06/such-a-perfect-day/'
  427.   URL = 'http://www.realhamster.com/'
  428.  
  429.   f = open('USA-SSM.svg', 'r')
  430.  
  431.   try:
  432.  
  433.     Page = f.readlines()
  434.     H = MyParser()
  435.  
  436.     for Line in Page:
  437.       H.feed(Line)
  438.  
  439.   finally:
  440.  
  441.     f.close()
  442.  
  443.   Lines = []
  444.  
  445.   for CPName in H.ClipPathNames:
  446.     Name = H.ShapeNamesByClipPathName[CPName]
  447.     Lines += ['  <!-- ' + StateNames[Name] + ' -->']
  448.     Lines += ['']
  449.     Lines += ['  <clipPath id="' + CPName + '">']
  450.     for (PathID, PathData) in H.NewClipPaths[CPName]:
  451.       Lines += WrapPathData(
  452.         PathData,
  453.         '    <path id="' + PathID + '" d="',
  454.         ' ' * 8,
  455.         '"/>',
  456.         80
  457.       )
  458.     Lines += ['  </clipPath>']
  459.     Lines += ['']
  460.  
  461.   Lines += ['']
  462.  
  463.   for Name in H.ShapeNames:
  464.     (CPID, XLink) = H.Shapes[Name]
  465.     Centre = Centres[Name]
  466.     if Name in NewOrigins:
  467.       Origin = NewOrigins[Name]
  468.       Centre = VDiff(Centre, Origin)
  469.       OrgStr = MaxDP(Origin[0], DPs) + ',' + MaxDP(Origin[1], DPs)
  470.       GXformStr = ' transform="translate(' + OrgStr + ')"'
  471.     else:
  472.       GXformStr = ''
  473.     CentreStr = MaxDP(Centre[0], DPs) + ',' + MaxDP(Centre[1], DPs)
  474.     PatXformStr = 'transform="translate(' + CentreStr + ')"'
  475.     Lines += ['  <!-- ' + StateNames[Name] + ' -->']
  476.     Lines += ['  <g clip-path="url(#' + CPID + ')"' + GXformStr + '>']
  477.     Lines += ['    <use xlink:href="' + XLink + '" ' + PatXformStr + '/>']
  478.     Lines += ['  </g>']
  479.  
  480.   print '\n'.join(Lines)
  481.  
  482. if __name__ == '__main__':
  483.   Main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement