Advertisement
Guest User

python markdown to html

a guest
Sep 9th, 2012
209
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Markdown to html
  2. import re
  3.  
  4. a = 'D:/Markdown/a.txt'
  5. file = open(a,'r')
  6. lines = file.read()
  7. file.close()
  8.  
  9. lines = '\n'+lines+'\n'
  10.  
  11. def blockQuote(lines):
  12.     lines = re.sub('(\n>){1,}','\n>',lines)
  13.     #lines = re.sub(r'(\n>)','\n>',lines)
  14.     lines1 = re.split(r'(\n>)',lines)
  15.     lines2=''
  16.     for idx, elem in enumerate(lines1):
  17.         #if previous element is \n> then append it,
  18.         #if previous element is \n> and next element is not \n> then append block code
  19.         #else append with nothing
  20.         if lines1[idx]=='\n>':
  21.             lines2=lines2
  22.         elif lines1[idx-1]!='\n>':
  23.             if idx != len(lines1)-1:
  24.                 if lines1[idx+1]=='\n>':
  25.                     lines2=lines2+str(elem)+'\n<blockquote>'
  26.                 else:
  27.                     lines2=lines2+str(elem)
  28.             else:
  29.                 lines2=lines2+str(elem)
  30.         elif lines1[idx-1] == '\n>':
  31.             if idx != len(lines1)-1:
  32.                 if lines1[idx+1] != '\n>':
  33.                     lines2 = lines2 + re.sub(r'(?=\n)',r''+'\n</blockquote>',elem,count =1)
  34.                 else:
  35.                     lines2=lines2+'\n>'+str(elem)
  36.             else:
  37.                 lines2 = lines2 +'\n>'+re.sub(r'(?=\n)',r''+'\n</blockquote>',elem,count =1)
  38.     return lines2
  39.  
  40. def header(lines):
  41.     for x in reversed(range(1,6)): # headers only work from 1 to 6
  42.         lines = re.sub(r'(?<=\n)(?:[#]{'+str(x)+'}\s)([^\n]+)',r'\1'+'</h'+str(x)+'>\n',lines)
  43.         lines = re.sub(r'(?<=\n)(?:(?:> )[#]{'+str(x)+'}\s)([^\n]+)',r'> \1'+'</h'+str(x)+'>\n',lines) #for block quotes
  44.         lines = re.sub(r'([\s#]+)(</h[1-6]?)',r'\2',lines)
  45.     lines = re.sub(r'\n[=]+\n',r'</h1>\n',lines)
  46.     lines = re.sub(r'\n[-]+\n',r'</h2>\n',lines)
  47.     for x in reversed(range(1,6)): #add the h tags to the beginning of the line
  48.         lines = re.sub(r'(?<=\n)(?=[^\n]+</h'+str(x)+'>)',r'<h'+str(x)+'>',lines)
  49.     return lines
  50.  
  51. def paragraph(lines):
  52.     lines = re.sub(r'(<[hH][1-6]>)>\s+',r'\1',lines)
  53.     lines = re.sub(r'\n>','\n<p>',lines)
  54.     lines = re.sub(r'(?<=\n\n)(?=[^><\s])','<p>',lines)
  55.     lines = re.sub(r'([\n]{3,})','\n\n',lines)
  56.     lines = re.sub(r'(?<=[^>])(?=\n\n)',r'</p>',lines)
  57.     return lines
  58.  
  59. def emphasis(lines):
  60.     lines = re.sub(r'\*{2}([^\n]+)\*{2}',r'<strong>\1</strong>',lines)
  61.     lines = re.sub(r'_{2}([^\n]+)_{2}',r'<strong>\1</strong>',lines)
  62.     lines = re.sub(r'[\*_]([^\n]+)[\*_]',r'<b>\1</b>',lines)
  63.     return lines
  64.  
  65. def List(lines):
  66.     lines1 = re.split(r'(\n[0-9]+\.)',lines)
  67.     #get the indices for 1.
  68.     b = [elem for elem in range(len(lines1)) if lines1[elem]=='\n1.']
  69.     lines2 = lines1
  70.  
  71.     for elem in b:
  72.         lines2[elem] = '\n\n<ol>\n<li>'
  73.         i = 1
  74.         dummy = str(i+1)
  75.         index = min(elem+2,len(lines2)-1)
  76.         while (dummy==str(i+1)):
  77.             index = min(index,len(lines2)-1)
  78.             dummy = re.sub(r'\n([0-9]+)\.',r'\1',lines2[index])
  79.             if dummy==str(i+1):
  80.                 lines2[index] = '\n<li>'
  81.                 i+=1
  82.                 dummy = str(i+1)
  83.                 index += 2
  84.     lines3 = ''
  85.     for elem in lines2:
  86.         lines3 = lines3 + elem
  87.     lines3 = re.sub(r'(<li>[^\n]+)(?=\n)',r'\1</li>',lines3)
  88.     lines3 = re.sub(r'(</li>\n)(?=[^<])',r'\1</ol>\n\n',lines3)
  89.     lines3 = re.sub(r'(?<=\n)([\*+-])',r'<li>\1',lines3)
  90.     lines3 = re.sub(r'(<li>[\*+-][^\n]+)(?=\n)',r'\1</li>',lines3)
  91.     lines3 = re.sub(r'(<li>[\*+-][\s]+)','<li>',lines3)
  92.     return lines3
  93.  
  94. lines = List(lines)
  95. lines = emphasis(lines)
  96. lines = blockQuote(lines)
  97. lines = header(lines)
  98. lines = paragraph(lines)
  99.  
  100. f = open("D:/test1.html","w")
  101. f.write(lines)
  102. f.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement