Untitled

import os
from docx import Document

def print_tables(path):
	doc = Document(path)
	for ntable, table in enumerate(doc.tables):
		buf = u""
		start_row = 0
		if ntable > 0:
			start_row = 2
		n_empty = 0
		data_part = False
		for nrow, row in enumerate(table.rows[start_row:]):
			last_tc = None
			row_empty = True
			for cell in row.cells:
				row_empty = row_empty and not cell.text
				# ignore merged and empty cells
				if ((cell._tc != last_tc) and cell.text):
					# try to determine if cell.text contains float value
					is_float = (cell.text[0].isdigit() and cell.text.count(".") == 1)
					if (is_float):
						#if (not data_part):
						#	print cell.text
						data_part = True
						# to make Excel happy
						buf += cell.text.replace(".", ",")
					else:
						buf += cell.text
					buf += u";"
				last_tc = cell._tc
			if row_empty:
				n_empty += 1
			if data_part and row_empty:
				break
			buf += u"\n"
		#print (n_empty, nrow)
		yield buf

for i in os.listdir("."):
	if i.endswith(".docx"):
		print(i)
		tbl = list(print_tables(i))
		open("%s.csv" % i, "wb").write(u"".join(tbl).encode("cp1251"))