Advertisement
Guest User

xld_attribute_adder

a guest
Jul 28th, 2015
231
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.29 KB | None | 0 0
  1. import re
  2.  
  3. __author__ = 'xiaolong'
  4.  
  5. class WordListHelper():
  6.    
  7.     ATTRIBUTE_SEPARATOR = '/'
  8.    
  9.     def __init__(self):
  10.         pass
  11.    
  12.     @staticmethod
  13.     def is_in_word_list(self, vocable_attribute_text, list_of_words, allow_whitespace_characters=True):
  14.         """ This method checks if a vocable attribute has a value, which is in a given list of words. """
  15.         vocable_attribute_values = vocable_attribute_text.split(self.ATTRIBUTE_SEPARATOR, maxsplit=-1)
  16.        
  17.         for vocable_attribute_value in vocable_attribute_values:
  18.             if allow_whitespace_characters:
  19.                 regex = '^\s*' + vocable_attribute_value + '\s*$'
  20.                 for word in list_of_words:
  21.                     if re.search(regex, word):
  22.                         return True
  23.             else:
  24.                 for word in list_of_words:
  25.                     if vocable_attribute_value == word:
  26.                         return True
  27.         return False
  28.  
  29.  
  30. import re
  31.  
  32. from lxml import etree
  33. from xldattributeadder.WordListHelper import WordListHelper
  34. from xldattributeadder.WordListReader import WordListReader
  35. from xldattributeadder.exceptions.XMLInvalidException import XMLInvalidException
  36.  
  37. from xldattributeadder.exceptions.XMLParseException import XMLParserException
  38. from xldattributeadder.xmlparser import XMLParser
  39.  
  40. __author__ = 'xiaolong'
  41.  
  42.  
  43. class XLDAttributeAdder():
  44.    
  45.     # initialize with default value
  46.     vocable_file_path = 'vocables.xml'
  47.     xsd_file_path = 'xld-vocables-schema.xsd'
  48.     word_list_file_path = 'words'
  49.    
  50.     ATTRIBUTE_SEPARATOR = '/'
  51.     VOCABLE_ATTRIBUTE_VALUE_PLACEHOLDER = '---'
  52.    
  53.     xml_parser = None
  54.    
  55.     xml_root = None
  56.    
  57.    
  58.     def __init__(self, vocable_file_path, xsd_file_path, word_list_file_path):
  59.         self.vocable_file_path = vocable_file_path
  60.         self.xsd_file_path = xsd_file_path
  61.         self.word_list_file_path = word_list_file_path
  62.  
  63.    
  64.     def add_values_to_attribute_of_vocables (self, attribute_name, attribute_value, words_attribute_name):
  65.         print('trying to add value to attribute of the vocables ...')
  66.         list_of_words = WordListReader.read(WordListReader, self.word_list_file_path)
  67.        
  68.         # create a dict to be able to tell which vocables haven't been found and subsequently didn't get any new attribute value
  69.         words_added = {}
  70.         for word in list_of_words:
  71.             words_added[word] = False
  72.        
  73.         add_counter = 0
  74.        
  75.         print('list of words')
  76.         for word in list_of_words:
  77.             print(word, ',', sep='', end='\n')
  78.        
  79.         print('creating XMLParser instance')
  80.         self.xml_parser = XMLParser()
  81.        
  82.         try:
  83.             self.xml_root = self.xml_parser.get_xml_element_tree_root(self.xsd_file_path, self.vocable_file_path)
  84.            
  85.             for vocable in self.xml_root:
  86.                 # if this is one of the vocables, which need to be changed
  87.                 #if vocable.find(words_attribute_name).text in list_of_words:
  88.                 if WordListHelper.is_in_word_list(WordListHelper, vocable.find(words_attribute_name).text, list_of_words, allow_whitespace_characters=True):
  89.                     # the vocable was found
  90.                     words_added[vocable.find(words_attribute_name).text] = True
  91.                    
  92.                     # only add attribute value to the vocable, if it doesn't have that value yet
  93.                     regex = '\s*' + attribute_value + '\s*$'
  94.                     if re.match(regex, vocable.find(attribute_name).text):
  95.                         print('Vocable', vocable.find(words_attribute_name).text, 'already has', attribute_value, 'as a', attribute_name, sep=' ', end='\n')
  96.                     else:
  97.                         #print('adding attribute value ...')
  98.                         add_counter += 1
  99.                         if vocable.find(attribute_name).text == self.VOCABLE_ATTRIBUTE_VALUE_PLACEHOLDER:
  100.                             vocable.find(attribute_name).text = attribute_value
  101.                         else:
  102.                             vocable.find(attribute_name).text += self.ATTRIBUTE_SEPARATOR + attribute_value
  103.                 else:
  104.                     #print(vocable.find(words_attribute_name).text, 'is not in list of words', sep=' ', end='\n')
  105.                     pass
  106.            
  107.             print('\n')
  108.             print('added', add_counter, attribute_name, 'attribute values to vocables', sep=' ', end='\n')
  109.             print('\n')
  110.            
  111.             missing_vocables_counter = 0
  112.             for key in words_added:
  113.                 if not words_added[key]:
  114.                     print(key + ' was not found in your vocables file.')
  115.                     missing_vocables_counter += 1
  116.            
  117.             print('\n')
  118.             print(missing_vocables_counter, 'vocables were not found in your vocables file.', sep=' ', end='\n')
  119.             print('\n')
  120.            
  121.         except XMLParserException:
  122.             print("Exception occured while parsing the XML file.")
  123.  
  124. from lxml import etree
  125. from xldattributeadder import XMLInvalidException, XMLParser
  126.  
  127. __author__ = 'xiaolong'
  128.  
  129. class VocableFileWriter():
  130.    
  131.     def __init__(self):
  132.         pass
  133.    
  134.     @staticmethod
  135.     def write(self, xsd_file_path, vocable_file_path, xml_root):
  136.         #print('writing xml to vocable file ...')
  137.         xml_parser = XMLParser()
  138.         if xml_parser.validate_tree(xsd_file_path, xml_root):
  139.             try:
  140.                 with open(vocable_file_path, 'w') as file:
  141.                     file.write("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n\n")
  142.                     file.write(etree.tostring(xml_root, xml_declaration=False, pretty_print=True, encoding="unicode"))
  143.             except IOError:
  144.                 print("Error while writing in log file!")          
  145.         else:
  146.             print("Tree is invalid.")
  147.             raise XMLInvalidException('Invalid XML!')
  148.  
  149.  
  150. __author__ = 'xiaolong'
  151.  
  152. class WordListReader():
  153.    
  154.     def __init__(self):
  155.         pass
  156.    
  157.     @staticmethod
  158.     def read(self, word_list_file_path):
  159.         #print('getting list of words from file ...')
  160.         list_of_words = []
  161.        
  162.         with open(word_list_file_path, 'r') as file:
  163.             for line in file:
  164.                 list_of_words.append(line.strip('\n'))
  165.        
  166.         return list_of_words
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement