Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pyodbc,nltk,array,re,itertools
- cnxn = pyodbc.connect('Driver={MySQL ODBC 5.1 Driver};Server=127.0.0.1;Port=3306;Database=information_schema;User=root; Password=1234;Option=3;')
- cursor = cnxn.cursor()
- cursor.execute("use collegedatabase ;")
- cursor.execute("select * from sampledata ; ")
- cnxn.commit()
- s=[]
- j=[]
- x=[]
- words = []
- w = []
- sfq = []
- POS=[]
- wnl = nltk.WordNetLemmatizer()
- p = []
- clean= []
- l =[]
- tupletolist= []
- results = []
- aux = []
- regex = re.compile("w+.")
- pp = []
- array1=[]
- f = open("C:\Users\vchauhan\Desktop\tupletolist.txt","w")
- for entry in cursor:
- s.append(entry.injury_type),j.append(entry.injury_desc)
- def isAcceptableChar(character):
- return character not in "~!@#$%^&*()_+`1234567890-={}|:<>?[];',/."
- from nltk.tokenize import word_tokenize
- from nltk.corpus import stopwords
- english_stops = set(stopwords.words('english'))
- for i in range(0,200):
- j.append(filter(isAcceptableChar, j[i]))
- w.append([word for word in word_tokenize(j[i].lower()) if word not in english_stops])
- for j in range (0,len(w[i])):
- results = regex.search(w[i][j])
- if results:
- str.rstrip(w[i][j],'.')
- for a in range(0 , 200):
- sfq.append(" ".join(w[a]))
- from nltk.stem import LancasterStemmer
- stemmer = LancasterStemmer()
- for i in range (0,200):
- pp.append(len(w[i]))
- for a in range (0,200):
- p.append(word_tokenize(sfq[a]))
- POS.append([wnl.lemmatize(t) for t in p[a]])
- x.append(nltk.pos_tag(POS[a]))
- clean.append((re.sub('()[]{}'':/-[(",)]','',str(x[a]))))
- cursor.execute("update sampledata SET POS = ? where SRNO = ?", (re.sub('()[]{}'':/-[(",)]','',str(x[a]))), a)
- for i in range (0,len(array1)):
- results.append(regex.search(array1[i][0]))
- if results[i] is not None:
- aux.append(i)
- f.write(str(w))
- Traceback (most recent call last):
- File "C:UsersvchauhanDesktopregexsolution_try.py", line 37, in <module>
- j.append(filter(isAcceptableChar, j[i]))
- AttributeError: 'int' object has no attribute 'append'
- j.append(filter(isAcceptableChar, j[i])) # j is not a list here,it is an int.
- w.append([word for word in word_tokenize(j[i].lower()) if word not in english_stops])
- for j in range (0,len(w[i])): # here j is an int
- for j in range (0,len(w[i])):
- for i in range(0,200):
- j.append(filter(isAcceptableChar, j[i]))
- w.append([word for word in word_tokenize(j[i].lower()) if word not in english_stops])
- for j in range (0,len(w[i])):
Add Comment
Please, Sign In to add comment