Untitled

def check_domains(url):
 global num_websites,domain_queue,domains,doc_queue,stanford_tagger
 the_domain = re.match(r'^(:?https?://[^.]*.)?([^/#?&]+).*$',url)
 if the_domain is not None:
  if the_domain.groups(0)[1] not in domains.keys():
   domains[the_domain.groups(0)[1]] = website(doc_queue,the_domain.groups(0)[1])
   domains[the_domain.groups(0)[1]].add_initial_url(url)
   domain_queue.append(domains[the_domain.groups(0)[1]])
   num_websites = num_websites + 1
  else:
   domains[the_domain.groups(0)[1]].add_url(url)

File "web_crawler.py", line 178, in getdoc
    check_domains(check)
  File "web_crawler.py", line 133, in check_domains
    the_domain = re.match(r'^(:?https?://[^.]*.)?([^/#?&]+).*$',url)
  File "/usr/local/lib/python2.7/re.py", line 137, in match
    return _compile(pattern, flags).match(string)
TypeError: expected string or buffer

>>> def check_domains(url):
...  the_domain = re.match(r'^(:?https?://[^.]*.)?([^/#?&]+).*$',url) #right here
...  if the_domain is not None:
...   print the_domain.groups(0)[1]
...  else:
...   print "NOOOO!!!!!"
...
>>>
>>> check_domains("http://www.hulu.com/watch/6704")
hulu.com
>>> check_domains("https://docs.python.org/2/library/datetime.html")
python.org