Untitled

def _extract_url_links(html):
"""extract url links
>>> _extract_url_links('aa<a href="link1">link1</a>bb<a href="link2">link2</a>cc')
['link1', 'link2']
"""
#"html.parser"はなるべくpython標準のparserモジュールを使うように指定しているBeautifulSoup()で
#BeautifulSoupで扱えるようにしている。
all_url = []
body_soup = BeautifulSoup(html, "html.parser").find('body')
#aタグを全て持ってくる。
for child_tag in body_soup.findChildren():
    if child_tag.get('href') is not None:
        if '#' not in child_tag.get('href'):#or '.png' or '.jpg' or '.gif'
            if '.jpg' not in child_tag.get('href'):
                if '.png' not in child_tag.get('href'):
                    if '.gif' not in child_tag.get('href'):
                        all_url.append(child_tag.get('href'))
return all_url

def _extract_url_links(html):
"""extract url links
>>> _extract_url_links('aa<a href="link1">link1</a>bb<a href="link2">link2</a>cc')
['link1', 'link2']
"""
#"html.parser"はなるべくpython標準のparserモジュールを使うように指定しているBeautifulSoup()で
#BeautifulSoupで扱えるようにしている。
all_url = []
body_soup = BeautifulSoup(html, "html.parser").find('body')
#aタグを全て持ってくる。
for child_tag in body_soup.findChildren():
    if child_tag.get('href') is not None:
        if '#' not in child_tag.get('href') or '.jpg' not in child_tag.get('href') or '.png' not in child_tag.get('href') or'.gif' not in child_tag.get('href'):
return all_url

import re
 m = re.search(r'ここの引数をどうしたらいいのかわかりません',child_tag.get('href'))
 if m is not None;
    all_url.append(child_tag.get('href'))