可以使用Python的recursive函数,来实现find_all方法的泛化。
示例代码:
from bs4 import BeautifulSoup
def filter_tags(element): if element.has_attr('class') and 'news' in element['class']: return True else: return False
def find_all(root, filter_func): results = [] if filter_func(root): results.append(root) for child in root.children: if child.name is not None: results += find_all(child, filter_func) return results
html_doc = """
The Dormouse's story
Once upon a time there were three little sisters; and their names were
...
"""soup = BeautifulSoup(html_doc, 'html.parser')
news_tags = find_all(soup.html, filter_tags)
for tag in news_tags: print(tag.prettify())