使用Beautiful Soup和Pandas给数据框的列赋值的步骤如下:
from bs4 import BeautifulSoup
import pandas as pd
# 例如,从一个HTML文件中提取表格数据
with open('example.html') as file:
soup = BeautifulSoup(file, 'html.parser')
table = soup.find('table')
data = []
for row in table.find_all('tr'):
cols = row.find_all('td')
data.append([col.text.strip() for col in cols])
df = pd.DataFrame(columns=['Column1', 'Column2', 'Column3'])
for i, col_data in enumerate(data):
df.iloc[i] = col_data
完整的代码示例:
from bs4 import BeautifulSoup
import pandas as pd
# 解析HTML文件
with open('example.html') as file:
soup = BeautifulSoup(file, 'html.parser')
table = soup.find('table')
data = []
for row in table.find_all('tr'):
cols = row.find_all('td')
data.append([col.text.strip() for col in cols])
# 创建数据框
df = pd.DataFrame(columns=['Column1', 'Column2', 'Column3'])
# 填充数据
for i, col_data in enumerate(data):
df.iloc[i] = col_data
# 打印数据框
print(df)
这样,你就可以使用Beautiful Soup和Pandas将提取的数据赋值给数据框的列。