如何复制 Beautiful Soup 中的元素?
How can I duplicate an element in Beautiful Soup?
我编写了一个脚本来递归文件夹并将 <td>
标签包装在 <title>
标签中。经过一番考虑,我更愿意保留 <td>
并在文档的头部添加一个新的 <title>
标记,但我不知道该怎么做。
下面的代码将 运行 将 <td>
标签包装在 <title>
标签中,但是我如何才能将此 <td>
中的文本复制到 <head>
我的 html 文档,被 <title>
标签包围。
import os
from bs4 import BeautifulSoup
def clean_up_folder(dir):
for root, dirs, files in os.walk(dir):
for f in files:
clean_up_file(os.path.join(root, f))
def clean_up_file(original_file):
with open(original_file) as orig_f2:
soup = BeautifulSoup(orig_f2.read())
for t in soup.find_all('td', class_='title'):
t.string.wrap(soup.new_tag('title'))
with open(original_file, 'w') as orig_f:
if soup.original_encoding is not None:
orig_f.write(soup.prettify().encode(soup.original_encoding))
clean_up_folder('Test')
基本上我想 ctrl + c 我的 <td class="title">
中的文本和 ctrl + v 它在 <head></head>
标签内,包含在 <title></title>
标签中。是否有可能做到这一点?有什么指点吗?
基本上,用 <td class="title">
中的 .string
创建一个新的 <title>
标签,然后 .append
<head>
的标题标签。
import os
from bs4 import BeautifulSoup
def clean_up_folder(dir):
for root, dirs, files in os.walk(dir):
for f in files:
clean_up_file(os.path.join(root, f))
def clean_up_file(original_file):
with open(original_file) as orig_f2:
soup = BeautifulSoup(orig_f2.read())
title = soup.new_tag('title')
td = soup.find('td', class_='title')
if td is not None and td.string is not None:
title.string = td.string
head = soup.find('head')
if head is not None:
head.append(title)
with open(original_file, 'w') as orig_f:
if soup.original_encoding is not None:
orig_f.write(soup.prettify().encode(soup.original_encoding))
clean_up_folder('Test')
查看文档:
我编写了一个脚本来递归文件夹并将 <td>
标签包装在 <title>
标签中。经过一番考虑,我更愿意保留 <td>
并在文档的头部添加一个新的 <title>
标记,但我不知道该怎么做。
下面的代码将 运行 将 <td>
标签包装在 <title>
标签中,但是我如何才能将此 <td>
中的文本复制到 <head>
我的 html 文档,被 <title>
标签包围。
import os
from bs4 import BeautifulSoup
def clean_up_folder(dir):
for root, dirs, files in os.walk(dir):
for f in files:
clean_up_file(os.path.join(root, f))
def clean_up_file(original_file):
with open(original_file) as orig_f2:
soup = BeautifulSoup(orig_f2.read())
for t in soup.find_all('td', class_='title'):
t.string.wrap(soup.new_tag('title'))
with open(original_file, 'w') as orig_f:
if soup.original_encoding is not None:
orig_f.write(soup.prettify().encode(soup.original_encoding))
clean_up_folder('Test')
基本上我想 ctrl + c 我的 <td class="title">
中的文本和 ctrl + v 它在 <head></head>
标签内,包含在 <title></title>
标签中。是否有可能做到这一点?有什么指点吗?
基本上,用 <td class="title">
中的 .string
创建一个新的 <title>
标签,然后 .append
<head>
的标题标签。
import os
from bs4 import BeautifulSoup
def clean_up_folder(dir):
for root, dirs, files in os.walk(dir):
for f in files:
clean_up_file(os.path.join(root, f))
def clean_up_file(original_file):
with open(original_file) as orig_f2:
soup = BeautifulSoup(orig_f2.read())
title = soup.new_tag('title')
td = soup.find('td', class_='title')
if td is not None and td.string is not None:
title.string = td.string
head = soup.find('head')
if head is not None:
head.append(title)
with open(original_file, 'w') as orig_f:
if soup.original_encoding is not None:
orig_f.write(soup.prettify().encode(soup.original_encoding))
clean_up_folder('Test')
查看文档: