# 排序以确保正确顺序合并(根据文件名排序)
pdf_files.sort()
# 检查有没有有PDF文件需要合并
if not pdf_files:
print(f"No PDF files found in {folder_path}")
return
# 初始化PDF合并器对象
merger = PdfFileMerger()
# 添加各个PDF文件到合并器中
for pdf_file in pdf_files:
try:
merger.append(pdf_file)
print(f"Added {pdf_file} to merge list.")
except Exception as e:
print(f"Error adding {pdf_file}: {str(e)}")
continue
# 定义输出文件路径和名称(将放在同一个目录下)
output_filename = "merged.pdf"
output_path = os.path.join(folder_path, output_filename)
try:
# 写入合并后的PDF文件到指定路径中输出目标名称
with open(output_path, 'wb') as outfile:
merger.write(outfile)
print(f"
Successfully merged PDF files into:
{output_path}
")
# 输出各个源文件及其页码范围信息示例:
print("Merge details:")
for i, file in enumerate(pdf_files):
start_page = i + 1 if i == 0 else sum(len(PdfFileReader(open(f)).pages) for f in pdf_files) + 1
end_page = start_page + len(PdfFileReader(open(file)).pages) - 1 if i != len(pdf_files)-1 else "end"
print(f"{file} -> Pages {start_page}-{end_page}")
return True
except Exception as e:
print(f"
Error merging PDFs into {output_path}: {str(e)}
")
return False
+++++ output/html_word.py
import pandas as pd
def htmltabletoword(htmltable_str):
"""
Convert HTML table string to Word document (.docx).,走捷径。
Parameters:
html_table_str (str): String containing HTML table code.
Returns:
bool: True if conversion successful.
"""
try:
from docx import Document
doc = Document()
doc.add_paragraph("HTML Table Converted to Word Document")
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_table_str,'html.parser')
table=soup.find('table')
for row in table.find_all('tr'):
cells=)]
if cells:
doc.add_paragraph(','.join(cells))
else :
continue
output_file_name=f'table_converted_{pd.datetime.now().strftime("%Y%m%d_%H%M%S")}.docx'
doc.save(output_file_name)
return True
except Exception as e:
return False