fix_heading

2026-05-31 00:40:43  1042 次阅读

import os
import re

folder = "."

for filename in os.listdir(folder):

if filename.endswith(".txt"):

path = os.path.join(folder, filename)

with open(path, "r", encoding="utf-8") as f:
content = f.read()

# =========================
# FIX HTML BẨN
# =========================

content = re.sub(
r'

\s*(]*>)',
r'\1',
content,
flags=re.I
)

content = re.sub(
r'()\s*

',
r'\1',
content,
flags=re.I
)

content = re.sub(
r'()\s*',
r'\1',
content,
flags=re.I
)

content = re.sub(
r'

\s*\s*

',
'',
content,
flags=re.I
)

# =========================
# ĐỔI HEADING LEVEL
# =========================

for i in range(6, 0, -1):

new_level = i + 1

if new_level <= 6:

content = re.sub(
rf']*)>',
rf'',
content,
flags=re.I
)

content = re.sub(
rf'',
rf'',
content,
flags=re.I
)

# =========================
# ĐỔI NĂM TRONG CONTENT
# =========================

content = re.sub(r'202[3-5]', '2026', content)

# =========================
# SAVE CONTENT
# =========================

with open(path, "w", encoding="utf-8") as f:
f.write(content)

# =========================
# ĐỔI TÊN FILE
# =========================

new_filename = re.sub(
r'202[3-5]',
'2026',
filename
)

if new_filename != filename:

new_path = os.path.join(folder, new_filename)

os.rename(path, new_path)

print(f"Renamed: {filename} -> {new_filename}")

else:
print("Fixed:", filename)

print("DONE")