import os
import re
folder = "."
for filename in os.listdir(folder):
if filename.endswith(".txt"):
path = os.path.join(folder, filename)
with open(path, "r", encoding="utf-8") as f:
content = f.read()
# =========================
# FIX HTML BẨN
# =========================
content = re.sub(
r'
\s*(
r'\1',
content,
flags=re.I
)
content = re.sub(
r'(
r'\1',
content,
flags=re.I
)
content = re.sub(
r'()\s*
',
r'\1',
content,
flags=re.I
)
content = re.sub(
r'
\s*
\s*
'',
content,
flags=re.I
)
# =========================
# ĐỔI HEADING LEVEL
# =========================
for i in range(6, 0, -1):
new_level = i + 1
if new_level <= 6:
content = re.sub(
rf'
rf'
content,
flags=re.I
)
content = re.sub(
rf'
rf'
content,
flags=re.I
)
# =========================
# ĐỔI NĂM TRONG CONTENT
# =========================
content = re.sub(r'202[3-5]', '2026', content)
# =========================
# SAVE CONTENT
# =========================
with open(path, "w", encoding="utf-8") as f:
f.write(content)
# =========================
# ĐỔI TÊN FILE
# =========================
new_filename = re.sub(
r'202[3-5]',
'2026',
filename
)
if new_filename != filename:
new_path = os.path.join(folder, new_filename)
os.rename(path, new_path)
print(f"Renamed: {filename} -> {new_filename}")
else:
print("Fixed:", filename)
print("DONE")