import re
from pathlib import Path

INFILE = Path("input.html")
OUTFILE = Path("output.cleaned.html")

html = INFILE.read_text(encoding="utf-8", errors="replace")

# ----------------------------
# 1) Fix common WP encoding junk
# ----------------------------
replacements = {
"‘": "‘",
"’": "’",
"“": "“",
"â€�": "”",
"—": "—",
"–": "–",
"Â ": " ",
"Â": "",
}
for k, v in replacements.items():
html = html.replace(k, v)

# ----------------------------
# 2) Update ALL event date references (best-effort)
# ----------------------------
date_map = {
# Main training date ranges
r"September\s+10\s+through\s+September\s+14,\s*2019": "March 24 through March 28, 2026",
r"September\s+10th\s+Through\s+September\s+14th,\s*2019": "March 24th Through March 28th, 2026",
r"September\s+10th,\s*2019\s+Through\s+September\s+14th,\s*2019": "March 24th, 2026 Through March 28th, 2026",
r"September\s+10th,\s*2019": "March 24th, 2026",
r"September\s+14th,\s*2019": "March 28th, 2026",
r"September\s+7th,\s*2019": "March 21st, 2026",
r"September\s+7,\s*2019": "March 21, 2026",
r"August\s+15,\s*2019": "March 10, 2026",
}

for pattern, repl in date_map.items():
html = re.sub(pattern, repl, html, flags=re.IGNORECASE)

# Keep the original letter date "August 6, 2019 8:23:AM" unchanged (you didn't ask to update it)

# ----------------------------
# 3) Update location / venue mentions (targeted, minimal)
# ----------------------------
# Replace the “secret training facility” sentence to match your request.
html = re.sub(
r"The training will be conducted at my secret training facility here in San Diego,",
"The training will be conducted here in San Diego, California at the Sorrento Valley Marriott,",
html,
flags=re.IGNORECASE
)

# Also nudge any “March Training” CTA line to stay accurate, without rewriting copy.
html = re.sub(
r"Click The Link Now To Register For the\s+March Training Before It's Too Late\.",
"Click The Link Now To Register For the March 24th–28th, 2026 Training Before It's Too Late.",
html,
flags=re.IGNORECASE
)

# ----------------------------
# 4) Strip common WP junk attributes/classes and spacer noise
# ----------------------------
# Remove wp-specific classes like "wp-image-123", alignleft/right/center, etc.
html = re.sub(r'\sclass="[^"]*?\bwp-image-\d+\b[^"]*?"', ' ', html, flags=re.IGNORECASE)
html = re.sub(r'\sclass="[^"]*?\balign(left|right|center)\b[^"]*?"', ' ', html, flags=re.IGNORECASE)
html = re.sub(r'\sclass="[^"]*?\bwp-att-\d+\b[^"]*?"', ' ', html, flags=re.IGNORECASE)

# Remove redundant empty strong tags and excessive   padding blocks
html = html.replace(" ", "")
html = re.sub(r"(]*>\s* \s*

\s*){2,}", "", html, flags=re.IGNORECASE)

# Remove empty headings like

html = re.sub(r"]*>\s*", "", html, flags=re.IGNORECASE)

# Remove spacer tables used only for layout (simple heuristic: tables with no meaningful text)
# We'll keep tables that contain substantial content.
def strip_empty_tables(s: str) -> str:
def table_repl(match):
table = match.group(0)
text = re.sub(r"<[^>]+>", "", table)
text = re.sub(r"\s+", " ", text).strip()
# If table text is tiny or purely decorative, drop it.
if len(text) <= 10: return "" return table return re.sub(r"

", table_repl, s, flags=re.IGNORECASE | re.DOTALL)

html = strip_empty_tables(html)

# ----------------------------
# 5) Normalize inline styles: keep only alignment on headings/paragraphs (optional)
# ----------------------------
# Remove most inline style attributes; preserve text-align where present.
def style_cleaner(match):
style = match.group(1)
m = re.search(r"text-align\s*:\s*(left|right|center)\s*;?", style, flags=re.IGNORECASE)
if m:
return f' style="text-align: {m.group(1).lower()};"'
return ""

html = re.sub(r'\sstyle="([^"]*)"', style_cleaner, html, flags=re.IGNORECASE)

# ----------------------------
# 6) Wrap with clean, proportional CSS
# ----------------------------
CLEAN_CSS = """




Real World Hypnosis — Regression Skills Intensive




"""
CLEAN_FOOT = """



"""

# If the HTML already contains /, strip outer shell and rewrap.
body_inner = html
body_inner = re.sub(r"(?is)^\s*", "", body_inner)
body_inner = re.sub(r"(?is).*?", "", body_inner)
body_inner = re.sub(r"(?is)", "", body_inner)

# Trim leading/trailing whitespace
body_inner = body_inner.strip()

final = CLEAN_CSS + "\n" + body_inner + "\n" + CLEAN_FOOT

OUTFILE.write_text(final, encoding="utf-8")
print(f"Wrote: {OUTFILE.resolve()}")

Home - 999 Copyright - 999 Earnings - 999 Privacy - 999 Terms & Conditions - 999 Disclaimer
David Snyder, dlsnyder1@yahoo.com, 3830 Valley Centre Drive Ste 705 PMB 371 San Diego CA, 92130, 8582824663

Scroll to Top