tag-notes.py

#!/usr/bin/env python3
"""
Note Tagging and SEO Metadata Script
Processes markdown notes using the Anthropic API to add tags, slugs, and SEO metadata.
"""

import io
import json
import os
import re
import sys
from pathlib import Path

import anthropic
import yaml
from ruamel.yaml import YAML

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
TAXONOMY_FILE = "tag-taxonomy.yaml"
NOTES_FOLDER = os.path.expanduser("~/Documents/ejl-zk/40 Public/41 Notes/")
CONTENT_CHAR_LIMIT = 20000
SEO_DESC_MIN = 150
SEO_DESC_MAX = 160
MODEL = "claude-sonnet-4-6"

# Round-trip YAML preserves existing frontmatter formatting
yaml_rt = YAML()
yaml_rt.preserve_quotes = True
yaml_rt.width = 4096

# Anthropic client — reads ANTHROPIC_API_KEY from env automatically
client = anthropic.Anthropic()


# ---------------------------------------------------------------------------
# Taxonomy helpers
# ---------------------------------------------------------------------------

def load_taxonomy(taxonomy_path: Path) -> list[str]:
    with open(taxonomy_path) as f:
        data = yaml.safe_load(f) or {}
    return data.get("tags", []) or []


def append_tags_to_taxonomy(taxonomy_path: Path, new_tags: set[str]) -> None:
    with open(taxonomy_path) as f:
        data = yaml.safe_load(f) or {}
    existing = data.get("tags", []) or []
    combined = list(dict.fromkeys(existing + list(new_tags)))
    data["tags"] = combined
    with open(taxonomy_path, "w") as f:
        yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)


# ---------------------------------------------------------------------------
# Frontmatter helpers
# ---------------------------------------------------------------------------

def extract_frontmatter(content: str):
    pattern = r"^---\s*\n(.*?)\n---\s*\n(.*)$"
    match = re.match(pattern, content, re.DOTALL)
    if not match:
        return None, content
    frontmatter = yaml_rt.load(match.group(1))
    return frontmatter, match.group(2)


def reconstruct_markdown(frontmatter, body: str) -> str:
    stream = io.StringIO()
    yaml_rt.dump(frontmatter, stream)
    fm_str = stream.getvalue()
    if not fm_str.endswith("\n"):
        fm_str += "\n"
    return f"---\n{fm_str}---\n{body}"


def slugify(text: str) -> str:
    text = text.lower()
    text = re.sub(r"[''`]", "", text)
    text = re.sub(r"[^\w\s-]", " ", text)
    text = re.sub(r"[-\s]+", "-", text).strip("-")
    return text


# ---------------------------------------------------------------------------
# LLM helpers
# ---------------------------------------------------------------------------

def parse_json_response(content: str | None) -> dict | None:
    if content is None:
        return None
    try:
        return json.loads(content)
    except json.JSONDecodeError:
        pass
    start = content.find("{")
    end = content.rfind("}")
    if start != -1 and end > start:
        try:
            return json.loads(content[start : end + 1])
        except json.JSONDecodeError:
            pass
    return None


def call_llm_json(system_prompt: str, user_prompt: str, max_tokens: int = 1024) -> dict | None:
    try:
        message = client.messages.create(
            model=MODEL,
            max_tokens=max_tokens,
            system=system_prompt,
            messages=[{"role": "user", "content": user_prompt}],
            temperature=0.2,
        )
        content = message.content[0].text if message.content else ""
        parsed = parse_json_response(content)
        if parsed is None:
            print(f"  ! LLM returned no parseable JSON (stop_reason={message.stop_reason})")
            print(f"    content: {content[:500]!r}")
        return parsed
    except anthropic.APIError as e:
        print(f"  ! Anthropic API error: {e}")
        return None


def request_metadata(title: str, note_content: str, taxonomy: list[str]) -> dict | None:
    taxonomy_str = ", ".join(taxonomy)
    system_prompt = f"""You analyze markdown notes and return structured metadata.

Return ONLY valid JSON in this exact shape:
{{
  "tags_from_taxonomy": ["tag1", "tag2"],
  "new_tag_suggestions": ["newtag1"],
  "seo_title_suffix": "Short descriptor that will follow the note title",
  "seo_description": "Factual summary between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters.",
  "seo_keywords": ["keyword1", "keyword2"]
}}

Rules:
- tags_from_taxonomy: 1-5 tags drawn from the existing taxonomy that best fit the content.
- new_tag_suggestions: 0-2 NEW tags, only when content truly warrants it (be conservative).
- seo_title_suffix: a short, clean, non-clickbaity descriptor of the note. Do NOT include the note title or a leading colon — only the text that would follow "<title>: ". Aim for 4-10 words.
- seo_description: a clean factual summary, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding.
- seo_keywords: 10-15 relevant keywords, no duplicates.

Existing tag taxonomy: {taxonomy_str}"""

    user_prompt = f"""Note title: {title}

Note content:
{note_content}

Produce the JSON described in the system prompt."""
    return call_llm_json(system_prompt, user_prompt)


def request_description_retry(title: str, note_content: str, previous_desc: str) -> str:
    system_prompt = f"""You rewrite SEO descriptions to a strict length.

Return ONLY valid JSON of the form:
{{"seo_description": "..."}}

The description must be a clean, factual summary of the note, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding."""

    user_prompt = f"""Note title: {title}

Note content:
{note_content}

Your previous description was {len(previous_desc)} characters, outside the allowed {SEO_DESC_MIN}-{SEO_DESC_MAX} range:
"{previous_desc}"

Rewrite it to fit strictly within {SEO_DESC_MIN}-{SEO_DESC_MAX} characters."""
    result = call_llm_json(system_prompt, user_prompt)
    if result:
        return (result.get("seo_description") or "").strip()
    return ""


# ---------------------------------------------------------------------------
# Note processing
# ---------------------------------------------------------------------------

def process_note(file_path: Path, taxonomy: list[str], new_tag_accumulator: set) -> None:
    print(f"Processing: {file_path}")
    content = file_path.read_text(encoding="utf-8")

    frontmatter, body = extract_frontmatter(content)
    if frontmatter is None:
        print("  ⚠️  No frontmatter found, skipping")
        return

    existing_tags = frontmatter.get("tags", []) or []
    if existing_tags == [None]:
        existing_tags = []

    needs_tags = not existing_tags
    needs_slug = not frontmatter.get("slug")
    needs_seo_title = not frontmatter.get("seo-title")
    needs_seo_desc = not frontmatter.get("seo-description")
    needs_seo_keywords = not frontmatter.get("seo-keywords")

    if not any([needs_tags, needs_slug, needs_seo_title, needs_seo_desc, needs_seo_keywords]):
        print("  ✓ All fields already populated, skipping")
        return

    title = frontmatter.get("title") or file_path.stem
    updated = False

    if needs_slug:
        slug = slugify(file_path.stem)
        frontmatter["slug"] = slug
        print(f"  + Added slug: {slug}")
        updated = True

    # If only slug was needed, skip the LLM call
    if not any([needs_tags, needs_seo_title, needs_seo_desc, needs_seo_keywords]):
        file_path.write_text(reconstruct_markdown(frontmatter, body), encoding="utf-8")
        print("  ✓ Updated successfully")
        return

    llm_response = request_metadata(title, body[:CONTENT_CHAR_LIMIT], taxonomy)
    if not llm_response:
        print("  ✗ Failed to get LLM response")
        return

    if needs_tags:
        taxonomy_tags = llm_response.get("tags_from_taxonomy") or []
        new_suggestions = llm_response.get("new_tag_suggestions") or []
        combined = list(dict.fromkeys(list(taxonomy_tags) + list(new_suggestions)))[:5]
        if combined:
            frontmatter["tags"] = combined
            updated = True
            print(f"  + Added tags: {', '.join(combined)}")
            genuinely_new = [t for t in combined if t not in taxonomy and t in new_suggestions]
            if genuinely_new:
                print(f"    (New suggestions: {', '.join(genuinely_new)})")
                new_tag_accumulator.update(genuinely_new)

    if needs_seo_title:
        suffix = (llm_response.get("seo_title_suffix") or "").strip().lstrip(":").strip()
        if suffix.lower().startswith(title.lower()):
            suffix = suffix[len(title):].lstrip(":").strip()
        if suffix:
            seo_title = f"{title}: {suffix}"
            frontmatter["seo-title"] = seo_title
            updated = True
            print(f"  + Added SEO title: {seo_title}")

    if needs_seo_desc:
        seo_desc = (llm_response.get("seo_description") or "").strip()
        if seo_desc and not (SEO_DESC_MIN <= len(seo_desc) <= SEO_DESC_MAX):
            print(f"  ~ SEO description length {len(seo_desc)} outside {SEO_DESC_MIN}-{SEO_DESC_MAX}, re-asking")
            retry = request_description_retry(title, body[:CONTENT_CHAR_LIMIT], seo_desc)
            if retry and SEO_DESC_MIN <= len(retry) <= SEO_DESC_MAX:
                seo_desc = retry
            elif retry:
                print(f"  ! Retry still {len(retry)} chars; using original")
            else:
                print("  ! Retry failed; using original")
        if seo_desc:
            frontmatter["seo-description"] = seo_desc
            updated = True
            print(f"  + Added SEO description ({len(seo_desc)} chars)")

    if needs_seo_keywords:
        seo_keywords = list(dict.fromkeys(llm_response.get("seo_keywords") or []))
        if seo_keywords:
            frontmatter["seo-keywords"] = seo_keywords
            updated = True
            print(f"  + Added {len(seo_keywords)} SEO keywords")

    if updated:
        file_path.write_text(reconstruct_markdown(frontmatter, body), encoding="utf-8")
        print("  ✓ Updated successfully")
    else:
        print("  - No updates needed")


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

def main() -> None:
    taxonomy_path = Path(__file__).parent / TAXONOMY_FILE
    if not taxonomy_path.exists():
        print(f"Error: Taxonomy file not found at {taxonomy_path}")
        sys.exit(1)

    taxonomy = load_taxonomy(taxonomy_path)
    print(f"Loaded {len(taxonomy)} tags from taxonomy\n")

    target_path = Path(NOTES_FOLDER)
    if not target_path.exists():
        print(f"Error: Notes folder not found: {target_path}")
        sys.exit(1)

    md_files = sorted(target_path.rglob("*.md"))
    if not md_files:
        print(f"No markdown files found in {target_path}")
        sys.exit(0)

    print(f"Processing all markdown files under: {target_path}")
    print(f"Found {len(md_files)} markdown files\n")

    new_tag_accumulator: set[str] = set()
    for md_file in md_files:
        try:
            process_note(md_file, taxonomy, new_tag_accumulator)
        except Exception as e:
            print(f"  ✗ Error processing {md_file}: {e}")
        print()

    print("\n✓ Processing complete!")

    fresh = sorted(t for t in new_tag_accumulator if t not in taxonomy)
    if not fresh:
        return

    print(f"\nNew tags suggested during this run: {', '.join(fresh)}")

    # Non-interactive (CI): log and skip
    if not sys.stdin.isatty():
        print("Non-interactive environment detected — skipping taxonomy update.")
        print(f"To add these manually, run the script locally and answer 'y' when prompted.")
        return

    try:
        answer = input("Add these to the taxonomy? [y/N]: ").strip().lower()
    except EOFError:
        answer = ""

    if answer == "y":
        append_tags_to_taxonomy(taxonomy_path, fresh)
        print(f"✓ Added {len(fresh)} tag(s) to {taxonomy_path.name}")
    else:
        print("Skipped taxonomy update.")


if __name__ == "__main__":
    main()
initial commit 2026-02-06 06:17:43 -06:00			`#!/usr/bin/env python3`
			`"""`
			`Note Tagging and SEO Metadata Script`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`Processes markdown notes using the Anthropic API to add tags, slugs, and SEO metadata.`
initial commit 2026-02-06 06:17:43 -06:00			`"""`

lots of fixes 2026-04-14 04:56:45 -05:00			`import io`
initial commit 2026-02-06 06:17:43 -06:00			`import json`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`import os`
initial commit 2026-02-06 06:17:43 -06:00			`import re`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`import sys`
lots of fixes 2026-04-14 04:56:45 -05:00			`from pathlib import Path`

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`import anthropic`
lots of fixes 2026-04-14 04:56:45 -05:00			`import yaml`
			`from ruamel.yaml import YAML`
initial commit 2026-02-06 06:17:43 -06:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# ---------------------------------------------------------------------------`
initial commit 2026-02-06 06:17:43 -06:00			`# Configuration`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# ---------------------------------------------------------------------------`
initial commit 2026-02-06 06:17:43 -06:00			`TAXONOMY_FILE = "tag-taxonomy.yaml"`
			`NOTES_FOLDER = os.path.expanduser("~/Documents/ejl-zk/40 Public/41 Notes/")`
lots of fixes 2026-04-14 04:56:45 -05:00			`CONTENT_CHAR_LIMIT = 20000`
			`SEO_DESC_MIN = 150`
			`SEO_DESC_MAX = 160`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`MODEL = "claude-sonnet-4-6"`
lots of fixes 2026-04-14 04:56:45 -05:00
			`# Round-trip YAML preserves existing frontmatter formatting`
			`yaml_rt = YAML()`
			`yaml_rt.preserve_quotes = True`
			`yaml_rt.width = 4096`

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# Anthropic client — reads ANTHROPIC_API_KEY from env automatically`
			`client = anthropic.Anthropic()`


			`# ---------------------------------------------------------------------------`
			`# Taxonomy helpers`
			`# ---------------------------------------------------------------------------`
initial commit 2026-02-06 06:17:43 -06:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`def load_taxonomy(taxonomy_path: Path) -> list[str]:`
			`with open(taxonomy_path) as f:`
lots of fixes 2026-04-14 04:56:45 -05:00			`data = yaml.safe_load(f) or {}`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`return data.get("tags", []) or []`
lots of fixes 2026-04-14 04:56:45 -05:00

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`def append_tags_to_taxonomy(taxonomy_path: Path, new_tags: set[str]) -> None:`
			`with open(taxonomy_path) as f:`
lots of fixes 2026-04-14 04:56:45 -05:00			`data = yaml.safe_load(f) or {}`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`existing = data.get("tags", []) or []`
lots of fixes 2026-04-14 04:56:45 -05:00			`combined = list(dict.fromkeys(existing + list(new_tags)))`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`data["tags"] = combined`
			`with open(taxonomy_path, "w") as f:`
lots of fixes 2026-04-14 04:56:45 -05:00			`yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)`

initial commit 2026-02-06 06:17:43 -06:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# ---------------------------------------------------------------------------`
			`# Frontmatter helpers`
			`# ---------------------------------------------------------------------------`

			`def extract_frontmatter(content: str):`
			`pattern = r"^---\s\n(.?)\n---\s\n(.)$"`
initial commit 2026-02-06 06:17:43 -06:00			`match = re.match(pattern, content, re.DOTALL)`
lots of fixes 2026-04-14 04:56:45 -05:00			`if not match:`
			`return None, content`
			`frontmatter = yaml_rt.load(match.group(1))`
			`return frontmatter, match.group(2)`
initial commit 2026-02-06 06:17:43 -06:00

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`def reconstruct_markdown(frontmatter, body: str) -> str:`
lots of fixes 2026-04-14 04:56:45 -05:00			`stream = io.StringIO()`
			`yaml_rt.dump(frontmatter, stream)`
			`fm_str = stream.getvalue()`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`if not fm_str.endswith("\n"):`
			`fm_str += "\n"`
lots of fixes 2026-04-14 04:56:45 -05:00			`return f"---\n{fm_str}---\n{body}"`
initial commit 2026-02-06 06:17:43 -06:00

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`def slugify(text: str) -> str:`
lots of fixes 2026-04-14 04:56:45 -05:00			`text = text.lower()`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			text = re.sub(r"[''`]", "", text)
			`text = re.sub(r"[^\w\s-]", " ", text)`
			`text = re.sub(r"[-\s]+", "-", text).strip("-")`
lots of fixes 2026-04-14 04:56:45 -05:00			`return text`
initial commit 2026-02-06 06:17:43 -06:00

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# ---------------------------------------------------------------------------`
			`# LLM helpers`
			`# ---------------------------------------------------------------------------`

			`def parse_json_response(content: str \| None) -> dict \| None:`
lots of fixes 2026-04-14 04:56:45 -05:00			`if content is None:`
			`return None`
			`try:`
			`return json.loads(content)`
			`except json.JSONDecodeError:`
			`pass`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`start = content.find("{")`
			`end = content.rfind("}")`
lots of fixes 2026-04-14 04:56:45 -05:00			`if start != -1 and end > start:`
			`try:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`return json.loads(content[start : end + 1])`
lots of fixes 2026-04-14 04:56:45 -05:00			`except json.JSONDecodeError:`
			`pass`
			`return None`
initial commit 2026-02-06 06:17:43 -06:00

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`def call_llm_json(system_prompt: str, user_prompt: str, max_tokens: int = 1024) -> dict \| None:`
initial commit 2026-02-06 06:17:43 -06:00			`try:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`message = client.messages.create(`
			`model=MODEL,`
			`max_tokens=max_tokens,`
			`system=system_prompt,`
			`messages=[{"role": "user", "content": user_prompt}],`
			`temperature=0.2,`
			`)`
			`content = message.content[0].text if message.content else ""`
enhancements 2026-04-19 22:18:10 -05:00			`parsed = parse_json_response(content)`
			`if parsed is None:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`print(f" ! LLM returned no parseable JSON (stop_reason={message.stop_reason})")`
enhancements 2026-04-19 22:18:10 -05:00			`print(f" content: {content[:500]!r}")`
			`return parsed`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`except anthropic.APIError as e:`
			`print(f" ! Anthropic API error: {e}")`
initial commit 2026-02-06 06:17:43 -06:00			`return None`

lots of fixes 2026-04-14 04:56:45 -05:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`def request_metadata(title: str, note_content: str, taxonomy: list[str]) -> dict \| None:`
lots of fixes 2026-04-14 04:56:45 -05:00			`taxonomy_str = ", ".join(taxonomy)`
			`system_prompt = f"""You analyze markdown notes and return structured metadata.`

			`Return ONLY valid JSON in this exact shape:`
			`{{`
			`"tags_from_taxonomy": ["tag1", "tag2"],`
			`"new_tag_suggestions": ["newtag1"],`
			`"seo_title_suffix": "Short descriptor that will follow the note title",`
			`"seo_description": "Factual summary between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters.",`
			`"seo_keywords": ["keyword1", "keyword2"]`
			`}}`

			`Rules:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`- tags_from_taxonomy: 1-5 tags drawn from the existing taxonomy that best fit the content.`
			`- new_tag_suggestions: 0-2 NEW tags, only when content truly warrants it (be conservative).`
lots of fixes 2026-04-14 04:56:45 -05:00			`- seo_title_suffix: a short, clean, non-clickbaity descriptor of the note. Do NOT include the note title or a leading colon — only the text that would follow "<title>: ". Aim for 4-10 words.`
			`- seo_description: a clean factual summary, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding.`
			`- seo_keywords: 10-15 relevant keywords, no duplicates.`

			`Existing tag taxonomy: {taxonomy_str}"""`

			`user_prompt = f"""Note title: {title}`

			`Note content:`
			`{note_content}`

			`Produce the JSON described in the system prompt."""`
			`return call_llm_json(system_prompt, user_prompt)`


switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`def request_description_retry(title: str, note_content: str, previous_desc: str) -> str:`
lots of fixes 2026-04-14 04:56:45 -05:00			`system_prompt = f"""You rewrite SEO descriptions to a strict length.`

			`Return ONLY valid JSON of the form:`
			`{{"seo_description": "..."}}`

			`The description must be a clean, factual summary of the note, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding."""`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00
lots of fixes 2026-04-14 04:56:45 -05:00			`user_prompt = f"""Note title: {title}`

			`Note content:`
			`{note_content}`

			`Your previous description was {len(previous_desc)} characters, outside the allowed {SEO_DESC_MIN}-{SEO_DESC_MAX} range:`
			`"{previous_desc}"`

			`Rewrite it to fit strictly within {SEO_DESC_MIN}-{SEO_DESC_MAX} characters."""`
enhancements 2026-04-19 22:18:10 -05:00			`result = call_llm_json(system_prompt, user_prompt)`
lots of fixes 2026-04-14 04:56:45 -05:00			`if result:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`return (result.get("seo_description") or "").strip()`
			`return ""`
lots of fixes 2026-04-14 04:56:45 -05:00

switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# ---------------------------------------------------------------------------`
			`# Note processing`
			`# ---------------------------------------------------------------------------`

			`def process_note(file_path: Path, taxonomy: list[str], new_tag_accumulator: set) -> None:`
initial commit 2026-02-06 06:17:43 -06:00			`print(f"Processing: {file_path}")`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`content = file_path.read_text(encoding="utf-8")`
lots of fixes 2026-04-14 04:56:45 -05:00
			`frontmatter, body = extract_frontmatter(content)`
initial commit 2026-02-06 06:17:43 -06:00			`if frontmatter is None:`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(" ⚠️ No frontmatter found, skipping")`
initial commit 2026-02-06 06:17:43 -06:00			`return`
lots of fixes 2026-04-14 04:56:45 -05:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`existing_tags = frontmatter.get("tags", []) or []`
lots of fixes 2026-04-14 04:56:45 -05:00			`if existing_tags == [None]:`
initial commit 2026-02-06 06:17:43 -06:00			`existing_tags = []`
lots of fixes 2026-04-14 04:56:45 -05:00
initial commit 2026-02-06 06:17:43 -06:00			`needs_tags = not existing_tags`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`needs_slug = not frontmatter.get("slug")`
			`needs_seo_title = not frontmatter.get("seo-title")`
			`needs_seo_desc = not frontmatter.get("seo-description")`
			`needs_seo_keywords = not frontmatter.get("seo-keywords")`
lots of fixes 2026-04-14 04:56:45 -05:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`if not any([needs_tags, needs_slug, needs_seo_title, needs_seo_desc, needs_seo_keywords]):`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(" ✓ All fields already populated, skipping")`
initial commit 2026-02-06 06:17:43 -06:00			`return`
lots of fixes 2026-04-14 04:56:45 -05:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`title = frontmatter.get("title") or file_path.stem`
add ability to populate slug field 2026-02-07 12:30:28 -06:00			`updated = False`
lots of fixes 2026-04-14 04:56:45 -05:00
add ability to populate slug field 2026-02-07 12:30:28 -06:00			`if needs_slug:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`slug = slugify(file_path.stem)`
			`frontmatter["slug"] = slug`
add ability to populate slug field 2026-02-07 12:30:28 -06:00			`print(f" + Added slug: {slug}")`
			`updated = True`
lots of fixes 2026-04-14 04:56:45 -05:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# If only slug was needed, skip the LLM call`
			`if not any([needs_tags, needs_seo_title, needs_seo_desc, needs_seo_keywords]):`
			`file_path.write_text(reconstruct_markdown(frontmatter, body), encoding="utf-8")`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(" ✓ Updated successfully")`
add ability to populate slug field 2026-02-07 12:30:28 -06:00			`return`
lots of fixes 2026-04-14 04:56:45 -05:00
			`llm_response = request_metadata(title, body[:CONTENT_CHAR_LIMIT], taxonomy)`
initial commit 2026-02-06 06:17:43 -06:00			`if not llm_response:`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(" ✗ Failed to get LLM response")`
initial commit 2026-02-06 06:17:43 -06:00			`return`
lots of fixes 2026-04-14 04:56:45 -05:00
initial commit 2026-02-06 06:17:43 -06:00			`if needs_tags:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`taxonomy_tags = llm_response.get("tags_from_taxonomy") or []`
			`new_suggestions = llm_response.get("new_tag_suggestions") or []`
			`combined = list(dict.fromkeys(list(taxonomy_tags) + list(new_suggestions)))[:5]`
lots of fixes 2026-04-14 04:56:45 -05:00			`if combined:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`frontmatter["tags"] = combined`
initial commit 2026-02-06 06:17:43 -06:00			`updated = True`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(f" + Added tags: {', '.join(combined)}")`
			`genuinely_new = [t for t in combined if t not in taxonomy and t in new_suggestions]`
			`if genuinely_new:`
			`print(f" (New suggestions: {', '.join(genuinely_new)})")`
			`new_tag_accumulator.update(genuinely_new)`

initial commit 2026-02-06 06:17:43 -06:00			`if needs_seo_title:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`suffix = (llm_response.get("seo_title_suffix") or "").strip().lstrip(":").strip()`
lots of fixes 2026-04-14 04:56:45 -05:00			`if suffix.lower().startswith(title.lower()):`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`suffix = suffix[len(title):].lstrip(":").strip()`
lots of fixes 2026-04-14 04:56:45 -05:00			`if suffix:`
			`seo_title = f"{title}: {suffix}"`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`frontmatter["seo-title"] = seo_title`
initial commit 2026-02-06 06:17:43 -06:00			`updated = True`
			`print(f" + Added SEO title: {seo_title}")`
lots of fixes 2026-04-14 04:56:45 -05:00
initial commit 2026-02-06 06:17:43 -06:00			`if needs_seo_desc:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`seo_desc = (llm_response.get("seo_description") or "").strip()`
lots of fixes 2026-04-14 04:56:45 -05:00			`if seo_desc and not (SEO_DESC_MIN <= len(seo_desc) <= SEO_DESC_MAX):`
			`print(f" ~ SEO description length {len(seo_desc)} outside {SEO_DESC_MIN}-{SEO_DESC_MAX}, re-asking")`
			`retry = request_description_retry(title, body[:CONTENT_CHAR_LIMIT], seo_desc)`
			`if retry and SEO_DESC_MIN <= len(retry) <= SEO_DESC_MAX:`
			`seo_desc = retry`
			`elif retry:`
			`print(f" ! Retry still {len(retry)} chars; using original")`
			`else:`
			`print(" ! Retry failed; using original")`
initial commit 2026-02-06 06:17:43 -06:00			`if seo_desc:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`frontmatter["seo-description"] = seo_desc`
initial commit 2026-02-06 06:17:43 -06:00			`updated = True`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(f" + Added SEO description ({len(seo_desc)} chars)")`

initial commit 2026-02-06 06:17:43 -06:00			`if needs_seo_keywords:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`seo_keywords = list(dict.fromkeys(llm_response.get("seo_keywords") or []))`
initial commit 2026-02-06 06:17:43 -06:00			`if seo_keywords:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`frontmatter["seo-keywords"] = seo_keywords`
initial commit 2026-02-06 06:17:43 -06:00			`updated = True`
			`print(f" + Added {len(seo_keywords)} SEO keywords")`
lots of fixes 2026-04-14 04:56:45 -05:00
initial commit 2026-02-06 06:17:43 -06:00			`if updated:`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`file_path.write_text(reconstruct_markdown(frontmatter, body), encoding="utf-8")`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(" ✓ Updated successfully")`
initial commit 2026-02-06 06:17:43 -06:00			`else:`
lots of fixes 2026-04-14 04:56:45 -05:00			`print(" - No updates needed")`

initial commit 2026-02-06 06:17:43 -06:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`# ---------------------------------------------------------------------------`
			`# Entry point`
			`# ---------------------------------------------------------------------------`

			`def main() -> None:`
initial commit 2026-02-06 06:17:43 -06:00			`taxonomy_path = Path(__file__).parent / TAXONOMY_FILE`
			`if not taxonomy_path.exists():`
			`print(f"Error: Taxonomy file not found at {taxonomy_path}")`
			`sys.exit(1)`
lots of fixes 2026-04-14 04:56:45 -05:00
initial commit 2026-02-06 06:17:43 -06:00			`taxonomy = load_taxonomy(taxonomy_path)`
			`print(f"Loaded {len(taxonomy)} tags from taxonomy\n")`
lots of fixes 2026-04-14 04:56:45 -05:00
initial commit 2026-02-06 06:17:43 -06:00			`target_path = Path(NOTES_FOLDER)`
			`if not target_path.exists():`
			`print(f"Error: Notes folder not found: {target_path}")`
			`sys.exit(1)`
lots of fixes 2026-04-14 04:56:45 -05:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`md_files = sorted(target_path.rglob("*.md"))`
initial commit 2026-02-06 06:17:43 -06:00			`if not md_files:`
			`print(f"No markdown files found in {target_path}")`
			`sys.exit(0)`
lots of fixes 2026-04-14 04:56:45 -05:00
			`print(f"Processing all markdown files under: {target_path}")`
initial commit 2026-02-06 06:17:43 -06:00			`print(f"Found {len(md_files)} markdown files\n")`
lots of fixes 2026-04-14 04:56:45 -05:00
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`new_tag_accumulator: set[str] = set()`
initial commit 2026-02-06 06:17:43 -06:00			`for md_file in md_files:`
lots of fixes 2026-04-14 04:56:45 -05:00			`try:`
			`process_note(md_file, taxonomy, new_tag_accumulator)`
			`except Exception as e:`
			`print(f" ✗ Error processing {md_file}: {e}")`
			`print()`

initial commit 2026-02-06 06:17:43 -06:00			`print("\n✓ Processing complete!")`

lots of fixes 2026-04-14 04:56:45 -05:00			`fresh = sorted(t for t in new_tag_accumulator if t not in taxonomy)`
switch to using Anthropic API 2026-04-25 18:47:29 -05:00			`if not fresh:`
			`return`

			`print(f"\nNew tags suggested during this run: {', '.join(fresh)}")`

			`# Non-interactive (CI): log and skip`
			`if not sys.stdin.isatty():`
			`print("Non-interactive environment detected — skipping taxonomy update.")`
			`print(f"To add these manually, run the script locally and answer 'y' when prompted.")`
			`return`

			`try:`
			`answer = input("Add these to the taxonomy? [y/N]: ").strip().lower()`
			`except EOFError:`
			`answer = ""`

			`if answer == "y":`
			`append_tags_to_taxonomy(taxonomy_path, fresh)`
			`print(f"✓ Added {len(fresh)} tag(s) to {taxonomy_path.name}")`
			`else:`
			`print("Skipped taxonomy update.")`
lots of fixes 2026-04-14 04:56:45 -05:00

initial commit 2026-02-06 06:17:43 -06:00			`if __name__ == "__main__":`
add ability to populate slug field 2026-02-07 12:30:28 -06:00			`main()`