diff --git a/.tag-notes.py.swp b/.tag-notes.py.swp
deleted file mode 100644
index 840f118..0000000
Binary files a/.tag-notes.py.swp and /dev/null differ
diff --git a/tag-notes.py b/tag-notes.py
index f12a232..d54d009 100755
--- a/tag-notes.py
+++ b/tag-notes.py
@@ -1,53 +1,65 @@
#!/usr/bin/env python3
"""
Note Tagging and SEO Metadata Script
-Processes markdown notes using a local LLM to add tags, slugs, and SEO metadata
+Processes markdown notes using the Anthropic API to add tags, slugs, and SEO metadata.
"""
-import os
-import sys
import io
import json
+import os
import re
+import sys
from pathlib import Path
-import requests
+import anthropic
import yaml
from ruamel.yaml import YAML
+# ---------------------------------------------------------------------------
# Configuration
-LM_STUDIO_URL = "http://localhost:1234/v1/chat/completions"
-MODEL_NAME = "openai/gpt-oss-20b"
+# ---------------------------------------------------------------------------
TAXONOMY_FILE = "tag-taxonomy.yaml"
NOTES_FOLDER = os.path.expanduser("~/Documents/ejl-zk/40 Public/41 Notes/")
CONTENT_CHAR_LIMIT = 20000
SEO_DESC_MIN = 150
SEO_DESC_MAX = 160
+MODEL = "claude-sonnet-4-6"
# Round-trip YAML preserves existing frontmatter formatting
yaml_rt = YAML()
yaml_rt.preserve_quotes = True
yaml_rt.width = 4096
+# Anthropic client — reads ANTHROPIC_API_KEY from env automatically
+client = anthropic.Anthropic()
-def load_taxonomy(taxonomy_path):
- with open(taxonomy_path, 'r') as f:
+
+# ---------------------------------------------------------------------------
+# Taxonomy helpers
+# ---------------------------------------------------------------------------
+
+def load_taxonomy(taxonomy_path: Path) -> list[str]:
+ with open(taxonomy_path) as f:
data = yaml.safe_load(f) or {}
- return data.get('tags', []) or []
+ return data.get("tags", []) or []
-def append_tags_to_taxonomy(taxonomy_path, new_tags):
- with open(taxonomy_path, 'r') as f:
+def append_tags_to_taxonomy(taxonomy_path: Path, new_tags: set[str]) -> None:
+ with open(taxonomy_path) as f:
data = yaml.safe_load(f) or {}
- existing = data.get('tags', []) or []
+ existing = data.get("tags", []) or []
combined = list(dict.fromkeys(existing + list(new_tags)))
- data['tags'] = combined
- with open(taxonomy_path, 'w') as f:
+ data["tags"] = combined
+ with open(taxonomy_path, "w") as f:
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
-def extract_frontmatter(content):
- pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
+# ---------------------------------------------------------------------------
+# Frontmatter helpers
+# ---------------------------------------------------------------------------
+
+def extract_frontmatter(content: str):
+ pattern = r"^---\s*\n(.*?)\n---\s*\n(.*)$"
match = re.match(pattern, content, re.DOTALL)
if not match:
return None, content
@@ -55,74 +67,65 @@ def extract_frontmatter(content):
return frontmatter, match.group(2)
-def reconstruct_markdown(frontmatter, body):
+def reconstruct_markdown(frontmatter, body: str) -> str:
stream = io.StringIO()
yaml_rt.dump(frontmatter, stream)
fm_str = stream.getvalue()
- if not fm_str.endswith('\n'):
- fm_str += '\n'
+ if not fm_str.endswith("\n"):
+ fm_str += "\n"
return f"---\n{fm_str}---\n{body}"
-def slugify(text):
+def slugify(text: str) -> str:
text = text.lower()
- text = re.sub(r"[’'`]", '', text)
- text = re.sub(r'[^\w\s-]', ' ', text)
- text = re.sub(r'[-\s]+', '-', text).strip('-')
+ text = re.sub(r"[''`]", "", text)
+ text = re.sub(r"[^\w\s-]", " ", text)
+ text = re.sub(r"[-\s]+", "-", text).strip("-")
return text
-def parse_json_response(content):
+# ---------------------------------------------------------------------------
+# LLM helpers
+# ---------------------------------------------------------------------------
+
+def parse_json_response(content: str | None) -> dict | None:
if content is None:
return None
try:
return json.loads(content)
except json.JSONDecodeError:
pass
- start = content.find('{')
- end = content.rfind('}')
+ start = content.find("{")
+ end = content.rfind("}")
if start != -1 and end > start:
try:
- return json.loads(content[start:end + 1])
+ return json.loads(content[start : end + 1])
except json.JSONDecodeError:
pass
return None
-def call_llm_json(system_prompt, user_prompt, max_tokens=4000):
- payload = {
- "model": MODEL_NAME,
- "messages": [
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": user_prompt},
- ],
- "temperature": 0.2,
- "max_tokens": max_tokens,
- "response_format": {"type": "text"},
- }
+def call_llm_json(system_prompt: str, user_prompt: str, max_tokens: int = 1024) -> dict | None:
try:
- response = requests.post(LM_STUDIO_URL, json=payload, timeout=600)
- if not response.ok:
- print(f" ! LLM error: {response.status_code} {response.reason}")
- print(f" body: {response.text[:500]}")
- return None
- result = response.json()
- choice = result['choices'][0]
- content = choice['message'].get('content') or ''
+ message = client.messages.create(
+ model=MODEL,
+ max_tokens=max_tokens,
+ system=system_prompt,
+ messages=[{"role": "user", "content": user_prompt}],
+ temperature=0.2,
+ )
+ content = message.content[0].text if message.content else ""
parsed = parse_json_response(content)
if parsed is None:
- finish = choice.get('finish_reason')
- print(f" ! LLM returned no parseable JSON (finish_reason={finish})")
- if finish == 'length':
- print(" Hit max_tokens — reasoning model burned the budget before emitting output.")
+ print(f" ! LLM returned no parseable JSON (stop_reason={message.stop_reason})")
print(f" content: {content[:500]!r}")
return parsed
- except Exception as e:
- print(f" ! LLM error: {e}")
+ except anthropic.APIError as e:
+ print(f" ! Anthropic API error: {e}")
return None
-def request_metadata(title, note_content, taxonomy):
+def request_metadata(title: str, note_content: str, taxonomy: list[str]) -> dict | None:
taxonomy_str = ", ".join(taxonomy)
system_prompt = f"""You analyze markdown notes and return structured metadata.
@@ -136,9 +139,8 @@ Return ONLY valid JSON in this exact shape:
}}
Rules:
-- Tags should describe what the note is substantively about, not topics it merely mentions in passing.
-- tags_from_taxonomy: 0-5 tags drawn from the existing taxonomy, ONLY when they genuinely fit. Do NOT force a taxonomy tag — return an empty list if nothing truly applies.
-- new_tag_suggestions: 0-5 NEW tags when the taxonomy doesn't adequately cover the content. Each must be a reusable category (not hyper-specific to one note). Use lowercase-hyphenated style (e.g., personal-essay).
+- tags_from_taxonomy: 1-5 tags drawn from the existing taxonomy that best fit the content.
+- new_tag_suggestions: 0-2 NEW tags, only when content truly warrants it (be conservative).
- seo_title_suffix: a short, clean, non-clickbaity descriptor of the note. Do NOT include the note title or a leading colon — only the text that would follow "
: ". Aim for 4-10 words.
- seo_description: a clean factual summary, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding.
- seo_keywords: 10-15 relevant keywords, no duplicates.
@@ -154,13 +156,14 @@ Produce the JSON described in the system prompt."""
return call_llm_json(system_prompt, user_prompt)
-def request_description_retry(title, note_content, previous_desc):
+def request_description_retry(title: str, note_content: str, previous_desc: str) -> str:
system_prompt = f"""You rewrite SEO descriptions to a strict length.
Return ONLY valid JSON of the form:
{{"seo_description": "..."}}
The description must be a clean, factual summary of the note, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding."""
+
user_prompt = f"""Note title: {title}
Note content:
@@ -172,46 +175,49 @@ Your previous description was {len(previous_desc)} characters, outside the allow
Rewrite it to fit strictly within {SEO_DESC_MIN}-{SEO_DESC_MAX} characters."""
result = call_llm_json(system_prompt, user_prompt)
if result:
- return (result.get('seo_description') or '').strip()
- return ''
+ return (result.get("seo_description") or "").strip()
+ return ""
-def process_note(file_path, taxonomy, new_tag_accumulator):
+# ---------------------------------------------------------------------------
+# Note processing
+# ---------------------------------------------------------------------------
+
+def process_note(file_path: Path, taxonomy: list[str], new_tag_accumulator: set) -> None:
print(f"Processing: {file_path}")
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
+ content = file_path.read_text(encoding="utf-8")
frontmatter, body = extract_frontmatter(content)
if frontmatter is None:
print(" ⚠️ No frontmatter found, skipping")
return
- existing_tags = frontmatter.get('tags', []) or []
+ existing_tags = frontmatter.get("tags", []) or []
if existing_tags == [None]:
existing_tags = []
needs_tags = not existing_tags
- needs_slug = not frontmatter.get('slug')
- needs_seo_title = not frontmatter.get('seo-title')
- needs_seo_desc = not frontmatter.get('seo-description')
- needs_seo_keywords = not frontmatter.get('seo-keywords')
+ needs_slug = not frontmatter.get("slug")
+ needs_seo_title = not frontmatter.get("seo-title")
+ needs_seo_desc = not frontmatter.get("seo-description")
+ needs_seo_keywords = not frontmatter.get("seo-keywords")
- if not (needs_tags or needs_slug or needs_seo_title or needs_seo_desc or needs_seo_keywords):
+ if not any([needs_tags, needs_slug, needs_seo_title, needs_seo_desc, needs_seo_keywords]):
print(" ✓ All fields already populated, skipping")
return
- title = frontmatter.get('title') or Path(file_path).stem
+ title = frontmatter.get("title") or file_path.stem
updated = False
if needs_slug:
- slug = slugify(Path(file_path).stem)
- frontmatter['slug'] = slug
+ slug = slugify(file_path.stem)
+ frontmatter["slug"] = slug
print(f" + Added slug: {slug}")
updated = True
- if not (needs_tags or needs_seo_title or needs_seo_desc or needs_seo_keywords):
- with open(file_path, 'w', encoding='utf-8') as f:
- f.write(reconstruct_markdown(frontmatter, body))
+ # If only slug was needed, skip the LLM call
+ if not any([needs_tags, needs_seo_title, needs_seo_desc, needs_seo_keywords]):
+ file_path.write_text(reconstruct_markdown(frontmatter, body), encoding="utf-8")
print(" ✓ Updated successfully")
return
@@ -221,11 +227,11 @@ def process_note(file_path, taxonomy, new_tag_accumulator):
return
if needs_tags:
- taxonomy_tags = llm_response.get('tags_from_taxonomy') or []
- new_suggestions = llm_response.get('new_tag_suggestions') or []
- combined = list(dict.fromkeys(list(taxonomy_tags) + list(new_suggestions)))[:8]
+ taxonomy_tags = llm_response.get("tags_from_taxonomy") or []
+ new_suggestions = llm_response.get("new_tag_suggestions") or []
+ combined = list(dict.fromkeys(list(taxonomy_tags) + list(new_suggestions)))[:5]
if combined:
- frontmatter['tags'] = combined
+ frontmatter["tags"] = combined
updated = True
print(f" + Added tags: {', '.join(combined)}")
genuinely_new = [t for t in combined if t not in taxonomy and t in new_suggestions]
@@ -234,19 +240,17 @@ def process_note(file_path, taxonomy, new_tag_accumulator):
new_tag_accumulator.update(genuinely_new)
if needs_seo_title:
- suffix = (llm_response.get('seo_title_suffix') or '').strip()
- suffix = suffix.lstrip(':').strip()
- # Strip a leading repeat of the title if the LLM included it anyway
+ suffix = (llm_response.get("seo_title_suffix") or "").strip().lstrip(":").strip()
if suffix.lower().startswith(title.lower()):
- suffix = suffix[len(title):].lstrip(':').strip()
+ suffix = suffix[len(title):].lstrip(":").strip()
if suffix:
seo_title = f"{title}: {suffix}"
- frontmatter['seo-title'] = seo_title
+ frontmatter["seo-title"] = seo_title
updated = True
print(f" + Added SEO title: {seo_title}")
if needs_seo_desc:
- seo_desc = (llm_response.get('seo_description') or '').strip()
+ seo_desc = (llm_response.get("seo_description") or "").strip()
if seo_desc and not (SEO_DESC_MIN <= len(seo_desc) <= SEO_DESC_MAX):
print(f" ~ SEO description length {len(seo_desc)} outside {SEO_DESC_MIN}-{SEO_DESC_MAX}, re-asking")
retry = request_description_retry(title, body[:CONTENT_CHAR_LIMIT], seo_desc)
@@ -257,30 +261,32 @@ def process_note(file_path, taxonomy, new_tag_accumulator):
else:
print(" ! Retry failed; using original")
if seo_desc:
- frontmatter['seo-description'] = seo_desc
+ frontmatter["seo-description"] = seo_desc
updated = True
print(f" + Added SEO description ({len(seo_desc)} chars)")
if needs_seo_keywords:
- seo_keywords = list(dict.fromkeys(llm_response.get('seo_keywords') or []))
+ seo_keywords = list(dict.fromkeys(llm_response.get("seo_keywords") or []))
if seo_keywords:
- frontmatter['seo-keywords'] = seo_keywords
+ frontmatter["seo-keywords"] = seo_keywords
updated = True
print(f" + Added {len(seo_keywords)} SEO keywords")
if updated:
- with open(file_path, 'w', encoding='utf-8') as f:
- f.write(reconstruct_markdown(frontmatter, body))
+ file_path.write_text(reconstruct_markdown(frontmatter, body), encoding="utf-8")
print(" ✓ Updated successfully")
else:
print(" - No updates needed")
-def main():
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main() -> None:
taxonomy_path = Path(__file__).parent / TAXONOMY_FILE
if not taxonomy_path.exists():
print(f"Error: Taxonomy file not found at {taxonomy_path}")
- print(f"Please create {TAXONOMY_FILE} in the same directory as this script")
sys.exit(1)
taxonomy = load_taxonomy(taxonomy_path)
@@ -290,11 +296,8 @@ def main():
if not target_path.exists():
print(f"Error: Notes folder not found: {target_path}")
sys.exit(1)
- if not target_path.is_dir():
- print(f"Error: {target_path} is not a directory")
- sys.exit(1)
- md_files = sorted(target_path.rglob('*.md'))
+ md_files = sorted(target_path.rglob("*.md"))
if not md_files:
print(f"No markdown files found in {target_path}")
sys.exit(0)
@@ -302,7 +305,7 @@ def main():
print(f"Processing all markdown files under: {target_path}")
print(f"Found {len(md_files)} markdown files\n")
- new_tag_accumulator = set()
+ new_tag_accumulator: set[str] = set()
for md_file in md_files:
try:
process_note(md_file, taxonomy, new_tag_accumulator)
@@ -313,17 +316,27 @@ def main():
print("\n✓ Processing complete!")
fresh = sorted(t for t in new_tag_accumulator if t not in taxonomy)
- if fresh:
- print(f"\nNew tags suggested during this run: {', '.join(fresh)}")
- try:
- answer = input("Add these to the taxonomy? [y/N]: ").strip().lower()
- except EOFError:
- answer = ''
- if answer == 'y':
- append_tags_to_taxonomy(taxonomy_path, fresh)
- print(f"✓ Added {len(fresh)} tag(s) to {taxonomy_path.name}")
- else:
- print("Skipped taxonomy update.")
+ if not fresh:
+ return
+
+ print(f"\nNew tags suggested during this run: {', '.join(fresh)}")
+
+ # Non-interactive (CI): log and skip
+ if not sys.stdin.isatty():
+ print("Non-interactive environment detected — skipping taxonomy update.")
+ print(f"To add these manually, run the script locally and answer 'y' when prompted.")
+ return
+
+ try:
+ answer = input("Add these to the taxonomy? [y/N]: ").strip().lower()
+ except EOFError:
+ answer = ""
+
+ if answer == "y":
+ append_tags_to_taxonomy(taxonomy_path, fresh)
+ print(f"✓ Added {len(fresh)} tag(s) to {taxonomy_path.name}")
+ else:
+ print("Skipped taxonomy update.")
if __name__ == "__main__":
diff --git a/tag-taxonomy.yaml b/tag-taxonomy.yaml
index 6c8ce35..5542f64 100644
--- a/tag-taxonomy.yaml
+++ b/tag-taxonomy.yaml
@@ -1,59 +1,51 @@
-# Tag Taxonomy for Note Tagging
-# Add new tags here as the LLM suggests good ones
-
tags:
- # Technology & Development
- - self-hosting
- - linux
- - automation
- - ai-tools
- - web-development
- - infrastructure
- - docker
- - security
- - privacy
-
- # Work & Management
- - project-management
- - business-analysis
- - leadership
- - agile
- - team-dynamics
- - process-improvement
- - governance
-
- # Knowledge & Learning
- - knowledge-management
- - zettelkasten
- - note-taking
- - learning
- - productivity
-
- # Philosophy & Spirituality
- - buddhism
- - eastern-philosophy
- - meditation
- - mindfulness
-
- # Literature & Writing
- - literature
- - postmodernism
- - writing
-
- # Personal Interests
- - plants
- - aroids
- - gardening
-
- # Personal Narrative & Life
- - memoir
- - personal-essay
- - reflection
- - family
- - parenting
- - recovery
- - mental-health
- - aging
- - relationships
- - childhood
- - identity
\ No newline at end of file
+- self-hosting
+- linux
+- automation
+- ai-tools
+- web-development
+- infrastructure
+- docker
+- security
+- privacy
+- project-management
+- business-analysis
+- leadership
+- agile
+- team-dynamics
+- process-improvement
+- governance
+- knowledge-management
+- zettelkasten
+- note-taking
+- learning
+- productivity
+- buddhism
+- eastern-philosophy
+- meditation
+- mindfulness
+- literature
+- postmodernism
+- writing
+- plants
+- aroids
+- gardening
+- memoir
+- personal-essay
+- reflection
+- family
+- parenting
+- recovery
+- mental-health
+- aging
+- relationships
+- childhood
+- identity
+- morning-routine
+- sleep-habits
+- baking
+- cooking-techniques
+- fermentation
+- food-baking
+- kitchen-hacks
+- sourdough