From dffb3f3c850b4128356ec6191b8cd2ed4e80633a Mon Sep 17 00:00:00 2001
From: Ethan J Lewis <ethan@ejlewis.com>
Date: Tue, 14 Apr 2026 04:56:45 -0500
Subject: [PATCH] lots of fixes

---
 .tag-notes.py.swp | Bin 0 -> 16384 bytes
 CLAUDE.md         |  26 ++++
 README.md         |  26 ++--
 tag-notes.py      | 356 +++++++++++++++++++++++++++-------------------
 4 files changed, 247 insertions(+), 161 deletions(-)
 create mode 100644 .tag-notes.py.swp
 create mode 100644 CLAUDE.md
diff --git a/.tag-notes.py.swp b/.tag-notes.py.swp
new file mode 100644
index 0000000000000000000000000000000000000000..840f118ce38605b36c9b5593a885019e09e4c2a4
GIT binary patch
literal 16384
zcmeHNO^h5z6>gIdlh}zZtZ)EDD))kB#>~&o+8e}5OIYl+Vr%vX?>dg<HSM06nwj?Y
zpR2lecf8p^q!20L6cHgrf(R!L+yDuL(3%4hA_pRYxWEB0awIMRPDls|zE@rSvuk5S
zE(p{keVU%?`u*N}Rqwr;?&LG)7T717Z3Ca9hVjx*UU6Rg*iqx~^b-cZ?($7X(wqB!
zZgRgH1nc*$Loc7#9k#lT-|~ZqHzIpD<f4VI2K|=JaQtt3DJzr=lnlH#1|Bs|PPHeg
z?Sy%p9sA(L_ts6>s$`&Kpk$zApk$zApk$zApk$zA;D3^VD15;9JZAnrJ$p}oAK3T(
z5B(YHa)00Z2lVf{{{Gdz_xfk~RWeXAP%=<5P%=<5P%=<5P%=<5P%=<5P%=<5@LyoS
z?&B^H<Nk>Pe%$}3@&6x>7{(po$3O_Uz&c<9&j9Cv8K4O~4jcj=0si)wVeA5L09!yE
z_&D$|@asnn<2%5szzrY-7J&}|w+|b}cYzmx=Kvp=2P(i{4jINR;D^BLz}El|m<A35
zZ$5&)fB={S4g$Y>*f4$rJO|7HM}XTOFpO^iBVY*30iOnr0Y`y%4r0B)*MVn&<G@FN
z4+C#MWEejOZUMW%_kfpyS>OQhgZCT87l9Xn2)G2y0SAF!JZKm{1-=cu3VaE83D^Nn
z0!M&%kwCcv>;f+UTflkX2(XLf%(s9YAOb!MJPkB}2Z29Bb|k-d)1=LP^oZ$?;884m
zwix)_NXtUu_)&FW&I=I;*0WvL@rSI_S(stle8;S*ihQfb!-&nRFDLMsEg7H5`p6#i
z0M}*UH{HPQ%W5z1Bko5@H#BBemwxECY?e_I%d?~2s5ult99G+T6K7C(osXI~L3_cQ
zlG~y;s*1|BYuD)Anmkcm+isqCYHg>6XOvdz>4<e|aAs+BuG7iKnP4k{$0Nt%k_eD&
zQ`jLpzp}K*9G`i%Snmg$KDy|0)^&s4y37_c!B)kXMF9(KA$ihfAOep?BaWeD82B>h
zJZqM(A9Z3kx>D(l0;k7i<?5Bmt5+(RiL{3tzbd+F<!aV=0`iE2-HY}NlCChTmOUgx
zD6W$EG>N5ZzW#KcO<_BdTZ2GYGP0vs-lLA{=0?nAg#YgZGOBhKtd-S^XU;8I7cX?`
zN?^00Jti%vGvYLequD1XlhGnEPUNU2x06lL^z1Dw3f8$V%^5a5IhntSxEFGP*$9qT
zlg&@mvv*hXM~2U~DFT-&nX%-;tTR({haxJmLI@^|@=^CTl4FUn_BKn((uVnPrWO#*
z_DVJ%(}RY%C|X!LGvBcm=N9IZCYld$w+%yPEAvZiosTyI(U-71pLIDKa-T#0C9Ae4
z8|~@rea$pY-K5WDPdH&jb|^<d?Dnal&3Z0J9|I^vY`aXx9yE50ex}jUlas7BvPoIE
zkTu;=<U}qnbU_^mX8V1pJA@BwbT)XMEiSF1Z>P83wVh~8ZDag_OGd2Sm|~0bml*!Z
zC0XFRV+Ko2Ra$UtC1bW}3t{__WKP7Ym?mCB1RJVrn#V<0OS>_h^pRMMy%qR@2WuMH
z5gVXg7lRoWxW~ON7c+*jO@AA+^gug?;=LeP&^rKIbH444Z*p|wNJcHx${_m<2llJx
zOiLvp!07G19ogu6XNQPp%#=K^QsIzvP5q-ynxUVk3eP7^vP{*iARx3fPX~Sv_H>tI
zf`KwzH8s7Mrb&vTI#I$=&6QMEm8_oT(0*0L=bIMRV97)l&;_M~CdyIVHkD17v`ULu
zMcsr|wG>qY%fR;()!g0&z3OOKTI^hA8@B88vwctT$G2#oNt6OC&2Kp}qH_X007=(4
zFw#ft8v=rvZtm4_Qh-#!>nu@l!Ck%q4aqg#IMrmQNte}bt*9d*6<FP?)Uv*x+M03l
zo?RpgUj^d@g{~P>&7vy9a+G=#rA37<XtL36vQ<)grLa6FLu+m|)#y441s`z1(mKiZ
zw`V)i>UuednJvhUN8Akuv5PZSf@PvcS%RJDv&;`7(i~aTYFXG+i=w~~q^Zs8f#X-x
z7s*th?WbX=th2yYqPXt_Y;O4+9b`j}acPbFjfCu@&td3jX1T6cO}5jzva!@EvE^&6
zs6kyAKH4=Hn2knb&EE!deuNzc`;k(Xq_i9K$2(+xjHoSEz!Ju`=QjJX7gh`J>d=dU
zjib{VxWOj68oPY<tnEr{T$mt=V&9<?dv;aCD3gJR$gX8lDIsBk!A3W@fQILz5($^*
z7CH>N3{_W?!rCuz*eZ%**hA(LY&y}1#Ub_(-Y@o%awYOcaB>nqV(%(w<@=~2H?82X
zWL}$^2~m_%n$Nk;s2|uvUPSC>Eeho+q0%03i%#j{e5g@E4UI#6qI%Igo66r7`lM$2
zDrz?l2ZL%}P*C+7wc&`V$7-mwA+dV$8SnDZbOmZt`5Y;XJhA3i;o#NQ{KC)LM=NT<
z6KsL}$wY{SS>im2HaYhh+CY0;$8+dZRBa%z)Tt`AAREwIG;!-?l1~N96Hkox+Ei;Z
z*LSZ0&^gnDaaAj1UGPY+T+O+pmsp<C^nzfL3QEa>kmn|Bfss+2RYavm9{C_)7jx=B
zp6YNZ?$620l}ZmBI7r%#N%q<CknIktYf?y@{_`u=*`>~z`3sm@0Cx|AHu+WvQs~sJ
zn$Ne+1ijdU$Ycxu_HNu<Z=ITC%W)S@L+ex<@doy(7D9>Dxo4LamliHtXU}!!(HR{;
z_&a#AYc^@srjh$hRDqUjJFQ_DH3BIcQ<GiO*yCwZrBM`xGp!apa@!rj7oBNOojldr
zXt&_<M6Dj2Jr`a*osfpI^no*s;Yp&zFdQ$$jsPVzlw4_TuG3|@co{#_D&#Du&w?ke
zZ~B|2o<3u9PagyPjh<*;76E*EiOo7SezjRjxTAE4+DuKIVh-AL>^@`%-N0t$#`Lv+
z2R4XMI)H|%wRU0_v=hTtly|5Go;co$rBHvp=l%u@$I&S8PZ|{e{}r+EEyT<e|JQZw
z_)Em{KLfrAya~Jpd;vHMOaQ+{JpT%?0o(xQfkVJU!0!>y{|NX#@D<<^a2og&@C5J&
z#Pn|iZvkHgt^*uc1|A0v0DnhZzYDw!ya)_|r+{O?W57F@?;YSRfabmnl%JA;l7W(e
zl7W(el7W(el7atw1}G-=Y@{-32I5M~@@<b>7Mn#VU?IGAe9JO*(3RY7m}~y+H@?iY
z8%|EM4hr~jeZirEKk7GWP-AR@ea5x@b%yH@Wczi*NO4o1^he@@+<J@hFjcQ_(c41e
zL-mQ28Pl)SB|=iGn5aoFV9o|HJo#-G@ttlUYdd+spoU)a^?cJtg(XoICkroBZ49V#
zasuI|is#bnoNTp-RS?NJQB~*Va|Q<HVz9rvftn$%LXhXXt4~M*Jfv$aE-+mULNyl&
zErgg^<L1x}y4A{wrq{33k|=i~6%(az_Rj!g(}hakK|&=8#8@rl?$s3Tj#8r3_0~Q%
zSdth<{FQa6>D7xV&eV|>YE=;u6G{Kcm@525?$qrg5l9><9{0`VUk|KOhq!_%3|yEO
zNTndPS`5=uA}O~{bRqHIrxaLUN1e%3@^z#fQPu#JlnSO%IaH8h(Ij_}7G`Rg7LaJ~
zlSasg0(C4?x_gLe8F^pKx`*2Xgt*&#5(YbDK(rnSd(TBinGcntPHsgsku*@cE-m|2
zooK+F0<JwWK``7h<cVua0HkJ;X+e^pP$op?f;+EN94P_nT`CJp?F5!pL#jj|E5cfA
z&q55E+yqOKG<s4hrJ8JNdh4P}=?WNYAd8|ijJVC<9D>p-<;Zm=y0Nzf?RXb;N>au>
M+NqFLcE@P;Pv$M2mH+?%

literal 0
HcmV?d00001

diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..e90618e
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,26 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Overview
+
+Single-script tool that walks a hardcoded Obsidian notes folder and fills empty frontmatter fields (`tags`, `slug`, `seo-title`, `seo-description`, `seo-keywords`) by calling a local LM Studio LLM. Only empty fields are touched; everything else in frontmatter is preserved.
+
+## Run
+
+```bash
+pip install pyyaml requests
+./tag-notes.py
+```
+
+Requires LM Studio running at `LM_STUDIO_URL` with `MODEL_NAME` loaded. There are no tests, linter, or build step.
+
+Sanity-check the LLM endpoint with: `curl http://localhost:1234/v1/models`
+
+## Architecture
+
+- `tag-notes.py` — all logic. Top-of-file constants (`LM_STUDIO_URL`, `MODEL_NAME`, `NOTES_FOLDER`, `TAXONOMY_FILE`) are the configuration surface — edit them in place.
+- `tag-taxonomy.yaml` — `tags:` list injected into the LLM system prompt. Add tags here when the LLM's "new_tag_suggestions" are worth keeping.
+- Flow per note: `extract_frontmatter` (regex + `yaml.safe_load`) → decide which fields are empty → slug is derived locally from filename → if any LLM-backed field is empty, send `body[:20000]` to `call_llm`, which expects a strict JSON response (with a fallback regex to unwrap ```json fences) → `reconstruct_markdown` writes the file back with `sort_keys=False` to preserve field order.
+- The 20000-char cap lives inline at the `call_llm` invocation, not as a constant.
+- Notes without `---` frontmatter are skipped, not created.
diff --git a/README.md b/README.md
index b710797..7406c0d 100644
--- a/README.md
+++ b/README.md
@@ -2,24 +2,21 @@
 
 Automatically tag your Obsidian notes, add slugs, and add SEO metadata using a local LLM.
 
-The script requires 5 frontmatter fields named as such:
+The script requires these frontmatter fields:
+- title (used as the basis for the SEO title)
 - tags
 - seo-title
 - seo-description
 - seo-keywords
 - slug
 
-**Note:** The script sends only the first 20,000 characters of the note to the LLM. This should be more than sufficient for most notes, however if you have extremely long notes then you can increase this amount by editing the following line in `tag-notes.py`:
-
-```py
-llm_response = call_llm(None, body[:20000], taxonomy)  # Limit content to first 20000 chars
-```  
+**Note:** The script sends only the first 20,000 characters of the note to the LLM. This should be more than sufficient for most notes, however if you have extremely long notes then you can increase this by editing the `CONTENT_CHAR_LIMIT` constant in `tag-notes.py`.
 
 ## Setup
 
 1. **Install dependencies:**
    ```bash
-   pip install pyyaml requests
+   pip install pyyaml ruamel.yaml requests
    ```
 2. **Make sure LM Studio is running** on  with the preferred model loaded
 3. **Place these files in a directory:**
@@ -39,24 +36,25 @@ Simply run the script:
 ./tag-notes.py
 ```
 
-It will automatically process all markdown files in the 'NOTES_FOLDER'.
+It will recursively process all markdown files under `NOTES_FOLDER`.
 
 ## What it does
 
 The script will:
 - ✓ Add tags (1-5) using your taxonomy + 1-2 new suggestions
-- ✓ Add SEO title (clean, non-clickbaity)
-- ✓ Add SEO description (150-160 chars, factual)
+- ✓ Add SEO title in the format `{title}: {short descriptor}`, built from the note's `title:` frontmatter field
+- ✓ Add SEO description (strictly 150-160 chars, factual — re-asks the LLM once if the first response is out of range)
 - ✓ Add SEO keywords (generous, 10-15 keywords)
-- ✓ Add slug based off of filename
+- ✓ Add slug derived from the filename (lowercased, punctuation stripped, spaces/hyphens collapsed)
 - ✓ **Only updates empty fields** - preserves existing values
 - ✓ Updates files directly (no confirmation needed)
-- ✓ **Only touches**: `tags`, `seo-title`, `seo-description`, `seo-keywords`
-- ✓ **Preserves everything else** in your frontmatter
+- ✓ **Preserves existing frontmatter formatting** via round-trip YAML
+
+At the end of a run, if the LLM proposed any genuinely new tags, the script lists them and prompts you to append them to `tag-taxonomy.yaml` automatically.
 
 ## Managing the taxonomy
 
-Edit `tag-taxonomy.yaml` to add new tags that the LLM suggests and you like.
+At the end of each run, the script will prompt you to add any newly suggested tags to `tag-taxonomy.yaml` automatically. You can also edit the file by hand at any time.
 
 The LLM will:
 - Prefer existing taxonomy tags
diff --git a/tag-notes.py b/tag-notes.py
index e8f5534..612629f 100755
--- a/tag-notes.py
+++ b/tag-notes.py
@@ -6,251 +6,313 @@ Processes markdown notes using a local LLM to add tags, slugs, and SEO metadata
 
 import os
 import sys
-import yaml
-import requests
+import io
 import json
-from pathlib import Path
 import re
+from pathlib import Path
+
+import requests
+import yaml
+from ruamel.yaml import YAML
 
 # Configuration
-LM_STUDIO_URL = "http://192.168.68.84:1234/v1/chat/completions"
+LM_STUDIO_URL = "http://localhost:1234/v1/chat/completions"
 MODEL_NAME = "openai/gpt-oss-20b"
 TAXONOMY_FILE = "tag-taxonomy.yaml"
 NOTES_FOLDER = os.path.expanduser("~/Documents/ejl-zk/40 Public/41 Notes/")
+CONTENT_CHAR_LIMIT = 20000
+SEO_DESC_MIN = 150
+SEO_DESC_MAX = 160
+
+# Round-trip YAML preserves existing frontmatter formatting
+yaml_rt = YAML()
+yaml_rt.preserve_quotes = True
+yaml_rt.width = 4096
+
 
 def load_taxonomy(taxonomy_path):
-    """Load the tag taxonomy from YAML file"""
     with open(taxonomy_path, 'r') as f:
-        data = yaml.safe_load(f)
-    return data.get('tags', [])
+        data = yaml.safe_load(f) or {}
+    return data.get('tags', []) or []
+
+
+def append_tags_to_taxonomy(taxonomy_path, new_tags):
+    with open(taxonomy_path, 'r') as f:
+        data = yaml.safe_load(f) or {}
+    existing = data.get('tags', []) or []
+    combined = list(dict.fromkeys(existing + list(new_tags)))
+    data['tags'] = combined
+    with open(taxonomy_path, 'w') as f:
+        yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
+
 
 def extract_frontmatter(content):
-    """Extract frontmatter and content from markdown"""
-    # Match YAML frontmatter between --- delimiters
     pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
     match = re.match(pattern, content, re.DOTALL)
-    
-    if match:
-        frontmatter_str = match.group(1)
-        body = match.group(2)
-        frontmatter = yaml.safe_load(frontmatter_str)
-        return frontmatter, body, frontmatter_str
-    
-    return None, content, None
+    if not match:
+        return None, content
+    frontmatter = yaml_rt.load(match.group(1))
+    return frontmatter, match.group(2)
+
 
 def reconstruct_markdown(frontmatter, body):
-    """Reconstruct markdown with updated frontmatter"""
-    # Convert frontmatter to YAML string
-    frontmatter_str = yaml.dump(frontmatter, default_flow_style=False, allow_unicode=True, sort_keys=False)
-    return f"---\n{frontmatter_str}---\n{body}"
+    stream = io.StringIO()
+    yaml_rt.dump(frontmatter, stream)
+    fm_str = stream.getvalue()
+    if not fm_str.endswith('\n'):
+        fm_str += '\n'
+    return f"---\n{fm_str}---\n{body}"
 
-def call_llm(prompt, note_content, taxonomy):
-    """Call LM Studio API to get tags and SEO metadata"""
-    taxonomy_str = ", ".join(taxonomy)
-    
-    system_prompt = f"""You are a helpful assistant that analyzes markdown notes and provides:
-1. Tags from existing taxonomy (1-5 tags, prefer existing)
-2. 1-2 NEW tag suggestions if content warrants it
-3. Clean, concise SEO title (not clickbaity)
-4. Clean, concise SEO description (150-160 chars, factual summary)
-5. SEO keywords (be generous, 10-15 relevant keywords)
 
-Existing tag taxonomy: {taxonomy_str}
+def slugify(text):
+    text = text.lower()
+    text = re.sub(r"[’'`]", '', text)
+    text = re.sub(r'[^\w\s-]', ' ', text)
+    text = re.sub(r'[-\s]+', '-', text).strip('-')
+    return text
 
-Return ONLY valid JSON in this exact format:
-{{
-  "tags_from_taxonomy": ["tag1", "tag2"],
-  "new_tag_suggestions": ["newtag1"],
-  "seo_title": "Clear Title Here",
-  "seo_description": "Concise factual summary of the note content.",
-  "seo_keywords": ["keyword1", "keyword2", "keyword3"]
-}}"""
 
-    user_prompt = f"""Analyze this note and provide tags and SEO metadata:
+def parse_json_response(content):
+    if content is None:
+        return None
+    try:
+        return json.loads(content)
+    except json.JSONDecodeError:
+        pass
+    start = content.find('{')
+    end = content.rfind('}')
+    if start != -1 and end > start:
+        try:
+            return json.loads(content[start:end + 1])
+        except json.JSONDecodeError:
+            pass
+    return None
 
-{note_content}
-
-Remember:
-- Use 1-5 tags from taxonomy that fit best
-- Suggest 1-2 NEW tags only if content really warrants it (be conservative)
-- SEO title should be clear and informative, NOT clickbaity
-- SEO description should be a clean factual summary (150-160 characters)
-- SEO keywords can be generous (10-15 keywords)"""
 
+def call_llm_json(system_prompt, user_prompt, max_tokens=900):
     payload = {
         "model": MODEL_NAME,
         "messages": [
             {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt}
+            {"role": "user", "content": user_prompt},
         ],
-        "temperature": 0.7,
-        "max_tokens": 500
+        "temperature": 0.2,
+        "max_tokens": max_tokens,
+        "response_format": {"type": "json_object"},
     }
-    
     try:
-        response = requests.post(LM_STUDIO_URL, json=payload, timeout=60)
+        response = requests.post(LM_STUDIO_URL, json=payload, timeout=120)
         response.raise_for_status()
         result = response.json()
-        
-        # Extract the response content
         content = result['choices'][0]['message']['content']
-        
-        # Try to parse JSON from the response
-        # Sometimes LLMs wrap JSON in markdown code blocks
-        json_match = re.search(r'```json\s*(\{.*?\})\s*```', content, re.DOTALL)
-        if json_match:
-            content = json_match.group(1)
-        
-        return json.loads(content)
-        
+        return parse_json_response(content)
     except Exception as e:
-        print(f"Error calling LLM: {e}")
+        print(f"  ! LLM error: {e}")
         return None
 
-def process_note(file_path, taxonomy):
-    """Process a single note file"""
+
+def request_metadata(title, note_content, taxonomy):
+    taxonomy_str = ", ".join(taxonomy)
+    system_prompt = f"""You analyze markdown notes and return structured metadata.
+
+Return ONLY valid JSON in this exact shape:
+{{
+  "tags_from_taxonomy": ["tag1", "tag2"],
+  "new_tag_suggestions": ["newtag1"],
+  "seo_title_suffix": "Short descriptor that will follow the note title",
+  "seo_description": "Factual summary between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters.",
+  "seo_keywords": ["keyword1", "keyword2"]
+}}
+
+Rules:
+- tags_from_taxonomy: 1-5 tags drawn from the existing taxonomy that best fit the content.
+- new_tag_suggestions: 0-2 NEW tags, only when content truly warrants it (be conservative).
+- seo_title_suffix: a short, clean, non-clickbaity descriptor of the note. Do NOT include the note title or a leading colon — only the text that would follow "<title>: ". Aim for 4-10 words.
+- seo_description: a clean factual summary, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding.
+- seo_keywords: 10-15 relevant keywords, no duplicates.
+
+Existing tag taxonomy: {taxonomy_str}"""
+
+    user_prompt = f"""Note title: {title}
+
+Note content:
+{note_content}
+
+Produce the JSON described in the system prompt."""
+    return call_llm_json(system_prompt, user_prompt)
+
+
+def request_description_retry(title, note_content, previous_desc):
+    system_prompt = f"""You rewrite SEO descriptions to a strict length.
+
+Return ONLY valid JSON of the form:
+{{"seo_description": "..."}}
+
+The description must be a clean, factual summary of the note, STRICTLY between {SEO_DESC_MIN} and {SEO_DESC_MAX} characters inclusive. Count characters carefully before responding."""
+    user_prompt = f"""Note title: {title}
+
+Note content:
+{note_content}
+
+Your previous description was {len(previous_desc)} characters, outside the allowed {SEO_DESC_MIN}-{SEO_DESC_MAX} range:
+"{previous_desc}"
+
+Rewrite it to fit strictly within {SEO_DESC_MIN}-{SEO_DESC_MAX} characters."""
+    result = call_llm_json(system_prompt, user_prompt, max_tokens=400)
+    if result:
+        return (result.get('seo_description') or '').strip()
+    return ''
+
+
+def process_note(file_path, taxonomy, new_tag_accumulator):
     print(f"Processing: {file_path}")
-    
-    # Read the file
     with open(file_path, 'r', encoding='utf-8') as f:
         content = f.read()
-    
-    # Extract frontmatter and body
-    frontmatter, body, original_fm_str = extract_frontmatter(content)
-    
+
+    frontmatter, body = extract_frontmatter(content)
     if frontmatter is None:
-        print(f"  ⚠️  No frontmatter found, skipping")
+        print("  ⚠️  No frontmatter found, skipping")
         return
-    
-    # Check what needs to be filled in
-    needs_update = False
-    existing_tags = frontmatter.get('tags', [])
-    if not existing_tags or existing_tags == [None]:
+
+    existing_tags = frontmatter.get('tags', []) or []
+    if existing_tags == [None]:
         existing_tags = []
-    
+
     needs_tags = not existing_tags
     needs_slug = not frontmatter.get('slug')
     needs_seo_title = not frontmatter.get('seo-title')
     needs_seo_desc = not frontmatter.get('seo-description')
     needs_seo_keywords = not frontmatter.get('seo-keywords')
-    
+
     if not (needs_tags or needs_slug or needs_seo_title or needs_seo_desc or needs_seo_keywords):
-        print(f"  ✓ All fields already populated, skipping")
+        print("  ✓ All fields already populated, skipping")
         return
-    
+
+    title = frontmatter.get('title') or Path(file_path).stem
     updated = False
-    
-    # Generate slug from filename if needed
+
     if needs_slug:
-        # Get filename without extension
-        filename = Path(file_path).stem
-        # Convert to lowercase and replace spaces with hyphens
-        slug = filename.lower().replace(' ', '-')
+        slug = slugify(Path(file_path).stem)
         frontmatter['slug'] = slug
         print(f"  + Added slug: {slug}")
         updated = True
-    
-    # Only call LLM if we need tags or SEO fields
+
     if not (needs_tags or needs_seo_title or needs_seo_desc or needs_seo_keywords):
-        # Only needed slug, we're done
-        new_content = reconstruct_markdown(frontmatter, body)
         with open(file_path, 'w', encoding='utf-8') as f:
-            f.write(new_content)
-        print(f"  ✓ Updated successfully")
+            f.write(reconstruct_markdown(frontmatter, body))
+        print("  ✓ Updated successfully")
         return
-    
-    # Call LLM
-    llm_response = call_llm(None, body[:20000], taxonomy)  # Limit content to first 20000 chars
-    
+
+    llm_response = request_metadata(title, body[:CONTENT_CHAR_LIMIT], taxonomy)
     if not llm_response:
-        print(f"  ✗ Failed to get LLM response")
+        print("  ✗ Failed to get LLM response")
         return
-    
-    # Update frontmatter with new values (only if empty)
-    
+
     if needs_tags:
-        # Combine taxonomy tags and new suggestions
-        all_tags = llm_response.get('tags_from_taxonomy', [])
-        new_suggestions = llm_response.get('new_tag_suggestions', [])
-        all_tags.extend(new_suggestions)
-        
-        # Limit to 5 tags total
-        all_tags = all_tags[:5]
-        
-        if all_tags:
-            frontmatter['tags'] = all_tags
+        taxonomy_tags = llm_response.get('tags_from_taxonomy') or []
+        new_suggestions = llm_response.get('new_tag_suggestions') or []
+        combined = list(dict.fromkeys(list(taxonomy_tags) + list(new_suggestions)))[:5]
+        if combined:
+            frontmatter['tags'] = combined
             updated = True
-            print(f"  + Added tags: {', '.join(all_tags)}")
-            if new_suggestions:
-                print(f"    (New suggestions: {', '.join(new_suggestions)})")
-    
+            print(f"  + Added tags: {', '.join(combined)}")
+            genuinely_new = [t for t in combined if t not in taxonomy and t in new_suggestions]
+            if genuinely_new:
+                print(f"    (New suggestions: {', '.join(genuinely_new)})")
+                new_tag_accumulator.update(genuinely_new)
+
     if needs_seo_title:
-        seo_title = llm_response.get('seo_title', '')
-        if seo_title:
+        suffix = (llm_response.get('seo_title_suffix') or '').strip()
+        suffix = suffix.lstrip(':').strip()
+        # Strip a leading repeat of the title if the LLM included it anyway
+        if suffix.lower().startswith(title.lower()):
+            suffix = suffix[len(title):].lstrip(':').strip()
+        if suffix:
+            seo_title = f"{title}: {suffix}"
             frontmatter['seo-title'] = seo_title
             updated = True
             print(f"  + Added SEO title: {seo_title}")
-    
+
     if needs_seo_desc:
-        seo_desc = llm_response.get('seo_description', '')
+        seo_desc = (llm_response.get('seo_description') or '').strip()
+        if seo_desc and not (SEO_DESC_MIN <= len(seo_desc) <= SEO_DESC_MAX):
+            print(f"  ~ SEO description length {len(seo_desc)} outside {SEO_DESC_MIN}-{SEO_DESC_MAX}, re-asking")
+            retry = request_description_retry(title, body[:CONTENT_CHAR_LIMIT], seo_desc)
+            if retry and SEO_DESC_MIN <= len(retry) <= SEO_DESC_MAX:
+                seo_desc = retry
+            elif retry:
+                print(f"  ! Retry still {len(retry)} chars; using original")
+            else:
+                print("  ! Retry failed; using original")
         if seo_desc:
             frontmatter['seo-description'] = seo_desc
             updated = True
-            print(f"  + Added SEO description")
-    
+            print(f"  + Added SEO description ({len(seo_desc)} chars)")
+
     if needs_seo_keywords:
-        seo_keywords = llm_response.get('seo_keywords', [])
+        seo_keywords = list(dict.fromkeys(llm_response.get('seo_keywords') or []))
         if seo_keywords:
             frontmatter['seo-keywords'] = seo_keywords
             updated = True
             print(f"  + Added {len(seo_keywords)} SEO keywords")
-    
+
     if updated:
-        # Write back to file
-        new_content = reconstruct_markdown(frontmatter, body)
         with open(file_path, 'w', encoding='utf-8') as f:
-            f.write(new_content)
-        print(f"  ✓ Updated successfully")
+            f.write(reconstruct_markdown(frontmatter, body))
+        print("  ✓ Updated successfully")
     else:
-        print(f"  - No updates needed")
+        print("  - No updates needed")
+
 
 def main():
-    # Load taxonomy
     taxonomy_path = Path(__file__).parent / TAXONOMY_FILE
     if not taxonomy_path.exists():
         print(f"Error: Taxonomy file not found at {taxonomy_path}")
         print(f"Please create {TAXONOMY_FILE} in the same directory as this script")
         sys.exit(1)
-    
+
     taxonomy = load_taxonomy(taxonomy_path)
     print(f"Loaded {len(taxonomy)} tags from taxonomy\n")
-    
-    # Use the hardcoded notes folder
+
     target_path = Path(NOTES_FOLDER)
-    
     if not target_path.exists():
         print(f"Error: Notes folder not found: {target_path}")
         sys.exit(1)
-    
     if not target_path.is_dir():
         print(f"Error: {target_path} is not a directory")
         sys.exit(1)
-    
-    # Process all markdown files in the directory
-    md_files = list(target_path.glob('*.md'))
-    
+
+    md_files = sorted(target_path.rglob('*.md'))
     if not md_files:
         print(f"No markdown files found in {target_path}")
         sys.exit(0)
-    
-    print(f"Processing all markdown files in: {target_path}")
+
+    print(f"Processing all markdown files under: {target_path}")
     print(f"Found {len(md_files)} markdown files\n")
-    
+
+    new_tag_accumulator = set()
     for md_file in md_files:
-        process_note(md_file, taxonomy)
-        print()  # Blank line between files
-    
+        try:
+            process_note(md_file, taxonomy, new_tag_accumulator)
+        except Exception as e:
+            print(f"  ✗ Error processing {md_file}: {e}")
+        print()
+
     print("\n✓ Processing complete!")
 
+    fresh = sorted(t for t in new_tag_accumulator if t not in taxonomy)
+    if fresh:
+        print(f"\nNew tags suggested during this run: {', '.join(fresh)}")
+        try:
+            answer = input("Add these to the taxonomy? [y/N]: ").strip().lower()
+        except EOFError:
+            answer = ''
+        if answer == 'y':
+            append_tags_to_taxonomy(taxonomy_path, fresh)
+            print(f"✓ Added {len(fresh)} tag(s) to {taxonomy_path.name}")
+        else:
+            print("Skipped taxonomy update.")
+
+
 if __name__ == "__main__":
     main()