commit 40c57e82a44d0b5256833f90f8027d281bc87621 Author: Ethan J Lewis Date: Fri Feb 6 06:17:43 2026 -0600 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..6698212 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# Note Tagging & SEO Automation + +Automatically tag your Obsidian notes and add SEO metadata using a local LLM. + +## Setup + +1. **Install dependencies:** + ```bash + pip install pyyaml requests + ``` + +2. **Make sure LM Studio is running** on `http://192.168.68.84:1234` with the `openai/gpt-oss-20b` model loaded + +3. **Place these files in a directory:** + - `tag-notes.py` (the main script) + - `tag-taxonomy.yaml` (your tag taxonomy) + +## Usage + +Simply run the script: +```bash +./tag-notes.py +``` + +It will automatically process all markdown files in: +`~/Documents/ejl-zk/40 Public/41 Notes/` + +To change the folder, edit the `NOTES_FOLDER` variable at the top of `tag-notes.py`. + +## What it does + +The script will: +- ✓ Add tags (1-5) using your taxonomy + 1-2 new suggestions +- ✓ Add SEO title (clean, non-clickbaity) +- ✓ Add SEO description (150-160 chars, factual) +- ✓ Add SEO keywords (generous, 10-15 keywords) +- ✓ **Only updates empty fields** - preserves existing values +- ✓ Updates files directly (no confirmation needed) +- ✓ **Only touches**: `tags`, `seo-title`, `seo-description`, `seo-keywords` +- ✓ **Preserves everything else** in your frontmatter + +## Managing the taxonomy + +Edit `tag-taxonomy.yaml` to add new tags that the LLM suggests and you like. + +The LLM will: +- Prefer existing taxonomy tags +- Suggest 1-2 new tags if the content warrants it +- Be conservative with new tag suggestions + +## Example output + +``` +Processing: /path/to/note.md + + Added tags: self-hosting, linux, docker + (New suggestions: containers) + + Added SEO title: Setting Up a Self-Hosted Development Environment + + Added SEO description + + Added 12 SEO keywords + ✓ Updated successfully +``` + +## Troubleshooting + +**LLM connection errors:** +- Check that LM Studio is running: `curl http://192.168.68.84:1234/v1/models` +- Verify the model is loaded in LM Studio + +**No frontmatter found:** +- The note needs YAML frontmatter between `---` delimiters + +**Fields already populated:** +- Script skips notes where all four fields are already filled +- To re-process a note, clear the specific fields you want regenerated \ No newline at end of file diff --git a/tag-notes.py b/tag-notes.py new file mode 100755 index 0000000..9949da6 --- /dev/null +++ b/tag-notes.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +""" +Note Tagging and SEO Metadata Script +Processes markdown notes using a local LLM to add tags and SEO metadata +""" + +import os +import sys +import yaml +import requests +import json +from pathlib import Path +import re + +# Configuration +LM_STUDIO_URL = "http://192.168.68.84:1234/v1/chat/completions" +MODEL_NAME = "openai/gpt-oss-20b" +TAXONOMY_FILE = "tag-taxonomy.yaml" +NOTES_FOLDER = os.path.expanduser("~/Documents/ejl-zk/40 Public/41 Notes/") + +def load_taxonomy(taxonomy_path): + """Load the tag taxonomy from YAML file""" + with open(taxonomy_path, 'r') as f: + data = yaml.safe_load(f) + return data.get('tags', []) + +def extract_frontmatter(content): + """Extract frontmatter and content from markdown""" + # Match YAML frontmatter between --- delimiters + pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$' + match = re.match(pattern, content, re.DOTALL) + + if match: + frontmatter_str = match.group(1) + body = match.group(2) + frontmatter = yaml.safe_load(frontmatter_str) + return frontmatter, body, frontmatter_str + + return None, content, None + +def reconstruct_markdown(frontmatter, body): + """Reconstruct markdown with updated frontmatter""" + # Convert frontmatter to YAML string + frontmatter_str = yaml.dump(frontmatter, default_flow_style=False, allow_unicode=True, sort_keys=False) + return f"---\n{frontmatter_str}---\n{body}" + +def call_llm(prompt, note_content, taxonomy): + """Call LM Studio API to get tags and SEO metadata""" + taxonomy_str = ", ".join(taxonomy) + + system_prompt = f"""You are a helpful assistant that analyzes markdown notes and provides: +1. Tags from existing taxonomy (1-5 tags, prefer existing) +2. 1-2 NEW tag suggestions if content warrants it +3. Clean, concise SEO title (not clickbaity) +4. Clean, concise SEO description (150-160 chars, factual summary) +5. SEO keywords (be generous, 10-15 relevant keywords) + +Existing tag taxonomy: {taxonomy_str} + +Return ONLY valid JSON in this exact format: +{{ + "tags_from_taxonomy": ["tag1", "tag2"], + "new_tag_suggestions": ["newtag1"], + "seo_title": "Clear Title Here", + "seo_description": "Concise factual summary of the note content.", + "seo_keywords": ["keyword1", "keyword2", "keyword3"] +}}""" + + user_prompt = f"""Analyze this note and provide tags and SEO metadata: + +{note_content} + +Remember: +- Use 1-5 tags from taxonomy that fit best +- Suggest 1-2 NEW tags only if content really warrants it (be conservative) +- SEO title should be clear and informative, NOT clickbaity +- SEO description should be a clean factual summary (150-160 characters) +- SEO keywords can be generous (10-15 keywords)""" + + payload = { + "model": MODEL_NAME, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ], + "temperature": 0.7, + "max_tokens": 500 + } + + try: + response = requests.post(LM_STUDIO_URL, json=payload, timeout=60) + response.raise_for_status() + result = response.json() + + # Extract the response content + content = result['choices'][0]['message']['content'] + + # Try to parse JSON from the response + # Sometimes LLMs wrap JSON in markdown code blocks + json_match = re.search(r'```json\s*(\{.*?\})\s*```', content, re.DOTALL) + if json_match: + content = json_match.group(1) + + return json.loads(content) + + except Exception as e: + print(f"Error calling LLM: {e}") + return None + +def process_note(file_path, taxonomy): + """Process a single note file""" + print(f"Processing: {file_path}") + + # Read the file + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Extract frontmatter and body + frontmatter, body, original_fm_str = extract_frontmatter(content) + + if frontmatter is None: + print(f" ⚠️ No frontmatter found, skipping") + return + + # Check what needs to be filled in + needs_update = False + existing_tags = frontmatter.get('tags', []) + if not existing_tags or existing_tags == [None]: + existing_tags = [] + + needs_tags = not existing_tags + needs_seo_title = not frontmatter.get('seo-title') + needs_seo_desc = not frontmatter.get('seo-description') + needs_seo_keywords = not frontmatter.get('seo-keywords') + + if not (needs_tags or needs_seo_title or needs_seo_desc or needs_seo_keywords): + print(f" ✓ All fields already populated, skipping") + return + + # Call LLM + llm_response = call_llm(None, body[:2000], taxonomy) # Limit content to first 2000 chars + + if not llm_response: + print(f" ✗ Failed to get LLM response") + return + + # Update frontmatter with new values (only if empty) + updated = False + + if needs_tags: + # Combine taxonomy tags and new suggestions + all_tags = llm_response.get('tags_from_taxonomy', []) + new_suggestions = llm_response.get('new_tag_suggestions', []) + all_tags.extend(new_suggestions) + + # Limit to 5 tags total + all_tags = all_tags[:5] + + if all_tags: + frontmatter['tags'] = all_tags + updated = True + print(f" + Added tags: {', '.join(all_tags)}") + if new_suggestions: + print(f" (New suggestions: {', '.join(new_suggestions)})") + + if needs_seo_title: + seo_title = llm_response.get('seo_title', '') + if seo_title: + frontmatter['seo-title'] = seo_title + updated = True + print(f" + Added SEO title: {seo_title}") + + if needs_seo_desc: + seo_desc = llm_response.get('seo_description', '') + if seo_desc: + frontmatter['seo-description'] = seo_desc + updated = True + print(f" + Added SEO description") + + if needs_seo_keywords: + seo_keywords = llm_response.get('seo_keywords', []) + if seo_keywords: + frontmatter['seo-keywords'] = seo_keywords + updated = True + print(f" + Added {len(seo_keywords)} SEO keywords") + + if updated: + # Write back to file + new_content = reconstruct_markdown(frontmatter, body) + with open(file_path, 'w', encoding='utf-8') as f: + f.write(new_content) + print(f" ✓ Updated successfully") + else: + print(f" - No updates needed") + +def main(): + # Load taxonomy + taxonomy_path = Path(__file__).parent / TAXONOMY_FILE + if not taxonomy_path.exists(): + print(f"Error: Taxonomy file not found at {taxonomy_path}") + print(f"Please create {TAXONOMY_FILE} in the same directory as this script") + sys.exit(1) + + taxonomy = load_taxonomy(taxonomy_path) + print(f"Loaded {len(taxonomy)} tags from taxonomy\n") + + # Use the hardcoded notes folder + target_path = Path(NOTES_FOLDER) + + if not target_path.exists(): + print(f"Error: Notes folder not found: {target_path}") + sys.exit(1) + + if not target_path.is_dir(): + print(f"Error: {target_path} is not a directory") + sys.exit(1) + + # Process all markdown files in the directory + md_files = list(target_path.glob('*.md')) + + if not md_files: + print(f"No markdown files found in {target_path}") + sys.exit(0) + + print(f"Processing all markdown files in: {target_path}") + print(f"Found {len(md_files)} markdown files\n") + + for md_file in md_files: + process_note(md_file, taxonomy) + print() # Blank line between files + + print("\n✓ Processing complete!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tag-taxonomy.yaml b/tag-taxonomy.yaml new file mode 100644 index 0000000..ac156fd --- /dev/null +++ b/tag-taxonomy.yaml @@ -0,0 +1,46 @@ +# Tag Taxonomy for Note Tagging +# Add new tags here as the LLM suggests good ones + +tags: + # Technology & Development + - self-hosting + - linux + - automation + - ai-tools + - web-development + - infrastructure + - docker + - security + - privacy + + # Work & Management + - project-management + - business-analysis + - leadership + - agile + - team-dynamics + - process-improvement + - governance + + # Knowledge & Learning + - knowledge-management + - zettelkasten + - note-taking + - learning + - productivity + + # Philosophy & Spirituality + - buddhism + - eastern-philosophy + - meditation + - mindfulness + + # Literature & Writing + - literature + - postmodernism + - writing + + # Personal Interests + - plants + - aroids + - gardening \ No newline at end of file