|
| 1 | +import os |
| 2 | +import csv |
| 3 | +import re |
| 4 | +import markdown |
| 5 | +import random |
| 6 | +from datetime import datetime, timedelta |
| 7 | +from pathlib import Path |
| 8 | +import yaml |
| 9 | +from openai import OpenAI |
| 10 | +import textwrap |
| 11 | + |
| 12 | +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") |
| 13 | + |
| 14 | +client = OpenAI(api_key=OPENAI_API_KEY) |
| 15 | + |
| 16 | +# Set the folder with markdown files |
| 17 | +MARKDOWN_FOLDER = "./_posts" |
| 18 | +OUTPUT_CSV = "converted_blogposts.csv" |
| 19 | + |
| 20 | +# Set static values (you can customize) |
| 21 | +COLLECTION_ID = "662a1b809adeb156702a5d75" |
| 22 | +LOCALE_ID = "662a1b8028d7e78401160d02" |
| 23 | +ITEM_ID = "PLACEHOLDER_ITEM_ID" |
| 24 | + |
| 25 | +def generate_short_description(title, content, max_length=150): |
| 26 | + """Generate a short description using OpenAI's API""" |
| 27 | + if not OPENAI_API_KEY: |
| 28 | + return "" |
| 29 | + |
| 30 | + try: |
| 31 | + prompt = f""" |
| 32 | + Generate a concise and engaging summary of the following blog post in about 2-3 sentences. |
| 33 | + The summary should highlight the main points and be less than 150 characters if possible. |
| 34 | + It should be suitable for use in a blog post's metadata. It shouldn't contain hashtags or links. |
| 35 | +
|
| 36 | + Title: {title} |
| 37 | + Content: {textwrap.shorten(content, width=1000, placeholder="...")} |
| 38 | + """ |
| 39 | + |
| 40 | + response = client.chat.completions.create(model="gpt-3.5-turbo", |
| 41 | + messages=[ |
| 42 | + {"role": "system", "content": "You are a skilled blog editor who writes concise, engaging summaries."}, |
| 43 | + {"role": "user", "content": prompt} |
| 44 | + ], |
| 45 | + max_tokens=100, |
| 46 | + temperature=0.7) |
| 47 | + |
| 48 | + description = response.choices[0].message.content.strip() |
| 49 | + |
| 50 | + return description |
| 51 | + except Exception as e: |
| 52 | + print(f"Error generating description: {e}") |
| 53 | + return "" |
| 54 | + |
| 55 | +def parse_markdown_file(filepath): |
| 56 | + with open(filepath, "r", encoding="utf-8") as f: |
| 57 | + content = f.read() |
| 58 | + |
| 59 | + # Split front matter |
| 60 | + front_matter_match = re.match(r"---\n(.*?)\n---\n(.*)", content, re.DOTALL) |
| 61 | + if not front_matter_match: |
| 62 | + raise ValueError(f"Front matter not found in {filepath}") |
| 63 | + |
| 64 | + front_matter = yaml.safe_load(front_matter_match.group(1)) |
| 65 | + body_md = front_matter_match.group(2) |
| 66 | + |
| 67 | + body_html = markdown.markdown(body_md) |
| 68 | + |
| 69 | + return front_matter, body_md, body_html |
| 70 | + |
| 71 | +def generate_csv_row(filepath, front_matter, body_md, html_content): |
| 72 | + filename = Path(filepath).stem |
| 73 | + |
| 74 | + # Fix the date parsing - extract complete date from filename |
| 75 | + # Instead of splitting at first hyphen, extract the full YYYY-MM-DD part |
| 76 | + match = re.match(r'(\d{4}-\d{2}-\d{2})-(.*)', filename) |
| 77 | + if match: |
| 78 | + date_str = match.group(1) |
| 79 | + slug = match.group(2) |
| 80 | + else: |
| 81 | + # Fallback if pattern doesn't match |
| 82 | + date_parts = filename.split('-', 3)[:3] # Get first 3 parts |
| 83 | + date_str = '-'.join(date_parts) |
| 84 | + slug = '-'.join(filename.split('-')[3:]) |
| 85 | + |
| 86 | + post_date = datetime.strptime(date_str, "%Y-%m-%d") |
| 87 | + formatted_date = post_date.strftime("%a %b %d %Y 00:00:00 GMT+0000 (Coordinated Universal Time)") |
| 88 | + |
| 89 | + # Create timestamps with small random hour variations based on the post date |
| 90 | + created_date = post_date.replace(hour=random.randint(8, 11), minute=random.randint(0, 59)) |
| 91 | + updated_date = post_date.replace(hour=random.randint(12, 15), minute=random.randint(0, 59)) |
| 92 | + published_date = post_date.replace(hour=random.randint(16, 18), minute=random.randint(0, 59)) |
| 93 | + |
| 94 | + created_str = created_date.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)") |
| 95 | + updated_str = updated_date.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)") |
| 96 | + published_str = published_date.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)") |
| 97 | + |
| 98 | + # Generate short description |
| 99 | + title = front_matter.get("title", "") |
| 100 | + short_description = generate_short_description(title, body_md) |
| 101 | + |
| 102 | + return [ |
| 103 | + title, # Name |
| 104 | + slug, # Slug |
| 105 | + COLLECTION_ID, # Collection ID |
| 106 | + LOCALE_ID, # Locale ID |
| 107 | + ITEM_ID, # Item ID (placeholder) |
| 108 | + created_str, # Created On - aligned with post date |
| 109 | + updated_str, # Updated On - aligned with post date |
| 110 | + published_str, # Published On - aligned with post date |
| 111 | + f"https://blog.magicblock.gg/{front_matter.get('image')}", # Cover - changed domain |
| 112 | + short_description, # Short Description generated by ChatGPT |
| 113 | + formatted_date, # Date |
| 114 | + html_content, # Main Content (HTML) |
| 115 | + "true", # Main Post |
| 116 | + "false", # Home Post |
| 117 | + "false" # Top Post |
| 118 | + ] |
| 119 | + |
| 120 | +def convert_all_markdown_to_csv(): |
| 121 | + files = [f for f in os.listdir(MARKDOWN_FOLDER) if f.endswith(".md")] |
| 122 | + rows = [] |
| 123 | + |
| 124 | + for file in files: |
| 125 | + path = os.path.join(MARKDOWN_FOLDER, file) |
| 126 | + try: |
| 127 | + front_matter, body_md, html_content = parse_markdown_file(path) |
| 128 | + row = generate_csv_row(path, front_matter, body_md, html_content) |
| 129 | + rows.append(row) |
| 130 | + except Exception as e: |
| 131 | + print(f"Error processing {file}: {e}") |
| 132 | + |
| 133 | + headers = [ |
| 134 | + "Name", "Slug", "Collection ID", "Locale ID", "Item ID", "Created On", |
| 135 | + "Updated On", "Published On", "Cover", "Short Description", "Date", |
| 136 | + "Main Content", "Main Post", "Home Post", "Top Post" |
| 137 | + ] |
| 138 | + |
| 139 | + with open(OUTPUT_CSV, "w", encoding="utf-8", newline="") as f: |
| 140 | + writer = csv.writer(f) |
| 141 | + writer.writerow(headers) |
| 142 | + writer.writerows(rows) |
| 143 | + |
| 144 | + print(f"Conversion completed. CSV saved to: {OUTPUT_CSV}") |
| 145 | + |
| 146 | +if __name__ == "__main__": |
| 147 | + convert_all_markdown_to_csv() |
0 commit comments