bsshare/bsupdate.py
Holger Weber a3f5f4a5dc Load env variables and fix wrong text marker usage.
1. If environment variables are not set before by docker compose, program tries to get them from .env file.
2. Fix: If text marker was not found on bs page a wrong text marker was added.
2025-07-18 08:45:20 +02:00

182 lines
5.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import os
from dotenv import load_dotenv
if "BOOKSTACK_URL" not in os.environ:
print("Environment variables not read. Try reading from file.")
load_dotenv()
BOOKSTACK_URL = os.getenv("BOOKSTACK_URL","https://my-book-stack.com")
BOOKSTACK_TOKEN_ID = os.getenv("BOOKSTACK_TOKEN_ID", "TOKEN-ID")
BOOKSTACK_TOKEN_SECRET = os.getenv("BOOKSTACK_TOKEN_SECRET", "TOKEN-SECRET")
PAGE_SLUG = os.getenv("PAGE_SLUG","page-name-of-book")
BOOK_SLUG = os.getenv("BOOK_SLUG","book-name")
INSERT_MARKER_LINK = os.getenv("INSERT_MARKER_LINK", "## Unsortierte Links")
INSERT_MARKER_TEXT = os.getenv("INSERT_MARKER_TEXT", "## Textmerker")
HEADERS = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": f"Token {BOOKSTACK_TOKEN_ID}:{BOOKSTACK_TOKEN_SECRET}"
}
def is_valid_url(url):
parsed = urlparse(url)
return all([parsed.scheme in ("http", "https"), parsed.netloc])
def get_page_id_by_slug(book_slug, page_slug):
res = requests.get(f"{BOOKSTACK_URL}/api/pages", headers=HEADERS)
print(HEADERS)
print(res)
pages = res.json().get("data", [])
for p in pages:
if p["book_slug"] == book_slug and p["slug"] == page_slug:
return p["id"]
raise("Book not found")
def get_page_title_from_url(url):
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, "html.parser")
title_tag = soup.find("title")
return title_tag.text.strip() if title_tag else url
except Exception as e:
print(f"Fehler beim Abrufen des Titels: {e}")
return url
def get_clean_title_from_url(url, max_len=80):
try:
res = requests.get(url, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, "html.parser")
# 1. og:title
og_title = soup.find("meta", property="og:title")
if og_title and og_title.get("content"):
title = og_title["content"]
# 2. <meta name="title">
elif soup.find("meta", attrs={"name": "title"}):
title = soup.find("meta", attrs={"name": "title"}).get("content", "")
# 3. <title>
elif soup.title and soup.title.string:
title = soup.title.string
# 4. Fallback: Domain
else:
return urlparse(url).netloc
title = title.strip()
# Optional: Aufteilen an Trennzeichen wie " | " oder " "
for sep in ["|", "", "-", ""]:
if sep in title:
title = title.split(sep)[0].strip()
break
# Länge kürzen
if len(title) > max_len:
title = title[:max_len].rstrip() + ""
return title
except Exception as e:
print(f"Fehler beim Abrufen oder Parsen des Titels: {e}")
return urlparse(url).netloc # Fallback auf Domain
def append_link_to_bookstack_page(url):
strPageId = get_page_id_by_slug(BOOK_SLUG, PAGE_SLUG)
# Hole aktuellen Seiteninhalt
page = requests.get(f"{BOOKSTACK_URL}/api/pages/{strPageId}", headers=HEADERS).json()
current_content = page["markdown"]
if url in current_content:
print("Text bereits vorhanden.")
return
title = get_page_title_from_url(url)
new_entry = f"- [{title}]({url})<br>{url}"
# Füge Link unterhalb von INSERT_MARKER_LINK ein
if INSERT_MARKER_LINK in current_content:
parts = current_content.split(INSERT_MARKER_LINK)
updated_content = parts[0] + INSERT_MARKER_LINK + "\n" + new_entry + parts[1]
else:
# Falls Marker nicht vorhanden, füge ihn am Ende hinzu
updated_content = current_content + "\n\n" + INSERT_MARKER_LINK + "\n\n" + new_entry
# Seite aktualisieren
update_data = {
"name": page["name"],
"markdown": updated_content
}
response = requests.put(
f"{BOOKSTACK_URL}/api/pages/{strPageId}",
headers=HEADERS,
json=update_data
)
if response.status_code == 200:
print("Link erfolgreich hinzugefügt.")
else:
print("Fehler beim Aktualisieren der Seite:", response.text)
def append_text_to_bockstack_page(strText):
strPageId = get_page_id_by_slug(BOOK_SLUG, PAGE_SLUG)
# Hole aktuellen Seiteninhalt
page = requests.get(f"{BOOKSTACK_URL}/api/pages/{strPageId}", headers=HEADERS).json()
current_content = page["markdown"]
if strText in current_content:
print("Text bereits vorhanden.")
return
new_entry = strText + "\n\n"
# Füge Text unterhalb von INSERT_MARKER_TEXT ein
if INSERT_MARKER_TEXT in current_content:
parts = current_content.split(INSERT_MARKER_TEXT)
updated_content = parts[0] + INSERT_MARKER_TEXT + "\n\n" + new_entry + "\n" + parts[1]
else:
# Falls Marker nicht vorhanden, füge ihn am Ende hinzu
updated_content = current_content + "\n\n" + INSERT_MARKER_TEXT + "\n\n" + new_entry
# Seite aktualisieren
update_data = {
"name": page["name"],
"markdown": updated_content
}
response = requests.put(
f"{BOOKSTACK_URL}/api/pages/{strPageId}",
headers=HEADERS,
json=update_data
)
if response.status_code == 200:
print("Link erfolgreich hinzugefügt.")
else:
print("Fehler beim Aktualisieren der Seite:", response.text)
def add_link_or_text(strText):
if (is_valid_url(strText)):
append_link_to_bookstack_page(strText)
else:
append_text_to_bockstack_page(strText)
pass