1. If environment variables are not set before by docker compose, program tries to get them from .env file. 2. Fix: If text marker was not found on bs page a wrong text marker was added.
182 lines
5.4 KiB
Python
182 lines
5.4 KiB
Python
import requests
|
||
from bs4 import BeautifulSoup
|
||
from urllib.parse import urlparse
|
||
import os
|
||
from dotenv import load_dotenv
|
||
|
||
if "BOOKSTACK_URL" not in os.environ:
|
||
print("Environment variables not read. Try reading from file.")
|
||
load_dotenv()
|
||
|
||
BOOKSTACK_URL = os.getenv("BOOKSTACK_URL","https://my-book-stack.com")
|
||
BOOKSTACK_TOKEN_ID = os.getenv("BOOKSTACK_TOKEN_ID", "TOKEN-ID")
|
||
BOOKSTACK_TOKEN_SECRET = os.getenv("BOOKSTACK_TOKEN_SECRET", "TOKEN-SECRET")
|
||
PAGE_SLUG = os.getenv("PAGE_SLUG","page-name-of-book")
|
||
BOOK_SLUG = os.getenv("BOOK_SLUG","book-name")
|
||
INSERT_MARKER_LINK = os.getenv("INSERT_MARKER_LINK", "## Unsortierte Links")
|
||
INSERT_MARKER_TEXT = os.getenv("INSERT_MARKER_TEXT", "## Textmerker")
|
||
|
||
HEADERS = {
|
||
"Accept": "application/json",
|
||
"Content-Type": "application/json",
|
||
"Authorization": f"Token {BOOKSTACK_TOKEN_ID}:{BOOKSTACK_TOKEN_SECRET}"
|
||
}
|
||
|
||
|
||
def is_valid_url(url):
|
||
parsed = urlparse(url)
|
||
return all([parsed.scheme in ("http", "https"), parsed.netloc])
|
||
|
||
|
||
def get_page_id_by_slug(book_slug, page_slug):
|
||
res = requests.get(f"{BOOKSTACK_URL}/api/pages", headers=HEADERS)
|
||
print(HEADERS)
|
||
print(res)
|
||
pages = res.json().get("data", [])
|
||
for p in pages:
|
||
if p["book_slug"] == book_slug and p["slug"] == page_slug:
|
||
return p["id"]
|
||
|
||
raise("Book not found")
|
||
|
||
|
||
def get_page_title_from_url(url):
|
||
try:
|
||
response = requests.get(url, timeout=10)
|
||
soup = BeautifulSoup(response.text, "html.parser")
|
||
title_tag = soup.find("title")
|
||
return title_tag.text.strip() if title_tag else url
|
||
except Exception as e:
|
||
print(f"Fehler beim Abrufen des Titels: {e}")
|
||
return url
|
||
|
||
|
||
def get_clean_title_from_url(url, max_len=80):
|
||
try:
|
||
res = requests.get(url, timeout=10)
|
||
res.raise_for_status()
|
||
soup = BeautifulSoup(res.text, "html.parser")
|
||
|
||
# 1. og:title
|
||
og_title = soup.find("meta", property="og:title")
|
||
if og_title and og_title.get("content"):
|
||
title = og_title["content"]
|
||
|
||
# 2. <meta name="title">
|
||
elif soup.find("meta", attrs={"name": "title"}):
|
||
title = soup.find("meta", attrs={"name": "title"}).get("content", "")
|
||
|
||
# 3. <title>
|
||
elif soup.title and soup.title.string:
|
||
title = soup.title.string
|
||
|
||
# 4. Fallback: Domain
|
||
else:
|
||
return urlparse(url).netloc
|
||
|
||
title = title.strip()
|
||
|
||
# Optional: Aufteilen an Trennzeichen wie " | " oder " – "
|
||
for sep in ["|", "–", "-", "•"]:
|
||
if sep in title:
|
||
title = title.split(sep)[0].strip()
|
||
break
|
||
|
||
# Länge kürzen
|
||
if len(title) > max_len:
|
||
title = title[:max_len].rstrip() + "…"
|
||
|
||
return title
|
||
|
||
except Exception as e:
|
||
print(f"Fehler beim Abrufen oder Parsen des Titels: {e}")
|
||
return urlparse(url).netloc # Fallback auf Domain
|
||
|
||
|
||
def append_link_to_bookstack_page(url):
|
||
strPageId = get_page_id_by_slug(BOOK_SLUG, PAGE_SLUG)
|
||
|
||
# Hole aktuellen Seiteninhalt
|
||
page = requests.get(f"{BOOKSTACK_URL}/api/pages/{strPageId}", headers=HEADERS).json()
|
||
current_content = page["markdown"]
|
||
|
||
if url in current_content:
|
||
print("Text bereits vorhanden.")
|
||
return
|
||
|
||
title = get_page_title_from_url(url)
|
||
new_entry = f"- [{title}]({url})<br>{url}"
|
||
|
||
# Füge Link unterhalb von INSERT_MARKER_LINK ein
|
||
if INSERT_MARKER_LINK in current_content:
|
||
parts = current_content.split(INSERT_MARKER_LINK)
|
||
updated_content = parts[0] + INSERT_MARKER_LINK + "\n" + new_entry + parts[1]
|
||
else:
|
||
# Falls Marker nicht vorhanden, füge ihn am Ende hinzu
|
||
updated_content = current_content + "\n\n" + INSERT_MARKER_LINK + "\n\n" + new_entry
|
||
|
||
# Seite aktualisieren
|
||
update_data = {
|
||
"name": page["name"],
|
||
"markdown": updated_content
|
||
}
|
||
|
||
response = requests.put(
|
||
f"{BOOKSTACK_URL}/api/pages/{strPageId}",
|
||
headers=HEADERS,
|
||
json=update_data
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
print("Link erfolgreich hinzugefügt.")
|
||
else:
|
||
print("Fehler beim Aktualisieren der Seite:", response.text)
|
||
|
||
def append_text_to_bockstack_page(strText):
|
||
strPageId = get_page_id_by_slug(BOOK_SLUG, PAGE_SLUG)
|
||
|
||
# Hole aktuellen Seiteninhalt
|
||
page = requests.get(f"{BOOKSTACK_URL}/api/pages/{strPageId}", headers=HEADERS).json()
|
||
current_content = page["markdown"]
|
||
|
||
if strText in current_content:
|
||
print("Text bereits vorhanden.")
|
||
return
|
||
|
||
new_entry = strText + "\n\n"
|
||
|
||
# Füge Text unterhalb von INSERT_MARKER_TEXT ein
|
||
if INSERT_MARKER_TEXT in current_content:
|
||
parts = current_content.split(INSERT_MARKER_TEXT)
|
||
updated_content = parts[0] + INSERT_MARKER_TEXT + "\n\n" + new_entry + "\n" + parts[1]
|
||
else:
|
||
# Falls Marker nicht vorhanden, füge ihn am Ende hinzu
|
||
updated_content = current_content + "\n\n" + INSERT_MARKER_TEXT + "\n\n" + new_entry
|
||
|
||
# Seite aktualisieren
|
||
update_data = {
|
||
"name": page["name"],
|
||
"markdown": updated_content
|
||
}
|
||
|
||
response = requests.put(
|
||
f"{BOOKSTACK_URL}/api/pages/{strPageId}",
|
||
headers=HEADERS,
|
||
json=update_data
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
print("Link erfolgreich hinzugefügt.")
|
||
else:
|
||
print("Fehler beim Aktualisieren der Seite:", response.text)
|
||
|
||
|
||
def add_link_or_text(strText):
|
||
if (is_valid_url(strText)):
|
||
append_link_to_bookstack_page(strText)
|
||
else:
|
||
append_text_to_bockstack_page(strText)
|
||
pass
|
||
|
||
|