Commit ec8beb89 authored by Marco Schmiedel's avatar Marco Schmiedel

fix

parent fc69800a
{
"fileId": "1d59cc86-7b89-484d-a6da-2e1563612c68",
"originalPath": "work/routes/EeccxRouter.py",
"currentPath": "work/routes/EeccxRouter.py",
"hash": "92720db32fef845c68e8a7df6e1295371fbe6b757313eca5ab03b7a28d36ad28",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747070393555,
"lastFileModificationTimestamp": 1747070388417.1619
}
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
"fileId": "22983490-9c01-4bd1-8649-dfe87c659225", "fileId": "22983490-9c01-4bd1-8649-dfe87c659225",
"originalPath": "work/config/MauiConfig.py", "originalPath": "work/config/MauiConfig.py",
"currentPath": "work/config/MauiConfig.py", "currentPath": "work/config/MauiConfig.py",
"hash": "6e627f3800fd413c6dbde92ad2e274d5e3047af0f906de4d75fc826cc129631e", "hash": "08c57a67f7a74d7b702b572da3cd912bf4603ee97e9495a6be2ce60b73beab20",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "todo", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "3bc16f5e-4032-44a8-9012-4b632849ba50", "commentId": "3bc16f5e-4032-44a8-9012-4b632849ba50",
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1744614418809 "timestamp": 1744614418809
} }
], ],
"lastCheckedTimestamp": 1746694114141, "lastCheckedTimestamp": 1747070436322,
"lastFileModificationTimestamp": 1745313945182.1555 "lastFileModificationTimestamp": 1747043388546.054
} }
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
"fileId": "36e791b4-e235-42f6-ac61-8560f1762892", "fileId": "36e791b4-e235-42f6-ac61-8560f1762892",
"originalPath": "work/workbench/Workbench.mwb", "originalPath": "work/workbench/Workbench.mwb",
"currentPath": "work/workbench/Workbench.mwb", "currentPath": "work/workbench/Workbench.mwb",
"hash": "d53db9e9d211116d4aafc32106a7e0c05a86c062af72f21a37420853a1c4eacc", "hash": "afea3df7165b4dad78d5a9d92ede4fec601f68ed0a14147532cf1ca00617c29e",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "done", "checkedStatus": "done",
"comments": [ "comments": [
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1746693753181 "timestamp": 1746693753181
} }
], ],
"lastCheckedTimestamp": 1746693747974, "lastCheckedTimestamp": 1747070021483,
"lastFileModificationTimestamp": 1746440499172.53 "lastFileModificationTimestamp": 1747068819870.2563
} }
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
"fileId": "38b9eebe-955e-4052-a0f6-29c69b1242b3", "fileId": "38b9eebe-955e-4052-a0f6-29c69b1242b3",
"originalPath": "work/config/MysqlConfig.py", "originalPath": "work/config/MysqlConfig.py",
"currentPath": "work/config/MysqlConfig.py", "currentPath": "work/config/MysqlConfig.py",
"hash": "8eeae892f7c5f5aa1e894ca9ff7b8c66ea2891bc37c0167c404cd6e0cb95f858", "hash": "d8958dba0bf7c100587dabf6ff576e0a1905a0aed4980a6c64ff6254f3671e5a",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "todo", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "56c5adba-20f4-4524-a894-41f81ab7ca55", "commentId": "56c5adba-20f4-4524-a894-41f81ab7ca55",
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1744622354948 "timestamp": 1744622354948
} }
], ],
"lastCheckedTimestamp": 1745314583521, "lastCheckedTimestamp": 1747070439965,
"lastFileModificationTimestamp": 1745313973064.8933 "lastFileModificationTimestamp": 1747070428651.6106
} }
{
"fileId": "48126029-3c3e-4372-9f3e-1e8b9686114e",
"originalPath": "work/commands/importCacheToDatabase.py",
"currentPath": "work/commands/importCacheToDatabase.py",
"hash": "3c19dde87c665d72591ff9391a0dd6ec28218002116df06b22217a09d2a73e27",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747070815750,
"lastFileModificationTimestamp": 1747070802673.0312
}
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
"fileId": "4c784f14-4710-4694-bf73-f5665baab43f", "fileId": "4c784f14-4710-4694-bf73-f5665baab43f",
"originalPath": "work/cron.sh", "originalPath": "work/cron.sh",
"currentPath": "work/cron.sh", "currentPath": "work/cron.sh",
"hash": "4c6e694c417005a79207a32c26609e0e2701f17d6484a536bd8188bf8dcceb93", "hash": "8950b2d4895462785979d16bc1db8830ed40cca30d60a680df360f731b95baa9",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "todo", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "e5f40597-ae51-440f-886a-44f06dbe8e96", "commentId": "e5f40597-ae51-440f-886a-44f06dbe8e96",
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1746693690181 "timestamp": 1746693690181
} }
], ],
"lastCheckedTimestamp": 1746693667833, "lastCheckedTimestamp": 1747070016564,
"lastFileModificationTimestamp": 1746448049902.1914 "lastFileModificationTimestamp": 1747070003854.197
} }
...@@ -2,10 +2,10 @@ ...@@ -2,10 +2,10 @@
"fileId": "58307c8c-416a-4c24-adc9-7ed6324d1f8a", "fileId": "58307c8c-416a-4c24-adc9-7ed6324d1f8a",
"originalPath": "work/manager/WebManager.py", "originalPath": "work/manager/WebManager.py",
"currentPath": "work/manager/WebManager.py", "currentPath": "work/manager/WebManager.py",
"hash": "66f022cdc155ded9c47e49a893ed3070099faa960a72aa01d23929a1c02a8657", "hash": "5a987f9d37c2d083c9a04ea0a0c4739a4fbd6c2e6c98877a0f64fadb45717a1c",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "changed", "checkedStatus": "done",
"comments": [], "comments": [],
"lastCheckedTimestamp": 1746694408088, "lastCheckedTimestamp": 1747070585799,
"lastFileModificationTimestamp": 1746696251620.3523 "lastFileModificationTimestamp": 1747070580506.2974
} }
{
"fileId": "5f874bee-40e2-4b9a-b102-f0b6d643a840",
"originalPath": "work/commands/downloadDataFromMaui.py",
"currentPath": "work/commands/downloadDataFromMaui.py",
"hash": "f75e0013a123055434a4592bb3509d3ff298273727226ef431662f3fad739fad",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747071242419,
"lastFileModificationTimestamp": 1747071185164.952
}
{
"fileId": "62aea232-2549-437e-b5a9-72cb2aa92d16",
"originalPath": "work/commands/calculateTarifDetailsWithGpt.py",
"currentPath": "work/commands/calculateTarifDetailsWithGpt.py",
"hash": "9161246779e6b04e4ae512afe91cfae14ad7e9fc28395d42f54627e3b70a25b0",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747071244862,
"lastFileModificationTimestamp": 1747071237273.2832
}
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
"fileId": "647ff9a8-a56f-486e-ba2a-8ff77e4514d4", "fileId": "647ff9a8-a56f-486e-ba2a-8ff77e4514d4",
"originalPath": "work/Dockerfile", "originalPath": "work/Dockerfile",
"currentPath": "work/Dockerfile", "currentPath": "work/Dockerfile",
"hash": "d885a8a45174b2f425d3c0201b797754c69f6ff798dabd51f0e53af17b047964", "hash": "ca6a37e37aff3fff276f8020d9860b94c6556181bbb65a3fe1c62f3868f7f0b3",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "changed", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "2a07c637-2149-4d5a-870d-94870f78945d", "commentId": "2a07c637-2149-4d5a-870d-94870f78945d",
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1746693591017 "timestamp": 1746693591017
} }
], ],
"lastCheckedTimestamp": 1746693552978, "lastCheckedTimestamp": 1747069787674,
"lastFileModificationTimestamp": 1746694865448.947 "lastFileModificationTimestamp": 1747069781547.9219
} }
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
"fileId": "766dc461-001e-4901-8faf-263820ad96cd", "fileId": "766dc461-001e-4901-8faf-263820ad96cd",
"originalPath": "work/manager/MysqlManager.py", "originalPath": "work/manager/MysqlManager.py",
"currentPath": "work/manager/MysqlManager.py", "currentPath": "work/manager/MysqlManager.py",
"hash": "27129c35df4b6b0e4d5fcb7a77c8e1c19d1b74f80d5c3ec822cdc26701124a68", "hash": "9a9ca8572ad133ef4a191b7082ffb025d979f84dce2109ff5be33108cb807652",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "changed", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "7227a7a0-99bc-47b4-a725-3547eb56015d", "commentId": "7227a7a0-99bc-47b4-a725-3547eb56015d",
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
"timestamp": 1746694262639 "timestamp": 1746694262639
} }
], ],
"lastCheckedTimestamp": 1745314589383, "lastCheckedTimestamp": 1747070649128,
"lastFileModificationTimestamp": 1746696474493.3755, "lastFileModificationTimestamp": 1747070643843.446,
"flaggedForCopy": false "flaggedForCopy": false
} }
{
"fileId": "78db1316-a768-4c1f-b15c-7a408444a030",
"originalPath": "work/routes/HealtCheckRouter.py",
"currentPath": "work/routes/HealtCheckRouter.py",
"hash": "965774cdb8edb7b68ec0341e4d122765853c5fa82b5432df2843234c25874f3d",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747070215651,
"lastFileModificationTimestamp": 1747070210301.3403
}
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
"fileId": "7a3a246b-fc0e-4c80-b748-96b941efab5c", "fileId": "7a3a246b-fc0e-4c80-b748-96b941efab5c",
"originalPath": "work/config/AWSConfig.py", "originalPath": "work/config/AWSConfig.py",
"currentPath": "work/config/AWSConfig.py", "currentPath": "work/config/AWSConfig.py",
"hash": "5a6654cb1cd77f8d531fcc1541d31261ea02c4e8cb126f2cc43a217c9c6920aa", "hash": "29bc59fd6ecbf98aa7efcfa3ef371bb912ac144f077895c279bb80ba150ee734",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "todo", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "3c070677-67c2-458d-8ad9-1ef595c16e0e", "commentId": "3c070677-67c2-458d-8ad9-1ef595c16e0e",
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1746694106055 "timestamp": 1746694106055
} }
], ],
"lastCheckedTimestamp": 1745314580866, "lastCheckedTimestamp": 1747070433793,
"lastFileModificationTimestamp": 1745311719614.9841 "lastFileModificationTimestamp": 1747042244230.3608
} }
{
"fileId": "8c1b7b54-86c0-453c-839c-95390d883819",
"originalPath": "work/commands/uploadCacheToAwsS3.py",
"currentPath": "work/commands/uploadCacheToAwsS3.py",
"hash": "158bb6839fc011bfeb8ab54d335f897c629228f2169844f2f6118f803b80c64f",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747070735035,
"lastFileModificationTimestamp": 1747070723183.9927
}
...@@ -2,16 +2,21 @@ ...@@ -2,16 +2,21 @@
"fileId": "986eeb57-8634-4f40-a4ea-a2eae9d87e71", "fileId": "986eeb57-8634-4f40-a4ea-a2eae9d87e71",
"originalPath": "work/readme.md", "originalPath": "work/readme.md",
"currentPath": "work/readme.md", "currentPath": "work/readme.md",
"hash": "3e2bf4db6ad284fb011128f2ac0d3cf7849268068a39b160418173f0230ba4bd", "hash": "455ce9ea71460c0f1f2b43ad6ea5c9706a4f2003e46d60622086b7cab47925db",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "changed", "checkedStatus": "todo",
"comments": [ "comments": [
{ {
"commentId": "574b8332-b3c0-4afa-9f2b-8a632e910e0d", "commentId": "574b8332-b3c0-4afa-9f2b-8a632e910e0d",
"text": "I need to insert the AWS-ECR-Uplink-Data.", "text": "I need to insert the AWS-ECR-Uplink-Data.",
"timestamp": 1746693537936 "timestamp": 1746693537936
},
{
"commentId": "e5d599b4-3080-4638-b0a8-753fb4dd3c9b",
"text": "Only the video tutorials are missing...",
"timestamp": 1747069658074
} }
], ],
"lastCheckedTimestamp": 1746693903209, "lastCheckedTimestamp": 1747069646363,
"lastFileModificationTimestamp": 1746694946510.8994 "lastFileModificationTimestamp": 1747069621488.916
} }
{
"fileId": "b71a7bf5-594a-4ac1-9113-25158c35bcb4",
"originalPath": "work/models/token_toke.py",
"currentPath": "work/models/token_toke.py",
"hash": "609e9fec7718b6125d047c7a8c7029b7d0cd2b95df889334914ce058091176bd",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747070070388,
"lastFileModificationTimestamp": 1747068898597.5894
}
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
"fileId": "caf03c7b-60d8-4a77-ac21-0eccabeae4a2", "fileId": "caf03c7b-60d8-4a77-ac21-0eccabeae4a2",
"originalPath": "work/boot.sh", "originalPath": "work/boot.sh",
"currentPath": "work/boot.sh", "currentPath": "work/boot.sh",
"hash": "d665dba2f614cbf283cf1900c259bea8472f31353be894740e06535e6c3936c3", "hash": "9d08025500b916fe294de7aa8b533c29e02743c714022415ae9274e066c4fa6a",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "todo", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "6ba2875c-14b5-4444-a34e-52295efd65bc", "commentId": "6ba2875c-14b5-4444-a34e-52295efd65bc",
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1746693713037 "timestamp": 1746693713037
} }
], ],
"lastCheckedTimestamp": 1746693711224, "lastCheckedTimestamp": 1747070094947,
"lastFileModificationTimestamp": 1746447735575.9163 "lastFileModificationTimestamp": 1747070086824.1516
} }
{
"fileId": "e4ffba94-a5e6-40d2-a63e-5bfa60e3d719",
"originalPath": "work/routes/BaseRouter.py",
"currentPath": "work/routes/BaseRouter.py",
"hash": "e8f5cd3137261214985789cc7e328848d9ff379525c706b9acf6ee7a6ea3adec",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747070513561,
"lastFileModificationTimestamp": 1747070506645.0361
}
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
"currentPath": "work/config/OpenAiConfig.py", "currentPath": "work/config/OpenAiConfig.py",
"hash": "50c0f7d96f9ea76aa069a0a24137e898dbd4fc3c4af867565c90468981bf6ff5", "hash": "50c0f7d96f9ea76aa069a0a24137e898dbd4fc3c4af867565c90468981bf6ff5",
"docContent": "<p><br></p>", "docContent": "<p><br></p>",
"checkedStatus": "todo", "checkedStatus": "done",
"comments": [ "comments": [
{ {
"commentId": "1b2c6a64-0a75-4763-9613-12634d96bed2", "commentId": "1b2c6a64-0a75-4763-9613-12634d96bed2",
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
"timestamp": 1746694100919 "timestamp": 1746694100919
} }
], ],
"lastCheckedTimestamp": 1746694087733, "lastCheckedTimestamp": 1747070442728,
"lastFileModificationTimestamp": 1746437070245.503 "lastFileModificationTimestamp": 1746437070245.503
} }
# Wir verwenden Ubuntu als Betriebssystem.
# The Docker container uses Ubuntu 24.04 as the base operating system.
FROM ubuntu:24.04 FROM ubuntu:24.04
# Wir deaktivieren das interaktive Frontend. # The non-interactive frontend is configured to suppress installation prompts.
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
# Zuerst aktualisieren wir die Paketquellen und führen ein Upgrade durch. # The package index files are updated to obtain the latest package metadata.
RUN apt-get -y update RUN apt-get -y update
# All installed packages are upgraded to their most recent versions.
RUN apt-get -y upgrade RUN apt-get -y upgrade
# Anschließend installieren wir systemweite Hilfspakete. # The “software-properties-common” package is installed to enable PPA management.
RUN apt-get install -y software-properties-common RUN apt-get install -y software-properties-common
# Wir installieren Python3 und pip. # Python 3 is installed to provide the runtime environment for Python applications.
RUN apt-get install -y python3 RUN apt-get install -y python3
# pip is installed to manage Python packages within the container.
RUN apt-get install -y python3-pip RUN apt-get install -y python3-pip
# Wir fügen das PPA hinzu, um die deb-basierte Version von Chromium zu erhalten. # The xtradeb PPA is added so a deb-based build of Chromium can be installed.
RUN add-apt-repository ppa:xtradeb/apps -y RUN add-apt-repository ppa:xtradeb/apps -y
# Wir aktualisieren erneut die Paketquellen (dadurch werden auch die PPA-Pakete verfügbar). # The package index files are refreshed to include packages from the newly added PPA.
RUN apt-get -y update RUN apt-get -y update
# Wir installieren den Browser. # The Chromium browser is installed to allow headless or automated browsing.
RUN apt-get install -y chromium-browser RUN apt-get install -y chromium-browser
# The Chromium WebDriver is installed to enable Selenium interactions with Chromium.
RUN apt-get install -y chromium-driver RUN apt-get install -y chromium-driver
# The Firefox Gecko WebDriver is installed to enable Selenium interactions with Firefox.
RUN apt-get install -y firefox-geckodriver RUN apt-get install -y firefox-geckodriver
# Wir entfernen snapd, damit keine Snap-Version von Chromium verwendet wird. # The “snapd” package is removed to prevent the Snap version of Chromium from being used.
RUN apt-get remove -y snapd RUN apt-get remove -y snapd
# Wir installieren Cron. # The cron utility is installed so scheduled tasks can run inside the container.
RUN apt-get install -y cron RUN apt-get install -y cron
# Wir installieren Vim. # The Vim text editor is installed for in-container file editing.
RUN apt-get install -y vim RUN apt-get install -y vim
# Wir installhieren htop. # The htop process viewer is installed to facilitate real-time resource monitoring.
RUN apt-get install -y htop RUN apt-get install -y htop
# Wir installhieren ffmpeg. # The FFmpeg multimedia framework is installed for audio and video processing tasks.
RUN apt-get install -y ffmpeg RUN apt-get install -y ffmpeg
# Wir installhieren curl. # The curl command-line tool is installed to enable data transfer over various protocols.
RUN apt-get install -y curl RUN apt-get install -y curl
# Wir installieren die Python-Abhängigkeiten via pip. # The Selenium package is installed to drive browser automation from Python.
RUN pip3 install --break-system-packages selenium RUN pip3 install --break-system-packages selenium
# The requests package is installed to simplify HTTP requests in Python.
RUN pip3 install --break-system-packages requests RUN pip3 install --break-system-packages requests
# SQLAlchemy is installed to provide an ORM for database access.
RUN pip3 install --break-system-packages sqlalchemy RUN pip3 install --break-system-packages sqlalchemy
# PyMySQL is installed as a MySQL client library for Python.
RUN pip3 install --break-system-packages pymysql RUN pip3 install --break-system-packages pymysql
# pandas is installed to enable data analysis and manipulation.
RUN pip3 install --break-system-packages pandas RUN pip3 install --break-system-packages pandas
# BeautifulSoup 4 is installed to facilitate HTML and XML parsing.
RUN pip3 install --break-system-packages bs4 RUN pip3 install --break-system-packages bs4
# feedparser is installed to parse RSS and Atom feeds.
RUN pip3 install --break-system-packages feedparser RUN pip3 install --break-system-packages feedparser
# demjson3 is installed to work with JSON that may not strictly follow the standard.
RUN pip3 install --break-system-packages demjson3 RUN pip3 install --break-system-packages demjson3
# Flask is installed (ignoring any previously installed version) to run the web server.
RUN pip3 install --break-system-packages --ignore-installed flask RUN pip3 install --break-system-packages --ignore-installed flask
# feedgen is installed to generate RSS and Atom feeds programmatically.
RUN pip3 install --break-system-packages feedgen RUN pip3 install --break-system-packages feedgen
# boto3 is installed to access AWS services from Python.
RUN pip3 install --break-system-packages boto3 RUN pip3 install --break-system-packages boto3
# pydub is installed to handle audio file manipulation.
RUN pip3 install --break-system-packages pydub RUN pip3 install --break-system-packages pydub
# json5 is installed to parse and generate JSON5 data.
RUN pip3 install --break-system-packages json5 RUN pip3 install --break-system-packages json5
# pyotp is installed to generate and verify one-time passwords.
RUN pip3 install --break-system-packages pyotp RUN pip3 install --break-system-packages pyotp
# sshtunnel is installed to create SSH tunnels from Python.
RUN pip3 install --break-system-packages sshtunnel RUN pip3 install --break-system-packages sshtunnel
# pypdf is installed to manipulate PDF files from Python.
RUN pip3 install --break-system-packages pypdf RUN pip3 install --break-system-packages pypdf
# Wir kopieren die Cron-Datei in den Container. # The cron configuration file is copied into the appropriate directory inside the container.
COPY config/_CronConfig.txt /etc/cron.d/scrapeNewsCron COPY config/_CronConfig.txt /etc/cron.d/scrapeNewsCron
# The permissions of the cron configuration file are set to be readable by cron.
RUN chmod 0644 /etc/cron.d/scrapeNewsCron RUN chmod 0644 /etc/cron.d/scrapeNewsCron
# The cron configuration is registered so the scheduled tasks become active.
RUN crontab /etc/cron.d/scrapeNewsCron RUN crontab /etc/cron.d/scrapeNewsCron
# The shell script executed by cron is copied into the container.
COPY cron.sh /maui/cron.sh COPY cron.sh /maui/cron.sh
# The cron shell script is marked as executable.
RUN chmod +x /maui/cron.sh RUN chmod +x /maui/cron.sh
# Wir kopieren alle Systemdatein in den Container. # The configuration directory is copied into the container to provide system settings.
COPY config /maui/config COPY config /maui/config
# The manager directory is copied into the container to provide management utilities.
COPY manager /maui/manager COPY manager /maui/manager
# The commands directory is copied into the container to provide command-line tools.
COPY commands /maui/commands COPY commands /maui/commands
# The models directory is copied into the container to provide ORM models.
COPY models /maui/models COPY models /maui/models
# The routes directory is copied into the container to provide web route definitions.
COPY routes /maui/routes COPY routes /maui/routes
# The boot script is copied into the container to serve as the container’s entry point.
COPY boot.sh /maui/boot.sh COPY boot.sh /maui/boot.sh
# The boot script is marked as executable so it can be run as the container’s default command.
RUN chmod +x /maui/boot.sh RUN chmod +x /maui/boot.sh
# Wir definieren das Startscript des Containers. # The boot script is set as the default command that runs when the container starts.
CMD ["/maui/boot.sh"] CMD ["/maui/boot.sh"]
#!/bin/bash #!/bin/bash
set -e set -e
# Dieser Befehl startet den Cron-Service. # This command starts the cron service.
service cron start service cron start
# Dieser Befehl wechselt in das Arbeitsverzeichnis /obsidian/manager. # This command changes into the application manager directory.
cd /maui/manager cd /maui/manager
# Dieser Befehl setzt die Umgebungsvariable PYTHONPATH auf /obsidian. # This command exports the project root so Python modules can be resolved.
export PYTHONPATH=/maui export PYTHONPATH=/maui
# Dieser Befehl startet den ApiManager Webserver. # This command launches the API manager web server.
python3 WebManager.py python3 WebManager.py
#!/usr/bin/env python3 import sys; sys.path.append("..")
# -*- coding: utf-8 -*-
#
# Hier wird der Suchpfad um das übergeordnete Verzeichnis erweitert, damit lokale Module gefunden werden.
import sys
sys.path.append("..")
#
# Hier werden Funktionen des Betriebssystems eingebunden.
import os import os
#
# Hier werden reguläre Ausdrücke zur Textbearbeitung eingebunden.
import re import re
#
# Hier wird das ast-Modul eingebunden, um Python-ähnliche Literale zu parsen.
import ast import ast
#
# Hier wird das json-Modul eingebunden, um JSON-Daten zu verarbeiten.
import json import json
#
# Hier wird das datetime-Modul unter dem Alias _dt eingebunden, um Zeitstempel zu erzeugen.
import datetime as _dt import datetime as _dt
#
# Hier wird das traceback-Modul eingebunden, um Fehlermeldungen formatiert auszugeben.
import traceback import traceback
#
# Hier werden Typ-Alias-Definitionen aus dem typing-Modul eingebunden, um den Code klarer zu gestalten.
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
#
# Hier wird die pypdf-Bibliothek eingebunden, um Textinhalte aus PDF-Dateien zu extrahieren.
from pypdf import PdfReader from pypdf import PdfReader
#
# Hier wird eine spezifische Exception aus pypdf eingebunden, um Leseprobleme differenziert zu behandeln.
from pypdf.errors import PdfReadError from pypdf.errors import PdfReadError
#
# Hier wird der OpenAI-Manager eingebunden, um Chat-Nachrichten an GPT-Modelle zu senden.
from manager.OpenAiManager import OpenAiManager from manager.OpenAiManager import OpenAiManager
#
# Hier wird der MySQL-Manager eingebunden, um Datenbank-Sessions zu erzeugen.
from manager.MysqlManager import MysqlManager from manager.MysqlManager import MysqlManager
#
# Hier werden die SQLAlchemy-Basisklassen eingebunden, damit alle Models korrekt referenziert werden.
from models._system import Base from models._system import Base
#
# Hier wird das Model BaseBase eingebunden, das die Haupttabelle für Tarife repräsentiert.
from models.base_base import BaseBase from models.base_base import BaseBase
#
# Hier wird das Model DealDeal eingebunden, das zugehörige Deal-Einträge verwaltet.
from models.deal_deal import DealDeal from models.deal_deal import DealDeal
#
# Hier wird das Model OptionOpti eingebunden, das optionale Tarif-Bausteine abbildet.
from models.option_opti import OptionOpti from models.option_opti import OptionOpti
# This variable holds the complete multi-line prompt that includes all extraction rules
#
# Hier wird der vollständige Prompt als mehrzeiliger String definiert, der alle Extraktionsregeln beinhaltet.
promptTemplate: str = ( promptTemplate: str = (
""" """
Du bist eine hochpräzise API zur Extraktion spezifischer Mobilfunktarif-Merkmale aus Dokumentenpaaren. Deine Eingabe besteht immer aus dem extrahierten Text von zwei PDF-Dateien: einem **Produktdetailblatt/Flyer** und einem **Produktinformationsblatt (PIB)**, die gemeinsam *einen* spezifischen Tarif beschreiben. Du bist eine hochpräzise API zur Extraktion spezifischer Mobilfunktarif-Merkmale aus Dokumentenpaaren. Deine Eingabe besteht immer aus dem extrahierten Text von zwei PDF-Dateien: einem **Produktdetailblatt/Flyer** und einem **Produktinformationsblatt (PIB)**, die gemeinsam *einen* spezifischen Tarif beschreiben.
...@@ -114,8 +61,7 @@ Numerische Werte als Number belassen, Netto stets auf 4 Nachkommastellen runden. ...@@ -114,8 +61,7 @@ Numerische Werte als Number belassen, Netto stets auf 4 Nachkommastellen runden.
""" """
) )
# # This variable holds the list of keys that must be present in the GPT response
# Hier wird die Liste der Schlüssel definiert, die im GPT-Ergebnis zwingend vorhanden sein müssen.
expectedKeys: List[str] = [ expectedKeys: List[str] = [
"tariff_name", "tariff_name",
"marketing_start_date", "marketing_start_date",
...@@ -148,61 +94,84 @@ expectedKeys: List[str] = [ ...@@ -148,61 +94,84 @@ expectedKeys: List[str] = [
] ]
# # This function extracts the complete text from a PDF file and returns it as a string
# Diese Funktion extrahiert den kompletten Text einer PDF-Datei und gibt ihn als String zurück.
def extractTextFromPdf(pdfPath: str) -> str | None: def extractTextFromPdf(pdfPath: str) -> str | None:
# This condition checks if the file does not exist, returning None if missing
if not os.path.exists(pdfPath): if not os.path.exists(pdfPath):
print(f"INFO: Datei nicht gefunden: {os.path.basename(pdfPath)}") print(f"INFO: Datei nicht gefunden: {os.path.basename(pdfPath)}")
return None return None
# This variable holds all page texts extracted from the PDF
pageTexts: List[str] = [] pageTexts: List[str] = []
# This block attempts to open and read the PDF file
try: try:
with open(pdfPath, "rb") as fileHandle: with open(pdfPath, "rb") as fileHandle:
# This line initializes the PDF reader to parse the file
reader = PdfReader(fileHandle) reader = PdfReader(fileHandle)
# This loop iterates through the pages of the PDF to extract text
for page in reader.pages: for page in reader.pages:
txt = page.extract_text() txt = page.extract_text()
# This condition checks if text was actually extracted from the page
if txt: if txt:
pageTexts.append(txt) pageTexts.append(txt)
# This condition checks if no text was extracted from the PDF
if not pageTexts: if not pageTexts:
print(f"INFO: Kein Text in {os.path.basename(pdfPath)}") print(f"INFO: Kein Text in {os.path.basename(pdfPath)}")
return None return None
# This line returns the joined text from all pages
return "\n".join(pageTexts).strip() return "\n".join(pageTexts).strip()
# This block handles a specific PDF reading error from pypdf
except PdfReadError as exc: except PdfReadError as exc:
print(f"WARNUNG: pypdf-Lesefehler bei '{os.path.basename(pdfPath)}': {exc}") print(f"WARNUNG: pypdf-Lesefehler bei '{os.path.basename(pdfPath)}': {exc}")
return None return None
# This block handles any other unexpected errors
except Exception: except Exception:
print(f"FEHLER: Unerwarteter Fehler bei '{os.path.basename(pdfPath)}':") print(f"FEHLER: Unerwarteter Fehler bei '{os.path.basename(pdfPath)}':")
traceback.print_exc(limit=1) traceback.print_exc(limit=1)
return None return None
# # This function removes incoming code fences to provide pure JSON
# Diese Funktion entfernt eingehende Code-Fences, um reines JSON zu erhalten.
def stripCodeFence(raw: str) -> str: def stripCodeFence(raw: str) -> str:
# This condition checks if the string starts with triple backticks to remove them
if raw.strip().startswith("```"): if raw.strip().startswith("```"):
return re.sub(r"```[\w]*", "", raw).strip() return re.sub(r"```[\w]*", "", raw).strip()
# This line returns the raw string if no code fences were found
return raw return raw
# # This function removes unnecessary commas before closing brackets in JSON strings
# Diese Funktion entfernt überflüssige Kommas vor schließenden Klammern aus JSON-Strings.
def removeTrailingCommas(js: str) -> str: def removeTrailingCommas(js: str) -> str:
# This line substitutes commas that appear right before closing braces or brackets
return re.sub(r",(\s*[}\]])", r"\1", js) return re.sub(r",(\s*[}\]])", r"\1", js)
# # This function tries to parse a string as JSON using multiple repair approaches
# Diese Funktion versucht, einen String in ein Dictionary umzuwandeln und nutzt mehrere Reparatur-Ansätze.
def loadJsonSafe(raw: str) -> Dict[str, Any] | None: def loadJsonSafe(raw: str) -> Dict[str, Any] | None:
# This variable holds the cleaned string without carriage returns
cleaned = stripCodeFence(raw).replace("\r", "") cleaned = stripCodeFence(raw).replace("\r", "")
# This loop attempts different variants of the cleaned string for JSON decoding
for variant in (cleaned, removeTrailingCommas(cleaned)): for variant in (cleaned, removeTrailingCommas(cleaned)):
try: try:
return json.loads(variant) return json.loads(variant)
except json.JSONDecodeError: except json.JSONDecodeError:
pass pass
# This block tries a relaxed approach using Python literal evaluation after replacements
try: try:
relaxed = cleaned.replace("null", "None").replace("true", "True").replace("false", "False") relaxed = cleaned.replace("null", "None").replace("true", "True").replace("false", "False")
return ast.literal_eval(relaxed) return ast.literal_eval(relaxed)
...@@ -210,95 +179,151 @@ def loadJsonSafe(raw: str) -> Dict[str, Any] | None: ...@@ -210,95 +179,151 @@ def loadJsonSafe(raw: str) -> Dict[str, Any] | None:
return None return None
# # This function validates the GPT response to ensure all required keys exist
# Diese Funktion validiert die GPT-Antwort und prüft, ob alle Pflichtschlüssel vorhanden sind.
def validateResponse(raw: str) -> Tuple[bool, Dict[str, Any] | None]: def validateResponse(raw: str) -> Tuple[bool, Dict[str, Any] | None]:
# This line loads the raw string into a Python dictionary
data = loadJsonSafe(raw) data = loadJsonSafe(raw)
# This condition checks if the result is invalid or not a dictionary
if data is None or not isinstance(data, dict): if data is None or not isinstance(data, dict):
print("VALIDATION: Antwort ist kein gültiges JSON-Objekt.") print("VALIDATION: Antwort ist kein gültiges JSON-Objekt.")
return False, None return False, None
# This line finds any missing keys from the expected list
missing = [k for k in expectedKeys if k not in data] missing = [k for k in expectedKeys if k not in data]
# This condition checks if any keys are missing from the response
if missing: if missing:
print(f"VALIDATION: Fehlende Schlüssel: {', '.join(missing)}") print(f"VALIDATION: Fehlende Schlüssel: {', '.join(missing)}")
return False, None return False, None
# This line returns True if all checks passed
return True, data return True, data
# # This block runs if the script is executed directly
# Dieser Block wird ausgeführt, wenn das Skript direkt gestartet wird.
if __name__ == "__main__": if __name__ == "__main__":
# This variable holds the directory where PDF files are stored
cacheDir = "../cache" cacheDir = "../cache"
# This line outputs information about the PDF file search location
print(f"INFO: Suche nach PDF-Dateien in '{cacheDir}' …") print(f"INFO: Suche nach PDF-Dateien in '{cacheDir}' …")
# This condition checks if the cache directory exists
if not os.path.isdir(cacheDir): if not os.path.isdir(cacheDir):
print("FEHLER: Cache-Verzeichnis nicht gefunden.") print("FEHLER: Cache-Verzeichnis nicht gefunden.")
sys.exit(1) sys.exit(1)
# This list comprehension gathers all files with .pdf extension
pdfFiles = [f for f in os.listdir(cacheDir) if f.lower().endswith(".pdf")] pdfFiles = [f for f in os.listdir(cacheDir) if f.lower().endswith(".pdf")]
# This variable holds a set of tariff IDs derived from the PDF filenames
tariffIds: set[str] = set() tariffIds: set[str] = set()
# This loop iterates over all found PDF files to identify possible tariff pairs
for f in pdfFiles: for f in pdfFiles:
stem = f[:-4].lower() stem = f[:-4].lower()
# This condition checks if the filename ends with _flyer
if stem.endswith("_flyer"): if stem.endswith("_flyer"):
tariffIds.add(stem[:-6]) tariffIds.add(stem[:-6])
# This condition checks if the filename ends with _pib
elif stem.endswith("_pib"): elif stem.endswith("_pib"):
tariffIds.add(stem[:-4]) tariffIds.add(stem[:-4])
# This condition checks if no matching PDF pairs were found
if not tariffIds: if not tariffIds:
print("INFO: Keine passenden PDF-Paare gefunden.") print("INFO: Keine passenden PDF-Paare gefunden.")
sys.exit(0) sys.exit(0)
# This line creates an instance of the OpenAiManager
gptManager = OpenAiManager() gptManager = OpenAiManager()
# This line creates a database session using MysqlManager
dbSession = MysqlManager().getSession() dbSession = MysqlManager().getSession()
# This loop processes each identified tariff ID
for tariffId in sorted(tariffIds): for tariffId in sorted(tariffIds):
print(f"\n--- Verarbeitung ID: {tariffId} ---") print(f"\n--- Verarbeitung ID: {tariffId} ---")
# This query retrieves all BaseBase records matching the tariff ID
baseRecords = dbSession.query(BaseBase).filter_by(providercode_base=tariffId).all() baseRecords = dbSession.query(BaseBase).filter_by(providercode_base=tariffId).all()
# This condition checks if no BaseBase records were found
if not baseRecords: if not baseRecords:
print("WARNUNG: Kein BaseBase-Datensatz gefunden – übersprungen.") print("WARNUNG: Kein BaseBase-Datensatz gefunden – übersprungen.")
continue continue
# This condition checks if details_base is already filled for all rows
if all(br.details_base for br in baseRecords): if all(br.details_base for br in baseRecords):
print("INFO: details_base bereits für alle Zeilen gefüllt – übersprungen.") print("INFO: details_base bereits für alle Zeilen gefüllt – übersprungen.")
continue continue
# This line constructs the paths for the flyer and PIB PDF files
flyerPath = os.path.join(cacheDir, f"{tariffId}_flyer.pdf") flyerPath = os.path.join(cacheDir, f"{tariffId}_flyer.pdf")
pibPath = os.path.join(cacheDir, f"{tariffId}_pib.pdf") pibPath = os.path.join(cacheDir, f"{tariffId}_pib.pdf")
# This line extracts text from the flyer PDF
flyerText = extractTextFromPdf(flyerPath) flyerText = extractTextFromPdf(flyerPath)
# This line extracts text from the PIB PDF
pibText = extractTextFromPdf(pibPath) pibText = extractTextFromPdf(pibPath)
# This condition checks if any text is missing
if not flyerText or not pibText: if not flyerText or not pibText:
print("INFO: Fehlende Texte – übersprungen.") print("INFO: Fehlende Texte – übersprungen.")
continue continue
# This variable holds the full prompt to be sent to GPT by combining the templates and extracted text
fullPrompt = promptTemplate + "# Flyer-Text:\n" + flyerText + "\n" + "# PIB-Text:\n" + pibText fullPrompt = promptTemplate + "# Flyer-Text:\n" + flyerText + "\n" + "# PIB-Text:\n" + pibText
# This variable will store the validated data after GPT response
validatedData: Dict[str, Any] | None = None validatedData: Dict[str, Any] | None = None
# This loop tries up to three attempts to get a valid response from GPT
for attempt in range(1, 4): for attempt in range(1, 4):
print(f"INFO: GPT-Abfrage Versuch {attempt}/3 …") print(f"INFO: GPT-Abfrage Versuch {attempt}/3 …")
# This block attempts to send the prompt to GPT
try: try:
raw = gptManager.chat(fullPrompt, model="gpt-4.1") raw = gptManager.chat(fullPrompt, model="gpt-4.1")
# This block catches any exceptions during the GPT request
except Exception as exc: except Exception as exc:
print(f"FEHLER: GPT-Abfrage fehlgeschlagen: {exc}") print(f"FEHLER: GPT-Abfrage fehlgeschlagen: {exc}")
raw = "" raw = ""
# This line validates the GPT response
ok, parsed = validateResponse(raw) ok, parsed = validateResponse(raw)
# This condition checks if the response was valid
if ok: if ok:
validatedData = parsed validatedData = parsed
break break
# This line indicates the response was invalid, prompting the next attempt
print("WARNUNG: Antwort ungültig – nächster Versuch …") print("WARNUNG: Antwort ungültig – nächster Versuch …")
# This condition checks if no valid response was obtained after three attempts
if not validatedData: if not validatedData:
print("FEHLER: Drei ungültige Antworten – übersprungen.") print("FEHLER: Drei ungültige Antworten – übersprungen.")
continue continue
# This loop updates the details_base for each relevant BaseBase record
for br in baseRecords: for br in baseRecords:
# This condition checks if details_base is None to set the validated data
if br.details_base is None: if br.details_base is None:
br.details_base = validatedData br.details_base = validatedData
br.updated_base = _dt.datetime.now() br.updated_base = _dt.datetime.now()
# This line commits the changes to the database
dbSession.commit() dbSession.commit()
print(f"INFO: JSON in {len([b for b in baseRecords if b.details_base])} Zeile(n) gespeichert.") print(f"INFO: JSON in {len([b for b in baseRecords if b.details_base])} Zeile(n) gespeichert.")
# This line closes the database session after processing
dbSession.close() dbSession.close()
print("INFO: Verarbeitung abgeschlossen.") print("INFO: Verarbeitung abgeschlossen.")
import sys import sys; sys.path.append("..")
# In diesem import wird der Pfad um eine Ebene nach oben erweitert.
sys.path.append("..")
# In diesem import werden Funktionen des Betriebssystems eingebunden.
import os import os
# In diesem import wird Funktionalität zum Lesen und Schreiben von CSV-Dateien eingebunden.
import csv import csv
# In diesem import wird Funktionalität zum Arbeiten mit Datum und Uhrzeit eingebunden.
import datetime import datetime
# In diesem import wird Funktionalität zum Hinzufügen von Pausen im Code eingebunden.
import time import time
# In diesem import wird die Bibliothek für zeitbasierte Einmalpasswörter eingebunden.
import pyotp import pyotp
# In diesem import wird Funktionalität für reguläre Ausdrücke eingebunden.
import re import re
# In diesem import wird Funktionalität für Base64-Codierung eingebunden.
import base64 import base64
# In diesem import wird eine Klasse für genaue Dezimalberechnungen eingebunden.
from decimal import Decimal from decimal import Decimal
# In diesem import wird die Funktionalität zur Rückverfolgung von Fehlern eingebunden.
import traceback import traceback
# In diesem import wird Funktionalität zum Kopieren und Löschen von Dateien eingebunden.
import shutil import shutil
# In diesem import wird die Bibliothek zum Umgang mit Pandas-Datenstrukturen eingebunden.
import pandas as pd import pandas as pd
# In diesem import wird Funktionalität für HTTP-Anfragen eingebunden.
import requests import requests
# In diesem import wird die Bibliothek BeautifulSoup zum Parsen von HTML eingebunden.
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# In diesem import werden verschiedene Selektoren aus Selenium eingebunden.
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
# In diesem import werden Aktionen zum Simulieren von Mausbewegungen eingebunden.
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
# In diesem import wird eine explizite Wartefunktion für Selenium geladen.
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
# In diesem import wird eine Klasse zum Erstellen von Dropdown-Auswahlen eingebunden.
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
# In diesem import werden verschiedene Bedingungen für Selenium-Wartefunktionen eingebunden.
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
# In diesem import wird eine spezielle Ausnahme für Zeitüberschreitungen in Selenium geladen.
from selenium.webdriver.support.wait import TimeoutException from selenium.webdriver.support.wait import TimeoutException
# In diesem import wird eine Ausnahme für nicht vorhandene Elemente in Selenium geladen.
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
# In dieser import-Anweisung wird der SeleniumManager geladen.
from manager.SeleniumManager import SeleniumManager from manager.SeleniumManager import SeleniumManager
# In dieser import-Anweisung werden Zugangsdaten aus der MauiConfig geladen.
from config.MauiConfig import MAUI_USERNAME, MAUI_PASSWORD, MAUI_AUTHCODE from config.MauiConfig import MAUI_USERNAME, MAUI_PASSWORD, MAUI_AUTHCODE
# In dieser import-Anweisung wird ein MySQL-Manager zum Umgang mit Datenbanken geladen.
from manager.MysqlManager import MysqlManager from manager.MysqlManager import MysqlManager
# In dieser import-Anweisung werden Modelle aus der System-Klasse geladen.
from models._system import Base from models._system import Base
# In dieser import-Anweisung werden Basisklassen für Datenbankmodelle geladen.
from models.base_base import BaseBase from models.base_base import BaseBase
# In dieser import-Anweisung wird das Modell DealDeal eingebunden.
from models.deal_deal import DealDeal from models.deal_deal import DealDeal
# In dieser import-Anweisung wird das Modell OptionOpti eingebunden.
from models.option_opti import OptionOpti from models.option_opti import OptionOpti
# This variable stores a unique set of category IDs to avoid duplicates.
# In dieser Variablen wird eine Menge gespeichert, um doppelte Kategorien zu vermeiden.
uniqueCategorySet = set() uniqueCategorySet = set()
# This function downloads a PDF file as Base64 from the Selenium context and saves it.
# In dieser Funktion wird eine PDF-Datei aus dem Selenium-Kontext als Base64 heruntergeladen und abgespeichert.
def downloadPdfSelenium(seleniumDriver, pdfUrl, downloadFolder, fileName): def downloadPdfSelenium(seleniumDriver, pdfUrl, downloadFolder, fileName):
# In dieser Variablen wird ein Skript abgelegt, das als asynchroner Aufruf eine PDF-Datei anfordert und als Base64-String zurückliefert. # This variable holds the asynchronous JavaScript code that requests the PDF as Base64.
downloadScript = """ downloadScript = """
var callback = arguments[arguments.length - 1]; var callback = arguments[arguments.length - 1];
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
...@@ -113,204 +52,194 @@ def downloadPdfSelenium(seleniumDriver, pdfUrl, downloadFolder, fileName): ...@@ -113,204 +52,194 @@ def downloadPdfSelenium(seleniumDriver, pdfUrl, downloadFolder, fileName):
xhr.send(); xhr.send();
""" """
# In dieser Variablen wird das Ergebnis des ausgeführten Skripts als Base64-String gespeichert. # This variable stores the Base64 string that the script returns.
pdfBase64String = seleniumDriver.execute_async_script(downloadScript, pdfUrl) pdfBase64String = seleniumDriver.execute_async_script(downloadScript, pdfUrl)
# In dieser if-Abzweigung wird geprüft, ob die Base64-Rückgabe korrekt ist. # This if-structure checks if we have a valid Base64 result.
if not pdfBase64String: if not pdfBase64String:
# In diesem Zweig wird eine Ausnahme ausgelöst, wenn kein gültiger Base64-Inhalt vorliegt. # This line raises an exception if the PDF download failed.
raise Exception("Der PDF-Download per Selenium ist fehlgeschlagen.") raise Exception("Der PDF-Download per Selenium ist fehlgeschlagen.")
# In dieser Variablen wird der vollständige Pfad für die zu speichernde PDF-Datei ermittelt. # This variable defines the full path where the PDF will be saved.
destinationPath = os.path.join(downloadFolder, fileName) destinationPath = os.path.join(downloadFolder, fileName)
# In diesem with-Block wird die Zieldatei erstellt und mit dem dekodierten Inhalt befüllt. # This with-structure opens the file in write-binary mode and writes the decoded PDF data.
with open(destinationPath, "wb") as pdfFile: with open(destinationPath, "wb") as pdfFile:
# An dieser Stelle wird der Base64-String dekodiert und in das PDF geschrieben. # This line decodes the Base64 data and writes it into the file.
pdfFile.write(base64.b64decode(pdfBase64String)) pdfFile.write(base64.b64decode(pdfBase64String))
# This function performs the login process using Selenium.
# In dieser Funktion wird der Login-Prozess über Selenium realisiert.
def login(seleniumManager, userName, userPassword, rawToken): def login(seleniumManager, userName, userPassword, rawToken):
# In dieser Variablen wird der Selenium-Driver nach dem Request zur Login-Seite gespeichert. # This variable stores the Selenium driver after requesting the login page.
seleniumDriver = seleniumManager.simpleRequest("https://maui.md.de") seleniumDriver = seleniumManager.simpleRequest("https://maui.md.de")
# In dieser Variablen wird ein WebDriverWait-Objekt angelegt, um Elemente abwarten zu können. # This variable is a WebDriverWait object for waiting on elements to appear.
wait = WebDriverWait(seleniumDriver, 10) wait = WebDriverWait(seleniumDriver, 10)
# In dieser Variablen wird das Eingabefeld für den Benutzernamen gespeichert, nachdem es präsent ist. # This variable holds the username field after it becomes present in the DOM.
usernameField = wait.until(EC.presence_of_element_located((By.ID, "mat-input-0"))) usernameField = wait.until(EC.presence_of_element_located((By.ID, "mat-input-0")))
# In dieser Zeile wird der Benutzername in das Feld eingetragen. # This line sends the username to the username field.
usernameField.send_keys(userName) usernameField.send_keys(userName)
# In dieser Variablen wird das Eingabefeld für das Passwort gespeichert, nachdem es präsent ist. # This variable holds the password field after it becomes present in the DOM.
userPasswordField = wait.until(EC.presence_of_element_located((By.ID, "mat-input-1"))) userPasswordField = wait.until(EC.presence_of_element_located((By.ID, "mat-input-1")))
# In dieser Zeile wird das Passwort in das Feld eingetragen. # This line sends the user password to the password field.
userPasswordField.send_keys(userPassword) userPasswordField.send_keys(userPassword)
# In diesem Aufruf wird eine kurze Wartezeit eingefügt, um Stabilität zu gewährleisten. # This function call adds a short delay to ensure stability.
time.sleep(1) time.sleep(1)
# In dieser Variablen wird der primäre Login-Button gespeichert, um ihn anschließend zu klicken. # This variable holds the primary login button that will be clicked.
loginButtonElement = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[.//span[contains(text(),'Anmelden')]]"))) loginButtonElement = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[.//span[contains(text(),'Anmelden')]]")))
# In dieser Zeile wird der Login-Button betätigt. # This line clicks the login button.
loginButtonElement.click() loginButtonElement.click()
# In dieser Variablen wird ein TOTP-Objekt erstellt, das mittels Raw Token einen Code generiert. # This variable creates a TOTP object that uses the provided token to generate the 2FA code.
totpGenerator = pyotp.TOTP(rawToken) totpGenerator = pyotp.TOTP(rawToken)
# In dieser Variablen wird der aktuelle 2FA-Code aus dem TOTP-Objekt geholt. # This variable holds the current 2FA code from the TOTP generator.
twoFactorCode = totpGenerator.now() twoFactorCode = totpGenerator.now()
# In dieser Variablen wird das Eingabefeld für den 2FA-Code gespeichert, nachdem es präsent ist. # This variable holds the field for entering the 2FA code after it becomes present.
twoFactorField = wait.until(EC.presence_of_element_located((By.ID, "mat-input-2"))) twoFactorField = wait.until(EC.presence_of_element_located((By.ID, "mat-input-2")))
# In dieser Zeile wird der generierte 2FA-Code in das Feld geschrieben. # This line inputs the 2FA code into the field.
twoFactorField.send_keys(twoFactorCode) twoFactorField.send_keys(twoFactorCode)
# In dieser Variablen wird der Anmelde-Button im Dialog gespeichert. # This variable holds the 2FA modal login button after it becomes clickable.
modalLoginButtonElement = wait.until(EC.element_to_be_clickable((By.XPATH, "//mat-dialog-actions//button[span[contains(text(),'Anmelden')]]"))) modalLoginButtonElement = wait.until(EC.element_to_be_clickable((By.XPATH, "//mat-dialog-actions//button[span[contains(text(),'Anmelden')]]")))
# In dieser Zeile wird der Button per Skript geklickt, da er manchmal verdeckt sein kann. # This line clicks the 2FA button through JavaScript in case it is obscured.
seleniumDriver.execute_script("arguments[0].click();", modalLoginButtonElement) seleniumDriver.execute_script("arguments[0].click();", modalLoginButtonElement)
# This function navigates to the Laufzeitvertrag page after a successful login.
# In dieser Funktion wird nach erfolgreichem Login die Laufzeitvertrags-Seite aufgerufen.
def openLaufzeitvertrag(seleniumManager): def openLaufzeitvertrag(seleniumManager):
# In dieser Variablen wird ein WebDriverWait erzeugt, um gezielt Elemente abzufragen. # This variable is a WebDriverWait for the driver's instance.
wait = WebDriverWait(seleniumManager.driver, 10) wait = WebDriverWait(seleniumManager.driver, 10)
# In dieser Variablen wird das Element mit dem Link zu Laufzeitvertrag gefunden. # This variable locates the link element for Laufzeitvertrag after it becomes visible.
laufzeitElement = wait.until(EC.presence_of_element_located((By.XPATH, "//a[contains(text(),'Laufzeitvertrag')]"))) laufzeitElement = wait.until(EC.presence_of_element_located((By.XPATH, "//a[contains(text(),'Laufzeitvertrag')]")))
# In dieser Variablen wird der Link ausgelesen und anschließend geladen. # This variable reads the href from the link and navigates the driver to it.
url = laufzeitElement.get_attribute("href") url = laufzeitElement.get_attribute("href")
seleniumManager.driver.get(url) seleniumManager.driver.get(url)
# This function checks if a dropdown is ready by waiting for overlays to disappear.
# In dieser Funktion wird geprüft, ob das Dropdown-Menü bereit ist, indem störende Overlays ausgeblendet sind.
def waitForDropdownReady(seleniumDriver, wait, maxRetries=3, retryDelay=5): def waitForDropdownReady(seleniumDriver, wait, maxRetries=3, retryDelay=5):
# In dieser for-Schleife werden mehrere Versuche unternommen, um das Dropdown verfügbar zu machen. # This for-structure attempts multiple retries to ensure the dropdown becomes ready.
for attemptIndex in range(maxRetries): for attemptIndex in range(maxRetries):
# In diesem try-Block wird gezielt geprüft, ob bestimmte Overlays verschwunden sind. # This try-structure checks for invisible overlays or iframes that block interaction.
try: try:
# In dieser Ausgabe wird informiert, welcher Versuch gerade stattfindet. # This print call is a debug message showing which attempt is in progress.
print(f"DEBUG: Warte auf Dropdown-Bereitschaft (Versuch {attemptIndex + 1}/{maxRetries})...") print(f"DEBUG: Warte auf Dropdown-Bereitschaft (Versuch {attemptIndex + 1}/{maxRetries})...")
# In dieser Zeile wird bis zu 60 Sekunden gewartet, bis ein bestimmtes Iframe unsichtbar ist. # This line waits up to 60 seconds for a specific iframe to become invisible.
WebDriverWait(seleniumDriver, 60).until( WebDriverWait(seleniumDriver, 60).until(
EC.invisibility_of_element_located((By.XPATH, "//iframe[contains(@src, 'wait.html')]")) EC.invisibility_of_element_located((By.XPATH, "//iframe[contains(@src, 'wait.html')]"))
) )
# In dieser Zeile wird bis zu 60 Sekunden gewartet, bis ein bestimmtes Overlay unsichtbar ist. # This line waits up to 60 seconds for a specific overlay to become invisible.
WebDriverWait(seleniumDriver, 60).until( WebDriverWait(seleniumDriver, 60).until(
EC.invisibility_of_element_located((By.ID, "bg_layer")) EC.invisibility_of_element_located((By.ID, "bg_layer"))
) )
# In dieser Zeile wird bis zu 20 Sekunden gewartet, bis das Tarif-Dropdown im DOM auftaucht. # This line waits up to 20 seconds for the tariff dropdown to appear in the DOM.
wait.until(EC.presence_of_element_located((By.NAME, "tarif_id"))) wait.until(EC.presence_of_element_located((By.NAME, "tarif_id")))
# In dieser Ausgabe wird mitgeteilt, dass das Dropdown bereit ist. # This print call is a debug message confirming the dropdown is ready.
print(f"DEBUG: Dropdown ist bereit (Versuch {attemptIndex + 1}).") print(f"DEBUG: Dropdown ist bereit (Versuch {attemptIndex + 1}).")
return True return True
# In dieser except-Abzweigung wird geprüft, ob eine TimeoutException vorliegt. # This except-structure handles a TimeoutException if the overlay remains visible too long.
except TimeoutException: except TimeoutException:
# Hier wird eine Warnung ausgegeben, dass der aktuelle Versuch erfolglos war. # This print call is a debug message warning that the attempt timed out.
print(f"DEBUG: Warnung: Timeout beim Warten auf Dropdown-Bereitschaft (Versuch {attemptIndex + 1}/{maxRetries}).") print(f"DEBUG: Warnung: Timeout beim Warten auf Dropdown-Bereitschaft (Versuch {attemptIndex + 1}/{maxRetries}).")
# In dieser if-Abzweigung wird geprüft, ob noch ein weiterer Versuch erfolgen soll. # This if-structure checks if more attempts are allowed.
if attemptIndex < maxRetries - 1: if attemptIndex < maxRetries - 1:
time.sleep(retryDelay) time.sleep(retryDelay)
else: else:
print(f"DEBUG: FEHLER: Konnte nach {maxRetries} Versuchen nicht auf Dropdown-Bereitschaft warten.") print(f"DEBUG: FEHLER: Konnte nach {maxRetries} Versuchen nicht auf Dropdown-Bereitschaft warten.")
break break
# In dieser except-Abzweigung werden unvorhergesehene Fehler protokolliert. # This except-structure captures any unexpected errors during the wait process.
except Exception as exception: except Exception as exception:
print(f"DEBUG: Unerwarteter Fehler beim Warten auf Dropdown (Versuch {attemptIndex + 1}/{maxRetries}): {exception}") print(f"DEBUG: Unerwarteter Fehler beim Warten auf Dropdown (Versuch {attemptIndex + 1}/{maxRetries}): {exception}")
# In dieser if-Abzweigung wird geprüft, ob noch weitere Versuche unternommen werden. # This if-structure checks if more attempts are allowed.
if attemptIndex < maxRetries - 1: if attemptIndex < maxRetries - 1:
time.sleep(retryDelay) time.sleep(retryDelay)
else: else:
print(f"DEBUG: FEHLER: Konnte nach {maxRetries} Versuchen wegen unerwartetem Fehler nicht auf Dropdown warten.") print(f"DEBUG: FEHLER: Konnte nach {maxRetries} Versuchen wegen unerwartetem Fehler nicht auf Dropdown warten.")
break break
# In dieser Zeile wird False zurückgegeben, falls alle Versuche scheitern. # This line returns False if all retries have failed.
return False return False
# This function fetches the currently displayed tariff price and calculates the net price.
# In dieser Funktion wird der aktuell im Frontend angezeigte Tarifpreis abgerufen und der Nettopreis berechnet.
def parsePlanPrice(seleniumDriver): def parsePlanPrice(seleniumDriver):
# In dieser Variablen wird ein Initialwert für den Nettopreis gesetzt. # This variable initializes the net price to 0.0.
priceNet = 0.0 priceNet = 0.0
# In diesem try-Block wird versucht, den Wert aus dem Element preis_anzeige_tarif auszulesen. # This try-structure attempts to read the tariff price from preis_anzeige_tarif.
try: try:
priceElement = seleniumDriver.find_element(By.ID, "preis_anzeige_tarif") priceElement = seleniumDriver.find_element(By.ID, "preis_anzeige_tarif")
priceText = priceElement.text priceText = priceElement.text
priceMatch = re.search(r'([\d\.,]+)\s*EUR', priceText) priceMatch = re.search(r'([\d\.,]+)\s*EUR', priceText)
# In dieser if-Abzweigung wird geprüft, ob der reguläre Ausdruck einen Treffer hatte. # This if-structure checks if the regular expression found a match.
if priceMatch: if priceMatch:
rawStr = priceMatch.group(1).replace(".", "").replace(",", ".") rawStr = priceMatch.group(1).replace(".", "").replace(",", ".")
grossPrice = float(rawStr) grossPrice = float(rawStr)
priceNet = round(grossPrice / 1.19, 5) priceNet = round(grossPrice / 1.19, 5)
# In dieser except-Abzweigung wird ein Hinweis ausgegeben, falls kein Preis ermittelt werden konnte. # This except-structure handles any errors if the price cannot be extracted.
except Exception as exception: except Exception as exception:
print(f"DEBUG: Warnung: Konnte Tarifpreis nicht extrahieren: {exception}") print(f"DEBUG: Warnung: Konnte Tarifpreis nicht extrahieren: {exception}")
# In dieser Zeile wird der ermittelte Nettopreis zurückgegeben. # This line returns the net price.
return priceNet return priceNet
# This function parses available campaigns from the corresponding dropdown.
# In dieser Funktion werden Kampagnen aus dem entsprechenden Dropdown geparst und als Liste zurückgegeben.
def parseCampaigns(seleniumDriver): def parseCampaigns(seleniumDriver):
# In dieser Liste werden alle gefundenen Kampagnen-Tuples gespeichert. # This variable is a list that collects all found campaigns as tuples.
campaignsList = [] campaignsList = []
# In diesem try-Block wird versucht, das Kampagnen-Select und dessen Optionen zu finden. # This try-structure attempts to locate and parse the campaign select element.
try: try:
campaignSelect = seleniumDriver.find_element(By.NAME, "am_aktion_select") campaignSelect = seleniumDriver.find_element(By.NAME, "am_aktion_select")
campaignOptions = campaignSelect.find_elements(By.TAG_NAME, "option") campaignOptions = campaignSelect.find_elements(By.TAG_NAME, "option")
# In dieser for-Schleife werden die Attribute jeder Option ausgelesen und gefiltert. # This for-structure iterates over all option elements in the campaign select.
for copt in campaignOptions: for copt in campaignOptions:
val = copt.get_attribute("value") val = copt.get_attribute("value")
txt = copt.text.strip() txt = copt.text.strip()
# In dieser if-Abzweigung werden ungültige oder leere Werte übersprungen. # This if-structure skips invalid or empty values.
if not val or val in [" |", "-1|", "|", "-1|", "0|"]: if not val or val in [" |", "-1|", "|", "-1|", "0|"]:
continue continue
partsVal = val.split("|") partsVal = val.split("|")
campaignId = partsVal[0].strip() campaignId = partsVal[0].strip()
# In dieser if-Abzweigung wird geprüft, ob eine Kampagnen-ID extrahiert werden konnte. # This if-structure checks if a valid campaign ID was extracted.
if not campaignId: if not campaignId:
continue continue
# In dieser if-Abzweigung wird geprüft, ob ein Trennstrich im Text enthalten ist. # This if-structure checks if the text has a dash that splits the campaign name.
if "-" in txt: if "-" in txt:
splitted = txt.split("-", 1) splitted = txt.split("-", 1)
campaignName = splitted[1].strip() campaignName = splitted[1].strip()
...@@ -319,87 +248,97 @@ def parseCampaigns(seleniumDriver): ...@@ -319,87 +248,97 @@ def parseCampaigns(seleniumDriver):
campaignsList.append((campaignId, campaignName)) campaignsList.append((campaignId, campaignName))
# In dieser except-Abzweigung wird ein Hinweis ausgegeben, falls das Element nicht gefunden werden konnte. # This except-structure handles any error if the campaign select is not found.
except Exception as exception: except Exception as exception:
print(f"DEBUG: Warnung: Konnte Kampagnen nicht extrahieren: {exception}") print(f"DEBUG: Warnung: Konnte Kampagnen nicht extrahieren: {exception}")
# In dieser Zeile wird die Liste der gefundenen Kampagnen zurückgegeben. # This line returns the list of found campaigns.
return campaignsList return campaignsList
# This function scrapes the main data and writes it to various CSV files.
# In dieser Funktion werden die Hauptdaten gescraped und in verschiedene CSV-Dateien geschrieben.
def scrapeData(seleniumManager): def scrapeData(seleniumManager):
# In dieser Variablen wird der Selenium-Driver abgelegt. # This variable holds the Selenium driver instance.
seleniumDriver = seleniumManager.driver seleniumDriver = seleniumManager.driver
# In dieser Variablen wird ein WebDriverWait mit Timeout 20 Sekunden abgelegt. # This variable is a WebDriverWait object with a 20-second timeout.
wait = WebDriverWait(seleniumDriver, 20) wait = WebDriverWait(seleniumDriver, 20)
# In dieser Variablen wird ein Pfad für das Cache-Verzeichnis definiert. # This variable defines the path for the cache directory.
cacheDir = "../cache" cacheDir = "../cache"
# In dieser if-Abzweigung wird geprüft, ob das Verzeichnis bereits existiert. # This if-structure checks if the cache directory already exists.
if os.path.exists(cacheDir): if os.path.exists(cacheDir):
# In diesem try-Block wird versucht, das bestehende Verzeichnis zu löschen. # This try-structure attempts to remove the existing directory if present.
try: try:
shutil.rmtree(cacheDir) shutil.rmtree(cacheDir)
print(f"Info: Bestehendes Cache-Verzeichnis '{cacheDir}' wurde gelöscht.") print(f"Info: Bestehendes Cache-Verzeichnis '{cacheDir}' wurde gelöscht.")
except OSError as exception: except OSError as exception:
print(f"Fehler beim Löschen von Verzeichnis {cacheDir}: {exception}") print(f"Fehler beim Löschen von Verzeichnis {cacheDir}: {exception}")
# In diesem try-Block wird das Verzeichnis neu erstellt oder sichergestellt, dass es existiert. # This try-structure ensures that the cache directory is created.
try: try:
os.makedirs(cacheDir, exist_ok=True) os.makedirs(cacheDir, exist_ok=True)
print(f"Info: Cache-Verzeichnis '{cacheDir}' sichergestellt/neu erstellt.") print(f"Info: Cache-Verzeichnis '{cacheDir}' sichergestellt/neu erstellt.")
except OSError as exception: except OSError as exception:
print(f"Fehler beim Erstellen von Verzeichnis {cacheDir}: {exception}") print(f"Fehler beim Erstellen von Verzeichnis {cacheDir}: {exception}")
# In diesen Variablen werden die Pfade zu den einzelnen CSV-Dateien definiert. # This variable stores the path for the plans.csv file.
plansCsvFilePath = os.path.join(cacheDir, "plans.csv") plansCsvFilePath = os.path.join(cacheDir, "plans.csv")
# This variable stores the path for the campaigns.csv file.
campaignsCsvFilePath = os.path.join(cacheDir, "campaigns.csv") campaignsCsvFilePath = os.path.join(cacheDir, "campaigns.csv")
# This variable stores the path for the options.csv file.
optionsCsvFilePath = os.path.join(cacheDir, "options.csv") optionsCsvFilePath = os.path.join(cacheDir, "options.csv")
# This variable stores the path for the categorys.csv file.
categorysCsvFilePath = os.path.join(cacheDir, "categorys.csv") categorysCsvFilePath = os.path.join(cacheDir, "categorys.csv")
# In dieser Menge werden bereits geschriebene Tarife gespeichert, um Duplikate zu vermeiden. # This variable is a set used to track which tariffs have been written to avoid duplicates.
writtenPlanIdSet = set() writtenPlanIdSet = set()
# In dieser Liste werden die möglichen Rahmenvertragsnummern abgelegt. # This variable stores a list of possible framework contract numbers.
frameworkList = ["", 980066161, 980008940, 981000541] frameworkList = ["", 980066161, 980008940, 981000541]
# In diesem with-Block werden alle CSV-Dateien geöffnet und die Writer initialisiert. # This with-structure opens all CSV files and prepares the writers.
with open(plansCsvFilePath, mode="w", newline="", encoding="utf-8") as plansFile, \ with open(plansCsvFilePath, mode="w", newline="", encoding="utf-8") as plansFile, \
open(campaignsCsvFilePath, mode="w", newline="", encoding="utf-8") as campaignsFile, \ open(campaignsCsvFilePath, mode="w", newline="", encoding="utf-8") as campaignsFile, \
open(optionsCsvFilePath, mode="w", newline="", encoding="utf-8") as optionsFile, \ open(optionsCsvFilePath, mode="w", newline="", encoding="utf-8") as optionsFile, \
open(categorysCsvFilePath, mode="w", newline="", encoding="utf-8") as categorysFile: open(categorysCsvFilePath, mode="w", newline="", encoding="utf-8") as categorysFile:
# In diesen Variablen werden die CSV-Writer für jede Datei angelegt. # This variable is the CSV writer for plans.csv.
plansWriter = csv.writer(plansFile, delimiter=";") plansWriter = csv.writer(plansFile, delimiter=";")
# This variable is the CSV writer for campaigns.csv.
campaignsWriter = csv.writer(campaignsFile, delimiter=";") campaignsWriter = csv.writer(campaignsFile, delimiter=";")
# This variable is the CSV writer for options.csv.
optionsWriter = csv.writer(optionsFile, delimiter=";") optionsWriter = csv.writer(optionsFile, delimiter=";")
# This variable is the CSV writer for categorys.csv.
categorysWriter = csv.writer(categorysFile, delimiter=";") categorysWriter = csv.writer(categorysFile, delimiter=";")
# In dieser Zeile werden die Spaltenüberschriften für die plans.csv geschrieben. # This line writes the header row for plans.csv.
plansWriter.writerow(["id", "provider", "network", "name", "price", "rahmen"]) plansWriter.writerow(["id", "provider", "network", "name", "price", "rahmen"])
# In dieser Zeile werden die Spaltenüberschriften für die campaigns.csv geschrieben. # This line writes the header row for campaigns.csv.
campaignsWriter.writerow(["id", "plan", "name"]) campaignsWriter.writerow(["id", "plan", "name"])
# In dieser Zeile werden die Spaltenüberschriften für die options.csv geschrieben. # This line writes the header row for options.csv.
optionsWriter.writerow(["id", "category", "plan", "name", "price"]) optionsWriter.writerow(["id", "category", "plan", "name", "price"])
# In dieser Zeile werden die Spaltenüberschriften für die categorys.csv geschrieben. # This line writes the header row for categorys.csv.
categorysWriter.writerow(["id", "name"]) categorysWriter.writerow(["id", "name"])
# In dieser for-Schleife werden alle Rahmenvertragsnummern durchlaufen. # This for-structure iterates over each framework contract option.
for currentFramework in frameworkList: for currentFramework in frameworkList:
# In dieser if-Abzweigung wird geprüft, ob wir eine Nummer im Rahmenfeld setzen müssen. # This if-structure checks if we must set a framework number.
if currentFramework != "": if currentFramework != "":
# In diesem try-Block wird die Checkbox für Rahmenvertrag angeklickt. # This try-structure attempts to click the framework checkbox.
try: try:
wait.until(EC.element_to_be_clickable((By.NAME, "rv_option"))) wait.until(EC.element_to_be_clickable((By.NAME, "rv_option")))
rvCheckbox = seleniumDriver.find_element(By.NAME, "rv_option") rvCheckbox = seleniumDriver.find_element(By.NAME, "rv_option")
...@@ -408,7 +347,7 @@ def scrapeData(seleniumManager): ...@@ -408,7 +347,7 @@ def scrapeData(seleniumManager):
except Exception as exception: except Exception as exception:
print(f"DEBUG: Konnte Checkbox 'rv_option' nicht setzen: {exception}") print(f"DEBUG: Konnte Checkbox 'rv_option' nicht setzen: {exception}")
# In diesem try-Block wird das Eingabefeld für die Rahmenvertragsnummer gesetzt. # This try-structure attempts to fill in the framework number field.
try: try:
wait.until(EC.presence_of_element_located((By.NAME, "rv_nr"))) wait.until(EC.presence_of_element_located((By.NAME, "rv_nr")))
rvNrField = seleniumDriver.find_element(By.NAME, "rv_nr") rvNrField = seleniumDriver.find_element(By.NAME, "rv_nr")
...@@ -420,17 +359,18 @@ def scrapeData(seleniumManager): ...@@ -420,17 +359,18 @@ def scrapeData(seleniumManager):
else: else:
print("emptyRahmen") print("emptyRahmen")
# In dieser Zeile wird eine Wartezeit von 5 Sekunden eingefügt. # This function call adds a delay of 5 seconds before further steps.
time.sleep(5) time.sleep(5)
# In diesen Variablen werden die aktuellen Tarifwelten und Netze neu ausgelesen. # This variable stores the list of available tariff worlds by reading the elements.
tarifWeltElements = seleniumDriver.find_elements(By.NAME, "tarif_welt") tarifWeltElements = seleniumDriver.find_elements(By.NAME, "tarif_welt")
tarifWelten = [elem.get_attribute("value") for elem in tarifWeltElements if elem.get_attribute("value")] tarifWelten = [elem.get_attribute("value") for elem in tarifWeltElements if elem.get_attribute("value")]
# This variable stores the list of available networks by reading the elements.
netzElements = seleniumDriver.find_elements(By.NAME, "netz") netzElements = seleniumDriver.find_elements(By.NAME, "netz")
netzList = [elem.get_attribute("value") for elem in netzElements if elem.get_attribute("value")] netzList = [elem.get_attribute("value") for elem in netzElements if elem.get_attribute("value")]
# In diesem try-Block wird der Radio-Button für die Produktkategorie 'A' geklickt. # This try-structure attempts to select the product category 'A'.
try: try:
productCategoryElement = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[@name="sel_produkt_kategorie" and @value="A"]'))) productCategoryElement = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[@name="sel_produkt_kategorie" and @value="A"]')))
seleniumDriver.execute_script("arguments[0].click();", productCategoryElement) seleniumDriver.execute_script("arguments[0].click();", productCategoryElement)
...@@ -439,15 +379,15 @@ def scrapeData(seleniumManager): ...@@ -439,15 +379,15 @@ def scrapeData(seleniumManager):
traceback.print_exc() traceback.print_exc()
continue continue
# In dieser for-Schleife werden alle gefundenen Tarifwelten durchlaufen. # This for-structure iterates over each available tariff world.
for tarifWelt in tarifWelten: for tarifWelt in tarifWelten:
# In dieser if-Abzweigung wird geprüft, ob das Dropdown bereit ist. # This if-structure checks if the dropdown is ready.
if not waitForDropdownReady(seleniumDriver, wait): if not waitForDropdownReady(seleniumDriver, wait):
print(f"DEBUG: Überspringe Tarifwelt {tarifWelt}, da die Seite nicht rechtzeitig bereit war.") print(f"DEBUG: Überspringe Tarifwelt {tarifWelt}, da die Seite nicht rechtzeitig bereit war.")
continue continue
# In diesem try-Block wird der jeweilige Radio-Button für die Tarifwelt geklickt. # This try-structure attempts to click the radio button for the current tariff world.
try: try:
tarifWeltRadio = wait.until(EC.element_to_be_clickable((By.XPATH, f'//input[@name="tarif_welt" and @value="{tarifWelt}"]'))) tarifWeltRadio = wait.until(EC.element_to_be_clickable((By.XPATH, f'//input[@name="tarif_welt" and @value="{tarifWelt}"]')))
seleniumDriver.execute_script("arguments[0].click();", tarifWeltRadio) seleniumDriver.execute_script("arguments[0].click();", tarifWeltRadio)
...@@ -456,15 +396,15 @@ def scrapeData(seleniumManager): ...@@ -456,15 +396,15 @@ def scrapeData(seleniumManager):
traceback.print_exc() traceback.print_exc()
continue continue
# In dieser for-Schleife werden alle möglichen Netze durchlaufen. # This for-structure iterates over each possible network.
for net in netzList: for net in netzList:
# In dieser if-Abzweigung wird nochmals geprüft, ob das Dropdown bereit ist. # This if-structure checks if the dropdown is still ready.
if not waitForDropdownReady(seleniumDriver, wait): if not waitForDropdownReady(seleniumDriver, wait):
print(f"DEBUG: Überspringe Netz {net} in Tarifwelt {tarifWelt}, da die Seite nicht rechtzeitig bereit war.") print(f"DEBUG: Überspringe Netz {net} in Tarifwelt {tarifWelt}, da die Seite nicht rechtzeitig bereit war.")
continue continue
# In diesem try-Block wird das jeweilige Netz geklickt. # This try-structure attempts to click the radio button for the network.
try: try:
netRadio = wait.until(EC.element_to_be_clickable((By.XPATH, f'//input[@name="netz" and @value="{net}"]'))) netRadio = wait.until(EC.element_to_be_clickable((By.XPATH, f'//input[@name="netz" and @value="{net}"]')))
seleniumDriver.execute_script("arguments[0].click();", netRadio) seleniumDriver.execute_script("arguments[0].click();", netRadio)
...@@ -473,12 +413,12 @@ def scrapeData(seleniumManager): ...@@ -473,12 +413,12 @@ def scrapeData(seleniumManager):
traceback.print_exc() traceback.print_exc()
continue continue
# In dieser if-Abzweigung wird noch einmal die Verfügbarkeit des Tarif-Dropdowns geprüft. # This if-structure checks again if the tariff dropdown is ready.
if not waitForDropdownReady(seleniumDriver, wait): if not waitForDropdownReady(seleniumDriver, wait):
print(f"DEBUG: Überspringe Netz {net} in Tarifwelt {tarifWelt}, da Tarif-Dropdown nicht bereit war.") print(f"DEBUG: Überspringe Netz {net} in Tarifwelt {tarifWelt}, da Tarif-Dropdown nicht bereit war.")
continue continue
# In diesem try-Block werden alle Tarifoptionen für das gegebene Netz ausgelesen. # This try-structure collects all tariff options from the dropdown for the current network.
try: try:
dropdown = wait.until(EC.presence_of_element_located((By.NAME, "tarif_id"))) dropdown = wait.until(EC.presence_of_element_located((By.NAME, "tarif_id")))
selectObj = Select(dropdown) selectObj = Select(dropdown)
...@@ -488,26 +428,26 @@ def scrapeData(seleniumManager): ...@@ -488,26 +428,26 @@ def scrapeData(seleniumManager):
traceback.print_exc() traceback.print_exc()
continue continue
# In dieser for-Schleife werden alle Tarife aus dem Dropdown verarbeitet. # This for-structure iterates over each tariff in the dropdown.
for tariffId, optText in optionsToProcess: for tariffId, optText in optionsToProcess:
# In dieser if-Abzweigung werden Platzhalterwerte übersprungen. # This if-structure skips placeholder text and empty IDs.
if optText in ["Bitte wählen Sie aus...", ""] or not tariffId: if optText in ["Bitte wählen Sie aus...", ""] or not tariffId:
continue continue
# In dieser if-Abzweigung wird geprüft, ob der Tarif für diesen Rahmen schon erfasst wurde. # This if-structure checks if the tariff is already written for the current framework.
if (tariffId, currentFramework) in writtenPlanIdSet: if (tariffId, currentFramework) in writtenPlanIdSet:
print(f"DEBUG: Tarif {tariffId} für Rahmen {currentFramework} bereits in CSV, überspringe.") print(f"DEBUG: Tarif {tariffId} für Rahmen {currentFramework} bereits in CSV, überspringe.")
continue continue
print(f"DEBUG: Verarbeite: {tariffId} - {net} - {optText} (Rahmen {currentFramework})") print(f"DEBUG: Verarbeite: {tariffId} - {net} - {optText} (Rahmen {currentFramework})")
# In dieser if-Abzweigung wird geprüft, ob das Dropdown weiterhin verfügbar ist. # This if-structure checks the dropdown readiness again before proceeding.
if not waitForDropdownReady(seleniumDriver, wait): if not waitForDropdownReady(seleniumDriver, wait):
print(f"DEBUG: Überspringe Tarif {tariffId} ({optText}), da die Seite nicht rechtzeitig bereit war.") print(f"DEBUG: Überspringe Tarif {tariffId} ({optText}), da die Seite nicht rechtzeitig bereit war.")
continue continue
# In diesem try-Block wird der passende Tarif im Dropdown gewählt. # This try-structure selects the appropriate tariff in the dropdown.
try: try:
currentDropdown = wait.until(EC.presence_of_element_located((By.NAME, "tarif_id"))) currentDropdown = wait.until(EC.presence_of_element_located((By.NAME, "tarif_id")))
currentSelectObj = Select(currentDropdown) currentSelectObj = Select(currentDropdown)
...@@ -524,23 +464,23 @@ def scrapeData(seleniumManager): ...@@ -524,23 +464,23 @@ def scrapeData(seleniumManager):
traceback.print_exc() traceback.print_exc()
continue continue
# In diesem try-Block wird gewartet, bis das Overlay verschwindet. # This try-structure waits for the overlay to become invisible after the selection.
try: try:
WebDriverWait(seleniumDriver, timeout=60).until(EC.invisibility_of_element_located((By.ID, "bg_layer"))) WebDriverWait(seleniumDriver, timeout=60).until(EC.invisibility_of_element_located((By.ID, "bg_layer")))
except TimeoutException: except TimeoutException:
print(f"DEBUG: FEHLER: Timeout beim Warten auf bg_layer nach Auswahl von Tarif {tariffId}. Überspringe...") print(f"DEBUG: FEHLER: Timeout beim Warten auf bg_layer nach Auswahl von Tarif {tariffId}. Überspringe...")
continue continue
# In dieser Zeile wird kurz gewartet, um die Preisanzeige stabil zu laden. # This line waits briefly to stabilize the price display.
time.sleep(1.5) time.sleep(1.5)
# In dieser Variablen wird der Nettopreis des aktuell ausgewählten Tarifs erfasst. # This variable holds the net price of the currently selected tariff.
planPriceNet = parsePlanPrice(seleniumDriver) planPriceNet = parsePlanPrice(seleniumDriver)
# In dieser Variablen wird die Liste aller verfügbaren Kampagnen erfasst. # This variable stores the list of all available campaigns for the tariff.
campaigns = parseCampaigns(seleniumDriver) campaigns = parseCampaigns(seleniumDriver)
# In dieser Zeile wird der Tarif in die plans.csv geschrieben. # This line writes the tariff record into plans.csv.
plansWriter.writerow([ plansWriter.writerow([
tariffId, tariffId,
tarifWelt, tarifWelt,
...@@ -550,10 +490,10 @@ def scrapeData(seleniumManager): ...@@ -550,10 +490,10 @@ def scrapeData(seleniumManager):
currentFramework currentFramework
]) ])
# In dieser Zeile wird der Tarif als bereits erfasst markiert. # This line marks the tariff as written to avoid duplication.
writtenPlanIdSet.add((tariffId, currentFramework)) writtenPlanIdSet.add((tariffId, currentFramework))
# In dieser for-Schleife werden alle Kampagnen in die campaigns.csv geschrieben. # This for-structure writes each campaign to the campaigns.csv file.
for (campId, campName) in campaigns: for (campId, campName) in campaigns:
campaignsWriter.writerow([ campaignsWriter.writerow([
campId, campId,
...@@ -561,21 +501,23 @@ def scrapeData(seleniumManager): ...@@ -561,21 +501,23 @@ def scrapeData(seleniumManager):
campName campName
]) ])
# In diesen Variablen werden die URLs für PDF-Dokumente abgeleitet. # This variable constructs the URL for the flyer PDF.
flyerPdfUrl = f"https://maui.mobilcom.de/vertragserfassung/show_pib_flyer.php?variant_id={tariffId}" flyerPdfUrl = f"https://maui.mobilcom.de/vertragserfassung/show_pib_flyer.php?variant_id={tariffId}"
# This variable constructs the URL for the PIB PDF.
pibPdfUrl = flyerPdfUrl + "&pib" pibPdfUrl = flyerPdfUrl + "&pib"
# In diesem try-Block werden die PDFs heruntergeladen. # This try-structure attempts to download the PDF documents.
try: try:
downloadPdfSelenium(seleniumDriver, flyerPdfUrl, cacheDir, f"{tariffId}_flyer.pdf") downloadPdfSelenium(seleniumDriver, flyerPdfUrl, cacheDir, f"{tariffId}_flyer.pdf")
downloadPdfSelenium(seleniumDriver, pibPdfUrl, cacheDir, f"{tariffId}_pib.pdf") downloadPdfSelenium(seleniumDriver, pibPdfUrl, cacheDir, f"{tariffId}_pib.pdf")
except Exception as exception: except Exception as exception:
print(f"DEBUG: Fehler beim PDF-Download für Tarif {tariffId}: {exception}") print(f"DEBUG: Fehler beim PDF-Download für Tarif {tariffId}: {exception}")
# In dieser Variablen wird gespeichert, ob zur Optionsseite navigiert werden konnte. # This variable will store whether navigation to the options page succeeded.
navigationToOptionsSuccessful = False navigationToOptionsSuccessful = False
# In diesem try-Block wird versucht, zur Optionsseite zu navigieren. # This try-structure attempts to navigate to the options page.
try: try:
print(f"DEBUG: Versuche zur Optionsseite zu navigieren für Tarif {tariffId}...") print(f"DEBUG: Versuche zur Optionsseite zu navigieren für Tarif {tariffId}...")
wait.until(EC.presence_of_element_located((By.NAME, "mobildaten"))) wait.until(EC.presence_of_element_located((By.NAME, "mobildaten")))
...@@ -588,7 +530,7 @@ def scrapeData(seleniumManager): ...@@ -588,7 +530,7 @@ def scrapeData(seleniumManager):
traceback.print_exc() traceback.print_exc()
continue continue
# In dieser if-Abzweigung wird geprüft, ob die Navigation erfolgreich war. # This if-structure proceeds only if navigation to the options page was successful.
if navigationToOptionsSuccessful: if navigationToOptionsSuccessful:
try: try:
print(f"DEBUG: Rufe scrapeOption für Tarif {tariffId} auf.") print(f"DEBUG: Rufe scrapeOption für Tarif {tariffId} auf.")
...@@ -602,58 +544,56 @@ def scrapeData(seleniumManager): ...@@ -602,58 +544,56 @@ def scrapeData(seleniumManager):
print(f"DEBUG: Fehler während scrapeOption für Tarif {tariffId}: {exception}") print(f"DEBUG: Fehler während scrapeOption für Tarif {tariffId}: {exception}")
traceback.print_exc() traceback.print_exc()
# In dieser Zeile werden die CSV-Dateien nach jedem Tarif zwischengespeichert. # This print call notifies that CSV files are flushed after processing each tariff.
print(f"DEBUG: Flushe CSV-Dateien nach Verarbeitung von Tarif {tariffId}.") print(f"DEBUG: Flushe CSV-Dateien nach Verarbeitung von Tarif {tariffId}.")
plansFile.flush() plansFile.flush()
campaignsFile.flush() campaignsFile.flush()
optionsFile.flush() optionsFile.flush()
categorysFile.flush() categorysFile.flush()
# This function scrapes the options for a given tariff on the options page.
# In dieser Funktion werden die Optionsdaten eines Tarifs auf der Optionsseite extrahiert.
def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter): def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter):
# In dieser Debug-Ausgabe wird mitgeteilt, dass das Scraping der Option gestartet wurde. # This print call is a debug message indicating the start of scraping options.
print(f"DEBUG: scrapeOption gestartet für Tarif {tariffId}.") print(f"DEBUG: scrapeOption gestartet für Tarif {tariffId}.")
# In dieser Variablen wird auf den im SeleniumManager gespeicherten Driver zugegriffen. # This variable references the Selenium driver from the SeleniumManager.
seleniumDriver = seleniumManager.driver seleniumDriver = seleniumManager.driver
# In dieser Variablen wird ein WebDriverWait-Objekt mit 20 Sekunden Timeout angelegt. # This variable is a WebDriverWait with a 20-second timeout.
wait = WebDriverWait(seleniumDriver, 20) wait = WebDriverWait(seleniumDriver, 20)
# In diesem try-Block findet das eigentliche Parsing der Optionsseite statt. # This try-structure attempts to parse the options page content.
try: try:
# In dieser Zeile wird bis zu 60 Sekunden auf das Verschwinden eines Overlays gewartet. # This print call indicates waiting for the overlay to be invisible.
print(f"DEBUG: Warte auf Unsichtbarkeit von bg_layer für Tarif {tariffId}.") print(f"DEBUG: Warte auf Unsichtbarkeit von bg_layer für Tarif {tariffId}.")
WebDriverWait(seleniumDriver, timeout=60).until(EC.invisibility_of_element_located((By.ID, "bg_layer"))) WebDriverWait(seleniumDriver, timeout=60).until(EC.invisibility_of_element_located((By.ID, "bg_layer")))
# In dieser Zeile wird darauf gewartet, dass das Formular 'tarifoptionen' im DOM vorhanden ist. # This print call indicates waiting for the 'tarifoptionen' form to be in the DOM.
print(f"DEBUG: Warte auf Formular 'tarifoptionen' für Tarif {tariffId}.") print(f"DEBUG: Warte auf Formular 'tarifoptionen' für Tarif {tariffId}.")
wait.until(EC.presence_of_element_located((By.NAME, "tarifoptionen"))) wait.until(EC.presence_of_element_located((By.NAME, "tarifoptionen")))
# In dieser Zeile wird geprüft, ob mindestens eine Tabelle mit Klasse 'tb_back' vorhanden ist. # This print call indicates waiting for at least one table with class 'tb_back' to appear.
print(f"DEBUG: Warte auf Klasse 'tb_back' für Tarif {tariffId}.") print(f"DEBUG: Warte auf Klasse 'tb_back' für Tarif {tariffId}.")
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "tb_back"))) wait.until(EC.presence_of_element_located((By.CLASS_NAME, "tb_back")))
print(f"DEBUG: Optionsseite für Tarif {tariffId} scheint geladen zu sein.") print(f"DEBUG: Optionsseite für Tarif {tariffId} scheint geladen zu sein.")
# In dieser Variablen wird der komplette HTML-Inhalt gespeichert. # This variable stores the entire HTML content of the current page.
htmlContent = seleniumDriver.page_source htmlContent = seleniumDriver.page_source
# In dieser Variablen wird ein BeautifulSoup-Objekt erstellt, um den Inhalt zu parsen. # This variable is a BeautifulSoup object for parsing the page content.
soupObj = BeautifulSoup(htmlContent, "html.parser") soupObj = BeautifulSoup(htmlContent, "html.parser")
print(f"DEBUG: Rufe parse_options für Tarif {tariffId} auf.") print(f"DEBUG: Rufe parse_options für Tarif {tariffId} auf.")
optionsData, categoryData = parseOptions(soupObj) optionsData, categoryData = parseOptions(soupObj)
print(f"DEBUG: parse_options fand {len(optionsData)} Optionen und {len(categoryData)} Kategorien für Tarif {tariffId}.") print(f"DEBUG: parse_options fand {len(optionsData)} Optionen und {len(categoryData)} Kategorien für Tarif {tariffId}.")
# In dieser Variablen wird gezählt, wie viele neue Kategorien geschrieben werden. # This variable tracks how many new categories are written to the file.
catsWritten = 0 catsWritten = 0
# In dieser for-Schleife werden alle Kategorien verarbeitet. # This for-structure processes each category line found.
for catLine in categoryData: for catLine in categoryData:
parts = catLine.split(";", 1) parts = catLine.split(";", 1)
if len(parts) == 2: if len(parts) == 2:
...@@ -662,16 +602,18 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter): ...@@ -662,16 +602,18 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter):
global uniqueCategorySet global uniqueCategorySet
if catId not in uniqueCategorySet: if catId not in uniqueCategorySet:
# This line writes the category data to categorys.csv.
categorysWriter.writerow([catId, catName]) categorysWriter.writerow([catId, catName])
uniqueCategorySet.add(catId) uniqueCategorySet.add(catId)
catsWritten += 1 catsWritten += 1
print(f"DEBUG: {catsWritten} neue Kategorien in CSV geschrieben für Tarif {tariffId}.") print(f"DEBUG: {catsWritten} neue Kategorien in CSV geschrieben für Tarif {tariffId}.")
# In dieser Variablen wird gezählt, wie viele Optionen geschrieben werden. # This variable tracks how many options are written to the file.
optsWritten = 0 optsWritten = 0
# In dieser for-Schleife werden die ermittelten Optionszeilen verarbeitet. # This for-structure processes each options line found.
for line in optionsData: for line in optionsData:
parts = line.split(";", 3) parts = line.split(";", 3)
if len(parts) == 4: if len(parts) == 4:
...@@ -680,15 +622,16 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter): ...@@ -680,15 +622,16 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter):
itemName = parts[2] itemName = parts[2]
priceStr = parts[3] priceStr = parts[3]
# In diesem try-Block wird der Preis als float konvertiert. # This try-structure converts the price to float.
try: try:
grossPrice = float(priceStr) grossPrice = float(priceStr)
except ValueError: except ValueError:
grossPrice = 0.0 grossPrice = 0.0
# In dieser Zeile wird der Nettopreis auf Basis von 19% MwSt. berechnet. # This variable calculates the net price from the gross price using 19% tax.
netPrice = round(grossPrice / 1.19, 5) netPrice = round(grossPrice / 1.19, 5)
# This line writes the option data to the options.csv file.
optionsWriter.writerow([ optionsWriter.writerow([
itemId, itemId,
categoryRefId, categoryRefId,
...@@ -700,20 +643,24 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter): ...@@ -700,20 +643,24 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter):
print(f"DEBUG: {optsWritten} Optionen in CSV geschrieben für Tarif {tariffId}.") print(f"DEBUG: {optsWritten} Optionen in CSV geschrieben für Tarif {tariffId}.")
# In dieser except-Abzweigung werden Fehler während des Parsings protokolliert. # This except-structure logs any errors during parsing or writing.
except Exception as exception: except Exception as exception:
print(f"FEHLER in scrapeOption (Parsing/Writing) für Tarif {tariffId}: {exception}") print(f"FEHLER in scrapeOption (Parsing/Writing) für Tarif {tariffId}: {exception}")
traceback.print_exc() traceback.print_exc()
# In diesem finally-Block wird versucht, auf die Hauptseite (Mobildaten) zurück zu navigieren. # This finally-structure attempts to navigate back to the main page after processing options.
finally: finally:
# In diesen Variablen werden die Anzahl der Versuche und die Pausenzeit definiert. # This variable sets how many navigation retries are allowed.
maxRetriesNav = 2 maxRetriesNav = 2
# This variable sets the delay between navigation retries.
retryDelayNav = 3 retryDelayNav = 3
# This variable indicates whether the return navigation was successful.
backNavSuccessful = False backNavSuccessful = False
# In dieser for-Schleife werden mehrere Versuche zum Rücksprung in die Hauptseite durchgeführt. # This for-structure makes several attempts to return to the main page.
for attempt in range(maxRetriesNav): for attempt in range(maxRetriesNav):
try: try:
print(f"DEBUG: Versuche zurückzunavigieren von Optionsseite für Tarif {tariffId} (Versuch {attempt + 1}/{maxRetriesNav}).") print(f"DEBUG: Versuche zurückzunavigieren von Optionsseite für Tarif {tariffId} (Versuch {attempt + 1}/{maxRetriesNav}).")
...@@ -732,47 +679,45 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter): ...@@ -732,47 +679,45 @@ def scrapeOption(seleniumManager, tariffId, optionsWriter, categorysWriter):
else: else:
print(f"DEBUG: Endgültige Warnung: Konnte nach {maxRetriesNav} Versuchen nicht von Tarif {tariffId} zurücknavigieren.") print(f"DEBUG: Endgültige Warnung: Konnte nach {maxRetriesNav} Versuchen nicht von Tarif {tariffId} zurücknavigieren.")
# In dieser if-Abfrage wird protokolliert, falls die Rücknavigation nicht geklappt hat. # This if-structure logs a message if navigation back to the main page was not successful.
if not backNavSuccessful: if not backNavSuccessful:
print(f"DEBUG: Rücknavigation von Tarif {tariffId} war nicht erfolgreich. Fortsetzung kann instabil sein.") print(f"DEBUG: Rücknavigation von Tarif {tariffId} war nicht erfolgreich. Fortsetzung kann instabil sein.")
# This function parses the HTML for options and categories on the options page.
# In dieser Funktion werden die Optionen und Kategorien im HTML-Dokument geparst und aufbereitet.
def parseOptions(soupObj): def parseOptions(soupObj):
# In dieser Liste werden alle gefundenen Optionen gespeichert. # This variable is a list for storing all discovered options.
optionsResults = [] optionsResults = []
# In dieser Liste werden alle gefundenen Kategorien gespeichert. # This variable is a list for storing all discovered categories.
categoryResults = [] categoryResults = []
# In diesem Set werden Kategorien gesammelt, die schon hinzugefügt wurden, um Duplikate zu vermeiden. # This variable is a set to track category IDs that have already been added.
collectedCategoryIds = set() collectedCategoryIds = set()
# In dieser Variablen wird ein RegEx für die Prüfung von Gruppenfeldern definiert. # This variable is a regex for identifying group check inputs.
categoryCheckPattern = re.compile(r'service_code\[(G\d+)_check\]') categoryCheckPattern = re.compile(r'service_code\[(G\d+)_check\]')
# In dieser Variablen wird ein RegEx für versteckte Gruppenfelder definiert. # This variable is a regex for identifying hidden group inputs.
categoryHiddenPattern = re.compile(r'service_code\[(G\d+)_check\]') categoryHiddenPattern = re.compile(r'service_code\[(G\d+)_check\]')
# In dieser Variablen wird ein RegEx für Radio-Buttons in Gruppenfeldern definiert. # This variable is a regex for identifying radio inputs in group fields.
categoryRadioPattern = re.compile(r'service_code\[(G\d+)\]') categoryRadioPattern = re.compile(r'service_code\[(G\d+)\]')
# In dieser Variablen wird ein RegEx für Item-IDs definiert, die mit G oder O beginnen. # This variable is a regex for matching item IDs starting with G or O.
itemValuePattern = re.compile(r'^(G\d+|O\d+)$') itemValuePattern = re.compile(r'^(G\d+|O\d+)$')
# In dieser Variablen wird ein RegEx für Preise definiert, um Beträge im Text zu erkennen. # This variable is a regex for matching monthly prices in the text.
pricePattern = re.compile(r'/\s*€\s*([\d.,]+)\s*monatlich', re.IGNORECASE) pricePattern = re.compile(r'/\s*€\s*([\d.,]+)\s*monatlich', re.IGNORECASE)
# In dieser Variablen wird ein RegEx definiert, um Sub-Selects zu erkennen. # This variable is a regex for sub-select fields within a group.
subSelectPattern = re.compile(r"service_code\[(G\d+)_S\d+\]") subSelectPattern = re.compile(r"service_code\[(G\d+)_S\d+\]")
# In dieser Variablen werden alle Haupttabellen mit Klasse 'tb_back' gesucht. # This variable finds all main tables with class 'tb_back'.
allPotentialMainTables = soupObj.find_all("table", class_="tb_back") allPotentialMainTables = soupObj.find_all("table", class_="tb_back")
print(f"DEBUG: parse_options: {len(allPotentialMainTables)} potenzielle Haupttabellen (tb_back) gefunden.") print(f"DEBUG: parse_options: {len(allPotentialMainTables)} potenzielle Haupttabellen (tb_back) gefunden.")
# In dieser for-Schleife wird jede gefundene Tabelle untersucht. # This for-structure processes each found main table.
for tbl in allPotentialMainTables: for tbl in allPotentialMainTables:
catNameEl = tbl.find("td", class_="tb_head") catNameEl = tbl.find("td", class_="tb_head")
...@@ -790,7 +735,7 @@ def parseOptions(soupObj): ...@@ -790,7 +735,7 @@ def parseOptions(soupObj):
catInputHidden = tbl.find("input", type="hidden", attrs={"name": categoryHiddenPattern}) catInputHidden = tbl.find("input", type="hidden", attrs={"name": categoryHiddenPattern})
catInputRadio = tbl.find("input", type="radio", attrs={"name": categoryRadioPattern}) catInputRadio = tbl.find("input", type="radio", attrs={"name": categoryRadioPattern})
# In dieser if-Abfolge wird geprüft, welche Kategorie-ID wir aus welcher Input-Variante ziehen können. # This if-structure checks which type of input can provide the category ID.
if catInputCheck: if catInputCheck:
matchCheck = categoryCheckPattern.search(catInputCheck.get("name", "")) matchCheck = categoryCheckPattern.search(catInputCheck.get("name", ""))
if matchCheck: if matchCheck:
...@@ -804,30 +749,30 @@ def parseOptions(soupObj): ...@@ -804,30 +749,30 @@ def parseOptions(soupObj):
if matchRadio: if matchRadio:
categoryId = matchRadio.group(1) categoryId = matchRadio.group(1)
# In dieser if-Abzweigung werden irrelevante oder unbekannte Gruppen ausgeschlossen. # This if-structure excludes irrelevant or unknown groups.
if not categoryId or catText in ["Sonstige Angaben", "Pflicht-Angaben"]: if not categoryId or catText in ["Sonstige Angaben", "Pflicht-Angaben"]:
continue continue
print(f"DEBUG: Verarbeite Optionsgruppe: {categoryId} - {catText}") print(f"DEBUG: Verarbeite Optionsgruppe: {categoryId} - {catText}")
# In dieser if-Abzweigung wird die Kategorie einmalig in die categoryResults aufgenommen. # This if-structure adds a new category to categoryResults if it has not been added yet.
if categoryId not in collectedCategoryIds: if categoryId not in collectedCategoryIds:
categoryResults.append(f"{categoryId};{catText}") categoryResults.append(f"{categoryId};{catText}")
collectedCategoryIds.add(categoryId) collectedCategoryIds.add(categoryId)
# In dieser Variablen werden mögliche Untertabellen gesucht. # This variable finds potential sub-tables within the main table.
subTables = tbl.find_all("table", {"border": "0", "width": "520", "cellspacing": "0", "cellpadding": "4"}) subTables = tbl.find_all("table", {"border": "0", "width": "520", "cellspacing": "0", "cellpadding": "4"})
if not subTables: if not subTables:
subTables = [tbl] subTables = [tbl]
lastGId = None lastGId = None
# In dieser for-Schleife werden die Untertabellen untersucht. # This for-structure processes each sub-table to find inputs and sub-selects.
for subTbl in subTables: for subTbl in subTables:
inp = subTbl.find("input", attrs={"value": itemValuePattern}) inp = subTbl.find("input", attrs={"value": itemValuePattern})
subSelect = subTbl.find("select", attrs={"name": subSelectPattern}) subSelect = subTbl.find("select", attrs={"name": subSelectPattern})
# In dieser if-Abzweigung wird geprüft, ob ein passendes Input-Feld gefunden wurde. # This if-structure checks if a matching input field was found.
if inp: if inp:
itemId = inp.get("value", "").strip() itemId = inp.get("value", "").strip()
if not itemId: if not itemId:
...@@ -839,7 +784,7 @@ def parseOptions(soupObj): ...@@ -839,7 +784,7 @@ def parseOptions(soupObj):
itemName = "Unbekannt" itemName = "Unbekannt"
# In dieser if-Abzweigung wird der Text des Label-Tags als Name verwendet, falls vorhanden. # This if-structure attempts to extract the label text if it exists.
if itemLabelTag and itemLabelTag.text.strip(): if itemLabelTag and itemLabelTag.text.strip():
itemName = re.sub(r'\s+', ' ', itemLabelTag.text.strip()) itemName = re.sub(r'\s+', ' ', itemLabelTag.text.strip())
else: else:
...@@ -849,7 +794,7 @@ def parseOptions(soupObj): ...@@ -849,7 +794,7 @@ def parseOptions(soupObj):
if linkInDiv and linkInDiv.text.strip(): if linkInDiv and linkInDiv.text.strip():
itemName = re.sub(r'\s+', ' ', linkInDiv.text.strip()) itemName = re.sub(r'\s+', ' ', linkInDiv.text.strip())
# In dieser if-Abzweigung wird fortgefahren, wenn kein Name ermittelt werden kann. # This if-structure continues if no valid name is found.
if itemName == "Unbekannt": if itemName == "Unbekannt":
continue continue
...@@ -857,7 +802,7 @@ def parseOptions(soupObj): ...@@ -857,7 +802,7 @@ def parseOptions(soupObj):
mPrice = pricePattern.search(combinedText) mPrice = pricePattern.search(combinedText)
priceStr = "0.0" priceStr = "0.0"
# In dieser if-Abzweigung wird ein gefundener Preis verarbeitet. # This if-structure checks if a price was found in the text.
if mPrice: if mPrice:
rawPrice = mPrice.group(1) rawPrice = mPrice.group(1)
normalized = rawPrice.replace(".", "").replace(",", ".") normalized = rawPrice.replace(".", "").replace(",", ".")
...@@ -867,7 +812,7 @@ def parseOptions(soupObj): ...@@ -867,7 +812,7 @@ def parseOptions(soupObj):
except ValueError: except ValueError:
priceStr = "0.0" priceStr = "0.0"
# In dieser if-Abzweigung wird unterschieden, ob wir eine Gruppen-ID oder eine normale Option haben. # This if-structure differentiates between group items and normal options.
if itemId.startswith("G"): if itemId.startswith("G"):
optionsResults.append(f"{categoryId};{itemId};{itemName};{priceStr}") optionsResults.append(f"{categoryId};{itemId};{itemName};{priceStr}")
lastGId = itemId lastGId = itemId
...@@ -878,7 +823,7 @@ def parseOptions(soupObj): ...@@ -878,7 +823,7 @@ def parseOptions(soupObj):
optionsResults.append(f"{categoryId};{itemId};{itemName};{priceStr}") optionsResults.append(f"{categoryId};{itemId};{itemName};{priceStr}")
lastGId = None lastGId = None
# In dieser if-Abzweigung wird geprüft, ob wir ein Sub-Select haben und zuletzt eine Gruppen-ID gespeichert wurde. # This if-structure handles sub-select elements if we have a stored group ID.
if subSelect and lastGId: if subSelect and lastGId:
optionTags = subSelect.find_all("option", attrs={"value": re.compile(r"^O\d+$")}) optionTags = subSelect.find_all("option", attrs={"value": re.compile(r"^O\d+$")})
for optTag in optionTags: for optTag in optionTags:
...@@ -908,47 +853,45 @@ def parseOptions(soupObj): ...@@ -908,47 +853,45 @@ def parseOptions(soupObj):
lastGId = None lastGId = None
# In dieser Liste werden doppelte Einträge entfernt. # This variable deduplicates the options list.
uniqueOptions = list(set(optionsResults)) uniqueOptions = list(set(optionsResults))
# This variable deduplicates the categories list.
uniqueCategoriesList = list(set(categoryResults)) uniqueCategoriesList = list(set(categoryResults))
print(f"DEBUG: parse_options: Gibt {len(uniqueOptions)} eindeutige Optionen und {len(uniqueCategoriesList)} eindeutige Kategorien zurück.") print(f"DEBUG: parse_options: Gibt {len(uniqueOptions)} eindeutige Optionen und {len(uniqueCategoriesList)} eindeutige Kategorien zurück.")
return uniqueOptions, uniqueCategoriesList return uniqueOptions, uniqueCategoriesList
# This function checks if a given group ID has a sub-select in the provided list of selects.
# In dieser Funktion wird geprüft, ob eine bestimmte Gruppen-ID ein Sub-Select besitzt.
def hasSubSelectForId(gId, subSelects): def hasSubSelectForId(gId, subSelects):
# In dieser for-Schleife werden alle Select-Elemente geprüft. # This for-structure inspects each select element to see if it matches the group ID.
for s in subSelects: for s in subSelects:
sName = s.get("name", "") sName = s.get("name", "")
if gId in sName: if gId in sName:
return True return True
return False return False
# This variable initializes a SeleniumManager with a visible browser and the specified GeckoDriver path.
# In dieser Variablen wird ein SeleniumManager mit sichtbarem Browserfenster und gegebenem Geckodriver-Pfad initialisiert.
seleniumManager = SeleniumManager() seleniumManager = SeleniumManager()
# In dieser Zeile wird der Login mit den globalen Zugangsdaten durchgeführt. # This line logs in using the global credentials.
login(seleniumManager, MAUI_USERNAME, MAUI_PASSWORD, MAUI_AUTHCODE) login(seleniumManager, MAUI_USERNAME, MAUI_PASSWORD, MAUI_AUTHCODE)
# In dieser Zeile wird die Laufzeitvertrags-Seite geöffnet. # This line opens the Laufzeitvertrag page.
openLaufzeitvertrag(seleniumManager) openLaufzeitvertrag(seleniumManager)
# In dieser Zeile wird eine kurze Wartezeit eingefügt. # This line adds a short delay before scraping begins.
time.sleep(5) time.sleep(5)
# In dieser Zeile werden alle Daten gescraped und in CSV-Dateien geschrieben. # This line scrapes all data and writes to CSV files.
scrapeData(seleniumManager) scrapeData(seleniumManager)
# In dieser Zeile wird eine letzte Wartezeit eingebaut, bevor der Browser geschlossen wird. # This line waits 10 seconds before closing the driver.
time.sleep(10) time.sleep(10)
# In dieser Zeile wird der WebDriver schließlich geschlossen. # This line closes the WebDriver at the end of the process.
seleniumManager.closeDriver() seleniumManager.closeDriver()
# In dieser Ausgabe wird signalisiert, dass das Scraping abgeschlossen ist. # This print call indicates that scraping is complete.
print("Scraping abgeschlossen.") print("Scraping abgeschlossen.")
...@@ -11,86 +11,91 @@ from models.base_base import BaseBase ...@@ -11,86 +11,91 @@ from models.base_base import BaseBase
from models.deal_deal import DealDeal from models.deal_deal import DealDeal
from models.option_opti import OptionOpti from models.option_opti import OptionOpti
# # The logging module is configured so that every message contains a timestamp and a log level for easy troubleshooting.
# Hier wird die Log-Konfiguration festgelegt, damit während des Ablaufs aussagekräftige Zeit- und Fehlermeldungen ausgegeben werden.
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format="%(asctime)s %(levelname)s: %(message)s" format="%(asctime)s %(levelname)s: %(message)s"
) )
# # The variable “CSV_DIR” stores the absolute path to the cache directory that contains all CSV source files.
# Dieses Verzeichnis verweist auf den Zwischenspeicher, in dem alle CSV-Dateien abgelegt sind.
CSV_DIR = os.path.join("..", "cache") CSV_DIR = os.path.join("..", "cache")
# # The variables below hold the absolute file paths of the four source CSV files used for the import process.
# Hier werden die einzelnen CSV-Dateien innerhalb des Zwischenspeichers definiert.
csvFileCategories = os.path.join(CSV_DIR, "categorys.csv") csvFileCategories = os.path.join(CSV_DIR, "categorys.csv")
csvFilePlans = os.path.join(CSV_DIR, "plans.csv") csvFilePlans = os.path.join(CSV_DIR, "plans.csv")
csvFileCampaigns = os.path.join(CSV_DIR, "campaigns.csv") csvFileCampaigns = os.path.join(CSV_DIR, "campaigns.csv")
csvFileOptions = os.path.join(CSV_DIR, "options.csv") csvFileOptions = os.path.join(CSV_DIR, "options.csv")
# # This helper function returns the content of a semicolon‑separated CSV file as a list of dictionaries.
# Diese Funktion liest eine CSV-Datei vollständig ein und liefert jede Zeile als Wörterbuch zurück.
def read_csv(path): def read_csv(path):
# The file is opened with UTF‑8 encoding so non‑ASCII characters are parsed correctly.
with open(path, newline="", encoding="utf-8") as f: with open(path, newline="", encoding="utf-8") as f:
return list(csv.DictReader(f, delimiter=";")) return list(csv.DictReader(f, delimiter=";"))
# # The four CSV files are loaded into separate lists so their rows are available for further processing.
# Hier werden sämtliche CSV-Dateien in Listen von Wörterbüchern eingelesen.
cat_rows = read_csv(csvFileCategories) cat_rows = read_csv(csvFileCategories)
plan_rows = read_csv(csvFilePlans) plan_rows = read_csv(csvFilePlans)
camp_rows = read_csv(csvFileCampaigns) camp_rows = read_csv(csvFileCampaigns)
opt_rows = read_csv(csvFileOptions) opt_rows = read_csv(csvFileOptions)
# # The dictionary “category_name” maps every category id to its human‑readable name for quick look‑ups.
# Dieses Wörterbuch ordnet jeder Kategorietabelle den zugehörigen Namen für späteres Nachschlagen zu.
category_name = {r["id"].strip(): r["name"].strip() for r in cat_rows} category_name = {r["id"].strip(): r["name"].strip() for r in cat_rows}
# # The default dictionary “campaigns_by_plan” groups all campaign rows by the id of their related plan.
# Diese Datenstruktur ordnet jeder Plan-ID alle zugehörigen Kampagnenzeilen zu, um schnellen Zugriff zu ermöglichen.
campaigns_by_plan = defaultdict(list) campaigns_by_plan = defaultdict(list)
for c in camp_rows: for c in camp_rows:
# Each campaign is appended to the list identified by its plan id so multiple campaigns can share the same key.
campaigns_by_plan[c["plan"].strip()].append(c) campaigns_by_plan[c["plan"].strip()].append(c)
# # The default dictionary “options_by_plan” groups all option rows by the id of their related plan.
# Diese Datenstruktur ordnet jeder Plan-ID alle zugehörigen Optionszeilen zu, um schnellen Zugriff zu ermöglichen.
options_by_plan = defaultdict(list) options_by_plan = defaultdict(list)
for o in opt_rows: for o in opt_rows:
# Each option is appended to the list identified by its plan id so multiple options can share the same key.
options_by_plan[o["plan"].strip()].append(o) options_by_plan[o["plan"].strip()].append(o)
# # The variable “mysql” establishes the database connection manager so sessions can be created.
# Hier wird die Verbindung zur Datenbank geöffnet und eine neue Session erzeugt.
mysql = MysqlManager() mysql = MysqlManager()
# The variable “session” stores the SQLAlchemy session that will be used for all database operations in this run.
session = mysql.getSession() session = mysql.getSession()
# # The dictionary “base_db” maps a tuple of provider name and provider code to the corresponding BaseBase object.
# Dieses Wörterbuch enthält alle bestehenden Basiseinträge, damit später neue Einträge erkannt werden können. base_db = {(b.provider_base, b.providercode_base): b for b in session.query(BaseBase).all()}
base_db = {(b.provider_base, b.providercode_base): b
for b in session.query(BaseBase).all()}
# # The default dictionary “deals_db” groups all existing DealDeal rows by their base id for quick comparison later on.
# Diese verschachtelte Struktur hält alle vorhandenen Deals pro Base-ID, wodurch ein schneller Abgleich ermöglicht wird.
deals_db = defaultdict(dict) deals_db = defaultdict(dict)
for d in session.query(DealDeal).all(): for d in session.query(DealDeal).all():
# Each deal is added to the inner dictionary that is addressed by the base id so provider codes become the second‑level keys.
deals_db[d.base_deal][d.providercode_deal] = d deals_db[d.base_deal][d.providercode_deal] = d
# # The default dictionary “opts_db” groups all existing OptionOpti rows by their base id for quick comparison later on.
# Diese verschachtelte Struktur hält alle vorhandenen Optionen pro Base-ID, um später Stop- und Reaktivierungslogik anzuwenden.
opts_db = defaultdict(dict) opts_db = defaultdict(dict)
for o in session.query(OptionOpti).all(): for o in session.query(OptionOpti).all():
# Each option is added to the inner dictionary that is addressed by the base id so provider codes become the second‑level keys.
opts_db[o.base_opti][o.providercode_opti] = o opts_db[o.base_opti][o.providercode_opti] = o
# # The variable “now” stores the current timestamp so all new rows share an identical creation and update time.
# Hier wird der aktuelle Zeitpunkt einmalig festgelegt, um ihn konsistent für alle neu erzeugten Datensätze zu verwenden.
now = datetime.datetime.now() now = datetime.datetime.now()
# # The list “new_bases” collects BaseBase objects that need to be inserted because they do not yet exist in the database.
# Diese Liste sammelt alle neu anzulegenden Basiseinträge, damit sie in einem Schritt geschrieben werden können.
new_bases = [] new_bases = []
for p in plan_rows: for p in plan_rows:
# The variable “prov_base” combines several CSV columns to form the provider_base value used in the database.
prov_base = f"Freenet | {p['provider'].strip()} | {p['rahmen'].strip()}" prov_base = f"Freenet | {p['provider'].strip()} | {p['rahmen'].strip()}"
# The variable “key” uniquely identifies a base by provider_base and provider code so duplicates can be detected.
key = (prov_base, p["id"].strip()) key = (prov_base, p["id"].strip())
# This branch creates a new BaseBase object when the combination of provider_base and provider code is unknown.
if key not in base_db: if key not in base_db:
# The variable “b” stores the new BaseBase object that is populated with basic attributes and timestamps.
b = BaseBase( b = BaseBase(
provider_base = prov_base, provider_base = prov_base,
providercode_base = p["id"].strip(), providercode_base = p["id"].strip(),
...@@ -101,31 +106,34 @@ for p in plan_rows: ...@@ -101,31 +106,34 @@ for p in plan_rows:
new_bases.append(b) new_bases.append(b)
base_db[key] = b base_db[key] = b
# # This branch writes all new BaseBase objects to the database in one bulk operation and refreshes their primary keys.
# Hier werden alle neu erkannten Basiseinträge in einem einzigen Datenbankvorgang gespeichert.
if new_bases: if new_bases:
session.add_all(new_bases) session.add_all(new_bases)
session.flush() session.flush()
logging.info("Inserted %d new bases", len(new_bases)) logging.info("Inserted %d new bases", len(new_bases))
# # The default dictionaries below keep track of all deals and options that should exist after the import run.
# Diese verschachtelten Mengen erfassen für jede Base-ID die in diesem Lauf gewünschten Deals und Optionen.
desired_deals = defaultdict(set) desired_deals = defaultdict(set)
desired_opts = defaultdict(set) desired_opts = defaultdict(set)
# # The two lists below accumulate dictionaries that will later be used for INSERT IGNORE bulk operations.
# Diese Listen sammeln alle Datensätze, die per INSERT IGNORE neu geschrieben oder aktualisiert werden sollen.
deal_rows_insert = [] deal_rows_insert = []
opt_rows_insert = [] opt_rows_insert = []
for p in plan_rows: for p in plan_rows:
# The variable “prov_base” recomputes the provider_base value so it matches the previously used key.
prov_base = f"Freenet | {p['provider'].strip()} | {p['rahmen'].strip()}" prov_base = f"Freenet | {p['provider'].strip()} | {p['rahmen'].strip()}"
# The variable “base_obj” retrieves the corresponding BaseBase object from the in‑memory cache.
base_obj = base_db[(prov_base, p["id"].strip())] base_obj = base_db[(prov_base, p["id"].strip())]
# The variable “b_id” stores the primary key of the current BaseBase row.
b_id = base_obj.id_base b_id = base_obj.id_base
# The variable “price” converts the plan price from a string into a Decimal object for precise arithmetic.
price = Decimal(p["price"].strip() or "0.00") price = Decimal(p["price"].strip() or "0.00")
# # The mandatory default deal without campaign code is added to the desired set and the insert list.
# Dieser Block fügt den obligatorischen Standard-Deal ohne Kampagnenkennung hinzu.
desired_deals[b_id].add("") desired_deals[b_id].add("")
deal_rows_insert.append({ deal_rows_insert.append({
"provisiongroup_deal": 1, "provisiongroup_deal": 1,
...@@ -139,9 +147,10 @@ for p in plan_rows: ...@@ -139,9 +147,10 @@ for p in plan_rows:
"updated_deal": now "updated_deal": now
}) })
# # Every campaign row belonging to the current plan is processed and prepared for insertion.
# Dieser Block verarbeitet alle Kampagnen zum aktuellen Plan und fügt sie der Wunschliste hinzu.
for c in campaigns_by_plan[p["id"].strip()]: for c in campaigns_by_plan[p["id"].strip()]:
# The variable “code” prefixes the campaign id with “A” so it conforms to the provider_code format.
code = f"A{c['id'].strip()}" code = f"A{c['id'].strip()}"
desired_deals[b_id].add(code) desired_deals[b_id].add(code)
deal_rows_insert.append({ deal_rows_insert.append({
...@@ -156,9 +165,10 @@ for p in plan_rows: ...@@ -156,9 +165,10 @@ for p in plan_rows:
"updated_deal": now "updated_deal": now
}) })
# # Every option row belonging to the current plan is processed and prepared for insertion.
# Dieser Block fügt alle Optionen zum aktuellen Plan der Wunschliste hinzu und bereitet die Insert-Zeilen vor.
for o in options_by_plan[p["id"].strip()]: for o in options_by_plan[p["id"].strip()]:
# The variable “code_opt” holds the raw option id string so it can be used as provider code.
code_opt = o["id"].strip() code_opt = o["id"].strip()
desired_opts[b_id].add(code_opt) desired_opts[b_id].add(code_opt)
opt_rows_insert.append({ opt_rows_insert.append({
...@@ -179,92 +189,87 @@ for p in plan_rows: ...@@ -179,92 +189,87 @@ for p in plan_rows:
"updated_opti": now "updated_opti": now
}) })
# # Duplicate deals are removed by transforming the list into a dictionary keyed by base id and provider code.
# In diesem Schritt werden doppelte Deal- und Optionszeilen anhand ihrer Schlüsselwerte entfernt.
deal_rows_insert = list({(r["base_deal"], r["providercode_deal"]): r for r in deal_rows_insert}.values()) deal_rows_insert = list({(r["base_deal"], r["providercode_deal"]): r for r in deal_rows_insert}.values())
# Duplicate options are removed by transforming the list into a dictionary keyed by base id, provider code, and category.
opt_rows_insert = list({ opt_rows_insert = list({
(r["base_opti"], r["providercode_opti"], r["providercategory_opti"]): r (r["base_opti"], r["providercode_opti"], r["providercategory_opti"]): r for r in opt_rows_insert
for r in opt_rows_insert
}.values()) }.values())
# # The lists below collect maps that will later be used to stop or reactivate existing deals and options.
# Diese Listen sammeln Datensätze, deren Status auf gestoppt oder reaktiviert gesetzt werden muss.
stop_deals, react_deals = [], [] stop_deals, react_deals = [], []
stop_opts, react_opts = [], [] stop_opts, react_opts = [], []
for (prov, _), b in base_db.items(): for (prov, _), b in base_db.items():
# This branch skips bases that were not imported from Freenet.
if not prov.startswith("Freenet"): if not prov.startswith("Freenet"):
continue continue
# The variable “b_id” stores the primary key of the base row currently being checked.
b_id = b.id_base b_id = b.id_base
# The variables “wantD” and “haveD” hold the sets of desired and existing deals for the current base.
wantD = desired_deals.get(b_id, set()) wantD = desired_deals.get(b_id, set())
haveD = deals_db.get(b_id, {}) haveD = deals_db.get(b_id, {})
# This loop checks every existing deal so its stop status can be updated when necessary.
for code, obj in haveD.items(): for code, obj in haveD.items():
# This branch marks a stopped deal for reactivation when the code is desired but the stop date is set.
if code in wantD and obj.stops_deal is not None: if code in wantD and obj.stops_deal is not None:
react_deals.append({"id_deal": obj.id_deal, react_deals.append({"id_deal": obj.id_deal, "stops_deal": None, "updated_deal": now})
"stops_deal": None,
"updated_deal": now}) # This branch marks an active deal for stopping when the code is no longer desired.
if code not in wantD and obj.stops_deal is None: if code not in wantD and obj.stops_deal is None:
stop_deals.append({"id_deal": obj.id_deal, stop_deals.append({"id_deal": obj.id_deal, "stops_deal": now, "updated_deal": now})
"stops_deal": now,
"updated_deal": now})
# The variables “wantO” and “haveO” hold the sets of desired and existing options for the current base.
wantO = desired_opts.get(b_id, set()) wantO = desired_opts.get(b_id, set())
haveO = opts_db.get(b_id, {}) haveO = opts_db.get(b_id, {})
# This loop checks every existing option so its stop status can be updated when necessary.
for code, obj in haveO.items(): for code, obj in haveO.items():
# This branch marks a stopped option for reactivation when the code is desired but the stop date is set.
if code in wantO and obj.stops_opti is not None: if code in wantO and obj.stops_opti is not None:
react_opts.append({"id_opti": obj.id_opti, react_opts.append({"id_opti": obj.id_opti, "stops_opti": None, "updated_opti": now})
"stops_opti": None,
"updated_opti": now}) # This branch marks an active option for stopping when the code is no longer desired.
if code not in wantO and obj.stops_opti is None: if code not in wantO and obj.stops_opti is None:
stop_opts.append({"id_opti": obj.id_opti, stop_opts.append({"id_opti": obj.id_opti, "stops_opti": now, "updated_opti": now})
"stops_opti": now,
"updated_opti": now}) # All prepared deal rows are written to the database using INSERT IGNORE so duplicates are silently skipped.
session.execute(mysql_insert(DealDeal.__table__).prefix_with("IGNORE"), deal_rows_insert)
#
# Dieser Block schreibt alle gewünschten Deals per INSERT IGNORE in die Datenbank.
session.execute(
mysql_insert(DealDeal.__table__).prefix_with("IGNORE"),
deal_rows_insert
)
logging.info("INSERT IGNORE'd %d deals", len(deal_rows_insert)) logging.info("INSERT IGNORE'd %d deals", len(deal_rows_insert))
# # All prepared option rows are written to the database using INSERT IGNORE so duplicates are silently skipped.
# Dieser Block schreibt alle gewünschten Optionen per INSERT IGNORE in die Datenbank. session.execute(mysql_insert(OptionOpti.__table__).prefix_with("IGNORE"), opt_rows_insert)
session.execute(
mysql_insert(OptionOpti.__table__).prefix_with("IGNORE"),
opt_rows_insert
)
logging.info("INSERT IGNORE'd %d options", len(opt_rows_insert)) logging.info("INSERT IGNORE'd %d options", len(opt_rows_insert))
# # All deals marked for stopping are updated in bulk so their stop dates are set in a single efficient query.
# Dieser Block aktualisiert alle Deals, die jetzt gestoppt werden müssen.
if stop_deals: if stop_deals:
session.bulk_update_mappings(DealDeal, stop_deals) session.bulk_update_mappings(DealDeal, stop_deals)
logging.info("Stopped %d deals", len(stop_deals)) logging.info("Stopped %d deals", len(stop_deals))
# # All deals marked for reactivation are updated in bulk so their stop dates are cleared in a single efficient query.
# Dieser Block aktualisiert alle Deals, die wieder reaktiviert werden müssen.
if react_deals: if react_deals:
session.bulk_update_mappings(DealDeal, react_deals) session.bulk_update_mappings(DealDeal, react_deals)
logging.info("Reactivated %d deals", len(react_deals)) logging.info("Reactivated %d deals", len(react_deals))
# # All options marked for stopping are updated in bulk so their stop dates are set in a single efficient query.
# Dieser Block aktualisiert alle Optionen, die jetzt gestoppt werden müssen.
if stop_opts: if stop_opts:
session.bulk_update_mappings(OptionOpti, stop_opts) session.bulk_update_mappings(OptionOpti, stop_opts)
logging.info("Stopped %d options", len(stop_opts)) logging.info("Stopped %d options", len(stop_opts))
# # All options marked for reactivation are updated in bulk so their stop dates are cleared in a single efficient query.
# Dieser Block aktualisiert alle Optionen, die wieder reaktiviert werden müssen.
if react_opts: if react_opts:
session.bulk_update_mappings(OptionOpti, react_opts) session.bulk_update_mappings(OptionOpti, react_opts)
logging.info("Reactivated %d options", len(react_opts)) logging.info("Reactivated %d options", len(react_opts))
# # The commit call persists every change made in this run so all inserts and updates become permanent.
# Hier werden sämtliche Änderungen dauerhaft in der Datenbank gespeichert.
session.commit() session.commit()
# # The session is closed to release database connections and other resources held by SQLAlchemy.
# Zum Abschluss wird die Session geschlossen, um Ressourcen freizugeben.
session.close() session.close()
logging.info("Import-Lauf abgeschlossen.") logging.info("Import run finished successfully.")
\ No newline at end of file
#!/usr/bin/env python3 # This script scans a local cache directory for PDF files belonging to base tariffs, uploads the PDFs to S3, writes the resulting URLs back into the MySQL database, and logs progress as well as errors to stdout.
# -*- coding: utf-8 -*- import sys; sys.path.append("..")
import sys
sys.path.append("..")
import os import os
import datetime import datetime
from manager.S3Manager import S3Manager from manager.S3Manager import S3Manager
...@@ -12,98 +9,111 @@ from models.deal_deal import DealDeal ...@@ -12,98 +9,111 @@ from models.deal_deal import DealDeal
from models.option_opti import OptionOpti from models.option_opti import OptionOpti
from models.provisiongroup_pgro import ProvisiongroupPgro # zwingend, um Abhängigkeits-Mapping zu initialisieren from models.provisiongroup_pgro import ProvisiongroupPgro # zwingend, um Abhängigkeits-Mapping zu initialisieren
# # The variable "cacheDir" stores the file‑system path that contains the PDF files waiting for upload.
# Dieses Verzeichnis enthält sämtliche PDF-Dateien für den Upload.
cacheDir = "../cache" cacheDir = "../cache"
# # The variable "s3Manager" holds an instance that encapsulates S3 upload functionality.
# Dieses Objekt übernimmt das Hochladen der Dateien in den S3-Bucket und liefert die endgültige URL.
s3Manager = S3Manager() s3Manager = S3Manager()
# # The variable "dbSession" stores a SQLAlchemy session used to query and update the MySQL database.
# Diese Datenbank-Session ermöglicht Abfragen und Aktualisierungen innerhalb der MySQL-Datenbank.
dbSession = MysqlManager().getSession() dbSession = MysqlManager().getSession()
# # The variable "pdfFiles" gathers all file names inside the cache directory whose names end with the ".pdf" extension, case‑insensitive.
# Diese Liste sammelt alle PDF-Dateinamen im Cache-Verzeichnis.
pdfFiles = [f for f in os.listdir(cacheDir) if f.lower().endswith(".pdf")] pdfFiles = [f for f in os.listdir(cacheDir) if f.lower().endswith(".pdf")]
# # The variable "pdfIdSet" collects distinct base identifiers by stripping the *_flyer or *_pib suffix from each file name stem.
# Diese Menge speichert alle eindeutigen Basis-IDs, die durch Suffix-Prüfung ermittelt wurden.
pdfIdSet = set() pdfIdSet = set()
for name in pdfFiles: for name in pdfFiles:
# The variable "stem" stores the file name without its extension and is converted to lowercase for uniformity.
stem = name[:-4].lower() stem = name[:-4].lower()
# This branch adds the identifier to the set when the file name ends with the flyer suffix.
if stem.endswith("_flyer"): if stem.endswith("_flyer"):
pdfIdSet.add(stem[:-6]) pdfIdSet.add(stem[:-6])
# This branch adds the identifier to the set when the file name ends with the PIB suffix.
elif stem.endswith("_pib"): elif stem.endswith("_pib"):
pdfIdSet.add(stem[:-4]) pdfIdSet.add(stem[:-4])
# # This branch terminates the script early when no matching PDF pairs were found in the cache directory.
# Dieser Block beendet das Skript, wenn keine geeigneten PDF-Dateien vorhanden sind.
if not pdfIdSet: if not pdfIdSet:
print(f"INFO: Keine PDF-Paare in '{cacheDir}' gefunden.") print(f"INFO: Keine PDF-Paare in '{cacheDir}' gefunden.")
dbSession.close() dbSession.close()
sys.exit(0) sys.exit(0)
# # This loop iterates over each distinct base identifier in sorted order to process the associated PDFs.
# Diese Schleife verarbeitet jede erkannte Basis-ID in sortierter Reihenfolge.
for currentId in sorted(pdfIdSet): for currentId in sorted(pdfIdSet):
# The print statement marks the beginning of processing for the current identifier.
print(f"\n--- Verarbeitung ID: {currentId} ---") print(f"\n--- Verarbeitung ID: {currentId} ---")
# # The variable "baseRecords" retrieves all BaseBase rows with a matching provider code so that multiple matches are processed consistently.
# Diese Abfrage liefert alle BaseBase-Datensätze, um Mehrfachtreffer sicher zu unterstützen.
baseRecords = dbSession.query(BaseBase).filter_by(providercode_base=currentId).all() baseRecords = dbSession.query(BaseBase).filter_by(providercode_base=currentId).all()
# This branch skips the current identifier when no matching BaseBase row exists.
if not baseRecords: if not baseRecords:
print(f"WARNUNG: Kein BaseBase-Eintrag für providercode_base='{currentId}'.") print(f"WARNUNG: Kein BaseBase-Eintrag für providercode_base='{currentId}'.")
continue continue
# # The variable "flyerPath" composes the absolute path to the flyer PDF for the current identifier.
# Dieser Pfad verweist auf die potenzielle Flyer-Datei der aktuellen Basis-ID.
flyerPath = os.path.join(cacheDir, f"{currentId}_flyer.pdf") flyerPath = os.path.join(cacheDir, f"{currentId}_flyer.pdf")
# # The variable "pibPath" composes the absolute path to the PIB PDF for the current identifier.
# Dieser Pfad verweist auf die potenzielle PIB-Datei der aktuellen Basis-ID.
pibPath = os.path.join(cacheDir, f"{currentId}_pib.pdf") pibPath = os.path.join(cacheDir, f"{currentId}_pib.pdf")
# # The variable "flyerUrl" will store the public S3 URL of the flyer PDF or remain None when the upload fails or the file does not exist.
# Diese Variable hält die hochgeladene Flyer-URL oder bleibt None, falls kein Upload erfolgte.
flyerUrl = None flyerUrl = None
# This branch uploads the flyer PDF when the file exists.
if os.path.exists(flyerPath): if os.path.exists(flyerPath):
# The variable "flyerKey" determines the destination key inside the S3 bucket.
flyerKey = f"flyers/{currentId}_flyer.pdf" flyerKey = f"flyers/{currentId}_flyer.pdf"
# The variable "flyerUrl" receives the URL returned by the upload method.
flyerUrl = s3Manager.uploadFile(flyerPath, flyerKey) flyerUrl = s3Manager.uploadFile(flyerPath, flyerKey)
# This branch prints an error message when the upload failed and no URL was returned.
if not flyerUrl: if not flyerUrl:
print(f"FEHLER: Flyer-Upload fehlgeschlagen für ID {currentId}") print(f"FEHLER: Flyer-Upload fehlgeschlagen für ID {currentId}")
# # The variable "pibUrl" will store the public S3 URL of the PIB PDF or remain None when the upload fails or the file does not exist.
# Diese Variable hält die hochgeladene PIB-URL oder bleibt None, falls kein Upload erfolgte.
pibUrl = None pibUrl = None
# This branch uploads the PIB PDF when the file exists.
if os.path.exists(pibPath): if os.path.exists(pibPath):
# The variable "pibKey" determines the destination key inside the S3 bucket.
pibKey = f"pibs/{currentId}_pib.pdf" pibKey = f"pibs/{currentId}_pib.pdf"
# The variable "pibUrl" receives the URL returned by the upload method.
pibUrl = s3Manager.uploadFile(pibPath, pibKey) pibUrl = s3Manager.uploadFile(pibPath, pibKey)
# This branch prints an error message when the upload failed and no URL was returned.
if not pibUrl: if not pibUrl:
print(f"FEHLER: PIB-Upload fehlgeschlagen für ID {currentId}") print(f"FEHLER: PIB-Upload fehlgeschlagen für ID {currentId}")
# # This loop updates each BaseBase record so that both flyer and PIB URLs are stored without overwriting existing values.
# Diese Schleife aktualisiert jede gefundene Base-Zeile, um Flyer- und PIB-URLs konsistent zu setzen.
for base in baseRecords: for base in baseRecords:
# This branch writes the flyer URL into the database row when no URL has been stored before and a new URL is available.
if base.flyerurl_base is None and flyerUrl: if base.flyerurl_base is None and flyerUrl:
base.flyerurl_base = flyerUrl base.flyerurl_base = flyerUrl
base.updated_base = datetime.datetime.now() base.updated_base = datetime.datetime.now()
print(f"INFO: flyerurl_base gesetzt: {flyerUrl}") print(f"INFO: flyerurl_base gesetzt: {flyerUrl}")
# This branch writes the PIB URL into the database row when no URL has been stored before and a new URL is available.
if base.piburl_base is None and pibUrl: if base.piburl_base is None and pibUrl:
base.piburl_base = pibUrl base.piburl_base = pibUrl
base.updated_base = datetime.datetime.now() base.updated_base = datetime.datetime.now()
print(f"INFO: piburl_base gesetzt: {pibUrl}") print(f"INFO: piburl_base gesetzt: {pibUrl}")
# # The commit call atomically persists all changes performed for the current identifier.
# Dieser Aufruf speichert alle Änderungen für die aktuelle Basis-ID atomar in der Datenbank.
dbSession.commit() dbSession.commit()
# # The database session is closed after all identifiers have been processed.
# Hier wird die Session geschlossen, sobald alle Basis-IDs verarbeitet wurden.
dbSession.close() dbSession.close()
# # The print statement confirms that the entire upload sequence finished successfully.
# Diese Meldung bestätigt das erfolgreiche Ende des gesamten Upload-Vorgangs.
print("INFO: Upload-Vorgang abgeschlossen.") print("INFO: Upload-Vorgang abgeschlossen.")
\ No newline at end of file
#!/bin/bash #!/bin/bash
# Dieser Wrapper wechselt ins Verzeichnis /maui/commands und startet das # This script guarantees that only one instance of a specified Python job runs simultaneously, captures its standard and error output in timestamped log files, and notifies a monitoring endpoint when errors occur.
# gewünschte Python-Skript (mit python3), sofern nicht bereits eine Instanz # The following conditional branch checks whether at least one positional argument has been provided; if not, usage information is printed and the script terminates with exit status 1.
# dieses Skripts läuft. Gleichzeitig werden alle Ausgaben in zwei getrennten
# Logfiles im Verzeichnis /maui/logs abgelegt, wobei jedes Skript einen
# eigenen Unterordner erhält (benannt nach dem Skriptnamen ohne Erweiterung)
# und die Logfiles die Namen im Format
# - L_yyyymmdd-hhiiss.txt für die Standardausgabe,
# - E_yyyymmdd-hhiiss.err für die Fehlermeldung
# tragen. Logfiles, die älter als 24 Stunden (1440 Minuten) sind, werden
# automatisch gelöscht.
#--- Parameterprüfung ---
# In dieser Abfrage wird überprüft, ob mindestens ein Parameter übergeben wurde.
if [ "$#" -lt 1 ]; then if [ "$#" -lt 1 ]; then
# Hier wird ein Hinweis ausgegeben, wie dieses Skript zu nutzen ist, wenn nicht # The echo command prints usage instructions when no job file is supplied.
# genügend Parameter übergeben wurden.
echo "Usage: $0 <jobfilename> [arguments...]" echo "Usage: $0 <jobfilename> [arguments...]"
# Dieser Befehl beendet das Skript mit einem Fehlercode. # The script terminates with exit status 1 when the required argument is missing.
exit 1 exit 1
fi fi
# Diese Variable speichert den ersten übergebenen Parameter als Namen des # The variable “jobname” stores the first positional argument as the Python job file name.
# Python-Skripts.
jobname="$1" jobname="$1"
# Dieser Befehl entfernt den ersten Parameter aus der Parameterliste, damit # The shift statement removes the first positional argument so that any additional arguments remain accessible.
# weitere Argumente optional weiterverarbeitet werden können.
shift shift
#--- Arbeitsverzeichnis und Log-Verzeichnis festlegen --- # The variable “WORKDIR” defines the directory where the Python job files reside.
# Diese Variable legt das Arbeitsverzeichnis fest, in dem sich die
# Python-Skripte befinden.
WORKDIR="/maui/commands" WORKDIR="/maui/commands"
# Diese Variable legt das Hauptverzeichnis für die Logdateien fest. # The variable “LOG_ROOT” defines the root directory where log folders will be created.
LOG_ROOT="/maui/logs" LOG_ROOT="/maui/logs"
# Diese Variable ermittelt aus dem übergebenen Skriptnamen # The variable “job_base” extracts the base name of the job without its extension.
# (z. B. rawFromBloomberg.py) den Basisteil (rawFromBloomberg).
job_base=$(basename "$jobname" .py) job_base=$(basename "$jobname" .py)
# Diese Variable bildet den Pfad für das individuellen Logverzeichnis, basierend # The variable “LOG_DIR” composes the path to the job-specific log directory.
# auf dem Basisteil des Skriptnamens.
LOG_DIR="$LOG_ROOT/$job_base" LOG_DIR="$LOG_ROOT/$job_base"
# Dieser Befehl stellt sicher, dass das Haupt-Logverzeichnis und das Verzeichnis # The mkdir command ensures that the root log directory and the job-specific directory exist, creating them if necessary.
# für das aktuelle Skript existieren, und legt sie gegebenenfalls an.
mkdir -p "$LOG_DIR" mkdir -p "$LOG_DIR"
# Dieser Befehl findet und löscht alle Logdateien im spezifischen Verzeichnis, # The find command removes log files older than twenty-four hours (1 440 minutes) from the job-specific directory.
# die älter als 24 Stunden (1440 Minuten) sind.
find "$LOG_DIR" -type f -mmin +1440 -delete find "$LOG_DIR" -type f -mmin +1440 -delete
#--- Prozessüberprüfung --- # The variable “current_pid” stores the process identifier of the currently running wrapper instance.
# Diese Variable speichert die Prozess-ID des aktuell ausgeführten Skripts,
# damit es sich nicht selbst erkennt.
current_pid=$$ current_pid=$$
# Diese Variable hält den Namen dieses Wrapperskripts (cron.sh), um ihn ebenfalls # The variable “wrapper_name” stores the file name of this wrapper to exclude it from the process search.
# von der Prozessliste auszuschließen.
wrapper_name=$(basename "$0") wrapper_name=$(basename "$0")
# Diese Variable legt fest, nach welchem exakten Aufrufmuster # The variable “pattern” stores the exact command signature that identifies a running job process.
# (python3 <jobname>) in der Prozessliste gesucht werden soll.
pattern="python3 $jobname" pattern="python3 $jobname"
# In dieser Variable werden alle zum Muster passenden Prozess-IDs gespeichert, # The variable “running” captures the process identifiers that match the command signature while excluding grep and wrapper processes.
# wobei Zeilen des Wrappers und greps ausgeschlossen werden.
running=$(ps ax -o pid,cmd | grep "$pattern" | grep -v grep | grep -v "$wrapper_name" | awk '{print $1}') running=$(ps ax -o pid,cmd | grep "$pattern" | grep -v grep | grep -v "$wrapper_name" | awk '{print $1}')
# Diese Abfrage prüft, ob ein passender Prozess bereits läuft. # The following conditional branch checks whether at least one matching process identifier was found; if a job is already running, the script informs the user and terminates with exit status 0.
if [ -n "$running" ]; then if [ -n "$running" ]; then
# Hier wird der Nutzer informiert, dass der entsprechende Job bereits ausgeführt # The echo command informs the user that the requested job is already running.
# wird, und ein erneuter Start verhindert.
echo "Job '$jobname' läuft bereits (PID(s): $running). Abbruch." echo "Job '$jobname' läuft bereits (PID(s): $running). Abbruch."
# Das Skript wird hier mit Exit-Code 0 (ohne Fehler) beendet, um keine neue # The script terminates with exit status 0 to prevent a second instance from starting.
# Instanz zu starten.
exit 0 exit 0
fi fi
#--- Logging vorbereiten und Job starten --- # The variable “timestamp” records the current date and time in YYYYMMDD-HHMMSS format.
# Diese Variable erzeugt einen Zeitstempel im Format yyyymmdd-hhiiss
# (z. B. 20250413-114530), um eindeutige Logdateien zu erstellen.
timestamp=$(date "+%Y%m%d-%H%M%S") timestamp=$(date "+%Y%m%d-%H%M%S")
# Diese Variable bildet den vollständigen Pfad zur Logdatei für die Standardausgabe. # The variable “STDOUT_LOG” composes the full path to the log file for standard output.
STDOUT_LOG="$LOG_DIR/L_${timestamp}.txt" STDOUT_LOG="$LOG_DIR/L_${timestamp}.txt"
# Diese Variable bildet den vollständigen Pfad zur Logdatei für die Fehlermeldungen. # The variable “ERROR_LOG” composes the full path to the log file for error output.
ERROR_LOG="$LOG_DIR/E_${timestamp}.err" ERROR_LOG="$LOG_DIR/E_${timestamp}.err"
# Dieser Befehl wechselt in das festgelegte Arbeitsverzeichnis oder bricht mit # The cd command changes into the working directory or aborts with an error message if the directory is inaccessible.
# Fehlermeldung ab, falls es nicht erreichbar ist.
cd "$WORKDIR" || { echo "Arbeitsverzeichnis $WORKDIR nicht erreichbar." >&2; exit 1; } cd "$WORKDIR" || { echo "Arbeitsverzeichnis $WORKDIR nicht erreichbar." >&2; exit 1; }
# Dieser Befehl führt das Python-Skript aus und leitet stdout in das L_-Logfile # The python3 command executes the specified job, redirecting standard output and error output to dedicated log files.
# und stderr in das E_-Logfile um.
python3 "$jobname" "$@" > "$STDOUT_LOG" 2> "$ERROR_LOG" python3 "$jobname" "$@" > "$STDOUT_LOG" 2> "$ERROR_LOG"
# Fehler senden bei Inhalt: Jobname, Zeilenumbruch, Fehler # The following conditional branch checks whether the error log file contains data; if errors exist, the job name and error content are posted to the monitoring endpoint.
if [ -s "$ERROR_LOG" ]; then if [ -s "$ERROR_LOG" ]; then
payload="$jobname
$(<"$ERROR_LOG")" # The variable “payload” concatenates the job name and error log content for notification.
payload="$jobname : $(<"$ERROR_LOG")"
# The curl command posts the payload to the ntfy.sh endpoint for error reporting.
curl -s -X POST https://ntfy.sh/itmaxDebug -d "$payload" curl -s -X POST https://ntfy.sh/itmaxDebug -d "$payload"
fi fi
import sys import sys; sys.path.append("..")
sys.path.append("..")
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import QueuePool from sqlalchemy.pool import QueuePool
import config.MysqlConfig as DatabaseConfig import config.MysqlConfig as DatabaseConfig
from sshtunnel import SSHTunnelForwarder from sshtunnel import SSHTunnelForwarder
# This class manages the MySQL connection, optionally establishes an SSH tunnel, and exposes a ready-to-use SQLAlchemy session.
# Diese Klasse verwaltet die MySQL‑Verbindung und nutzt optional einen SSH‑Tunnel.
# Die Konfiguration stammt weiterhin aus config.MysqlConfig; es wird lediglich
# der erweiterte Engine‑Teil (QueuePool, Timeouts u. a.) integriert.
class MysqlManager: class MysqlManager:
# The constructor loads configuration, conditionally creates an SSH tunnel, builds an SQLAlchemy engine with a queue pool, and instantiates the first session.
def __init__(self): def __init__(self):
# ───────────────────────────────────────────────────────
# Konfiguration aus dem Modul laden # The variable “dbConfig” stores database connection parameters that are loaded from the configuration module.
# ───────────────────────────────────────────────────────
self.dbConfig = { self.dbConfig = {
"host": DatabaseConfig.MYSQL_HOST, "host": DatabaseConfig.MYSQL_HOST,
"user": DatabaseConfig.MYSQL_USER, "user": DatabaseConfig.MYSQL_USER,
...@@ -25,10 +20,10 @@ class MysqlManager: ...@@ -25,10 +20,10 @@ class MysqlManager:
"port": DatabaseConfig.MYSQL_PORT, "port": DatabaseConfig.MYSQL_PORT,
} }
# ─────────────────────────────────────────────────────── # This conditional branch builds an SSH tunnel when the configuration flag USE_SSH_TUNNEL is True; otherwise, it uses the direct database host and port.
# Optionalen SSH‑Tunnel aufbauen
# ───────────────────────────────────────────────────────
if getattr(DatabaseConfig, "USE_SSH_TUNNEL", False): if getattr(DatabaseConfig, "USE_SSH_TUNNEL", False):
# The variable “sshTunnel” opens a forwarder that connects the local port to the remote MySQL host through SSH.
self.sshTunnel = SSHTunnelForwarder( self.sshTunnel = SSHTunnelForwarder(
(DatabaseConfig.SSH_HOST, DatabaseConfig.SSH_PORT), (DatabaseConfig.SSH_HOST, DatabaseConfig.SSH_PORT),
ssh_username=DatabaseConfig.SSH_USERNAME, ssh_username=DatabaseConfig.SSH_USERNAME,
...@@ -36,36 +31,46 @@ class MysqlManager: ...@@ -36,36 +31,46 @@ class MysqlManager:
remote_bind_address=(self.dbConfig["host"], self.dbConfig["port"]), remote_bind_address=(self.dbConfig["host"], self.dbConfig["port"]),
) )
self.sshTunnel.start() self.sshTunnel.start()
# The variables “db_host” and “db_port” point to the locally forwarded endpoint when the tunnel is active.
db_host = "127.0.0.1" db_host = "127.0.0.1"
db_port = self.sshTunnel.local_bind_port db_port = self.sshTunnel.local_bind_port
else: else:
# The variable “sshTunnel” is set to None when no tunnel is required.
self.sshTunnel = None self.sshTunnel = None
# The variables “db_host” and “db_port” point to the remote MySQL server when no tunnel is used.
db_host = self.dbConfig["host"] db_host = self.dbConfig["host"]
db_port = self.dbConfig["port"] db_port = self.dbConfig["port"]
# ─────────────────────────────────────────────────────── # The variable “engine” creates an SQLAlchemy engine that uses a QueuePool and applies timeout settings taken from the configuration.
# SQLAlchemy‑Engine mit QueuePool & Timeouts erstellen
# ───────────────────────────────────────────────────────
self.engine = create_engine( self.engine = create_engine(
f"mysql+pymysql://{self.dbConfig['user']}:{self.dbConfig['password']}@{db_host}:{db_port}/{self.dbConfig['database']}", f"mysql+pymysql://{self.dbConfig['user']}:{self.dbConfig['password']}@{db_host}:{db_port}/{self.dbConfig['database']}",
echo=False, echo=False,
poolclass=QueuePool, poolclass=QueuePool,
pool_size=getattr(DatabaseConfig, "POOL_SIZE", 1), pool_size=getattr(DatabaseConfig, "POOL_SIZE", 1),
max_overflow=getattr(DatabaseConfig, "MAX_OVERFLOW", 0), max_overflow=getattr(DatabaseConfig, "MAX_OVERFLOW", 0),
pool_recycle=getattr(DatabaseConfig, "POOL_RECYCLE", 3600), # Sekunden pool_recycle=getattr(DatabaseConfig, "POOL_RECYCLE", 3600),
pool_pre_ping=True, pool_pre_ping=True,
connect_args={"connect_timeout": getattr(DatabaseConfig, "CONNECT_TIMEOUT", 30)}, connect_args={"connect_timeout": getattr(DatabaseConfig, "CONNECT_TIMEOUT", 30)},
) )
# Session Factory sofort initialisieren # The variable “dbSession” stores the first session instance created from the session factory bound to the engine.
self.dbSession = sessionmaker(bind=self.engine)() self.dbSession = sessionmaker(bind=self.engine)()
# Gibt die aktuelle Session zurück # This method returns the current SQLAlchemy session so callers can interact with the database.
def getSession(self): def getSession(self):
# The session instance is returned without creating a new one.
return self.dbSession return self.dbSession
# Schließt Session und SSH‑Tunnel (falls vorhanden) # This method closes the current session and shuts down the SSH tunnel when it was created.
def close(self): def close(self):
# The session is closed to release database resources.
self.dbSession.close() self.dbSession.close()
# This conditional branch stops the SSH tunnel when it exists.
if self.sshTunnel: if self.sshTunnel:
self.sshTunnel.stop() self.sshTunnel.stop()
"""
Hauptanwendung (Manager-Kontext)
Startet die Flask-App, erzeugt einen WebManager und registriert
alle Blueprints zentral.
**Neu**
Alle Endpunkte verlangen jetzt zwingend den Query-Parameter
?token=12345
Fehlt der Parameter oder stimmt der Wert nicht, erhält der Client
HTTP/1.1 401 Unauthorized
{"status": "NOK", "message": "Ungültiger oder fehlender Token."}
"""
from __future__ import annotations from __future__ import annotations
import sys; sys.path.append("..")
import sys
sys.path.append("..") # Projekt-Root im Suchpfad registrieren
from flask import Flask, request, jsonify from flask import Flask, request, jsonify
from manager.MysqlManager import MysqlManager
from models.token_toke import TokenToke
from routes.HealtCheckRouter import blueprint as health_router from routes.HealtCheckRouter import blueprint as health_router
from routes.BaseRouter import blueprint as tarifs_router from routes.BaseRouter import blueprint as tarifs_router
from routes.EeccxRouter import blueprint as eeccx_router from routes.EeccxRouter import blueprint as eeccx_router
# --------------------------------------------------------------------------- # # This class bundles blueprint registration so that all route collections are attached to the Flask application.
# WebManager: registriert sämtliche Blueprints
# --------------------------------------------------------------------------- #
class WebManager: class WebManager:
"""Registriert Blueprints und bündelt weitere Infrastruktur."""
# The constructor assigns the provided Flask instance and calls the private registration helper.
def __init__(self, app: Flask) -> None: def __init__(self, app: Flask) -> None:
self.app = app self.app = app
self._register_blueprints() self._register_blueprints()
# This helper method iterates over all blueprints and registers each of them on the Flask application.
def _register_blueprints(self) -> None: def _register_blueprints(self) -> None:
"""Alle Blueprint-Objekte an der App anmelden."""
for bp in (health_router, tarifs_router, eeccx_router): for bp in (health_router, tarifs_router, eeccx_router):
self.app.register_blueprint(bp) self.app.register_blueprint(bp)
# --------------------------------------------------------------------------- # # A new Flask application instance is created and handed to the WebManager for blueprint registration.
# App-Instanz & globale Token-Prüfung
# --------------------------------------------------------------------------- #
TOKEN_VALUE = "12345" # Erlaubter Token-Wert
app = Flask(__name__) app = Flask(__name__)
WebManager(app) WebManager(app)
# This handler executes before every request to enforce the compulsory token parameter and validate it against the database.
@app.before_request @app.before_request
def _require_token(): def _require_token():
"""
Globale Pre-Request-Hook: # This branch allows requests for static files to proceed without token validation.
Schlägt fehl, wenn der Query-Parameter ?token=12345
nicht exakt vorhanden ist.
"""
if request.endpoint == "static": if request.endpoint == "static":
# Flask-static-Files nicht schützen
return None return None
# The variable “token” stores the value of the ?token query parameter or None when absent.
token = request.args.get("token") token = request.args.get("token")
if token != TOKEN_VALUE:
# This branch rejects the request when the token parameter is missing.
if not token:
return ( return (
jsonify({"message": "Please enter a valid token."}), jsonify({"status": "NOAUTH", "message": "Please enter a valid token."}),
401, 401,
) )
# The variable “session” holds a new SQLAlchemy session obtained from the MysqlManager.
session = MysqlManager().getSession()
try:
# The variable “token_exists” evaluates to True when a matching token record is found in the database.
token_exists = (
session.query(TokenToke)
.filter_by(token_toke=token)
.first()
is not None
)
finally:
session.close()
# This branch rejects the request when the supplied token does not exist in the database.
if not token_exists:
return (
jsonify({"status": "NOAUTH", "message": "Please enter a valid token."}),
401,
)
# --------------------------------------------------------------------------- # # The application starts on all network interfaces on port 80 when the module is executed directly.
# Startpunkt
# --------------------------------------------------------------------------- #
if __name__ == "__main__": if __name__ == "__main__":
# Server auf allen Interfaces, Port 80 starten
app.run(host="0.0.0.0", port=80) app.run(host="0.0.0.0", port=80)
from sqlalchemy import Column, Integer, String, DateTime
from models._system import Base
class TokenToke(Base):
__tablename__ = 'token_toke'
id_toke = Column(
Integer,
primary_key=True,
autoincrement=True
)
token_toke = Column(
String(255),
nullable=False
)
owner_toke = Column(
String(255),
nullable=False
)
created_toke = Column(
DateTime,
nullable=False
)
# MAUI Data Toolkit # MAUI Data Toolkit
## Tutorials
....
....
....
## JupyterLab ## JupyterLab
To further develop or test this project use jupyter lab. Take care that "notebooks" are only concepts with your pc as python environment. Production code runs only inside the docker environment (stored in e.g. "commands" or "manager" folder). To further develop or test this project use jupyter lab. Take care that "notebooks" are only concepts with your pc as python environment. Production code runs only inside the docker environment (stored in e.g. "commands" or "manager" folder).
...@@ -8,31 +13,18 @@ To further develop or test this project use jupyter lab. Take care that "noteboo ...@@ -8,31 +13,18 @@ To further develop or test this project use jupyter lab. Take care that "noteboo
jupyter lab jupyter lab
``` ```
## Docker & ECR ## Docker
Use Docker to deploy this package in a production environment. Log in to Amazon ECR with the AWS CLI: Use Docker to build this package for a production environment.
```bash
aws ecr get-login-password --region eu-central-1 | docker login --username AWS --password-stdin ???
```
Build, tag, and push the image:
```bash ```bash
docker build --platform linux/amd64 -t maui:latest . docker build --platform linux/amd64 -t maui:latest .
docker tag maui:latest ???
docker push ???
``` ```
To pull and run the container, use:
```bash ```bash
docker pull ??? docker run -it -d --restart always -p 80:80 maui:latest
docker run -it -d --restart always -p 80:80 ???
``` ```
Alternatively, for local development with mounted volumes:
```bash ```bash
docker run -it \ docker run -it \
-v ./commands:/maui/commands \ -v ./commands:/maui/commands \
......
"""
Tarifs-Summary-Router
Stellt zwei Endpunkte bereit
GET /base – Übersicht aller aktiven Basis-Tarife
GET /base/<id> – Vollständiger Datensatz für ein Base-Objekt
(inkl. Deals & Options)
Die Detail-Route enthält die komplette, korrigierte Logik zum
Zusammen­bauen der Options-Hierarchie, so dass Duplikate – etwa wenn
derselbe O-Code in mehreren Gruppen auftaucht – **nicht** mehr verloren
gehen.
"""
# --------------------------------------------------------------------------- #
# Standard- / Drittanbieter-Bibliotheken
# --------------------------------------------------------------------------- #
from __future__ import annotations from __future__ import annotations
import sys; sys.path.append("..")
import sys
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
sys.path.append("..") # Projekt-Root für Manager & Models hinzufügen
from flask import Blueprint, jsonify, abort from flask import Blueprint, jsonify, abort
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy import func from sqlalchemy import func
# --------------------------------------------------------------------------- #
# Eigene Module
# --------------------------------------------------------------------------- #
from manager.MysqlManager import MysqlManager from manager.MysqlManager import MysqlManager
from models.base_base import BaseBase from models.base_base import BaseBase
from models.deal_deal import DealDeal from models.deal_deal import DealDeal
from models.option_opti import OptionOpti from models.option_opti import OptionOpti
# --------------------------------------------------------------------------- # # The blueprint instance is created with the module name stripped of dots so
# Blueprint # registering it never triggers a ValueError.
# --------------------------------------------------------------------------- #
blueprint = Blueprint(__name__.rsplit(".", 1)[-1], __name__) blueprint = Blueprint(__name__.rsplit(".", 1)[-1], __name__)
# --------------------------------------------------------------------------- # # This function builds the full JSON response for a given base object id,
# Hilfsfunktionen # returning None when the id does not exist.
# --------------------------------------------------------------------------- #
def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | None: def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | None:
"""
Erzeugt die vollständige JSON-Struktur für ein gegebenes Base-Objekt # This query loads the base record that matches the requested id or returns None when no match is found.
(inklusive Deals sowie hierarchisch aufgebauter Optionen).
"""
# --------------------------------------------------------------------- #
# Basis-Datensatz laden
# --------------------------------------------------------------------- #
base_record: BaseBase | None = ( base_record: BaseBase | None = (
session.query(BaseBase) session.query(BaseBase)
.filter_by(id_base=base_id) .filter_by(id_base=base_id)
.one_or_none() .one_or_none()
) )
if base_record is None: # nicht gefunden
# This conditional branch exits early when the requested base id does not exist.
if base_record is None:
return None return None
# --------------------------------------------------------------------- # # This query loads all deal records that belong to the current base object.
# Deals sammeln
# --------------------------------------------------------------------- #
deal_records: List[DealDeal] = ( deal_records: List[DealDeal] = (
session.query(DealDeal) session.query(DealDeal)
.filter_by(base_deal=base_record.id_base) .filter_by(base_deal=base_record.id_base)
.all() .all()
) )
# This comprehension converts the SQLAlchemy deal objects into plain Python dictionaries ready for JSON serialization.
deals: List[Dict[str, Any]] = [ deals: List[Dict[str, Any]] = [
{ {
"id": d.id_deal, "id": d.id_deal,
...@@ -88,24 +56,21 @@ def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | Non ...@@ -88,24 +56,21 @@ def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | Non
for d in deal_records for d in deal_records
] ]
# --------------------------------------------------------------------- # # This query loads every option that belongs to the current base object.
# Optionen laden
# --------------------------------------------------------------------- #
opti_records: List[OptionOpti] = ( opti_records: List[OptionOpti] = (
session.query(OptionOpti) session.query(OptionOpti)
.filter_by(base_opti=base_record.id_base) .filter_by(base_opti=base_record.id_base)
.all() .all()
) )
# --------------------------------------------------------------------- # # The two collections below hold option nodes and category nodes so we can easily assemble the option hierarchy.
# Optionen in Nodes verwandeln
# --------------------------------------------------------------------- #
# option_nodes = Liste aller (node_dict, parent_code)
# category_nodes = Mapping Gruppen-Code („G…“) → node_dict
option_nodes: List[Tuple[Dict[str, Any], str | None]] = [] option_nodes: List[Tuple[Dict[str, Any], str | None]] = []
category_nodes: Dict[str, Dict[str, Any]] = {} category_nodes: Dict[str, Dict[str, Any]] = {}
# This loop converts each option database record into a node dictionary and remembers its parent relationship.
for o in opti_records: for o in opti_records:
# The node dictionary contains all option attributes plus a list for potential child options.
node: Dict[str, Any] = { node: Dict[str, Any] = {
"id": o.id_opti, "id": o.id_opti,
"provisiongroup": o.provisiongroup_opti, "provisiongroup": o.provisiongroup_opti,
...@@ -121,45 +86,42 @@ def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | Non ...@@ -121,45 +86,42 @@ def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | Non
"provision4": float(o.provision4_opti), "provision4": float(o.provision4_opti),
"created": o.created_opti.isoformat() if o.created_opti else None, "created": o.created_opti.isoformat() if o.created_opti else None,
"updated": o.updated_opti.isoformat() if o.updated_opti else None, "updated": o.updated_opti.isoformat() if o.updated_opti else None,
"items": [], # Platz für Kind-Optionen "items": [],
} }
# Gruppen-Codes beginnen mit „G“ – merken, damit wir Kinder anhängen können # This conditional branch stores nodes whose provider code begins with “G” so children can later be attached to them.
if o.providercode_opti.startswith("G"): if o.providercode_opti.startswith("G"):
category_nodes[o.providercode_opti] = node category_nodes[o.providercode_opti] = node
# Merken, unter welcher Kategorie/Gruppe dieses Element hängt # Each tuple in option_nodes keeps the node itself and the code of its parent option or group.
option_nodes.append((node, o.providercategory_opti)) option_nodes.append((node, o.providercategory_opti))
# --------------------------------------------------------------------- # # This dictionary collects nodes that have no valid parent so they can be returned as top‑level entries grouped by parent code.
# Parent-/Child-Verknüpfung herstellen
# --------------------------------------------------------------------- #
root_nodes: Dict[str | None, List[Dict[str, Any]]] = {} root_nodes: Dict[str | None, List[Dict[str, Any]]] = {}
# This loop attaches every node either to its parent category or to the root collection when no suitable parent exists.
for node, parent_code in option_nodes: for node, parent_code in option_nodes:
# Falls ein Parent existiert und wir ihn als Group-Node erfasst haben:
# This branch attaches a node to its parent when the parent exists and has been recognised as a category node.
if parent_code and parent_code in category_nodes: if parent_code and parent_code in category_nodes:
category_nodes[parent_code]["items"].append(node) category_nodes[parent_code]["items"].append(node)
# This branch stores nodes without a valid parent as top‑level entries under their parent code.
else: else:
# Top-Level-Element (keine passende Gruppe gefunden)
root_nodes.setdefault(parent_code, []).append(node) root_nodes.setdefault(parent_code, []).append(node)
# Leere items-Arrays entfernen # This loop removes empty “items” lists from category nodes so the client does not receive useless empty arrays.
for n in category_nodes.values(): for n in category_nodes.values():
if not n["items"]: if not n["items"]:
n.pop("items", None) n.pop("items", None)
# --------------------------------------------------------------------- # # This comprehension builds the final list of option groups ready to be embedded into the JSON response.
# Endgültige Options-Liste formen
# --------------------------------------------------------------------- #
options: List[Dict[str, Any]] = [ options: List[Dict[str, Any]] = [
{"providercode": parent_code, "items": items} {"providercode": parent_code, "items": items}
for parent_code, items in root_nodes.items() for parent_code, items in root_nodes.items()
] ]
# --------------------------------------------------------------------- # # This dictionary gathers the base table column values ready for JSON serialization.
# Basis-Felder + Details zusammenführen
# --------------------------------------------------------------------- #
base_data: Dict[str, Any] = { base_data: Dict[str, Any] = {
"id": base_record.id_base, "id": base_record.id_base,
"provider": base_record.provider_base, "provider": base_record.provider_base,
...@@ -168,60 +130,43 @@ def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | Non ...@@ -168,60 +130,43 @@ def _build_base_response(session: Session, base_id: int) -> Dict[str, Any] | Non
"alias": base_record.alias_base, "alias": base_record.alias_base,
"flyerurl": base_record.flyerurl_base, "flyerurl": base_record.flyerurl_base,
"piburl": base_record.piburl_base, "piburl": base_record.piburl_base,
"created": ( "created": base_record.created_base.isoformat() if base_record.created_base else None,
base_record.created_base.isoformat() "updated": base_record.updated_base.isoformat() if base_record.updated_base else None,
if base_record.created_base else None
),
"updated": (
base_record.updated_base.isoformat()
if base_record.updated_base else None
),
} }
# This expression copies the JSON details column or uses an empty dictionary when the column is NULL.
details_data: Dict[str, Any] = ( details_data: Dict[str, Any] = (
base_record.details_base.copy() base_record.details_base.copy()
if base_record.details_base else {} if base_record.details_base else {}
) )
# This line removes the internal tariff_name helper key so it can be returned as a dedicated attribute.
ai_identified_name = details_data.pop("tariff_name", None) ai_identified_name = details_data.pop("tariff_name", None)
# This dictionary merges the core base data with the JSON details and the AI‑identified name.
merged_base = { merged_base = {
**base_data, **base_data,
**details_data, **details_data,
"ai_identified_name": ai_identified_name, "ai_identified_name": ai_identified_name,
} }
# --------------------------------------------------------------------- # # The function returns the assembled base, deal, and option data so the caller can serialize it to JSON.
# Gesamtergebnis
# --------------------------------------------------------------------- #
return { return {
"base": merged_base, "base": merged_base,
"deals": deals, "deals": deals,
"options": options, "options": options,
} }
# This route returns an overview of every base tariff that currently has at
# --------------------------------------------------------------------------- # # least one active deal (stops_deal IS NULL).
# Routen
# --------------------------------------------------------------------------- #
@blueprint.route("/base", methods=["GET"]) @blueprint.route("/base", methods=["GET"])
def base_overview(): def base_overview():
"""
Übersicht aller aktiven Basis-Tarife. # A new database session is opened through the MySQL manager.
SQL-Äquivalent:
SELECT id_base AS id,
provider_base AS provider,
providercode_base AS providercode,
name_base AS name,
alias_base AS alias
FROM base_base
INNER JOIN deal_deal ON base_deal = id_base
WHERE stops_deal IS NULL
GROUP BY id_base
ORDER BY provider_base ASC;
"""
session = MysqlManager().getSession() session = MysqlManager().getSession()
try: try:
# This query selects the distinct base objects that have at least one active deal.
query = ( query = (
session.query( session.query(
BaseBase.id_base.label("id"), BaseBase.id_base.label("id"),
...@@ -236,6 +181,7 @@ def base_overview(): ...@@ -236,6 +181,7 @@ def base_overview():
.order_by(BaseBase.provider_base.asc()) .order_by(BaseBase.provider_base.asc())
) )
# This comprehension converts every result row into a plain dictionary ready for JSON serialization.
records: List[Dict[str, Any]] = [ records: List[Dict[str, Any]] = [
{ {
"id": row.id, "id": row.id,
...@@ -246,25 +192,32 @@ def base_overview(): ...@@ -246,25 +192,32 @@ def base_overview():
} }
for row in query.all() for row in query.all()
] ]
# The finally block guarantees that the session is always closed.
finally: finally:
session.close() session.close()
# The route returns the list of base objects as a JSON array.
return jsonify(records) return jsonify(records)
# This route returns a complete JSON structure for a single base object or raises a 404 error when the id does not exist.
@blueprint.route("/base/<int:id>", methods=["GET"]) @blueprint.route("/base/<int:id>", methods=["GET"])
def base_details(id: int): def base_details(id: int):
"""
Detail-Route – liefert die vollständige JSON-Struktur für das # A new database session is opened through the MySQL manager.
angegebene Base-Objekt (inkl. Deals & Options).
"""
session = MysqlManager().getSession() session = MysqlManager().getSession()
try: try:
# The helper function assembles the complete response structure or returns None when the id is unknown.
data = _build_base_response(session, id) data = _build_base_response(session, id)
# The finally block guarantees that the session is always closed.
finally: finally:
session.close() session.close()
if data is None: # nicht gefunden ⇒ 404 # This conditional branch aborts with a 404 status when the requested base id was not found.
if data is None:
abort(404, description=f"Base object with id={id} not found.") abort(404, description=f"Base object with id={id} not found.")
# The route returns the fully assembled JSON structure for the requested base id.
return jsonify(data) return jsonify(data)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations from __future__ import annotations
import sys; sys.path.append("..")
import sys
sys.path.append("..")
import time import time
import hashlib import hashlib
import io import io
import json import json
import os import os
import tempfile import tempfile
from typing import List, Tuple from typing import List, Tuple
import requests import requests
import urllib3 import urllib3
from flask import Blueprint, Response, request from flask import Blueprint, Response, request
from sqlalchemy.orm import joinedload from sqlalchemy.orm import joinedload
from config.MauiConfig import EECCX_TOKEN_URL, EECCX_API_URL, EECCX_CLIENT_ID, EECCX_CLIENT_SECRET, EECCX_CF_CLIENT_ID, EECCX_CF_CLIENT_SECRET, EECCX_HDL_NR, EECCX_PROV_HDL_NR
# --------------------------------------------------------------------------- #
# Eigene Module #
# --------------------------------------------------------------------------- #
from manager.S3Manager import S3Manager from manager.S3Manager import S3Manager
from manager.MysqlManager import MysqlManager from manager.MysqlManager import MysqlManager
from models.deal_deal import DealDeal from models.deal_deal import DealDeal
from models.base_base import BaseBase from models.base_base import BaseBase
from models.option_opti import OptionOpti from models.option_opti import OptionOpti
from config.MauiConfig import EECCX_TOKEN_URL, EECCX_API_URL, EECCX_CLIENT_ID, EECCX_CLIENT_SECRET, EECCX_CF_CLIENT_ID, EECCX_CF_CLIENT_SECRET, EECCX_HDL_NR, EECCX_PROV_HDL_NR
# --------------------------------------------------------------------------- # # A warning is disabled so self‑signed certificate usage does not flood the log in development environments.
# Warnungen zu unsicheren HTTPS-Requests unterdrücken (nur Dev) #
# --------------------------------------------------------------------------- #
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# --------------------------------------------------------------------------- # # The constant TOKEN_URL stores the identity provider endpoint used for OAuth authentication.
# Konfiguration / Konstanten #
# --------------------------------------------------------------------------- #
TOKEN_URL = EECCX_TOKEN_URL TOKEN_URL = EECCX_TOKEN_URL
# The constant API_URL stores the partner API endpoint that generates the PDF.
API_URL = EECCX_API_URL API_URL = EECCX_API_URL
# The constant CLIENT_ID stores the OAuth client identifier for the back‑end application.
CLIENT_ID = EECCX_CLIENT_ID CLIENT_ID = EECCX_CLIENT_ID
# The constant CLIENT_SECRET stores the OAuth client secret matching the client identifier.
CLIENT_SECRET = EECCX_CLIENT_SECRET CLIENT_SECRET = EECCX_CLIENT_SECRET
# The constant CF_CLIENT_ID stores the Cloudflare Access client identifier required by the partner API.
CF_CLIENT_ID = EECCX_CF_CLIENT_ID CF_CLIENT_ID = EECCX_CF_CLIENT_ID
# The constant CF_CLIENT_SECRET stores the Cloudflare Access client secret required by the partner API.
CF_CLIENT_SECRET = EECCX_CF_CLIENT_SECRET CF_CLIENT_SECRET = EECCX_CF_CLIENT_SECRET
# The constant HDL_NR stores the dealer number that must be supplied in every partner API request.
HDL_NR = EECCX_HDL_NR HDL_NR = EECCX_HDL_NR
# The constant PROV_HDL_NR stores the provisioning dealer number that must be supplied in every partner API request.
PROV_HDL_NR = EECCX_PROV_HDL_NR PROV_HDL_NR = EECCX_PROV_HDL_NR
# The constant PRODUKT_KATEGORIE stores the fixed product category for the request payload.
PRODUKT_KATEGORIE = "O" PRODUKT_KATEGORIE = "O"
# --------------------------------------------------------------------------- # # The blueprint instance is created from the module name so the router can be registered once inside the WebManager.
# Blueprint #
# --------------------------------------------------------------------------- #
blueprint = Blueprint(__name__.rsplit(".", 1)[-1], __name__) blueprint = Blueprint(__name__.rsplit(".", 1)[-1], __name__)
# S3-Manager für Uploads # The S3Manager instance manages file uploads to the configured S3 bucket.
s3_manager = S3Manager() s3_manager = S3Manager()
# --------------------------------------------------------------------------- # # The function returns a JSON error response with the supplied message and status code.
# Hilfsfunktionen #
# --------------------------------------------------------------------------- #
def _json_error(message: str, status_code: int = 500) -> Response: def _json_error(message: str, status_code: int = 500) -> Response:
# A JSON payload is assembled with status "ERROR" and the provided message.
payload = json.dumps({"status": "ERROR", "message": message}, ensure_ascii=False) payload = json.dumps({"status": "ERROR", "message": message}, ensure_ascii=False)
return Response(payload, status=status_code, mimetype="application/json") return Response(payload, status=status_code, mimetype="application/json")
# The function extracts the option identifiers from the query string and normalises comma‑separated values into a list.
def _extract_options() -> List[str]: def _extract_options() -> List[str]:
# The raw option list is read from the query argument list to handle multiple "options" parameters.
raw = request.args.getlist("options") raw = request.args.getlist("options")
# The following conditional branch handles the special case in which a single comma‑separated value has been supplied instead of repeated parameters.
if len(raw) == 1 and "," in raw[0]: if len(raw) == 1 and "," in raw[0]:
# The value is split on commas and stripped so empty strings are removed from the final list.
return [opt.strip() for opt in raw[0].split(",") if opt.strip()] return [opt.strip() for opt in raw[0].split(",") if opt.strip()]
return [opt for opt in raw if opt] return [opt for opt in raw if opt]
# The function builds a unique hash from tarif_id, the option list, and the current timestamp so the uploaded PDF file name is collision‑free.
def _hash_id_options(tarif_id: str, options: List[str]) -> str: def _hash_id_options(tarif_id: str, options: List[str]) -> str:
# The key string concatenates tarif_id, the sorted option list, and the current Unix timestamp.
key = f"{tarif_id}:{','.join(sorted(options))}:{int(time.time())}".encode("utf-8") key = f"{tarif_id}:{','.join(sorted(options))}:{int(time.time())}".encode("utf-8")
return hashlib.sha256(key).hexdigest() return hashlib.sha256(key).hexdigest()
# ---------------------------- OAuth-Token ---------------------------------- # # ---------------------------- OAuth-Token ---------------------------------- #
# The function exchanges client credentials for an OAuth access token and returns the token together with an error message if something goes wrong.
def _get_token() -> Tuple[str | None, str | None]: def _get_token() -> Tuple[str | None, str | None]:
# The payload dictionary contains the grant type and client credentials for the token request.
payload = { payload = {
"grant_type": "client_credentials", "grant_type": "client_credentials",
"client_id": CLIENT_ID, "client_id": CLIENT_ID,
"client_secret": CLIENT_SECRET, "client_secret": CLIENT_SECRET,
} }
try: try:
# A POST request is sent to the identity provider to obtain an access token.
r = requests.post(TOKEN_URL, data=payload, verify=False, timeout=10) r = requests.post(TOKEN_URL, data=payload, verify=False, timeout=10)
r.raise_for_status() r.raise_for_status()
except requests.exceptions.RequestException as exc: except requests.exceptions.RequestException as exc:
# An error tuple is returned when the HTTP request fails.
return None, f"Token-Abruf fehlgeschlagen: {exc}" return None, f"Token-Abruf fehlgeschlagen: {exc}"
# The JSON response is parsed to extract the access token field.
token = r.json().get("access_token") token = r.json().get("access_token")
# The following conditional branch returns an error tuple when the response does not contain an access token.
if not token: if not token:
return None, "Kein access_token im Token-Response." return None, "Kein access_token im Token-Response."
return token, None
return token, None
# ------------------------- Partner-API-Aufruf ------------------------------ # # ------------------------- Partner-API-Aufruf ------------------------------ #
# The function assembles the partner API payload, performs the HTTP PUT request, and returns either the JSON result or an error message.
def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | None, str | None]: def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | None, str | None]:
# The headers dictionary includes OAuth, Cloudflare, and content‑type information required by the partner API.
headers = { headers = {
"Authorization": f"Bearer {token}", "Authorization": f"Bearer {token}",
"CF-Access-Client-Id": CF_CLIENT_ID, "CF-Access-Client-Id": CF_CLIENT_ID,
...@@ -104,6 +127,8 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | ...@@ -104,6 +127,8 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict |
} }
try: try:
# A database session is opened to load the requested deal together with its base tariff information.
session = MysqlManager().getSession() session = MysqlManager().getSession()
deal_int = int(tarif_id) deal_int = int(tarif_id)
deal = ( deal = (
...@@ -113,28 +138,43 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | ...@@ -113,28 +138,43 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict |
.filter(DealDeal.id_deal == deal_int) .filter(DealDeal.id_deal == deal_int)
.one_or_none() .one_or_none()
) )
# The following conditional branch returns an error whenever the specified deal identifier cannot be found.
if deal is None: if deal is None:
return None, f"Deal {tarif_id} not found." return None, f"Deal {tarif_id} not found."
# The base_obj variable holds the related base tariff for the selected deal.
base_obj: BaseBase = deal.base base_obj: BaseBase = deal.base
# The following conditional branch validates that a provider code exists for the base tariff; otherwise an error is returned.
if not base_obj or not base_obj.providercode_base: if not base_obj or not base_obj.providercode_base:
return None, f"Kein providercode_base für Deal id={tarif_id} gefunden." return None, f"Kein providercode_base für Deal id={tarif_id} gefunden."
providercode_base_value = base_obj.providercode_base providercode_base_value = base_obj.providercode_base
providercode_deal_value = deal.providercode_deal providercode_deal_value = deal.providercode_deal
finally: finally:
# The database session is always closed to free resources regardless of success or failure.
session.close() session.close()
# The am_aktion_id variable is initialised with None and used only when the deal provider code contains digits.
am_aktion_id: int | None = None am_aktion_id: int | None = None
# The following conditional branch extracts numeric characters from the deal provider code to build the am_aktion_id value.
if providercode_deal_value: if providercode_deal_value:
digits = "".join(filter(str.isdigit, providercode_deal_value)) digits = "".join(filter(str.isdigit, providercode_deal_value))
if digits: if digits:
am_aktion_id = int(digits) am_aktion_id = int(digits)
# The service_codes list collects provider codes of selected options and their parent groups to pass them to the API.
service_codes: List[str] = [] service_codes: List[str] = []
try: try:
# A database session is opened to translate option identifiers to provider codes.
session = MysqlManager().getSession() session = MysqlManager().getSession()
for opt_id in options: for opt_id in options:
# The following try/except converts the option identifier to an integer and skips invalid values.
try: try:
opt_int = int(opt_id) opt_int = int(opt_id)
except ValueError: except ValueError:
...@@ -144,11 +184,15 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | ...@@ -144,11 +184,15 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict |
.filter(OptionOpti.id_opti == opt_int) .filter(OptionOpti.id_opti == opt_int)
.one_or_none() .one_or_none()
) )
# The following conditional branch skips options without a provider code.
if not opt or not opt.providercode_opti: if not opt or not opt.providercode_opti:
continue continue
service_codes.append(opt.providercode_opti) service_codes.append(opt.providercode_opti)
parent_code = opt.providercategory_opti parent_code = opt.providercategory_opti
# This loop climbs up the category hierarchy so parent group codes are included as service codes.
while parent_code: while parent_code:
parent_opt = ( parent_opt = (
session.query(OptionOpti) session.query(OptionOpti)
...@@ -156,14 +200,19 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | ...@@ -156,14 +200,19 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict |
.limit(1) .limit(1)
.one_or_none() .one_or_none()
) )
# The following conditional branch breaks the traversal when the parent option is missing or lacks a provider code.
if not parent_opt or not parent_opt.providercode_opti: if not parent_opt or not parent_opt.providercode_opti:
break break
service_codes.append(parent_opt.providercode_opti) service_codes.append(parent_opt.providercode_opti)
parent_code = parent_opt.providercategory_opti parent_code = parent_opt.providercategory_opti
finally: finally:
# The database session is always closed to release connections.
session.close() session.close()
# The payload dictionary is prepared according to the partner API specification.
payload = { payload = {
"hdl_nr": HDL_NR, "hdl_nr": HDL_NR,
"prov_hdl_nr": PROV_HDL_NR, "prov_hdl_nr": PROV_HDL_NR,
...@@ -171,21 +220,34 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | ...@@ -171,21 +220,34 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict |
"produkt_kategorie": PRODUKT_KATEGORIE, "produkt_kategorie": PRODUKT_KATEGORIE,
"service_code": service_codes, "service_code": service_codes,
} }
# The following conditional branch adds the promotional action identifier when one has been detected.
if am_aktion_id is not None: if am_aktion_id is not None:
payload["am_aktion_id"] = am_aktion_id payload["am_aktion_id"] = am_aktion_id
try: try:
# A PUT request is sent to the partner API with the assembled payload and headers.
r = requests.put(API_URL, headers=headers, json=payload, verify=False, timeout=30) r = requests.put(API_URL, headers=headers, json=payload, verify=False, timeout=30)
r.raise_for_status() r.raise_for_status()
except requests.exceptions.RequestException as exc: except requests.exceptions.RequestException as exc:
# An error tuple is returned when the HTTP request fails.
return None, f"API-Aufruf fehlgeschlagen: {exc} – Payload: {payload}" return None, f"API-Aufruf fehlgeschlagen: {exc} – Payload: {payload}"
try: try:
# The JSON body of the HTTP response is parsed.
data = r.json() data = r.json()
except ValueError: except ValueError:
# An error tuple is returned when the response is not valid JSON.
return None, "Antwort der Partner-API ist kein JSON." return None, "Antwort der Partner-API ist kein JSON."
# The err_val variable is inspected for API‑level error information that must be mapped to a user‑friendly string.
err_val = data.get("error") err_val = data.get("error")
# The following conditional branch returns an error tuple when the API embedded error information in its JSON body.
if err_val: if err_val:
if isinstance(err_val, list): if isinstance(err_val, list):
msg = "; ".join(str(e) for e in err_val) msg = "; ".join(str(e) for e in err_val)
...@@ -203,63 +265,68 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict | ...@@ -203,63 +265,68 @@ def _partner_api(token: str, tarif_id: str, options: List[str]) -> Tuple[dict |
return data, None return data, None
# ----------------------------- PDF-Download ------------------------------- # # ----------------------------- PDF-Download ------------------------------- #
# The function downloads the PDF at the supplied URL and returns its binary content together with an optional error message.
def _download_pdf(url: str) -> Tuple[bytes | None, str | None]: def _download_pdf(url: str) -> Tuple[bytes | None, str | None]:
# A streaming GET request is performed so large files do not exhaust memory unnecessarily.
try: try:
r = requests.get(url, stream=True, verify=False, timeout=30) r = requests.get(url, stream=True, verify=False, timeout=30)
r.raise_for_status() r.raise_for_status()
except requests.exceptions.RequestException as exc: except requests.exceptions.RequestException as exc:
# A tuple containing None and an error message is returned when the HTTP request fails.
return None, f"PDF-Download fehlgeschlagen: {exc}" return None, f"PDF-Download fehlgeschlagen: {exc}"
return r.content, None return r.content, None
# The route handler generates the EECCX PDF for a given deal and optional option list and returns the public S3 download URL.
# --------------------------------------------------------------------------- #
# Route #
# --------------------------------------------------------------------------- #
@blueprint.route("/freenet-eeccx/<string:tarif_id>", methods=["GET"]) @blueprint.route("/freenet-eeccx/<string:tarif_id>", methods=["GET"])
def eeccx_pdf(tarif_id: str): def eeccx_pdf(tarif_id: str):
"""
Beispiel: # The options list is extracted from the query string so it can be forwarded to the partner API.
/freenet-eeccx/3877325?options=G343,O3729
/freenet-eeccx/3877325?options=G343&options=B…
"""
options = _extract_options() options = _extract_options()
# 1) OAuth-Token # The OAuth token is obtained and an error response is returned when token retrieval fails.
token, err = _get_token() token, err = _get_token()
if err: if err:
return _json_error(err, 502) return _json_error(err, 502)
# 2) Partner-API # The partner API is called and an error response is returned when the API invocation fails.
api_json, err = _partner_api(token, tarif_id, options) api_json, err = _partner_api(token, tarif_id, options)
if err: if err:
return _json_error(err, 502) return _json_error(err, 502)
# 3) PDF-URL extrahieren # The pdf_url variable tries to extract the PCS or PCI PDF link from the partner API response JSON.
pdf_url = api_json.get("pcsPdf") or api_json.get("pciPdf") pdf_url = api_json.get("pcsPdf") or api_json.get("pciPdf")
# The following conditional branch returns an error response when no PDF URL is present in the API response.
if not pdf_url: if not pdf_url:
msg = api_json.get("message") or "Keine PDF-URL in der API-Antwort." msg = api_json.get("message") or "Keine PDF-URL in der API-Antwort."
return _json_error(msg, 502) return _json_error(msg, 502)
# 4) PDF laden # The PDF is downloaded and an error response is returned when the download fails.
pdf_bytes, err = _download_pdf(pdf_url) pdf_bytes, err = _download_pdf(pdf_url)
if err: if err:
return _json_error(err, 502) return _json_error(err, 502)
# 5) Temporäre Datei zum Upload schreiben # A unique hash is generated so the temporary file and the S3 object name are collision‑free.
hash_name = _hash_id_options(tarif_id, options) hash_name = _hash_id_options(tarif_id, options)
# A temporary file is opened so the PDF can be written to disk for uploading.
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(pdf_bytes) tmp.write(pdf_bytes)
tmp_path = tmp.name tmp_path = tmp.name
# 6) Upload zu S3 # The PDF file is uploaded to S3 and the local temporary file is removed afterwards.
s3_key = f"eeccx/{hash_name}.pdf" s3_key = f"eeccx/{hash_name}.pdf"
url = s3_manager.uploadFile(tmp_path, s3_key) url = s3_manager.uploadFile(tmp_path, s3_key)
os.remove(tmp_path) os.remove(tmp_path)
# The following conditional branch returns an error response when the S3 upload fails.
if not url: if not url:
return _json_error(f"Upload zu S3 fehlgeschlagen für key={s3_key}", 502) return _json_error(f"Upload zu S3 fehlgeschlagen für key={s3_key}", 502)
# 7) Download-URL als JSON zurückgeben # A JSON response is returned containing the public URL of the uploaded PDF.
payload = json.dumps({"url": url}, ensure_ascii=False) payload = json.dumps({"url": url}, ensure_ascii=False)
return Response(payload, status=200, mimetype="application/json") return Response(payload, status=200, mimetype="application/json")
\ No newline at end of file
"""
Health-Check-Router
Kapselt den Endpunkt / für den System-Gesundheitscheck.
"""
from flask import Blueprint, jsonify from flask import Blueprint, jsonify
# Blueprint-Name = Dateiname ohne Punkte; verhindert ValueError # The blueprint instance is named after the current module without dots to avoid a ValueError on registration.
blueprint = Blueprint(__name__.rsplit(".", 1)[-1], __name__) blueprint = Blueprint(__name__.rsplit(".", 1)[-1], __name__)
# This function handles HTTP GET requests to the root path and returns a simple health-check response.
@blueprint.route("/", methods=["GET"]) @blueprint.route("/", methods=["GET"])
def index(): def index():
"""
GET / # The function returns a JSON object indicating that the API is operational.
Liefert einen einfachen JSON-Status. return jsonify({"message": "The API is working.", "status": "OK"})
"""
return jsonify({"message": "The API is working."})
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment