Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Project - Tarifs Crawler & API
crawler
Commits
11270df4
Commit
11270df4
authored
Jul 22, 2025
by
Marco Schmiedel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
b33ee70f
Changes
14
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1081 additions
and
44 deletions
+1081
-44
38da158f-ad91-433f-8b7b-60ff4949d7ff.json
.sidekick/database/38da158f-ad91-433f-8b7b-60ff4949d7ff.json
+3
-3
48126029-3c3e-4372-9f3e-1e8b9686114e.json
.sidekick/database/48126029-3c3e-4372-9f3e-1e8b9686114e.json
+7
-1
5a3f6886-edd3-48d5-935d-f15a42e82bac.json
.sidekick/database/5a3f6886-edd3-48d5-935d-f15a42e82bac.json
+18
-0
5f874bee-40e2-4b9a-b102-f0b6d643a840.json
.sidekick/database/5f874bee-40e2-4b9a-b102-f0b6d643a840.json
+7
-1
62aea232-2549-437e-b5a9-72cb2aa92d16.json
.sidekick/database/62aea232-2549-437e-b5a9-72cb2aa92d16.json
+15
-4
8c1b7b54-86c0-453c-839c-95390d883819.json
.sidekick/database/8c1b7b54-86c0-453c-839c-95390d883819.json
+7
-1
986eeb57-8634-4f40-a4ea-a2eae9d87e71.json
.sidekick/database/986eeb57-8634-4f40-a4ea-a2eae9d87e71.json
+3
-3
e3281330-5559-49da-9434-bf3cccd4ddae.json
.sidekick/database/e3281330-5559-49da-9434-bf3cccd4ddae.json
+9
-4
calculateAiPrices.py
commands/calculateAiPrices.py
+56
-22
calculateTarifDetailsWithGpt.py
commands/calculateTarifDetailsWithGpt.py
+25
-1
downloadDataFromMauiPartnercard.py
commands/downloadDataFromMauiPartnercard.py
+925
-0
MysqlConfig.py
config/MysqlConfig.py
+1
-1
_CronConfig.txt
config/_CronConfig.txt
+4
-3
readme.md
readme.md
+1
-0
No files found.
.sidekick/database/38da158f-ad91-433f-8b7b-60ff4949d7ff.json
View file @
11270df4
...
@@ -2,11 +2,11 @@
...
@@ -2,11 +2,11 @@
"fileId"
:
"38da158f-ad91-433f-8b7b-60ff4949d7ff"
,
"fileId"
:
"38da158f-ad91-433f-8b7b-60ff4949d7ff"
,
"originalPath"
:
"work/config/_CronConfig.txt"
,
"originalPath"
:
"work/config/_CronConfig.txt"
,
"currentPath"
:
"work/config/_CronConfig.txt"
,
"currentPath"
:
"work/config/_CronConfig.txt"
,
"hash"
:
"
174ef9fe04e4d6aebb38573991945d535ec049a7e3069a8c033cd8e7ee30820e
"
,
"hash"
:
"
6163e59558a5880e7708c36a39ef2d5bc25a1b24b9be547bdc4c654f2c2cd495
"
,
"docContent"
:
"<p><br></p>"
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"done"
,
"checkedStatus"
:
"done"
,
"comments"
:
[],
"comments"
:
[],
"lastCheckedTimestamp"
:
17
49816820741
,
"lastCheckedTimestamp"
:
17
53176964166
,
"lastFileModificationTimestamp"
:
17
49816813585.9177
,
"lastFileModificationTimestamp"
:
17
53176714171.083
,
"hash_version"
:
2
"hash_version"
:
2
}
}
.sidekick/database/48126029-3c3e-4372-9f3e-1e8b9686114e.json
View file @
11270df4
...
@@ -5,7 +5,13 @@
...
@@ -5,7 +5,13 @@
"hash"
:
"35d56b9bc420e57388faa4e15e12cde381048f016b40307c4ed7f829e9aac7e4"
,
"hash"
:
"35d56b9bc420e57388faa4e15e12cde381048f016b40307c4ed7f829e9aac7e4"
,
"docContent"
:
"<p><br></p>"
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"done"
,
"checkedStatus"
:
"done"
,
"comments"
:
[],
"comments"
:
[
{
"commentId"
:
"48748f93-8a7a-4f42-b5cb-71e8007dccf1"
,
"text"
:
"For information about this script, refer to the base tutorial series in the README file."
,
"timestamp"
:
1753170811938
}
],
"lastCheckedTimestamp"
:
1747070815750
,
"lastCheckedTimestamp"
:
1747070815750
,
"lastFileModificationTimestamp"
:
1747070802673.0312
,
"lastFileModificationTimestamp"
:
1747070802673.0312
,
"hash_version"
:
2
"hash_version"
:
2
...
...
.sidekick/database/5a3f6886-edd3-48d5-935d-f15a42e82bac.json
0 → 100644
View file @
11270df4
{
"fileId"
:
"5a3f6886-edd3-48d5-935d-f15a42e82bac"
,
"originalPath"
:
"work/commands/downloadDataFromMauiPartnercard.py"
,
"currentPath"
:
"work/commands/downloadDataFromMauiPartnercard.py"
,
"hash"
:
"dcf9d6bada1a0e0b7e1d9608174b5c5e3ae06efdc15f32219e599b266f71f74f"
,
"hash_version"
:
2
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"done"
,
"comments"
:
[
{
"commentId"
:
"e85986cf-a5ed-4d22-a9e5-69ad29f26082"
,
"text"
:
"https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-07-22.m4v"
,
"timestamp"
:
1753177338921
}
],
"lastCheckedTimestamp"
:
1753177340827
,
"lastFileModificationTimestamp"
:
1753176234527.168
}
.sidekick/database/5f874bee-40e2-4b9a-b102-f0b6d643a840.json
View file @
11270df4
...
@@ -5,7 +5,13 @@
...
@@ -5,7 +5,13 @@
"hash"
:
"3e1df1c401fbd00595912583ee4d4d2bb45364f81daca29113db874515eee0a0"
,
"hash"
:
"3e1df1c401fbd00595912583ee4d4d2bb45364f81daca29113db874515eee0a0"
,
"docContent"
:
"<p><br></p>"
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"done"
,
"checkedStatus"
:
"done"
,
"comments"
:
[],
"comments"
:
[
{
"commentId"
:
"9b3b2e00-18f1-44cb-a752-0b74eeb492d8"
,
"text"
:
"For information about this script, refer to the base tutorial series in the README file."
,
"timestamp"
:
1753170808598
}
],
"lastCheckedTimestamp"
:
1750323681845
,
"lastCheckedTimestamp"
:
1750323681845
,
"lastFileModificationTimestamp"
:
1750323366660.076
,
"lastFileModificationTimestamp"
:
1750323366660.076
,
"hash_version"
:
2
"hash_version"
:
2
...
...
.sidekick/database/62aea232-2549-437e-b5a9-72cb2aa92d16.json
View file @
11270df4
...
@@ -2,11 +2,22 @@
...
@@ -2,11 +2,22 @@
"fileId"
:
"62aea232-2549-437e-b5a9-72cb2aa92d16"
,
"fileId"
:
"62aea232-2549-437e-b5a9-72cb2aa92d16"
,
"originalPath"
:
"work/commands/calculateTarifDetailsWithGpt.py"
,
"originalPath"
:
"work/commands/calculateTarifDetailsWithGpt.py"
,
"currentPath"
:
"work/commands/calculateTarifDetailsWithGpt.py"
,
"currentPath"
:
"work/commands/calculateTarifDetailsWithGpt.py"
,
"hash"
:
"
6de592dae63250612a453932e1e344699a550e4438c16813d54ba4bf2a13c785
"
,
"hash"
:
"
9a99857070c6c9066089985619e08d943c5c3ec785113239e48ff83d3e352792
"
,
"docContent"
:
"<p><br></p>"
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"done"
,
"checkedStatus"
:
"done"
,
"comments"
:
[],
"comments"
:
[
"lastCheckedTimestamp"
:
1747071244862
,
{
"lastFileModificationTimestamp"
:
1747071237273.2832
,
"commentId"
:
"4face178-ab9f-4870-bbf9-73befd035a80"
,
"text"
:
"For information about this script, refer to the base tutorial series in the README file."
,
"timestamp"
:
1753170818842
},
{
"commentId"
:
"22763243-229a-48ca-980b-8eb27433ba6e"
,
"text"
:
"I've added a static field
\"
is_partnercard
\"
based on the partnercard.csv that is created by downloadDataFromMauiPartnercard.py."
,
"timestamp"
:
1753177018105
}
],
"lastCheckedTimestamp"
:
1753176977286
,
"lastFileModificationTimestamp"
:
1753169638397.8057
,
"hash_version"
:
2
"hash_version"
:
2
}
}
.sidekick/database/8c1b7b54-86c0-453c-839c-95390d883819.json
View file @
11270df4
...
@@ -5,7 +5,13 @@
...
@@ -5,7 +5,13 @@
"hash"
:
"4c972fa8de12b095edb942fc260533235001e5b7a508a4e058e28f45340ebc59"
,
"hash"
:
"4c972fa8de12b095edb942fc260533235001e5b7a508a4e058e28f45340ebc59"
,
"docContent"
:
"<p><br></p>"
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"done"
,
"checkedStatus"
:
"done"
,
"comments"
:
[],
"comments"
:
[
{
"commentId"
:
"02d64edc-151e-46cd-ad31-7855739ed216"
,
"text"
:
"For information about this script, refer to the base tutorial series in the README file."
,
"timestamp"
:
1753170814981
}
],
"lastCheckedTimestamp"
:
1750663401713
,
"lastCheckedTimestamp"
:
1750663401713
,
"lastFileModificationTimestamp"
:
1750662226645.7258
,
"lastFileModificationTimestamp"
:
1750662226645.7258
,
"hash_version"
:
2
"hash_version"
:
2
...
...
.sidekick/database/986eeb57-8634-4f40-a4ea-a2eae9d87e71.json
View file @
11270df4
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
"fileId"
:
"986eeb57-8634-4f40-a4ea-a2eae9d87e71"
,
"fileId"
:
"986eeb57-8634-4f40-a4ea-a2eae9d87e71"
,
"originalPath"
:
"work/readme.md"
,
"originalPath"
:
"work/readme.md"
,
"currentPath"
:
"work/README.md"
,
"currentPath"
:
"work/README.md"
,
"hash"
:
"
4f572de1efd35b429b45d9932e8ffa93153c9b0a421c7afec99b4af109aa87d1
"
,
"hash"
:
"
756bb90539f71f054db700f99b053cf8e8b94a2d17499ce170cc1ba5db7276b3
"
,
"docContent"
:
"<p><br></p>"
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"done"
,
"checkedStatus"
:
"done"
,
"comments"
:
[
"comments"
:
[
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
"timestamp"
:
1747069658074
"timestamp"
:
1747069658074
}
}
],
],
"lastCheckedTimestamp"
:
175
0683280697
,
"lastCheckedTimestamp"
:
175
3177346533
,
"lastFileModificationTimestamp"
:
175
0683272808.5942
,
"lastFileModificationTimestamp"
:
175
3177332762.9363
,
"hash_version"
:
2
"hash_version"
:
2
}
}
.sidekick/database/e3281330-5559-49da-9434-bf3cccd4ddae.json
View file @
11270df4
...
@@ -2,17 +2,22 @@
...
@@ -2,17 +2,22 @@
"fileId"
:
"e3281330-5559-49da-9434-bf3cccd4ddae"
,
"fileId"
:
"e3281330-5559-49da-9434-bf3cccd4ddae"
,
"originalPath"
:
"work/commands/calculateAiPrices.py"
,
"originalPath"
:
"work/commands/calculateAiPrices.py"
,
"currentPath"
:
"work/commands/calculateAiPrices.py"
,
"currentPath"
:
"work/commands/calculateAiPrices.py"
,
"hash"
:
"
343345985f2e8153b4e9e6a7efa0479c3938e68080388f23e537609f14cafded
"
,
"hash"
:
"
5853eddf6c87959b28413a7d63edaef015cde95c13b6b28d13a0d24dfdb30af2
"
,
"hash_version"
:
2
,
"hash_version"
:
2
,
"docContent"
:
"<p><br></p>"
,
"docContent"
:
"<p><br></p>"
,
"checkedStatus"
:
"
todo
"
,
"checkedStatus"
:
"
done
"
,
"comments"
:
[
"comments"
:
[
{
{
"commentId"
:
"1d5f60af-fe8c-46f1-8a99-806b835d8ed6"
,
"commentId"
:
"1d5f60af-fe8c-46f1-8a99-806b835d8ed6"
,
"text"
:
"Maybe not stable... Need to run this a few weeks on prod..."
,
"text"
:
"Maybe not stable... Need to run this a few weeks on prod..."
,
"timestamp"
:
1750682733287
"timestamp"
:
1750682733287
},
{
"commentId"
:
"1180a758-e39f-429f-ac9b-415747f29e56"
,
"text"
:
"https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-23.m4v"
,
"timestamp"
:
1753170856993
}
}
],
],
"lastCheckedTimestamp"
:
175
0682714405
,
"lastCheckedTimestamp"
:
175
3177024777
,
"lastFileModificationTimestamp"
:
175
0681385737.0137
"lastFileModificationTimestamp"
:
175
3169895196.4453
}
}
commands/calculateAiPrices.py
View file @
11270df4
...
@@ -12,13 +12,27 @@ from models.deal_deal import DealDeal
...
@@ -12,13 +12,27 @@ from models.deal_deal import DealDeal
from
models.option_opti
import
OptionOpti
from
models.option_opti
import
OptionOpti
from
models.aiprice_aipr
import
AipriceAipr
from
models.aiprice_aipr
import
AipriceAipr
#
Konfiguriert ein einfaches Logging, um den Skriptverlauf zu sehen
.
#
This configures basic logging to monitor the script's execution flow
.
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
"
%(asctime)
s
%(levelname)
s:
%(message)
s"
)
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
"
%(asctime)
s
%(levelname)
s:
%(message)
s"
)
def
get_validated_response
(
gpt_manager
:
OpenAiManager
,
name
:
str
)
->
dict
|
None
:
def
get_validated_response
(
gpt_manager
:
OpenAiManager
,
name
:
str
)
->
dict
|
None
:
"""Sendet eine Anfrage an GPT und validiert die JSON-Antwort auf Konsistenz."""
"""
Sends a request to an LLM to extract structured pricing data from a name string.
This function employs a consistency-checking mechanism to ensure a high-quality
response. It queries the LLM repeatedly (up to 10 times) until it receives
two consecutive, identical, and structurally valid JSON responses. This
mitigates the risk of model hallucinations or inconsistent outputs.
Args:
gpt_manager: An instance of the OpenAiManager to handle the API call.
name: The name string of the mobile plan option to be analyzed.
Returns:
A dictionary containing the structured pricing data if a consistent
response is obtained, otherwise None.
"""
prompt
=
"""
prompt
=
"""
# Teil 1 – Aufgabe
# Teil 1 – Aufgabe
...
@@ -333,13 +347,13 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
...
@@ -333,13 +347,13 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
last_response
=
None
last_response
=
None
#
GEÄNDERT: Schleife für bis zu 10 Versuche, um eine konsistente Antwort zu erhalten
#
This loop runs up to 10 times to get a consistent and valid response.
for
attempt
in
range
(
1
,
11
):
for
attempt
in
range
(
1
,
11
):
logging
.
info
(
f
"-> GPT-Versuch {attempt}/10 für '{name}'..."
)
logging
.
info
(
f
"-> GPT-Versuch {attempt}/10 für '{name}'..."
)
try
:
try
:
raw_response
=
gpt_manager
.
chat
(
prompt
,
model
=
"gpt-4.1"
)
raw_response
=
gpt_manager
.
chat
(
prompt
,
model
=
"gpt-4.1"
)
#
Bereinige und parse die JSON-Antwort
#
This block cleans and parses the JSON response from the LLM.
if
raw_response
.
strip
()
.
startswith
(
"```"
):
if
raw_response
.
strip
()
.
startswith
(
"```"
):
cleaned_response
=
re
.
sub
(
r"```[\w]*"
,
""
,
raw_response
)
.
strip
()
cleaned_response
=
re
.
sub
(
r"```[\w]*"
,
""
,
raw_response
)
.
strip
()
else
:
else
:
...
@@ -347,35 +361,42 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
...
@@ -347,35 +361,42 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
data
=
json
.
loads
(
cleaned_response
)
data
=
json
.
loads
(
cleaned_response
)
#
Prüfe, ob die Struktur der Antwort korrekt ist
#
This block validates the structure of the JSON response.
if
isinstance
(
data
,
dict
)
and
all
(
key
in
data
for
key
in
expected_keys
):
if
isinstance
(
data
,
dict
)
and
all
(
key
in
data
for
key
in
expected_keys
):
#
Prüfe, ob die Antwort mit der letzten übereinstimmt
#
This condition checks if the response is identical to the previous valid one.
if
data
==
last_response
:
if
data
==
last_response
:
logging
.
info
(
f
"-> Konsistente Antwort für '{name}' in Versuch {attempt} erhalten. Daten sind gültig."
)
logging
.
info
(
f
"-> Konsistente Antwort für '{name}' in Versuch {attempt} erhalten. Daten sind gültig."
)
return
data
return
data
else
:
else
:
#
Speichere die erste gültige Antwort und fordere eine zweite zur Bestätigung an
#
This line stores the first valid response to verify it against the next one.
last_response
=
data
last_response
=
data
logging
.
warning
(
f
"-> Gültige, aber noch nicht bestätigte Antwort in Versuch {attempt} für '{name}'. Nächster Versuch zur Verifizierung."
)
logging
.
warning
(
f
"-> Gültige, aber noch nicht bestätigte Antwort in Versuch {attempt} für '{name}'. Nächster Versuch zur Verifizierung."
)
else
:
else
:
logging
.
warning
(
f
"-> Ungültige Datenstruktur in Versuch {attempt} für '{name}'."
)
logging
.
warning
(
f
"-> Ungültige Datenstruktur in Versuch {attempt} für '{name}'."
)
last_response
=
None
#
Setze zurück, da die Kette unterbrochen ist
last_response
=
None
#
This line resets consistency check if the chain of valid responses is broken.
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
error
(
f
"-> Fehler in Versuch {attempt} für '{name}': {e}"
)
logging
.
error
(
f
"-> Fehler in Versuch {attempt} für '{name}': {e}"
)
last_response
=
None
#
Setze bei Fehler zurück
last_response
=
None
#
This line resets consistency check on error.
logging
.
error
(
f
"-> Konnte nach 10 Versuchen keine zwei aufeinanderfolgenden, identischen Antworten für '{name}' erhalten."
)
logging
.
error
(
f
"-> Konnte nach 10 Versuchen keine zwei aufeinanderfolgenden, identischen Antworten für '{name}' erhalten."
)
return
None
return
None
def
sync_names_to_aiprice
(
session
:
Session
):
def
sync_names_to_aiprice
(
session
:
Session
):
"""PROZESS 1: Synchronisiert eindeutige Namen in die aiprice_aipr Tabelle."""
"""
Process 1: Synchronizes new, relevant names into the aiprice_aipr table.
This function scans the deal_deal and option_opti tables for entries that
likely contain pricing information, based on a list of keywords. It then
inserts any unique names not already present in aiprice_aipr, preparing
them for analysis in the next step.
"""
logging
.
info
(
"Starte Prozess 1: Synchronisiere Namen."
)
logging
.
info
(
"Starte Prozess 1: Synchronisiere Namen."
)
#
Lade existierende Keys, um Duplikate zu vermeiden
#
This line loads all existing primary keys to avoid inserting duplicates.
existing_keys
=
{
key
for
key
,
in
session
.
query
(
AipriceAipr
.
key_aipr
)}
existing_keys
=
{
key
for
key
,
in
session
.
query
(
AipriceAipr
.
key_aipr
)}
#
Schlüsselwörter, die für den Import berücksichtigt werden sollen
#
This list defines keywords to identify deals and options that likely contain pricing information.
include_keywords
=
[
include_keywords
=
[
"
%
Sonderbonus
%
"
,
"
%
Sonderbonus
%
"
,
"
%
Vergütungsverzicht
%
"
,
"
%
Vergütungsverzicht
%
"
,
...
@@ -387,11 +408,11 @@ def sync_names_to_aiprice(session: Session):
...
@@ -387,11 +408,11 @@ def sync_names_to_aiprice(session: Session):
"
%
Monatsgrundpreis
%
"
,
"
%
Monatsgrundpreis
%
"
,
]
]
#
Erstelle eine Liste von 'like'-Bedingungen für die Filterung
#
This line creates a list of 'like' conditions for SQLAlchemy filtering.
deal_conditions
=
[
DealDeal
.
name_deal
.
like
(
keyword
)
for
keyword
in
include_keywords
]
deal_conditions
=
[
DealDeal
.
name_deal
.
like
(
keyword
)
for
keyword
in
include_keywords
]
option_conditions
=
[
OptionOpti
.
name_opti
.
like
(
keyword
)
for
keyword
in
include_keywords
]
option_conditions
=
[
OptionOpti
.
name_opti
.
like
(
keyword
)
for
keyword
in
include_keywords
]
#
Lade eindeutige Namen aus den Quelltabellen und wende die Einschlussfilter an
#
These queries fetch distinct, non-empty names from source tables based on the keyword filters.
deals
=
session
.
query
(
DealDeal
.
name_deal
)
.
distinct
()
.
filter
(
deals
=
session
.
query
(
DealDeal
.
name_deal
)
.
distinct
()
.
filter
(
DealDeal
.
name_deal
!=
""
,
DealDeal
.
name_deal
!=
""
,
or_
(
*
deal_conditions
)
or_
(
*
deal_conditions
)
...
@@ -403,12 +424,12 @@ def sync_names_to_aiprice(session: Session):
...
@@ -403,12 +424,12 @@ def sync_names_to_aiprice(session: Session):
or_
(
*
option_conditions
)
or_
(
*
option_conditions
)
)
.
all
()
)
.
all
()
#
Kombiniere und erstelle neue Einträge
#
This block combines names from both sources and prepares new entries.
new_entries
=
[]
new_entries
=
[]
for
name
,
category
in
[(
d
[
0
],
'deal'
)
for
d
in
deals
]
+
[(
o
[
0
],
'opti'
)
for
o
in
options
]:
for
name
,
category
in
[(
d
[
0
],
'deal'
)
for
d
in
deals
]
+
[(
o
[
0
],
'opti'
)
for
o
in
options
]:
if
name
not
in
existing_keys
:
if
name
not
in
existing_keys
:
new_entries
.
append
(
AipriceAipr
(
key_aipr
=
name
,
category_aipr
=
category
))
new_entries
.
append
(
AipriceAipr
(
key_aipr
=
name
,
category_aipr
=
category
))
existing_keys
.
add
(
name
)
#
Verhindert doppeltes Hinzufügen im selben Lauf
existing_keys
.
add
(
name
)
#
This line prevents adding the same key twice in one run.
if
new_entries
:
if
new_entries
:
session
.
add_all
(
new_entries
)
session
.
add_all
(
new_entries
)
...
@@ -420,11 +441,19 @@ def sync_names_to_aiprice(session: Session):
...
@@ -420,11 +441,19 @@ def sync_names_to_aiprice(session: Session):
def
enrich_aiprice_with_gpt
(
session
:
Session
):
def
enrich_aiprice_with_gpt
(
session
:
Session
):
"""PROZESS 2: Reichert Einträge ohne response_aipr sofort einzeln an."""
"""
Process 2: Enriches records in aiprice_aipr with structured data from an LLM.
This function retrieves all records from the aiprice_aipr table that have
not yet been processed (i.e., `response_aipr` is NULL). It sends each
record's name to the LLM via `get_validated_response` and saves the
resulting structured JSON data back to the database, committing each
record individually to ensure progress is saved.
"""
logging
.
info
(
"Starte Prozess 2: Reichere Daten mit GPT an."
)
logging
.
info
(
"Starte Prozess 2: Reichere Daten mit GPT an."
)
gpt_manager
=
OpenAiManager
()
gpt_manager
=
OpenAiManager
()
#
Verarbeite nur Einträge, bei denen die Antwort noch fehlt
#
This query selects only records from the aiprice table that have not yet been processed.
items_to_process
=
session
.
query
(
AipriceAipr
)
.
filter
(
AipriceAipr
.
response_aipr
.
is_
(
None
))
.
all
()
items_to_process
=
session
.
query
(
AipriceAipr
)
.
filter
(
AipriceAipr
.
response_aipr
.
is_
(
None
))
.
all
()
if
not
items_to_process
:
if
not
items_to_process
:
...
@@ -433,13 +462,15 @@ def enrich_aiprice_with_gpt(session: Session):
...
@@ -433,13 +462,15 @@ def enrich_aiprice_with_gpt(session: Session):
logging
.
info
(
f
"{len(items_to_process)} Einträge werden verarbeitet."
)
logging
.
info
(
f
"{len(items_to_process)} Einträge werden verarbeitet."
)
# This loop processes each item individually.
for
item
in
items_to_process
:
for
item
in
items_to_process
:
validated_data
=
get_validated_response
(
gpt_manager
,
item
.
key_aipr
)
validated_data
=
get_validated_response
(
gpt_manager
,
item
.
key_aipr
)
#
Speichere sofort, wenn die Daten gültig sind
#
This block saves the result if the response data is valid and consistent.
if
validated_data
:
if
validated_data
:
item
.
response_aipr
=
validated_data
item
.
response_aipr
=
validated_data
try
:
try
:
# This line commits the change for the current item immediately.
session
.
commit
()
session
.
commit
()
logging
.
info
(
f
"Eintrag für '{item.key_aipr}' erfolgreich gespeichert."
)
logging
.
info
(
f
"Eintrag für '{item.key_aipr}' erfolgreich gespeichert."
)
except
Exception
as
e
:
except
Exception
as
e
:
...
@@ -448,11 +479,14 @@ def enrich_aiprice_with_gpt(session: Session):
...
@@ -448,11 +479,14 @@ def enrich_aiprice_with_gpt(session: Session):
logging
.
info
(
"Prozess 2 abgeschlossen."
)
logging
.
info
(
"Prozess 2 abgeschlossen."
)
#
--- Hauptausführung ---
#
This block is the main entry point for the script.
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
logging
.
info
(
"=== Starte kombinierte Ausführung: sync & enrich ==="
)
logging
.
info
(
"=== Starte kombinierte Ausführung: sync & enrich ==="
)
# This line initializes a new database session.
db_session
=
MysqlManager
()
.
getSession
()
db_session
=
MysqlManager
()
.
getSession
()
# This line executes the first process to sync new names.
sync_names_to_aiprice
(
db_session
)
sync_names_to_aiprice
(
db_session
)
enrich_aiprice_with_gpt
(
db_session
)
# This line executes the second process to enrich the new names with data.
enrich_aiprice_with_gpt
(
db_session
)
\ No newline at end of file
commands/calculateTarifDetailsWithGpt.py
View file @
11270df4
...
@@ -3,6 +3,7 @@ import os
...
@@ -3,6 +3,7 @@ import os
import
re
import
re
import
ast
import
ast
import
json
import
json
import
csv
import
datetime
as
_dt
import
datetime
as
_dt
import
traceback
import
traceback
from
typing
import
Any
,
Dict
,
List
,
Tuple
from
typing
import
Any
,
Dict
,
List
,
Tuple
...
@@ -215,6 +216,24 @@ if __name__ == "__main__":
...
@@ -215,6 +216,24 @@ if __name__ == "__main__":
if
not
os
.
path
.
isdir
(
cacheDir
):
if
not
os
.
path
.
isdir
(
cacheDir
):
print
(
"FEHLER: Cache-Verzeichnis nicht gefunden."
)
print
(
"FEHLER: Cache-Verzeichnis nicht gefunden."
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
# Load all partner card IDs from the CSV for quick lookup
partnercardsCsvPath
=
os
.
path
.
join
(
cacheDir
,
"partnercards.csv"
)
partnercardIds
=
set
()
if
os
.
path
.
exists
(
partnercardsCsvPath
):
print
(
f
"INFO: Lese Partnercard-IDs aus '{partnercardsCsvPath}'..."
)
try
:
with
open
(
partnercardsCsvPath
,
mode
=
'r'
,
newline
=
''
,
encoding
=
'utf-8'
)
as
f
:
reader
=
csv
.
reader
(
f
,
delimiter
=
';'
)
header
=
next
(
reader
)
# Skip header
for
row
in
reader
:
if
row
:
partnercardIds
.
add
(
row
[
0
])
print
(
f
"INFO: {len(partnercardIds)} Partnercard-IDs geladen."
)
except
Exception
as
e
:
print
(
f
"WARNUNG: Fehler beim Lesen der Partnercard-CSV: {e}"
)
else
:
print
(
"INFO: Datei 'partnercards.csv' nicht gefunden. 'is_partnercard' wird immer false sein."
)
# This list comprehension gathers all files with .pdf extension
# This list comprehension gathers all files with .pdf extension
pdfFiles
=
[
f
for
f
in
os
.
listdir
(
cacheDir
)
if
f
.
lower
()
.
endswith
(
".pdf"
)]
pdfFiles
=
[
f
for
f
in
os
.
listdir
(
cacheDir
)
if
f
.
lower
()
.
endswith
(
".pdf"
)]
...
@@ -311,6 +330,11 @@ if __name__ == "__main__":
...
@@ -311,6 +330,11 @@ if __name__ == "__main__":
if
not
validatedData
:
if
not
validatedData
:
print
(
"FEHLER: Drei ungültige Antworten – übersprungen."
)
print
(
"FEHLER: Drei ungültige Antworten – übersprungen."
)
continue
continue
# Check if the current tariff ID is a partner card and add the flag to the data.
isPartnercard
=
tariffId
in
partnercardIds
validatedData
[
'is_partnercard'
]
=
isPartnercard
print
(
f
"INFO: Feld 'is_partnercard' auf '{isPartnercard}' gesetzt."
)
# This loop updates the details_base for each relevant BaseBase record
# This loop updates the details_base for each relevant BaseBase record
for
br
in
baseRecords
:
for
br
in
baseRecords
:
...
@@ -326,4 +350,4 @@ if __name__ == "__main__":
...
@@ -326,4 +350,4 @@ if __name__ == "__main__":
# This line closes the database session after processing
# This line closes the database session after processing
dbSession
.
close
()
dbSession
.
close
()
print
(
"INFO: Verarbeitung abgeschlossen."
)
print
(
"INFO: Verarbeitung abgeschlossen."
)
\ No newline at end of file
commands/downloadDataFromMauiPartnercard.py
0 → 100644
View file @
11270df4
This diff is collapsed.
Click to expand it.
config/MysqlConfig.py
View file @
11270df4
...
@@ -4,7 +4,7 @@ MYSQL_PASSWORD = "floz09sx3dTyx144gy"
...
@@ -4,7 +4,7 @@ MYSQL_PASSWORD = "floz09sx3dTyx144gy"
MYSQL_DATABASE
=
"itmax_tarifs"
MYSQL_DATABASE
=
"itmax_tarifs"
MYSQL_PORT
=
3306
MYSQL_PORT
=
3306
USE_SSH_TUNNEL
=
Tru
e
USE_SSH_TUNNEL
=
Fals
e
SSH_HOST
=
"jumphost.bugsmasher.online"
SSH_HOST
=
"jumphost.bugsmasher.online"
SSH_PORT
=
22
SSH_PORT
=
22
SSH_USERNAME
=
"root"
SSH_USERNAME
=
"root"
...
...
config/_CronConfig.txt
View file @
11270df4
# STEP 1 - IMPORT TARIFS FROM MAUI
# STEP 1 - IMPORT TARIFS FROM MAUI
0 3 * * * /maui/cron.sh downloadDataFromMaui.py
0 3 * * * /maui/cron.sh downloadDataFromMaui.py
20 4 * * * /maui/cron.sh downloadDataFromMauiPartnercard.py
# STEP 2 - IMPORT DUMP TO DATABASE
# STEP 2 - IMPORT DUMP TO DATABASE
3
0 4 * * * /maui/cron.sh importCacheToDatabase.py
4
0 4 * * * /maui/cron.sh importCacheToDatabase.py
# STEP 3 - UPLOAD FLYER TO AWS S3
# STEP 3 - UPLOAD FLYER TO AWS S3
0
5 * * * /maui/cron.sh uploadCacheToAwsS3.py
5
5 * * * /maui/cron.sh uploadCacheToAwsS3.py
# STEP 4 - GENERATE TARIF-DETAILS WITH GPT
# STEP 4 - GENERATE TARIF-DETAILS WITH GPT
0
5 * * * /maui/cron.sh calculateTarifDetailsWithGpt.py
5
5 * * * /maui/cron.sh calculateTarifDetailsWithGpt.py
# STEP 5 - GENERATE AI PRICES WITH GPT
# STEP 5 - GENERATE AI PRICES WITH GPT
30 5 * * * /maui/cron.sh calculateAiPrices.py
30 5 * * * /maui/cron.sh calculateAiPrices.py
readme.md
View file @
11270df4
...
@@ -9,6 +9,7 @@ https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/
...
@@ -9,6 +9,7 @@ https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2024-05-13.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2024-05-13.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-03.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-03.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-23.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-23.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-07-22.m4v
## JupyterLab
## JupyterLab
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment