Commit 11270df4 authored by Marco Schmiedel's avatar Marco Schmiedel

fix

parent b33ee70f
......@@ -2,11 +2,11 @@
"fileId": "38da158f-ad91-433f-8b7b-60ff4949d7ff",
"originalPath": "work/config/_CronConfig.txt",
"currentPath": "work/config/_CronConfig.txt",
"hash": "174ef9fe04e4d6aebb38573991945d535ec049a7e3069a8c033cd8e7ee30820e",
"hash": "6163e59558a5880e7708c36a39ef2d5bc25a1b24b9be547bdc4c654f2c2cd495",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1749816820741,
"lastFileModificationTimestamp": 1749816813585.9177,
"lastCheckedTimestamp": 1753176964166,
"lastFileModificationTimestamp": 1753176714171.083,
"hash_version": 2
}
......@@ -5,7 +5,13 @@
"hash": "35d56b9bc420e57388faa4e15e12cde381048f016b40307c4ed7f829e9aac7e4",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"comments": [
{
"commentId": "48748f93-8a7a-4f42-b5cb-71e8007dccf1",
"text": "For information about this script, refer to the base tutorial series in the README file.",
"timestamp": 1753170811938
}
],
"lastCheckedTimestamp": 1747070815750,
"lastFileModificationTimestamp": 1747070802673.0312,
"hash_version": 2
......
{
"fileId": "5a3f6886-edd3-48d5-935d-f15a42e82bac",
"originalPath": "work/commands/downloadDataFromMauiPartnercard.py",
"currentPath": "work/commands/downloadDataFromMauiPartnercard.py",
"hash": "dcf9d6bada1a0e0b7e1d9608174b5c5e3ae06efdc15f32219e599b266f71f74f",
"hash_version": 2,
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [
{
"commentId": "e85986cf-a5ed-4d22-a9e5-69ad29f26082",
"text": "https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-07-22.m4v",
"timestamp": 1753177338921
}
],
"lastCheckedTimestamp": 1753177340827,
"lastFileModificationTimestamp": 1753176234527.168
}
......@@ -5,7 +5,13 @@
"hash": "3e1df1c401fbd00595912583ee4d4d2bb45364f81daca29113db874515eee0a0",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"comments": [
{
"commentId": "9b3b2e00-18f1-44cb-a752-0b74eeb492d8",
"text": "For information about this script, refer to the base tutorial series in the README file.",
"timestamp": 1753170808598
}
],
"lastCheckedTimestamp": 1750323681845,
"lastFileModificationTimestamp": 1750323366660.076,
"hash_version": 2
......
......@@ -2,11 +2,22 @@
"fileId": "62aea232-2549-437e-b5a9-72cb2aa92d16",
"originalPath": "work/commands/calculateTarifDetailsWithGpt.py",
"currentPath": "work/commands/calculateTarifDetailsWithGpt.py",
"hash": "6de592dae63250612a453932e1e344699a550e4438c16813d54ba4bf2a13c785",
"hash": "9a99857070c6c9066089985619e08d943c5c3ec785113239e48ff83d3e352792",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"lastCheckedTimestamp": 1747071244862,
"lastFileModificationTimestamp": 1747071237273.2832,
"comments": [
{
"commentId": "4face178-ab9f-4870-bbf9-73befd035a80",
"text": "For information about this script, refer to the base tutorial series in the README file.",
"timestamp": 1753170818842
},
{
"commentId": "22763243-229a-48ca-980b-8eb27433ba6e",
"text": "I've added a static field \"is_partnercard\" based on the partnercard.csv that is created by downloadDataFromMauiPartnercard.py.",
"timestamp": 1753177018105
}
],
"lastCheckedTimestamp": 1753176977286,
"lastFileModificationTimestamp": 1753169638397.8057,
"hash_version": 2
}
......@@ -5,7 +5,13 @@
"hash": "4c972fa8de12b095edb942fc260533235001e5b7a508a4e058e28f45340ebc59",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [],
"comments": [
{
"commentId": "02d64edc-151e-46cd-ad31-7855739ed216",
"text": "For information about this script, refer to the base tutorial series in the README file.",
"timestamp": 1753170814981
}
],
"lastCheckedTimestamp": 1750663401713,
"lastFileModificationTimestamp": 1750662226645.7258,
"hash_version": 2
......
......@@ -2,7 +2,7 @@
"fileId": "986eeb57-8634-4f40-a4ea-a2eae9d87e71",
"originalPath": "work/readme.md",
"currentPath": "work/README.md",
"hash": "4f572de1efd35b429b45d9932e8ffa93153c9b0a421c7afec99b4af109aa87d1",
"hash": "756bb90539f71f054db700f99b053cf8e8b94a2d17499ce170cc1ba5db7276b3",
"docContent": "<p><br></p>",
"checkedStatus": "done",
"comments": [
......@@ -17,7 +17,7 @@
"timestamp": 1747069658074
}
],
"lastCheckedTimestamp": 1750683280697,
"lastFileModificationTimestamp": 1750683272808.5942,
"lastCheckedTimestamp": 1753177346533,
"lastFileModificationTimestamp": 1753177332762.9363,
"hash_version": 2
}
......@@ -2,17 +2,22 @@
"fileId": "e3281330-5559-49da-9434-bf3cccd4ddae",
"originalPath": "work/commands/calculateAiPrices.py",
"currentPath": "work/commands/calculateAiPrices.py",
"hash": "343345985f2e8153b4e9e6a7efa0479c3938e68080388f23e537609f14cafded",
"hash": "5853eddf6c87959b28413a7d63edaef015cde95c13b6b28d13a0d24dfdb30af2",
"hash_version": 2,
"docContent": "<p><br></p>",
"checkedStatus": "todo",
"checkedStatus": "done",
"comments": [
{
"commentId": "1d5f60af-fe8c-46f1-8a99-806b835d8ed6",
"text": "Maybe not stable... Need to run this a few weeks on prod...",
"timestamp": 1750682733287
},
{
"commentId": "1180a758-e39f-429f-ac9b-415747f29e56",
"text": "https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-23.m4v",
"timestamp": 1753170856993
}
],
"lastCheckedTimestamp": 1750682714405,
"lastFileModificationTimestamp": 1750681385737.0137
"lastCheckedTimestamp": 1753177024777,
"lastFileModificationTimestamp": 1753169895196.4453
}
......@@ -12,13 +12,27 @@ from models.deal_deal import DealDeal
from models.option_opti import OptionOpti
from models.aiprice_aipr import AipriceAipr
# Konfiguriert ein einfaches Logging, um den Skriptverlauf zu sehen.
# This configures basic logging to monitor the script's execution flow.
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None:
"""Sendet eine Anfrage an GPT und validiert die JSON-Antwort auf Konsistenz."""
"""
Sends a request to an LLM to extract structured pricing data from a name string.
This function employs a consistency-checking mechanism to ensure a high-quality
response. It queries the LLM repeatedly (up to 10 times) until it receives
two consecutive, identical, and structurally valid JSON responses. This
mitigates the risk of model hallucinations or inconsistent outputs.
Args:
gpt_manager: An instance of the OpenAiManager to handle the API call.
name: The name string of the mobile plan option to be analyzed.
Returns:
A dictionary containing the structured pricing data if a consistent
response is obtained, otherwise None.
"""
prompt = """
# Teil 1 – Aufgabe
......@@ -333,13 +347,13 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
last_response = None
# GEÄNDERT: Schleife für bis zu 10 Versuche, um eine konsistente Antwort zu erhalten
# This loop runs up to 10 times to get a consistent and valid response.
for attempt in range(1, 11):
logging.info(f"-> GPT-Versuch {attempt}/10 für '{name}'...")
try:
raw_response = gpt_manager.chat(prompt, model="gpt-4.1")
# Bereinige und parse die JSON-Antwort
# This block cleans and parses the JSON response from the LLM.
if raw_response.strip().startswith("```"):
cleaned_response = re.sub(r"```[\w]*", "", raw_response).strip()
else:
......@@ -347,35 +361,42 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
data = json.loads(cleaned_response)
# Prüfe, ob die Struktur der Antwort korrekt ist
# This block validates the structure of the JSON response.
if isinstance(data, dict) and all(key in data for key in expected_keys):
# Prüfe, ob die Antwort mit der letzten übereinstimmt
# This condition checks if the response is identical to the previous valid one.
if data == last_response:
logging.info(f"-> Konsistente Antwort für '{name}' in Versuch {attempt} erhalten. Daten sind gültig.")
return data
else:
# Speichere die erste gültige Antwort und fordere eine zweite zur Bestätigung an
# This line stores the first valid response to verify it against the next one.
last_response = data
logging.warning(f"-> Gültige, aber noch nicht bestätigte Antwort in Versuch {attempt} für '{name}'. Nächster Versuch zur Verifizierung.")
else:
logging.warning(f"-> Ungültige Datenstruktur in Versuch {attempt} für '{name}'.")
last_response = None # Setze zurück, da die Kette unterbrochen ist
last_response = None # This line resets consistency check if the chain of valid responses is broken.
except Exception as e:
logging.error(f"-> Fehler in Versuch {attempt} für '{name}': {e}")
last_response = None # Setze bei Fehler zurück
last_response = None # This line resets consistency check on error.
logging.error(f"-> Konnte nach 10 Versuchen keine zwei aufeinanderfolgenden, identischen Antworten für '{name}' erhalten.")
return None
def sync_names_to_aiprice(session: Session):
"""PROZESS 1: Synchronisiert eindeutige Namen in die aiprice_aipr Tabelle."""
"""
Process 1: Synchronizes new, relevant names into the aiprice_aipr table.
This function scans the deal_deal and option_opti tables for entries that
likely contain pricing information, based on a list of keywords. It then
inserts any unique names not already present in aiprice_aipr, preparing
them for analysis in the next step.
"""
logging.info("Starte Prozess 1: Synchronisiere Namen.")
# Lade existierende Keys, um Duplikate zu vermeiden
# This line loads all existing primary keys to avoid inserting duplicates.
existing_keys = {key for key, in session.query(AipriceAipr.key_aipr)}
# Schlüsselwörter, die für den Import berücksichtigt werden sollen
# This list defines keywords to identify deals and options that likely contain pricing information.
include_keywords = [
"%Sonderbonus%",
"%Vergütungsverzicht%",
......@@ -387,11 +408,11 @@ def sync_names_to_aiprice(session: Session):
"%Monatsgrundpreis%",
]
# Erstelle eine Liste von 'like'-Bedingungen für die Filterung
# This line creates a list of 'like' conditions for SQLAlchemy filtering.
deal_conditions = [DealDeal.name_deal.like(keyword) for keyword in include_keywords]
option_conditions = [OptionOpti.name_opti.like(keyword) for keyword in include_keywords]
# Lade eindeutige Namen aus den Quelltabellen und wende die Einschlussfilter an
# These queries fetch distinct, non-empty names from source tables based on the keyword filters.
deals = session.query(DealDeal.name_deal).distinct().filter(
DealDeal.name_deal != "",
or_(*deal_conditions)
......@@ -403,12 +424,12 @@ def sync_names_to_aiprice(session: Session):
or_(*option_conditions)
).all()
# Kombiniere und erstelle neue Einträge
# This block combines names from both sources and prepares new entries.
new_entries = []
for name, category in [(d[0], 'deal') for d in deals] + [(o[0], 'opti') for o in options]:
if name not in existing_keys:
new_entries.append(AipriceAipr(key_aipr=name, category_aipr=category))
existing_keys.add(name) # Verhindert doppeltes Hinzufügen im selben Lauf
existing_keys.add(name) # This line prevents adding the same key twice in one run.
if new_entries:
session.add_all(new_entries)
......@@ -420,11 +441,19 @@ def sync_names_to_aiprice(session: Session):
def enrich_aiprice_with_gpt(session: Session):
"""PROZESS 2: Reichert Einträge ohne response_aipr sofort einzeln an."""
"""
Process 2: Enriches records in aiprice_aipr with structured data from an LLM.
This function retrieves all records from the aiprice_aipr table that have
not yet been processed (i.e., `response_aipr` is NULL). It sends each
record's name to the LLM via `get_validated_response` and saves the
resulting structured JSON data back to the database, committing each
record individually to ensure progress is saved.
"""
logging.info("Starte Prozess 2: Reichere Daten mit GPT an.")
gpt_manager = OpenAiManager()
# Verarbeite nur Einträge, bei denen die Antwort noch fehlt
# This query selects only records from the aiprice table that have not yet been processed.
items_to_process = session.query(AipriceAipr).filter(AipriceAipr.response_aipr.is_(None)).all()
if not items_to_process:
......@@ -433,13 +462,15 @@ def enrich_aiprice_with_gpt(session: Session):
logging.info(f"{len(items_to_process)} Einträge werden verarbeitet.")
# This loop processes each item individually.
for item in items_to_process:
validated_data = get_validated_response(gpt_manager, item.key_aipr)
# Speichere sofort, wenn die Daten gültig sind
# This block saves the result if the response data is valid and consistent.
if validated_data:
item.response_aipr = validated_data
try:
# This line commits the change for the current item immediately.
session.commit()
logging.info(f"Eintrag für '{item.key_aipr}' erfolgreich gespeichert.")
except Exception as e:
......@@ -448,11 +479,14 @@ def enrich_aiprice_with_gpt(session: Session):
logging.info("Prozess 2 abgeschlossen.")
# --- Hauptausführung ---
# This block is the main entry point for the script.
if __name__ == "__main__":
logging.info("=== Starte kombinierte Ausführung: sync & enrich ===")
# This line initializes a new database session.
db_session = MysqlManager().getSession()
# This line executes the first process to sync new names.
sync_names_to_aiprice(db_session)
# This line executes the second process to enrich the new names with data.
enrich_aiprice_with_gpt(db_session)
\ No newline at end of file
......@@ -3,6 +3,7 @@ import os
import re
import ast
import json
import csv
import datetime as _dt
import traceback
from typing import Any, Dict, List, Tuple
......@@ -216,6 +217,24 @@ if __name__ == "__main__":
print("FEHLER: Cache-Verzeichnis nicht gefunden.")
sys.exit(1)
# Load all partner card IDs from the CSV for quick lookup
partnercardsCsvPath = os.path.join(cacheDir, "partnercards.csv")
partnercardIds = set()
if os.path.exists(partnercardsCsvPath):
print(f"INFO: Lese Partnercard-IDs aus '{partnercardsCsvPath}'...")
try:
with open(partnercardsCsvPath, mode='r', newline='', encoding='utf-8') as f:
reader = csv.reader(f, delimiter=';')
header = next(reader) # Skip header
for row in reader:
if row:
partnercardIds.add(row[0])
print(f"INFO: {len(partnercardIds)} Partnercard-IDs geladen.")
except Exception as e:
print(f"WARNUNG: Fehler beim Lesen der Partnercard-CSV: {e}")
else:
print("INFO: Datei 'partnercards.csv' nicht gefunden. 'is_partnercard' wird immer false sein.")
# This list comprehension gathers all files with .pdf extension
pdfFiles = [f for f in os.listdir(cacheDir) if f.lower().endswith(".pdf")]
......@@ -312,6 +331,11 @@ if __name__ == "__main__":
print("FEHLER: Drei ungültige Antworten – übersprungen.")
continue
# Check if the current tariff ID is a partner card and add the flag to the data.
isPartnercard = tariffId in partnercardIds
validatedData['is_partnercard'] = isPartnercard
print(f"INFO: Feld 'is_partnercard' auf '{isPartnercard}' gesetzt.")
# This loop updates the details_base for each relevant BaseBase record
for br in baseRecords:
......
This diff is collapsed.
......@@ -4,7 +4,7 @@ MYSQL_PASSWORD = "floz09sx3dTyx144gy"
MYSQL_DATABASE = "itmax_tarifs"
MYSQL_PORT = 3306
USE_SSH_TUNNEL = True
USE_SSH_TUNNEL = False
SSH_HOST = "jumphost.bugsmasher.online"
SSH_PORT = 22
SSH_USERNAME = "root"
......
# STEP 1 - IMPORT TARIFS FROM MAUI
0 3 * * * /maui/cron.sh downloadDataFromMaui.py
20 4 * * * /maui/cron.sh downloadDataFromMauiPartnercard.py
# STEP 2 - IMPORT DUMP TO DATABASE
30 4 * * * /maui/cron.sh importCacheToDatabase.py
40 4 * * * /maui/cron.sh importCacheToDatabase.py
# STEP 3 - UPLOAD FLYER TO AWS S3
0 5 * * * /maui/cron.sh uploadCacheToAwsS3.py
5 5 * * * /maui/cron.sh uploadCacheToAwsS3.py
# STEP 4 - GENERATE TARIF-DETAILS WITH GPT
0 5 * * * /maui/cron.sh calculateTarifDetailsWithGpt.py
5 5 * * * /maui/cron.sh calculateTarifDetailsWithGpt.py
# STEP 5 - GENERATE AI PRICES WITH GPT
30 5 * * * /maui/cron.sh calculateAiPrices.py
......@@ -9,6 +9,7 @@ https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2024-05-13.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-03.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-23.m4v
https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-07-22.m4v
## JupyterLab
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment