fix

11270df4 · Marco Schmiedel · b33ee70f · 11270df4 · 11270df4 · 11270df4
Commit 11270df4 authored Jul 22, 2025 by Marco Schmiedel
14 changed files
--- a/.sidekick/database/38da158f-ad91-433f-8b7b-60ff4949d7ff.json
+++ b/.sidekick/database/38da158f-ad91-433f-8b7b-60ff4949d7ff.json
@@ -2,11 +2,11 @@
  "fileId": "38da158f-ad91-433f-8b7b-60ff4949d7ff",
  "originalPath": "work/config/_CronConfig.txt",
  "currentPath": "work/config/_CronConfig.txt",
-  "hash": "174ef9fe04e4d6aebb38573991945d535ec049a7e3069a8c033cd8e7ee30820e",
+  "hash": "6163e59558a5880e7708c36a39ef2d5bc25a1b24b9be547bdc4c654f2c2cd495",
  "docContent": "<p><br></p>",
  "checkedStatus": "done",
  "comments": [],
-  "lastCheckedTimestamp": 1749816820741,
-  "lastFileModificationTimestamp": 1749816813585.9177,
+  "lastCheckedTimestamp": 1753176964166,
+  "lastFileModificationTimestamp": 1753176714171.083,
  "hash_version": 2
 }
--- a/.sidekick/database/48126029-3c3e-4372-9f3e-1e8b9686114e.json
+++ b/.sidekick/database/48126029-3c3e-4372-9f3e-1e8b9686114e.json
@@ -5,7 +5,13 @@
  "hash": "35d56b9bc420e57388faa4e15e12cde381048f016b40307c4ed7f829e9aac7e4",
  "docContent": "<p><br></p>",
  "checkedStatus": "done",
-  "comments": [],
+  "comments": [
+    {
+      "commentId": "48748f93-8a7a-4f42-b5cb-71e8007dccf1",
+      "text": "For information about this script, refer to the base tutorial series in the README file.",
+      "timestamp": 1753170811938
+    }
+  ],
  "lastCheckedTimestamp": 1747070815750,
  "lastFileModificationTimestamp": 1747070802673.0312,
  "hash_version": 2

--- a/.sidekick/database/5a3f6886-edd3-48d5-935d-f15a42e82bac.json
+++ b/.sidekick/database/5a3f6886-edd3-48d5-935d-f15a42e82bac.json
+{
+  "fileId": "5a3f6886-edd3-48d5-935d-f15a42e82bac",
+  "originalPath": "work/commands/downloadDataFromMauiPartnercard.py",
+  "currentPath": "work/commands/downloadDataFromMauiPartnercard.py",
+  "hash": "dcf9d6bada1a0e0b7e1d9608174b5c5e3ae06efdc15f32219e599b266f71f74f",
+  "hash_version": 2,
+  "docContent": "<p><br></p>",
+  "checkedStatus": "done",
+  "comments": [
+    {
+      "commentId": "e85986cf-a5ed-4d22-a9e5-69ad29f26082",
+      "text": "https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-07-22.m4v",
+      "timestamp": 1753177338921
+    }
+  ],
+  "lastCheckedTimestamp": 1753177340827,
+  "lastFileModificationTimestamp": 1753176234527.168
+}
--- a/.sidekick/database/5f874bee-40e2-4b9a-b102-f0b6d643a840.json
+++ b/.sidekick/database/5f874bee-40e2-4b9a-b102-f0b6d643a840.json
@@ -5,7 +5,13 @@
  "hash": "3e1df1c401fbd00595912583ee4d4d2bb45364f81daca29113db874515eee0a0",
  "docContent": "<p><br></p>",
  "checkedStatus": "done",
-  "comments": [],
+  "comments": [
+    {
+      "commentId": "9b3b2e00-18f1-44cb-a752-0b74eeb492d8",
+      "text": "For information about this script, refer to the base tutorial series in the README file.",
+      "timestamp": 1753170808598
+    }
+  ],
  "lastCheckedTimestamp": 1750323681845,
  "lastFileModificationTimestamp": 1750323366660.076,
  "hash_version": 2

--- a/.sidekick/database/62aea232-2549-437e-b5a9-72cb2aa92d16.json
+++ b/.sidekick/database/62aea232-2549-437e-b5a9-72cb2aa92d16.json
@@ -2,11 +2,22 @@
  "fileId": "62aea232-2549-437e-b5a9-72cb2aa92d16",
  "originalPath": "work/commands/calculateTarifDetailsWithGpt.py",
  "currentPath": "work/commands/calculateTarifDetailsWithGpt.py",
-  "hash": "6de592dae63250612a453932e1e344699a550e4438c16813d54ba4bf2a13c785",
+  "hash": "9a99857070c6c9066089985619e08d943c5c3ec785113239e48ff83d3e352792",
  "docContent": "<p><br></p>",
  "checkedStatus": "done",
-  "comments": [],
-  "lastCheckedTimestamp": 1747071244862,
-  "lastFileModificationTimestamp": 1747071237273.2832,
+  "comments": [
+    {
+      "commentId": "4face178-ab9f-4870-bbf9-73befd035a80",
+      "text": "For information about this script, refer to the base tutorial series in the README file.",
+      "timestamp": 1753170818842
+    },
+    {
+      "commentId": "22763243-229a-48ca-980b-8eb27433ba6e",
+      "text": "I've added a static field \"is_partnercard\" based on the partnercard.csv that is created by downloadDataFromMauiPartnercard.py.",
+      "timestamp": 1753177018105
+    }
+  ],
+  "lastCheckedTimestamp": 1753176977286,
+  "lastFileModificationTimestamp": 1753169638397.8057,
  "hash_version": 2
 }
--- a/.sidekick/database/8c1b7b54-86c0-453c-839c-95390d883819.json
+++ b/.sidekick/database/8c1b7b54-86c0-453c-839c-95390d883819.json
@@ -5,7 +5,13 @@
  "hash": "4c972fa8de12b095edb942fc260533235001e5b7a508a4e058e28f45340ebc59",
  "docContent": "<p><br></p>",
  "checkedStatus": "done",
-  "comments": [],
+  "comments": [
+    {
+      "commentId": "02d64edc-151e-46cd-ad31-7855739ed216",
+      "text": "For information about this script, refer to the base tutorial series in the README file.",
+      "timestamp": 1753170814981
+    }
+  ],
  "lastCheckedTimestamp": 1750663401713,
  "lastFileModificationTimestamp": 1750662226645.7258,
  "hash_version": 2

--- a/.sidekick/database/986eeb57-8634-4f40-a4ea-a2eae9d87e71.json
+++ b/.sidekick/database/986eeb57-8634-4f40-a4ea-a2eae9d87e71.json
@@ -2,7 +2,7 @@
  "fileId": "986eeb57-8634-4f40-a4ea-a2eae9d87e71",
  "originalPath": "work/readme.md",
  "currentPath": "work/README.md",
-  "hash": "4f572de1efd35b429b45d9932e8ffa93153c9b0a421c7afec99b4af109aa87d1",
+  "hash": "756bb90539f71f054db700f99b053cf8e8b94a2d17499ce170cc1ba5db7276b3",
  "docContent": "<p><br></p>",
  "checkedStatus": "done",
  "comments": [
@@ -17,7 +17,7 @@
      "timestamp": 1747069658074
    }
  ],
-  "lastCheckedTimestamp": 1750683280697,
-  "lastFileModificationTimestamp": 1750683272808.5942,
+  "lastCheckedTimestamp": 1753177346533,
+  "lastFileModificationTimestamp": 1753177332762.9363,
  "hash_version": 2
 }
--- a/.sidekick/database/e3281330-5559-49da-9434-bf3cccd4ddae.json
+++ b/.sidekick/database/e3281330-5559-49da-9434-bf3cccd4ddae.json
@@ -2,17 +2,22 @@
  "fileId": "e3281330-5559-49da-9434-bf3cccd4ddae",
  "originalPath": "work/commands/calculateAiPrices.py",
  "currentPath": "work/commands/calculateAiPrices.py",
-  "hash": "343345985f2e8153b4e9e6a7efa0479c3938e68080388f23e537609f14cafded",
+  "hash": "5853eddf6c87959b28413a7d63edaef015cde95c13b6b28d13a0d24dfdb30af2",
  "hash_version": 2,
  "docContent": "<p><br></p>",
-  "checkedStatus": "todo",
+  "checkedStatus": "done",
  "comments": [
    {
      "commentId": "1d5f60af-fe8c-46f1-8a99-806b835d8ed6",
      "text": "Maybe not stable... Need to run this a few weeks on prod...",
      "timestamp": 1750682733287
+    },
+    {
+      "commentId": "1180a758-e39f-429f-ac9b-415747f29e56",
+      "text": "https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-23.m4v",
+      "timestamp": 1753170856993
    }
  ],
-  "lastCheckedTimestamp": 1750682714405,
-  "lastFileModificationTimestamp": 1750681385737.0137
+  "lastCheckedTimestamp": 1753177024777,
+  "lastFileModificationTimestamp": 1753169895196.4453
 }
--- a/commands/calculateAiPrices.py
+++ b/commands/calculateAiPrices.py
@@ -12,13 +12,27 @@ from models.deal_deal import DealDeal
 from models.option_opti import OptionOpti
 from models.aiprice_aipr import AipriceAipr

-# Konfiguriert ein einfaches Logging, um den Skriptverlauf zu sehen.
+# This configures basic logging to monitor the script's execution flow.
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")


-
 def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None:
-    """Sendet eine Anfrage an GPT und validiert die JSON-Antwort auf Konsistenz."""
+    """
+    Sends a request to an LLM to extract structured pricing data from a name string.
+
+    This function employs a consistency-checking mechanism to ensure a high-quality
+    response. It queries the LLM repeatedly (up to 10 times) until it receives
+    two consecutive, identical, and structurally valid JSON responses. This
+    mitigates the risk of model hallucinations or inconsistent outputs.
+
+    Args:
+        gpt_manager: An instance of the OpenAiManager to handle the API call.
+        name: The name string of the mobile plan option to be analyzed.
+
+    Returns:
+        A dictionary containing the structured pricing data if a consistent
+        response is obtained, otherwise None.
+    """
    prompt = """
    # Teil 1 – Aufgabe

@@ -333,13 +347,13 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
    
    last_response = None
    
-    # GEÄNDERT: Schleife für bis zu 10 Versuche, um eine konsistente Antwort zu erhalten
+    # This loop runs up to 10 times to get a consistent and valid response.
    for attempt in range(1, 11):
        logging.info(f"-> GPT-Versuch {attempt}/10 für '{name}'...")
        try:
            raw_response = gpt_manager.chat(prompt, model="gpt-4.1")
            
-            # Bereinige und parse die JSON-Antwort
+            # This block cleans and parses the JSON response from the LLM.
            if raw_response.strip().startswith("```"):
                cleaned_response = re.sub(r"```[\w]*", "", raw_response).strip()
            else:
@@ -347,35 +361,42 @@ def get_validated_response(gpt_manager: OpenAiManager, name: str) -> dict | None
            
            data = json.loads(cleaned_response)

-            # Prüfe, ob die Struktur der Antwort korrekt ist
+            # This block validates the structure of the JSON response.
            if isinstance(data, dict) and all(key in data for key in expected_keys):
-                # Prüfe, ob die Antwort mit der letzten übereinstimmt
+                # This condition checks if the response is identical to the previous valid one.
                if data == last_response:
                    logging.info(f"-> Konsistente Antwort für '{name}' in Versuch {attempt} erhalten. Daten sind gültig.")
                    return data
                else:
-                    # Speichere die erste gültige Antwort und fordere eine zweite zur Bestätigung an
+                    # This line stores the first valid response to verify it against the next one.
                    last_response = data
                    logging.warning(f"-> Gültige, aber noch nicht bestätigte Antwort in Versuch {attempt} für '{name}'. Nächster Versuch zur Verifizierung.")
            else:
                logging.warning(f"-> Ungültige Datenstruktur in Versuch {attempt} für '{name}'.")
-                last_response = None # Setze zurück, da die Kette unterbrochen ist
+                last_response = None # This line resets consistency check if the chain of valid responses is broken.

        except Exception as e:
            logging.error(f"-> Fehler in Versuch {attempt} für '{name}': {e}")
-            last_response = None # Setze bei Fehler zurück
+            last_response = None # This line resets consistency check on error.
            
    logging.error(f"-> Konnte nach 10 Versuchen keine zwei aufeinanderfolgenden, identischen Antworten für '{name}' erhalten.")
    return None

 def sync_names_to_aiprice(session: Session):
-    """PROZESS 1: Synchronisiert eindeutige Namen in die aiprice_aipr Tabelle."""
+    """
+    Process 1: Synchronizes new, relevant names into the aiprice_aipr table.
+
+    This function scans the deal_deal and option_opti tables for entries that
+    likely contain pricing information, based on a list of keywords. It then
+    inserts any unique names not already present in aiprice_aipr, preparing
+    them for analysis in the next step.
+    """
    logging.info("Starte Prozess 1: Synchronisiere Namen.")
    
-    # Lade existierende Keys, um Duplikate zu vermeiden
+    # This line loads all existing primary keys to avoid inserting duplicates.
    existing_keys = {key for key, in session.query(AipriceAipr.key_aipr)}
    
-    # Schlüsselwörter, die für den Import berücksichtigt werden sollen
+    # This list defines keywords to identify deals and options that likely contain pricing information.
    include_keywords = [
        "%Sonderbonus%",
        "%Vergütungsverzicht%",
@@ -387,11 +408,11 @@ def sync_names_to_aiprice(session: Session):
        "%Monatsgrundpreis%",
    ]

-    # Erstelle eine Liste von 'like'-Bedingungen für die Filterung
+    # This line creates a list of 'like' conditions for SQLAlchemy filtering.
    deal_conditions = [DealDeal.name_deal.like(keyword) for keyword in include_keywords]
    option_conditions = [OptionOpti.name_opti.like(keyword) for keyword in include_keywords]

-    # Lade eindeutige Namen aus den Quelltabellen und wende die Einschlussfilter an
+    # These queries fetch distinct, non-empty names from source tables based on the keyword filters.
    deals = session.query(DealDeal.name_deal).distinct().filter(
        DealDeal.name_deal != "",
        or_(*deal_conditions)
@@ -403,12 +424,12 @@ def sync_names_to_aiprice(session: Session):
        or_(*option_conditions)
    ).all()
    
-    # Kombiniere und erstelle neue Einträge
+    # This block combines names from both sources and prepares new entries.
    new_entries = []
    for name, category in [(d[0], 'deal') for d in deals] + [(o[0], 'opti') for o in options]:
        if name not in existing_keys:
            new_entries.append(AipriceAipr(key_aipr=name, category_aipr=category))
-            existing_keys.add(name) # Verhindert doppeltes Hinzufügen im selben Lauf
+            existing_keys.add(name) # This line prevents adding the same key twice in one run.

    if new_entries:
        session.add_all(new_entries)
@@ -420,11 +441,19 @@ def sync_names_to_aiprice(session: Session):


 def enrich_aiprice_with_gpt(session: Session):
-    """PROZESS 2: Reichert Einträge ohne response_aipr sofort einzeln an."""
+    """
+    Process 2: Enriches records in aiprice_aipr with structured data from an LLM.
+
+    This function retrieves all records from the aiprice_aipr table that have
+    not yet been processed (i.e., `response_aipr` is NULL). It sends each
+    record's name to the LLM via `get_validated_response` and saves the
+    resulting structured JSON data back to the database, committing each
+    record individually to ensure progress is saved.
+    """
    logging.info("Starte Prozess 2: Reichere Daten mit GPT an.")
    gpt_manager = OpenAiManager()
    
-    # Verarbeite nur Einträge, bei denen die Antwort noch fehlt
+    # This query selects only records from the aiprice table that have not yet been processed.
    items_to_process = session.query(AipriceAipr).filter(AipriceAipr.response_aipr.is_(None)).all()

    if not items_to_process:
@@ -433,13 +462,15 @@ def enrich_aiprice_with_gpt(session: Session):

    logging.info(f"{len(items_to_process)} Einträge werden verarbeitet.")
    
+    # This loop processes each item individually.
    for item in items_to_process:
        validated_data = get_validated_response(gpt_manager, item.key_aipr)
        
-        # Speichere sofort, wenn die Daten gültig sind
+        # This block saves the result if the response data is valid and consistent.
        if validated_data:
            item.response_aipr = validated_data
            try:
+                # This line commits the change for the current item immediately.
                session.commit()
                logging.info(f"Eintrag für '{item.key_aipr}' erfolgreich gespeichert.")
            except Exception as e:
@@ -448,11 +479,14 @@ def enrich_aiprice_with_gpt(session: Session):

    logging.info("Prozess 2 abgeschlossen.")
        
-# --- Hauptausführung ---
+# This block is the main entry point for the script.
 if __name__ == "__main__":
    logging.info("=== Starte kombinierte Ausführung: sync & enrich ===")
    
+    # This line initializes a new database session.
    db_session = MysqlManager().getSession()
    
+    # This line executes the first process to sync new names.
    sync_names_to_aiprice(db_session)
+    # This line executes the second process to enrich the new names with data.
    enrich_aiprice_with_gpt(db_session)
\ No newline at end of file
--- a/commands/calculateTarifDetailsWithGpt.py
+++ b/commands/calculateTarifDetailsWithGpt.py
@@ -3,6 +3,7 @@ import os
 import re
 import ast
 import json
+import csv
 import datetime as _dt
 import traceback
 from typing import Any, Dict, List, Tuple
@@ -216,6 +217,24 @@ if __name__ == "__main__":
        print("FEHLER: Cache-Verzeichnis nicht gefunden.")
        sys.exit(1)
        
+    # Load all partner card IDs from the CSV for quick lookup
+    partnercardsCsvPath = os.path.join(cacheDir, "partnercards.csv")
+    partnercardIds = set()
+    if os.path.exists(partnercardsCsvPath):
+        print(f"INFO: Lese Partnercard-IDs aus '{partnercardsCsvPath}'...")
+        try:
+            with open(partnercardsCsvPath, mode='r', newline='', encoding='utf-8') as f:
+                reader = csv.reader(f, delimiter=';')
+                header = next(reader)  # Skip header
+                for row in reader:
+                    if row:
+                        partnercardIds.add(row[0])
+            print(f"INFO: {len(partnercardIds)} Partnercard-IDs geladen.")
+        except Exception as e:
+            print(f"WARNUNG: Fehler beim Lesen der Partnercard-CSV: {e}")
+    else:
+        print("INFO: Datei 'partnercards.csv' nicht gefunden. 'is_partnercard' wird immer false sein.")
+
    # This list comprehension gathers all files with .pdf extension
    pdfFiles = [f for f in os.listdir(cacheDir) if f.lower().endswith(".pdf")]

@@ -312,6 +331,11 @@ if __name__ == "__main__":
            print("FEHLER: Drei ungültige Antworten – übersprungen.")
            continue
            
+        # Check if the current tariff ID is a partner card and add the flag to the data.
+        isPartnercard = tariffId in partnercardIds
+        validatedData['is_partnercard'] = isPartnercard
+        print(f"INFO: Feld 'is_partnercard' auf '{isPartnercard}' gesetzt.")
+
        # This loop updates the details_base for each relevant BaseBase record
        for br in baseRecords:


--- a/commands/downloadDataFromMauiPartnercard.py
+++ b/commands/downloadDataFromMauiPartnercard.py
--- a/config/MysqlConfig.py
+++ b/config/MysqlConfig.py
@@ -4,7 +4,7 @@ MYSQL_PASSWORD = "floz09sx3dTyx144gy"
 MYSQL_DATABASE = "itmax_tarifs"
 MYSQL_PORT = 3306

-USE_SSH_TUNNEL = True
+USE_SSH_TUNNEL = False
 SSH_HOST = "jumphost.bugsmasher.online"
 SSH_PORT = 22
 SSH_USERNAME = "root"

--- a/config/_CronConfig.txt
+++ b/config/_CronConfig.txt
 # STEP 1 - IMPORT TARIFS FROM MAUI
 0 3 * * * /maui/cron.sh downloadDataFromMaui.py
+20 4 * * * /maui/cron.sh downloadDataFromMauiPartnercard.py

 # STEP 2 - IMPORT DUMP TO DATABASE
-30 4 * * * /maui/cron.sh importCacheToDatabase.py
+40 4 * * * /maui/cron.sh importCacheToDatabase.py

 # STEP 3 - UPLOAD FLYER TO AWS S3
-0 5 * * * /maui/cron.sh uploadCacheToAwsS3.py
+5 5 * * * /maui/cron.sh uploadCacheToAwsS3.py

 # STEP 4 - GENERATE TARIF-DETAILS WITH GPT
-0 5 * * * /maui/cron.sh calculateTarifDetailsWithGpt.py
+5 5 * * * /maui/cron.sh calculateTarifDetailsWithGpt.py

 # STEP 5 - GENERATE AI PRICES WITH GPT
 30 5 * * * /maui/cron.sh calculateAiPrices.py
--- a/readme.md
+++ b/readme.md
@@ -9,6 +9,7 @@ https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/
 https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2024-05-13.m4v
 https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-03.m4v
 https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-06-23.m4v
+https://s3.eu-central-1.amazonaws.com/monosnap.bugsmasher.online/marcoschmiedel/2025-07-22.m4v

 ## JupyterLab