From e2f61da2738fbf579e8380687f2c906de5a97d90 Mon Sep 17 00:00:00 2001
From: Khanh Dinh <info@lsg-digital.de>
Date: Thu, 2 Jan 2025 19:39:39 +0100
Subject: [PATCH] update code

---
 .DS_Store             | Bin 0 -> 6148 bytes
 api.py                |  10 +-
 api_dev.ipynb         | 301 ++++++++++++++++++++++++++++++++++++++++++
 config.py             |  64 ++++++++-
 main.py               |   2 +-
 proposer.py           |  17 +--
 sla_analyzer/app.py   | 106 +++++++++++++++
 test.ipynb            |  42 ------
 utilities/__init__.py |   0
 utilities/api.py      |  42 ++++++
 utilities/utils.py    | 129 ++++++++++++++++++
 utils.py              |   2 +-
 12 files changed, 657 insertions(+), 58 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 api_dev.ipynb
 create mode 100644 sla_analyzer/app.py
 delete mode 100644 test.ipynb
 create mode 100644 utilities/__init__.py
 create mode 100644 utilities/api.py
 create mode 100644 utilities/utils.py

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..a08bc9ac1a1555e14a6980cf3e9212ef2b93ac16
GIT binary patch
literal 6148
zcmeHKy-EW?5S}$BMlB3zA=oSkmO{YBM$Y4GMTn(^=7$(aynve4E4B1FQl#?{()td*
zfnY26&F&`ddWofo&cN=sH$OXbU$~tt5vk<1StY6vQ5l0VTER3!*v_LM3r@qqiudTY
ztF=bf>lZvlAq)ruzmWm+;nOa)sY5s6_d7~Za5S;?vNWxCvpRI;__?(F`Zzue?agoP
z{Y&p`h&U}RV($($sYMrI=Cx61^>e@74flpNw-?{bKC{pE2=dbj=U+np0bNsrGOi-5
zf!>QAejUd<<Ff}_$CrKlTIPF&^%y=PRMVm>8u*p0Z@-+PlBvzFVV@4R?kB&4i}kb5
zvio^Ao;0==`KJt^XS0-7Ey^qm2m`{vlmR{;GzMerFt=!q4ov0>0F0nEg0<;oK+OtZ
z>@c?o4Mf>cpbb^-ilJ;c^xFBw4s(k(oRrCoeOzYcZYauRhh7_ZQn5vug#lq8WT0dp
zE8PE&=HLIrAbAo7gn>WBfQr&un&6dOZ!NqW_gal{j={oyxkVF#$+cs%;8wheVFWhK
W3t;Rpw+IbH{s<TvWD*8`lz|Vh_JA`0

literal 0
HcmV?d00001

diff --git a/api.py b/api.py
index 7c1d82b..36cb1a5 100644
--- a/api.py
+++ b/api.py
@@ -5,6 +5,7 @@ from dotenv import load_dotenv
 from settings import load_settings
 from utils import construct_prompt
 
+
 # Load API key from .env file
 load_dotenv()
 api_key = os.getenv("OPENAI_API_KEY")
@@ -23,6 +24,7 @@ def fetch_okrs(user_input: str):
 
     user_prompt = construct_prompt(prompt_template=input_template, user_input=user_input)
     
+    print(user_prompt)
     headers = {"api-key": api_key, "Content-Type": "application/json"}
     body = {
         "messages": [
@@ -36,4 +38,10 @@ def fetch_okrs(user_input: str):
         return response.json()
     except Exception as e:
         st.error(f"Error fetching data from API: {e}")
-        return None
\ No newline at end of file
+        return None
+
+
+from config import INPUT_TEMPLATE
+result = fetch_okrs(user_input=INPUT_TEMPLATE)
+objective = result['choices'][0]['message']['content']
+print(type(objective))
\ No newline at end of file
diff --git a/api_dev.ipynb b/api_dev.ipynb
new file mode 100644
index 0000000..0afba09
--- /dev/null
+++ b/api_dev.ipynb
@@ -0,0 +1,301 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import os\n",
+    "import streamlit as st\n",
+    "from dotenv import load_dotenv\n",
+    "from settings import load_settings\n",
+    "from utils import construct_prompt\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load API key from .env file\n",
+    "load_dotenv()\n",
+    "api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+    "\n",
+    "if not api_key:\n",
+    "    st.error(\"API key not found. Please set OPENAI_API_KEY in your .env file.\")\n",
+    "    st.stop()\n",
+    "\n",
+    "api_url = \"https://genai.dev.odp.lhgroup.de/openai/deployments/gpt-4-turbo/chat/completions?api-version=2023-07-01-preview\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = \"\"\"\n",
+    "You are an OKR coach who helps beginners create well-phrased OKRs (Objectives and Key Results). \n",
+    "Your goal is to provide a proposal for an OKR based on user input, formatted as JSON. \n",
+    "If the input is unclear or incomplete, include a hint with specific questions or suggestions \n",
+    "to help improve the input.\n",
+    "\n",
+    "Always follow this format for your response:\n",
+    "{\n",
+    "    \"hint\": \"Suggestions or questions to improve the input, if needed. If no hint is needed, return an empty string.\",\n",
+    "    \"proposal\": [\n",
+    "        {\n",
+    "            \"objective\": \"A clear and concise objective variant\",\n",
+    "            \"key_results\": [\"Key result 1\", \"Key result 2\", \"... up to 5 key results\"]\n",
+    "        },\n",
+    "        ...\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "Keep your responses concise, actionable, and aligned with OKR best practices. If you need more context from the user, ask for it in the `hint`.\n",
+    "Always provide a 'hint' how to further improve the user input in order to get better results.\n",
+    "\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prompt template for user input\n",
+    "prompt_template = \"\"\"\n",
+    "Please help us in defining proper OKRs.\n",
+    "Here is what we have thought about and we would like to phrase an OKR with maximum 5 key results.\n",
+    "\n",
+    "this is the user input:\n",
+    "{user_input}\n",
+    "\n",
+    "Please provide the response in json format. \n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_template = \"\"\"\n",
+    "We want to improve our SLA framework by finalizing the current SLA template, which is used for the negotiations with our ground service partners.\n",
+    "The resulting SLA will be used in order to measure the quality of the service partner and issue penalties if targets are not met. \n",
+    "We would like to improve this, to have a better steering function, e.g. by adding a bonus component to the SLA framework.\n",
+    "\n",
+    "To achieve this, we need to develop a bonus concept which is viable, feasible and desirable. \n",
+    "We need to simulate the concept, so that we don't risk issuing too much bonus which we cannot cover.\n",
+    "We need to define organizational processes and responsibilities so that we are able to pay a bonus.\n",
+    "\n",
+    "How should the OKR look like for the next cycle which starts in Jan 2025 and ends in Apr 2025?\n",
+    "We would like to phrase 4 key results.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fetch_okrs(user_input: str, system_prompt: str, prompt_template: str):\n",
+    "    #settings = load_settings()\n",
+    "    \n",
+    "    #system_prompt = settings[\"system_prompt\"]\n",
+    "    #input_template = settings[\"input_template\"]\n",
+    "\n",
+    "    user_prompt = construct_prompt(prompt_template=prompt_template, user_input=user_input)\n",
+    "    \n",
+    "    print(user_prompt)\n",
+    "    headers = {\"api-key\": api_key, \"Content-Type\": \"application/json\"}\n",
+    "    body = {\n",
+    "        \"messages\": [\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": user_prompt}\n",
+    "        ]\n",
+    "    }\n",
+    "    try:\n",
+    "        response = requests.post(url=api_url, headers=headers, json=body)\n",
+    "        response.raise_for_status()\n",
+    "        return response.json()\n",
+    "    except Exception as e:\n",
+    "        st.error(f\"Error fetching data from API: {e}\")\n",
+    "        return None\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def parse_json_content(cleaned_content: str):\n",
+    "    \"\"\"\n",
+    "    Parses the cleaned content to extract valid JSON data.\n",
+    "\n",
+    "    Args:\n",
+    "        cleaned_content (str): The raw content containing JSON data.\n",
+    "\n",
+    "    Returns:\n",
+    "        dict or list: The parsed JSON object.\n",
+    "    \"\"\"\n",
+    "    import re\n",
+    "\n",
+    "    # Step 1: Strip unwanted characters and clean the content\n",
+    "    cleaned_content = cleaned_content.strip()\n",
+    "\n",
+    "    # Step 2: Use regex to extract only the valid JSON block (e.g., starts with [ or {)\n",
+    "    json_match = re.search(r\"(\\{.*\\}|\\[.*\\])\", cleaned_content, re.DOTALL)\n",
+    "    \n",
+    "    if not json_match:\n",
+    "        raise ValueError(\"No valid JSON found in the content.\")\n",
+    "\n",
+    "    # Step 3: Extract and parse the valid JSON\n",
+    "    valid_json = json_match.group(0)  # Extract matched JSON block\n",
+    "    try:\n",
+    "        extracted_data = json.loads(valid_json)\n",
+    "    except json.JSONDecodeError as e:\n",
+    "        raise ValueError(f\"Failed to decode JSON. Error: {e}\\nContent:\\n{valid_json}\")\n",
+    "\n",
+    "    return extracted_data\n",
+    "\n",
+    "# Function to extract and parse JSON response\n",
+    "def extract_llm_response(response):\n",
+    "    \"\"\"\n",
+    "    Extracts and parses the JSON response from the API.\n",
+    "\n",
+    "    Args:\n",
+    "        response (dict): The API response containing a hint and proposals.\n",
+    "\n",
+    "    Returns:\n",
+    "        tuple: A tuple containing the objective (str), key results (list), and hint (str).\n",
+    "    \"\"\"\n",
+    "    #print(\"RESPONSE:\",response)\n",
+    "\n",
+    "    raw_message_content = response[\"choices\"][0][\"message\"][\"content\"]\n",
+    "    #print(\"raw_message_content:\", raw_message_content)\n",
+    "    # Clean and parse the JSON content\n",
+    "    cleaned_content = raw_message_content.replace(\"`\", \"\").split(\"json\")[-1]\n",
+    "    #print(\"cleaned content\", cleaned_content)\n",
+    "\n",
+    "    parsed_data = parse_json_content(cleaned_content=cleaned_content)\n",
+    "    #print(\"parsed_data:\",parsed_data)\n",
+    "\n",
+    "    hint = parsed_data.get(\"hint\", \"\")\n",
+    "\n",
+    "    proposals = parsed_data.get(\"proposal\", [])\n",
+    "\n",
+    "    if proposals:\n",
+    "        # Extract the first proposal's objective and key results\n",
+    "        first_proposal = proposals[0]  # Get the first proposal (assuming it's a list)\n",
+    "        objective = first_proposal.get(\"objective\", \"\")\n",
+    "        key_results = first_proposal.get(\"key_results\", [])\n",
+    "    else:\n",
+    "        objective = \"\"\n",
+    "        key_results = []\n",
+    "\n",
+    "    #print(\"debug:\", parsed_data.get(\"objective\", \"\"))\n",
+    "\n",
+    "    return objective, key_results, hint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Please help us in defining proper OKRs.\n",
+      "Here is what we have thought about and we would like to phrase an OKR with maximum 5 key results.\n",
+      "\n",
+      "this is the user input:\n",
+      "\n",
+      "We want to improve our SLA framework by finalizing the current SLA template, which is used for the negotiations with our ground service partners.\n",
+      "The resulting SLA will be used in order to measure the quality of the service partner and issue penalties if targets are not met. \n",
+      "We would like to improve this, to have a better steering function, e.g. by adding a bonus component to the SLA framework.\n",
+      "\n",
+      "To achieve this, we need to develop a bonus concept which is viable, feasible and desirable. \n",
+      "We need to simulate the concept, so that we don't risk issuing too much bonus which we cannot cover.\n",
+      "We need to define organizational processes and responsibilities so that we are able to pay a bonus.\n",
+      "\n",
+      "How should the OKR look like for the next cycle which starts in Jan 2025 and ends in Apr 2025?\n",
+      "We would like to phrase 4 key results.\n",
+      "\n",
+      "\n",
+      "Please provide the response in json format. \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "from config import INPUT_TEMPLATE, PROMPT_TEMPLATE\n",
+    "result = fetch_okrs(user_input=input_template, system_prompt=system_prompt, prompt_template=prompt_template)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "objective, key_results, hint = extract_llm_response(response=result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Enhance the SLA framework to better incentivize ground service partners by April 2025\n",
+      "4 ['Finalize the updated SLA template with bonus components for better partner performance by February 2025', 'Develop a well-defined bonus concept that is viable, feasible, and desirable by March 2025', 'Simulate the bonus payout scenarios to ensure financial sustainability by end of March 2025', 'Establish clear organizational processes and assign responsibilities for bonus distribution by April 2025']\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(objective)\n",
+    "print(len(key_results), key_results)\n",
+    "print(hint)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/config.py b/config.py
index cbdb656..91c413e 100644
--- a/config.py
+++ b/config.py
@@ -1,4 +1,4 @@
-SYSTEM_PROMPT = """
+SYSTEM_PROMPT2 = """
 You are an OKR coach and support us, as beginners in OKRs, with the correct phrasing of OKRs. 
 Always one proposal and provide the proposal in the requested format (json). 
 If you think the input is not clear enough and the OKR could be improved, then provide a hint,
@@ -18,6 +18,29 @@ Format could look like this:
 }
 """
 
+SYSTEM_PROMPT = """
+You are an OKR coach who helps beginners create well-phrased OKRs (Objectives and Key Results). 
+Your goal is to provide a proposal for an OKR based on user input, formatted as JSON. 
+If the input is unclear or incomplete, include a hint with specific questions or suggestions 
+to help improve the input.
+
+Always follow this format for your response:
+{
+    "hint": "Suggestions or questions to improve the input, if needed. If no hint is needed, return an empty string.",
+    "proposal": [
+        {
+            "objective": "A clear and concise objective variant",
+            "key_results": ["Key result 1", "Key result 2", "... up to 5 key results"]
+        },
+        ...
+    ]
+}
+
+Keep your responses concise, actionable, and aligned with OKR best practices. If you need more context from the user, ask for it in the `hint`.
+Always provide a 'hint' how to further improve the user input in order to get better results.
+
+"""
+
 INPUT_TEMPLATE = """
 We want to improve our SLA framework by finalizing the current SLA template, which is used for the negotiations with our ground service partners.
 The resulting SLA will be used in order to measure the quality of the service partner and issue penalties if targets are not met. 
@@ -44,15 +67,46 @@ Objective (key: objective)
 key results (list with the key results)
 The json could be structured like this:
 
-{
+{{
     "hint": "any additional questions, which can be added to the user input to rerun the prompting",
     "proposals": [
-        {
+        {{
             "objective":"objective variant 1",
             "key_result_1":"key result 1",
             ...
-        },
+        }},
         ...
     ]
-}
+}}
+"""
+
+PROMPT_TEMPLATE = """
+Please help us in defining proper OKRs.
+Here is what we have thought about and we would like to phrase an OKR with maximum 5 key results.
+
+this is the user input:
+{user_input}
+
+Please provide the response in json format. 
+"""
+
+INPUT_TEMPLATE = """
+We want to improve our SLA framework by finalizing the current SLA template, which is used for the negotiations with our ground service partners.
+The resulting SLA will be used in order to measure the quality of the service partner and issue penalties if targets are not met. 
+We would like to improve this, to have a better steering function, e.g. by adding a bonus component to the SLA framework.
+
+To achieve this, we need to develop a bonus concept which is viable, feasible and desirable. 
+We need to simulate the concept, so that we don't risk issuing too much bonus which we cannot cover.
+We need to define organizational processes and responsibilities so that we are able to pay a bonus.
+
+How should the OKR look like for the next cycle which starts in Jan 2025 and ends in Apr 2025?
+"""
+
+# Prompt template for user input
+PROMPT_TEMPLATE2 = """
+Please help us in defining proper OKRs.
+Here is what we have thought about and we would like to phrase an OKR with up to 5 key results.
+
+this is the user input:
+{user_input}
 """
\ No newline at end of file
diff --git a/main.py b/main.py
index 314bb3c..e702647 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,7 @@
 import streamlit as st
 from dotenv import load_dotenv
 from api import fetch_okrs
-from utils import construct_prompt, extract_llm_response
+from utilities import construct_prompt, extract_llm_response
 from styles import apply_styles
 from config import SYSTEM_PROMPT, INPUT_TEMPLATE
 
diff --git a/proposer.py b/proposer.py
index 931974f..088ac9d 100644
--- a/proposer.py
+++ b/proposer.py
@@ -6,22 +6,23 @@ from utils import construct_prompt, extract_llm_response
 
 def proposer_page():
     # Streamlit App Layout
-    st.title("OKR Generator")
+    st.title("AO/PM OKR Proposer")
 
     # Input Section and Buttons Row
     st.subheader("Enter your idea or goal:")
     user_input = st.text_area(
         "Input your idea here:",
         value=st.session_state.get("user_input", INPUT_TEMPLATE.strip()),
-        height=200,
+        height=300,
     )
 
-    col1, col2 = st.columns([1, 1])
-    with col1:
-        if st.button("Reset All"):
-            st.session_state.clear()
-    with col2:
-        generate_okrs_clicked = st.button("Generate OKRs")
+    generate_okrs_clicked = st.button("Generate OKR Proposal")
+    #col1, col2 = st.columns([1, 1])
+    #with col1:
+    #    if st.button("Reset All"):
+    #        st.session_state.clear()
+    #with col2:
+        
         
     if generate_okrs_clicked:
         if not user_input.strip():
diff --git a/sla_analyzer/app.py b/sla_analyzer/app.py
new file mode 100644
index 0000000..c657410
--- /dev/null
+++ b/sla_analyzer/app.py
@@ -0,0 +1,106 @@
+import streamlit as st
+import fitz  # PyMuPDF for PDF text extraction
+import pandas as pd
+import json
+
+from utilities.utils import extract_text_from_pdf, chunk_text, construct_prompt_for_pdf
+#from utilities.utils import extract_text_from_pdf, chunk_text, construct_prompt_for_pdf
+from utilities.api import call_prompt
+
+prompt_template = """
+You are an AI assistant analyzing a service level agreement (SLA) document. Extract the following structured information in JSON format:
+1. Airline name and ground handling partner.
+2. Effective date of the agreement.
+3. Airport location.
+4. Delay codes penalized, including target ranges and penalties.
+5. Baggage loss reasons and corresponding penalties.
+
+Here is the document content:
+{pdf_input}
+
+Provide output in JSON format only.
+"""
+
+# Streamlit App Layout
+st.title("PDF SLA Analyzer")
+
+# File uploader for PDF
+st.header("Upload a PDF File")
+uploaded_file = st.file_uploader("Choose a PDF file to analyze", type=["pdf"])
+
+# Text input for custom prompt
+st.header("Enter Your Custom Prompt")
+custom_prompt = st.text_area(
+    "Custom Prompt",
+    value="""
+You are an AI assistant analyzing a service level agreement (SLA) document. Extract the following structured information in JSON format:
+1. Airline name and ground handling partner.
+2. Effective date of the agreement.
+3. Airport location.
+4. Delay codes penalized, including target ranges and penalties.
+5. Baggage loss reasons and corresponding penalties.
+
+Provide output in JSON format only.
+"""
+)
+
+# Button to process the PDF and call the API
+if st.button("Analyze"):
+    if uploaded_file is not None:
+        # Step 1: Extract text from the uploaded PDF
+        with st.spinner("Extracting text from PDF..."):
+            pdf_text = extract_text_from_pdf(uploaded_file)
+
+        # Step 2: Chunk the text if necessary
+        with st.spinner("Chunking large text..."):
+            chunks = chunk_text(pdf_text)
+
+        # Step 3: Query GPT-4-Turbo for each chunk and combine results
+        all_results = []
+        with st.spinner("Querying GPT-4-Turbo..."):
+            for i, chunk in enumerate(chunks):
+                st.text(f"Processing chunk {i + 1}/{len(chunks)}...")
+                #prompt = custom_prompt + f"\n\nHere is the document content:\n{chunk}"
+                prompt = construct_prompt_for_pdf(prompt_template=prompt_template, pdf_input=chunk)
+                result = call_prompt(prompt)
+                print("result:", result)
+                all_results.append(result)
+
+        # Combine all results into one JSON object (if applicable)
+        combined_result = "\n".join(all_results)
+        print("combined_result:", combined_result)
+
+        # Step 4: Display raw JSON response
+        st.subheader("Raw JSON Response")
+        try:
+            json_data = json.loads(combined_result)  # Parse JSON string into Python dict
+            st.json(json_data)
+        except json.JSONDecodeError:
+            st.error("Failed to parse JSON response.")
+            st.text(combined_result)  # Show raw response if parsing fails
+
+        # Step 5: Display results in a table (if JSON is valid)
+        if 'delay_penalties' in json_data or 'baggage_loss_penalties' in json_data:
+            st.subheader("Extracted Data Table")
+
+            # Create DataFrame for delay penalties (if available)
+            if 'delay_penalties' in json_data:
+                delay_df = pd.DataFrame(json_data.get("delay_penalties", []))
+                st.write("**Delay Penalties:**")
+                st.table(delay_df)
+
+            # Create DataFrame for baggage loss penalties (if available)
+            if 'baggage_loss_penalties' in json_data:
+                baggage_df = pd.DataFrame(json_data.get("baggage_loss_penalties", []))
+                st.write("**Baggage Loss Penalties:**")
+                st.table(baggage_df)
+
+            # Display other metadata (e.g., airline, airport, etc.)
+            metadata_keys = ["airline", "ground_handling_partner", "effective_date", "airport"]
+            metadata = {key: json_data.get(key, "N/A") for key in metadata_keys}
+            st.write("**Metadata:**")
+            metadata_df = pd.DataFrame([metadata])
+            st.table(metadata_df)
+
+    else:
+        st.error("Please upload a PDF file to analyze.")
\ No newline at end of file
diff --git a/test.ipynb b/test.ipynb
deleted file mode 100644
index d01c8a4..0000000
--- a/test.ipynb
+++ /dev/null
@@ -1,42 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "hello\n"
-     ]
-    }
-   ],
-   "source": [
-    "print('hello')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/utilities/__init__.py b/utilities/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utilities/api.py b/utilities/api.py
new file mode 100644
index 0000000..eb6dd52
--- /dev/null
+++ b/utilities/api.py
@@ -0,0 +1,42 @@
+import requests
+import os
+import streamlit as st
+from dotenv import load_dotenv
+from settings import load_settings
+#from utils import construct_prompt
+from utilities.utils import construct_prompt
+
+# Load API key from .env file
+load_dotenv()
+api_key = os.getenv("OPENAI_API_KEY")
+
+if not api_key:
+    st.error("API key not found. Please set OPENAI_API_KEY in your .env file.")
+    st.stop()
+
+api_url = "https://genai.dev.odp.lhgroup.de/openai/deployments/gpt-4-turbo/chat/completions?api-version=2023-07-01-preview"
+
+def call_prompt(user_input: str, prompt_template: str = None, system_prompt: str = None):
+    
+    if prompt_template is None:
+        settings = load_settings()
+    
+        system_prompt = settings["system_prompt"]
+        input_template = settings["input_template"]
+
+    user_prompt = construct_prompt(prompt_template=prompt_template, user_input=user_input)
+    
+    headers = {"api-key": api_key, "Content-Type": "application/json"}
+    body = {
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ]
+    }
+    try:
+        response = requests.post(url=api_url, headers=headers, json=body)
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        st.error(f"Error fetching data from API: {e}")
+        return None
\ No newline at end of file
diff --git a/utilities/utils.py b/utilities/utils.py
new file mode 100644
index 0000000..739eb3c
--- /dev/null
+++ b/utilities/utils.py
@@ -0,0 +1,129 @@
+import json
+import streamlit as st
+import re
+import fitz  # PyMuPDF
+
+# Function to construct the prompt
+def construct_prompt(prompt_template: str, user_input: str) -> str:
+    return prompt_template.format(user_input=user_input)
+
+def construct_prompt_for_pdf(prompt_template: str, pdf_input: str) -> str:
+    return prompt_template.format(pdf_input=pdf_input)
+
+def parse_json_content(cleaned_content: str):
+    """
+    Parses the cleaned content to extract valid JSON data.
+
+    Args:
+        cleaned_content (str): The raw content containing JSON data.
+
+    Returns:
+        dict or list: The parsed JSON object.
+    """
+    import re
+
+    # Step 1: Strip unwanted characters and clean the content
+    cleaned_content = cleaned_content.strip()
+
+    # Step 2: Use regex to extract only the valid JSON block (e.g., starts with [ or {)
+    json_match = re.search(r"(\{.*\}|\[.*\])", cleaned_content, re.DOTALL)
+    
+    if not json_match:
+        raise ValueError("No valid JSON found in the content.")
+
+    # Step 3: Extract and parse the valid JSON
+    valid_json = json_match.group(0)  # Extract matched JSON block
+    try:
+        extracted_data = json.loads(valid_json)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Failed to decode JSON. Error: {e}\nContent:\n{valid_json}")
+
+    return extracted_data
+
+# Function to extract and parse JSON response
+def extract_llm_response(response):
+    """
+    Extracts and parses the JSON response from the API.
+
+    Args:
+        response (dict): The API response containing a hint and proposals.
+
+    Returns:
+        tuple: A tuple containing the objective (str), key results (list), and hint (str).
+    """
+    print("RESPONSE:",response)
+
+    raw_message_content = response["choices"][0]["message"]["content"]
+    print("raw_message_content:", raw_message_content)
+    # Clean and parse the JSON content
+    cleaned_content = raw_message_content.replace("`", "").split("json")[-1]
+    print("cleaned content", cleaned_content)
+
+    parsed_data = parse_json_content(cleaned_content=cleaned_content)
+    print("parsed_data:",parsed_data)
+
+    hint = parsed_data.get("hint", "")
+
+    proposals = parsed_data.get("proposals", [])
+
+    if proposals:
+        # Extract the first proposal's objective and key results
+        first_proposal = proposals[0]  # Get the first proposal (assuming it's a list)
+        objective = first_proposal.get("objective", "")
+        key_results = first_proposal.get("key_results", [])
+    else:
+        objective = ""
+        key_results = []
+
+    #print("debug:", parsed_data.get("objective", ""))
+
+    return objective, key_results, hint
+
+    #try:
+    # Extract hint from the response
+    hint = response.get("hint", "")
+
+    # Extract proposals from the response
+    proposals = response.get("proposals", [])
+    print("hint:", hint)
+    print("proposals:", proposals)
+
+    # Check if proposals are available
+    if proposals:
+        # Extract the first proposal's objective and key results
+        first_proposal = proposals[0]  # Get the first proposal (assuming it's a list)
+        objective = first_proposal.get("objective", "")
+        key_results = first_proposal.get("key_results", [])
+    else:
+        objective = ""
+        key_results = []
+
+    # Log parsed data for debugging
+    print("parsed_data:", {"objective": objective, "key_results": key_results, "hint": hint})
+
+    return objective, key_results, hint
+
+    #except Exception as e:
+    #    print(f"Error parsing API response: {e}")
+    #    return "", [], ""
+
+
+def extract_text_from_pdf(pdf_path):
+    """Extract text from a PDF file."""
+    doc = fitz.open(pdf_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+
+def chunk_text(text, max_chars=3000):
+    """Split text into smaller chunks."""
+    chunks = []
+    while len(text) > max_chars:
+        split_index = text[:max_chars].rfind("\n")  # Split at the nearest newline
+        if split_index == -1:  # No newline found
+            split_index = max_chars
+        chunks.append(text[:split_index])
+        text = text[split_index:]
+    chunks.append(text)
+    return chunks
diff --git a/utils.py b/utils.py
index 8cea80a..5b20412 100644
--- a/utils.py
+++ b/utils.py
@@ -116,7 +116,7 @@ def extract_llm_response(response):
 
     hint = parsed_data.get("hint", "")
 
-    proposals = parsed_data.get("proposals", [])
+    proposals = parsed_data.get("proposal", [])
 
     if proposals:
         # Extract the first proposal's objective and key results