From e2f61da2738fbf579e8380687f2c906de5a97d90 Mon Sep 17 00:00:00 2001 From: Khanh Dinh Date: Thu, 2 Jan 2025 19:39:39 +0100 Subject: [PATCH] update code --- .DS_Store | Bin 0 -> 6148 bytes api.py | 10 +- api_dev.ipynb | 301 ++++++++++++++++++++++++++++++++++++++++++ config.py | 64 ++++++++- main.py | 2 +- proposer.py | 17 +-- sla_analyzer/app.py | 106 +++++++++++++++ test.ipynb | 42 ------ utilities/__init__.py | 0 utilities/api.py | 42 ++++++ utilities/utils.py | 129 ++++++++++++++++++ utils.py | 2 +- 12 files changed, 657 insertions(+), 58 deletions(-) create mode 100644 .DS_Store create mode 100644 api_dev.ipynb create mode 100644 sla_analyzer/app.py delete mode 100644 test.ipynb create mode 100644 utilities/__init__.py create mode 100644 utilities/api.py create mode 100644 utilities/utils.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a08bc9ac1a1555e14a6980cf3e9212ef2b93ac16 GIT binary patch literal 6148 zcmeHKy-EW?5S}$BMlB3zA=oSkmO{YBM$Y4GMTn(^=7$(aynve4E4B1FQl#?{()td* zfnY26&F&`ddWofo&cN=sH$OXbU$~tt5vk<1StY6vQ5l0VTER3!*v_LM3r@qqiudTY ztF=bf>lZvlAq)ruzmWm+;nOa)sY5s6_d7~Za5S;?vNWxCvpRI;__?(F`Zzue?agoP z{Y&p`h&U}RV($($sYMrI=Cx61^>e@74flpNw-?{bKC{pE2=dbj=U+np0bNsrGOi-5 zf!>QAejUd<8u*p0Z@-+PlBvzFVV@4R?kB&4i}kb5 zvio^Ao;0==`KJt^XS0-7Ey^qm2m`{vlmR{;GzMerFt=!q4ov0>0F0nEg0<;oK+OtZ z>@c?o4Mf>cpbb^-ilJ;c^xFBw4s(k(oRrCoeOzYcZYauRhh7_ZQn5vug#lq8WT0dp zE8PE&=HLIrAbAo7gn>WBfQr&un&6dOZ!NqW_gal{j={oyxkVF#$+cs%;8wheVFWhK W3t;Rpw+IbH{s str: + return prompt_template.format(user_input=user_input) + +def construct_prompt_for_pdf(prompt_template: str, pdf_input: str) -> str: + return prompt_template.format(pdf_input=pdf_input) + +def parse_json_content(cleaned_content: str): + """ + Parses the cleaned content to extract valid JSON data. + + Args: + cleaned_content (str): The raw content containing JSON data. + + Returns: + dict or list: The parsed JSON object. + """ + import re + + # Step 1: Strip unwanted characters and clean the content + cleaned_content = cleaned_content.strip() + + # Step 2: Use regex to extract only the valid JSON block (e.g., starts with [ or {) + json_match = re.search(r"(\{.*\}|\[.*\])", cleaned_content, re.DOTALL) + + if not json_match: + raise ValueError("No valid JSON found in the content.") + + # Step 3: Extract and parse the valid JSON + valid_json = json_match.group(0) # Extract matched JSON block + try: + extracted_data = json.loads(valid_json) + except json.JSONDecodeError as e: + raise ValueError(f"Failed to decode JSON. Error: {e}\nContent:\n{valid_json}") + + return extracted_data + +# Function to extract and parse JSON response +def extract_llm_response(response): + """ + Extracts and parses the JSON response from the API. + + Args: + response (dict): The API response containing a hint and proposals. + + Returns: + tuple: A tuple containing the objective (str), key results (list), and hint (str). + """ + print("RESPONSE:",response) + + raw_message_content = response["choices"][0]["message"]["content"] + print("raw_message_content:", raw_message_content) + # Clean and parse the JSON content + cleaned_content = raw_message_content.replace("`", "").split("json")[-1] + print("cleaned content", cleaned_content) + + parsed_data = parse_json_content(cleaned_content=cleaned_content) + print("parsed_data:",parsed_data) + + hint = parsed_data.get("hint", "") + + proposals = parsed_data.get("proposals", []) + + if proposals: + # Extract the first proposal's objective and key results + first_proposal = proposals[0] # Get the first proposal (assuming it's a list) + objective = first_proposal.get("objective", "") + key_results = first_proposal.get("key_results", []) + else: + objective = "" + key_results = [] + + #print("debug:", parsed_data.get("objective", "")) + + return objective, key_results, hint + + #try: + # Extract hint from the response + hint = response.get("hint", "") + + # Extract proposals from the response + proposals = response.get("proposals", []) + print("hint:", hint) + print("proposals:", proposals) + + # Check if proposals are available + if proposals: + # Extract the first proposal's objective and key results + first_proposal = proposals[0] # Get the first proposal (assuming it's a list) + objective = first_proposal.get("objective", "") + key_results = first_proposal.get("key_results", []) + else: + objective = "" + key_results = [] + + # Log parsed data for debugging + print("parsed_data:", {"objective": objective, "key_results": key_results, "hint": hint}) + + return objective, key_results, hint + + #except Exception as e: + # print(f"Error parsing API response: {e}") + # return "", [], "" + + +def extract_text_from_pdf(pdf_path): + """Extract text from a PDF file.""" + doc = fitz.open(pdf_path) + text = "" + for page in doc: + text += page.get_text() + return text + +def chunk_text(text, max_chars=3000): + """Split text into smaller chunks.""" + chunks = [] + while len(text) > max_chars: + split_index = text[:max_chars].rfind("\n") # Split at the nearest newline + if split_index == -1: # No newline found + split_index = max_chars + chunks.append(text[:split_index]) + text = text[split_index:] + chunks.append(text) + return chunks diff --git a/utils.py b/utils.py index 8cea80a..5b20412 100644 --- a/utils.py +++ b/utils.py @@ -116,7 +116,7 @@ def extract_llm_response(response): hint = parsed_data.get("hint", "") - proposals = parsed_data.get("proposals", []) + proposals = parsed_data.get("proposal", []) if proposals: # Extract the first proposal's objective and key results