106 lines
4.2 KiB
Python
106 lines
4.2 KiB
Python
import streamlit as st
|
|
import fitz # PyMuPDF for PDF text extraction
|
|
import pandas as pd
|
|
import json
|
|
|
|
from utilities.utils import extract_text_from_pdf, chunk_text, construct_prompt_for_pdf
|
|
#from utilities.utils import extract_text_from_pdf, chunk_text, construct_prompt_for_pdf
|
|
from utilities.api import call_prompt
|
|
|
|
prompt_template = """
|
|
You are an AI assistant analyzing a service level agreement (SLA) document. Extract the following structured information in JSON format:
|
|
1. Airline name and ground handling partner.
|
|
2. Effective date of the agreement.
|
|
3. Airport location.
|
|
4. Delay codes penalized, including target ranges and penalties.
|
|
5. Baggage loss reasons and corresponding penalties.
|
|
|
|
Here is the document content:
|
|
{pdf_input}
|
|
|
|
Provide output in JSON format only.
|
|
"""
|
|
|
|
# Streamlit App Layout
|
|
st.title("PDF SLA Analyzer")
|
|
|
|
# File uploader for PDF
|
|
st.header("Upload a PDF File")
|
|
uploaded_file = st.file_uploader("Choose a PDF file to analyze", type=["pdf"])
|
|
|
|
# Text input for custom prompt
|
|
st.header("Enter Your Custom Prompt")
|
|
custom_prompt = st.text_area(
|
|
"Custom Prompt",
|
|
value="""
|
|
You are an AI assistant analyzing a service level agreement (SLA) document. Extract the following structured information in JSON format:
|
|
1. Airline name and ground handling partner.
|
|
2. Effective date of the agreement.
|
|
3. Airport location.
|
|
4. Delay codes penalized, including target ranges and penalties.
|
|
5. Baggage loss reasons and corresponding penalties.
|
|
|
|
Provide output in JSON format only.
|
|
"""
|
|
)
|
|
|
|
# Button to process the PDF and call the API
|
|
if st.button("Analyze"):
|
|
if uploaded_file is not None:
|
|
# Step 1: Extract text from the uploaded PDF
|
|
with st.spinner("Extracting text from PDF..."):
|
|
pdf_text = extract_text_from_pdf(uploaded_file)
|
|
|
|
# Step 2: Chunk the text if necessary
|
|
with st.spinner("Chunking large text..."):
|
|
chunks = chunk_text(pdf_text)
|
|
|
|
# Step 3: Query GPT-4-Turbo for each chunk and combine results
|
|
all_results = []
|
|
with st.spinner("Querying GPT-4-Turbo..."):
|
|
for i, chunk in enumerate(chunks):
|
|
st.text(f"Processing chunk {i + 1}/{len(chunks)}...")
|
|
#prompt = custom_prompt + f"\n\nHere is the document content:\n{chunk}"
|
|
prompt = construct_prompt_for_pdf(prompt_template=prompt_template, pdf_input=chunk)
|
|
result = call_prompt(prompt)
|
|
print("result:", result)
|
|
all_results.append(result)
|
|
|
|
# Combine all results into one JSON object (if applicable)
|
|
combined_result = "\n".join(all_results)
|
|
print("combined_result:", combined_result)
|
|
|
|
# Step 4: Display raw JSON response
|
|
st.subheader("Raw JSON Response")
|
|
try:
|
|
json_data = json.loads(combined_result) # Parse JSON string into Python dict
|
|
st.json(json_data)
|
|
except json.JSONDecodeError:
|
|
st.error("Failed to parse JSON response.")
|
|
st.text(combined_result) # Show raw response if parsing fails
|
|
|
|
# Step 5: Display results in a table (if JSON is valid)
|
|
if 'delay_penalties' in json_data or 'baggage_loss_penalties' in json_data:
|
|
st.subheader("Extracted Data Table")
|
|
|
|
# Create DataFrame for delay penalties (if available)
|
|
if 'delay_penalties' in json_data:
|
|
delay_df = pd.DataFrame(json_data.get("delay_penalties", []))
|
|
st.write("**Delay Penalties:**")
|
|
st.table(delay_df)
|
|
|
|
# Create DataFrame for baggage loss penalties (if available)
|
|
if 'baggage_loss_penalties' in json_data:
|
|
baggage_df = pd.DataFrame(json_data.get("baggage_loss_penalties", []))
|
|
st.write("**Baggage Loss Penalties:**")
|
|
st.table(baggage_df)
|
|
|
|
# Display other metadata (e.g., airline, airport, etc.)
|
|
metadata_keys = ["airline", "ground_handling_partner", "effective_date", "airport"]
|
|
metadata = {key: json_data.get(key, "N/A") for key in metadata_keys}
|
|
st.write("**Metadata:**")
|
|
metadata_df = pd.DataFrame([metadata])
|
|
st.table(metadata_df)
|
|
|
|
else:
|
|
st.error("Please upload a PDF file to analyze.") |